diff mbox series

[RFC,v5.1,9/9,DON'T,APPLY] cache: sifive-ccache: add cache flushing capability

Message ID 20230103210400.3500626-10-conor@kernel.org (mailing list archive)
State Changes Requested
Headers show
Series Generic function based cache management operations (was Re: [PATCH v5 6/6] soc: renesas: Add L2 cache management for RZ/Five SoC) | expand

Commit Message

Conor Dooley Jan. 3, 2023, 9:04 p.m. UTC
From: Daire McNamara <daire.mcnamara@microchip.com>

SiFive L2 cache controller can flush L2 cache. Expose this capability via
driver.

Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
[Conor: rebase on top of move to cache subsystem]
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
This commit needs more work, and a way to enable it from errata. I've
not gone and done this as PolarFire SoC has archid etc all set to zero.
So we need to go figure out a workaround for this, before adding in
errata enabling code for this. I've included it here as a second user of
the cache management stuff, since what's currently upstream for the
ccache driver does not do any cache management.
---
 drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Palmer Dabbelt Jan. 3, 2023, 9:25 p.m. UTC | #1
On Tue, 03 Jan 2023 13:04:01 PST (-0800), Conor Dooley wrote:
> From: Daire McNamara <daire.mcnamara@microchip.com>
>
> SiFive L2 cache controller can flush L2 cache. Expose this capability via

Do you have a pointer to the datasheet for whatever L2 controller is in 
the PFSOC?  IIRC whether it's possible to correctly flush the cache is 
kind of subtle, as it depends on what else is floating around the SOC.

> driver.
>
> Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
> [Conor: rebase on top of move to cache subsystem]
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
> This commit needs more work, and a way to enable it from errata. I've
> not gone and done this as PolarFire SoC has archid etc all set to zero.
> So we need to go figure out a workaround for this, before adding in
> errata enabling code for this. I've included it here as a second user of
> the cache management stuff, since what's currently upstream for the
> ccache driver does not do any cache management.
> ---
>  drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
>
> diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
> index 47e7d6557f85..3c00f205bace 100644
> --- a/drivers/cache/sifive_ccache.c
> +++ b/drivers/cache/sifive_ccache.c
> @@ -9,12 +9,14 @@
>  #define pr_fmt(fmt) "CCACHE: " fmt
>
>  #include <linux/debugfs.h>
> +#include <linux/dma-direction.h>
>  #include <linux/interrupt.h>
>  #include <linux/of_irq.h>
>  #include <linux/of_address.h>
>  #include <linux/device.h>
>  #include <linux/bitfield.h>
>  #include <asm/cacheinfo.h>
> +#include <asm/cacheflush.h>
>  #include <cache/sifive_ccache.h>
>
>  #define SIFIVE_CCACHE_DIRECCFIX_LOW 0x100
> @@ -42,11 +44,15 @@
>  #define SIFIVE_CCACHE_WAYENABLE 0x08
>  #define SIFIVE_CCACHE_ECCINJECTERR 0x40
>
> +#define SIFIVE_CCACHE_FLUSH64 0x200
> +#define SIFIVE_CCACHE_FLUSH32 0x240
> +
>  #define SIFIVE_CCACHE_MAX_ECCINTR 4
>
>  static void __iomem *ccache_base;
>  static int g_irq[SIFIVE_CCACHE_MAX_ECCINTR];
>  static struct riscv_cacheinfo_ops ccache_cache_ops;
> +static struct riscv_cache_maint_ops ccache_cmos;
>  static int level;
>
>  enum {
> @@ -205,6 +211,42 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
>  	return IRQ_HANDLED;
>  }
>
> +static void sifive_ccache_dma_wback_inv(void* vaddr, unsigned long size)
> +{
> +	void * __iomem flush = ccache_base + SIFIVE_CCACHE_FLUSH64;
> +	phys_addr_t start = virt_to_phys(vaddr);
> +	phys_addr_t aligned_start = start & ~0x3f;
> +	u64 addr;
> +	u64 end;
> +	u64 aligned_end;
> +
> +	size += start - aligned_start;
> +	end = start + size;
> +	aligned_end = end += 0x3f;
> +	aligned_end &= ~0x3f;
> +
> +	for (addr = aligned_start; addr < aligned_end; addr += 64)
> +		writeq(addr, flush);
> +}
> +
> +static void sifive_ccache_cmo(unsigned int cache_size, void *vaddr, size_t size,
> +			      int dir, int ops)
> +{
> +	switch (dir) {
> +	case DMA_TO_DEVICE:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	case DMA_FROM_DEVICE:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	case DMA_BIDIRECTIONAL:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  static int __init sifive_ccache_init(void)
>  {
>  	struct device_node *np;
> @@ -254,6 +296,9 @@ static int __init sifive_ccache_init(void)
>  	ccache_cache_ops.get_priv_group = ccache_get_priv_group;
>  	riscv_set_cacheinfo_ops(&ccache_cache_ops);
>
> +	ccache_cmos.cmo_patchfunc = sifive_ccache_cmo;
> +	riscv_set_cache_maint_ops(&ccache_cmos);
> +
>  #ifdef CONFIG_DEBUG_FS
>  	setup_sifive_debug();
>  #endif
Arnd Bergmann Jan. 3, 2023, 9:28 p.m. UTC | #2
On Tue, Jan 3, 2023, at 22:04, Conor Dooley wrote:
> From: Daire McNamara <daire.mcnamara@microchip.com>
>
> SiFive L2 cache controller can flush L2 cache. Expose this capability via
> driver.
>
> Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
> [Conor: rebase on top of move to cache subsystem]
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
> This commit needs more work, and a way to enable it from errata. I've
> not gone and done this as PolarFire SoC has archid etc all set to zero.
> So we need to go figure out a workaround for this, before adding in
> errata enabling code for this. I've included it here as a second user of
> the cache management stuff, since what's currently upstream for the
> ccache driver does not do any cache management.
> ---
>  drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)

My feeling here is that the cacheflush code is unrelated to the
EDAC code and it should just be a separate file. From what I can
tell, all of the existing contents of this file can simply
get merged into drivers/edac/sifive_edac.c, with the newly
added code becoming a standalone driver.

     Arnd
Conor Dooley Jan. 4, 2023, midnight UTC | #3
On Tue, Jan 03, 2023 at 10:28:19PM +0100, Arnd Bergmann wrote:
> On Tue, Jan 3, 2023, at 22:04, Conor Dooley wrote:
> > From: Daire McNamara <daire.mcnamara@microchip.com>
> >
> > SiFive L2 cache controller can flush L2 cache. Expose this capability via
> > driver.
> >
> > Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
> > [Conor: rebase on top of move to cache subsystem]
> > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> > ---
> > This commit needs more work, and a way to enable it from errata. I've
> > not gone and done this as PolarFire SoC has archid etc all set to zero.
> > So we need to go figure out a workaround for this, before adding in
> > errata enabling code for this. I've included it here as a second user of
> > the cache management stuff, since what's currently upstream for the
> > ccache driver does not do any cache management.
> > ---
> >  drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
> >  1 file changed, 45 insertions(+)
> 
> My feeling here is that the cacheflush code is unrelated to the
> EDAC code and it should just be a separate file. From what I can
> tell, all of the existing contents of this file can simply
> get merged into drivers/edac/sifive_edac.c, with the newly
> added code becoming a standalone driver.

Sure? I'd like to do that independently of whatever is done for the
ax45mp CMOs though, don't think it's worth holding up that platform's
support on me splitting this out.

Thanks,
Conor.
Arnd Bergmann Jan. 4, 2023, 8:17 a.m. UTC | #4
On Wed, Jan 4, 2023, at 01:00, Conor Dooley wrote:
> On Tue, Jan 03, 2023 at 10:28:19PM +0100, Arnd Bergmann wrote:
>> On Tue, Jan 3, 2023, at 22:04, Conor Dooley wrote:
>> > From: Daire McNamara <daire.mcnamara@microchip.com>
>> >
>> > SiFive L2 cache controller can flush L2 cache. Expose this capability via
>> > driver.
>> >
>> > Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
>> > [Conor: rebase on top of move to cache subsystem]
>> > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>> > ---
>> > This commit needs more work, and a way to enable it from errata. I've
>> > not gone and done this as PolarFire SoC has archid etc all set to zero.
>> > So we need to go figure out a workaround for this, before adding in
>> > errata enabling code for this. I've included it here as a second user of
>> > the cache management stuff, since what's currently upstream for the
>> > ccache driver does not do any cache management.
>> > ---
>> >  drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>> >  1 file changed, 45 insertions(+)
>> 
>> My feeling here is that the cacheflush code is unrelated to the
>> EDAC code and it should just be a separate file. From what I can
>> tell, all of the existing contents of this file can simply
>> get merged into drivers/edac/sifive_edac.c, with the newly
>> added code becoming a standalone driver.
>
> Sure? I'd like to do that independently of whatever is done for the
> ax45mp CMOs though, don't think it's worth holding up that platform's
> support on me splitting this out.

Right, no need to touch the existing file as part of this series,
it probably just gets in the way of defining a good interface here.

    Arnd
Conor Dooley Jan. 4, 2023, 9:23 a.m. UTC | #5
On 4 January 2023 08:17:41 GMT, Arnd Bergmann <arnd@arndb.de> wrote:
>On Wed, Jan 4, 2023, at 01:00, Conor Dooley wrote:
>> On Tue, Jan 03, 2023 at 10:28:19PM +0100, Arnd Bergmann wrote:
>>> On Tue, Jan 3, 2023, at 22:04, Conor Dooley wrote:
>>> > From: Daire McNamara <daire.mcnamara@microchip.com>
>>> >
>>> > SiFive L2 cache controller can flush L2 cache. Expose this capability via
>>> > driver.
>>> >
>>> > Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
>>> > [Conor: rebase on top of move to cache subsystem]
>>> > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>>> > ---
>>> > This commit needs more work, and a way to enable it from errata. I've
>>> > not gone and done this as PolarFire SoC has archid etc all set to zero.
>>> > So we need to go figure out a workaround for this, before adding in
>>> > errata enabling code for this. I've included it here as a second user of
>>> > the cache management stuff, since what's currently upstream for the
>>> > ccache driver does not do any cache management.
>>> > ---
>>> >  drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>>> >  1 file changed, 45 insertions(+)
>>> 
>>> My feeling here is that the cacheflush code is unrelated to the
>>> EDAC code and it should just be a separate file. From what I can
>>> tell, all of the existing contents of this file can simply
>>> get merged into drivers/edac/sifive_edac.c, with the newly
>>> added code becoming a standalone driver.
>>
>> Sure? I'd like to do that independently of whatever is done for the
>> ax45mp CMOs though, don't think it's worth holding up that platform's
>> support on me splitting this out.
>
>Right, no need to touch the existing file as part of this series,
>it probably just gets in the way of defining a good interface here.

Sure. Can leave it where it was & I'll sort it out later when it's errata etc get added.

Btw, would you mind pointing out where you wanted to have that if/else you mentioned on IRC?

Thanks,
Conor.
Ben Dooks Jan. 4, 2023, 9:45 a.m. UTC | #6
On 03/01/2023 21:04, Conor Dooley wrote:
> From: Daire McNamara <daire.mcnamara@microchip.com>
> 
> SiFive L2 cache controller can flush L2 cache. Expose this capability via
> driver.
> 
> Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
> [Conor: rebase on top of move to cache subsystem]
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
> This commit needs more work, and a way to enable it from errata. I've
> not gone and done this as PolarFire SoC has archid etc all set to zero.
> So we need to go figure out a workaround for this, before adding in
> errata enabling code for this. I've included it here as a second user of
> the cache management stuff, since what's currently upstream for the
> ccache driver does not do any cache management.

I think errata isn't the right word here, it's more of a system 
requirement for anything that isn't coherent. All the SiFive systems
I have are coherent so won't need this.

> ---
>   drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>   1 file changed, 45 insertions(+)
> 
> diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
> index 47e7d6557f85..3c00f205bace 100644
> --- a/drivers/cache/sifive_ccache.c
> +++ b/drivers/cache/sifive_ccache.c
> @@ -9,12 +9,14 @@
>   #define pr_fmt(fmt) "CCACHE: " fmt
>   
>   #include <linux/debugfs.h>
> +#include <linux/dma-direction.h>
>   #include <linux/interrupt.h>
>   #include <linux/of_irq.h>
>   #include <linux/of_address.h>
>   #include <linux/device.h>
>   #include <linux/bitfield.h>
>   #include <asm/cacheinfo.h>
> +#include <asm/cacheflush.h>
>   #include <cache/sifive_ccache.h>
>   
>   #define SIFIVE_CCACHE_DIRECCFIX_LOW 0x100
> @@ -42,11 +44,15 @@
>   #define SIFIVE_CCACHE_WAYENABLE 0x08
>   #define SIFIVE_CCACHE_ECCINJECTERR 0x40
>   
> +#define SIFIVE_CCACHE_FLUSH64 0x200
> +#define SIFIVE_CCACHE_FLUSH32 0x240
> +
>   #define SIFIVE_CCACHE_MAX_ECCINTR 4
>   
>   static void __iomem *ccache_base;
>   static int g_irq[SIFIVE_CCACHE_MAX_ECCINTR];
>   static struct riscv_cacheinfo_ops ccache_cache_ops;
> +static struct riscv_cache_maint_ops ccache_cmos;
>   static int level;
>   
>   enum {
> @@ -205,6 +211,42 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
>   	return IRQ_HANDLED;
>   }
>   
> +static void sifive_ccache_dma_wback_inv(void* vaddr, unsigned long size)
> +{
> +	void * __iomem flush = ccache_base + SIFIVE_CCACHE_FLUSH64;
> +	phys_addr_t start = virt_to_phys(vaddr);
> +	phys_addr_t aligned_start = start & ~0x3f;
> +	u64 addr;
> +	u64 end;
> +	u64 aligned_end;
> +
> +	size += start - aligned_start;
> +	end = start + size;
> +	aligned_end = end += 0x3f;

I think you meant + 0x3f here. There is an align macro in the kernel
headers, and I'm not sure by inspection if you'd miss the last line
with this code.

> +	aligned_end &= ~0x3f;
> +
> +	for (addr = aligned_start; addr < aligned_end; addr += 64)
> +		writeq(addr, flush);
> +}

The p550 manual states that the zero device flush method is quicker for
any large area flush. However not sure what that level is and whether it
is worth dealing with here? If so we need to have the L3 zero are mapped.

> +
> +static void sifive_ccache_cmo(unsigned int cache_size, void *vaddr, size_t size,
> +			      int dir, int ops)
> +{

technically dir should have been of type "enum dma_data_direction"

> +	switch (dir) {
> +	case DMA_TO_DEVICE:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	case DMA_FROM_DEVICE:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	case DMA_BIDIRECTIONAL:
> +		sifive_ccache_dma_wback_inv(vaddr, size);
> +		break;
> +	default:
> +		break;
> +	}
> +}

I'm not sure why you'd bother checking the dir here, the cache can
only be flushed (I hope DMA_FROM_DEVICE is done /before/ the DMA op).

You could have saved yourself an include if just ignoring dir.

> +
>   static int __init sifive_ccache_init(void)
>   {
>   	struct device_node *np;
> @@ -254,6 +296,9 @@ static int __init sifive_ccache_init(void)
>   	ccache_cache_ops.get_priv_group = ccache_get_priv_group;
>   	riscv_set_cacheinfo_ops(&ccache_cache_ops);
>   
> +	ccache_cmos.cmo_patchfunc = sifive_ccache_cmo;
> +	riscv_set_cache_maint_ops(&ccache_cmos);
> +
>   #ifdef CONFIG_DEBUG_FS
>   	setup_sifive_debug();
>   #endif
Conor Dooley Jan. 4, 2023, 9:57 a.m. UTC | #7
On 4 January 2023 09:45:30 GMT, Ben Dooks <ben.dooks@codethink.co.uk> wrote:
>On 03/01/2023 21:04, Conor Dooley wrote:
>> From: Daire McNamara <daire.mcnamara@microchip.com>
>> 
>> SiFive L2 cache controller can flush L2 cache. Expose this capability via
>> driver.
>> 
>> Signed-off-by: Daire McNamara <daire.mcnamara@microchip.com>
>> [Conor: rebase on top of move to cache subsystem]
>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>> ---
>> This commit needs more work, and a way to enable it from errata. I've
>> not gone and done this as PolarFire SoC has archid etc all set to zero.
>> So we need to go figure out a workaround for this, before adding in
>> errata enabling code for this. I've included it here as a second user of
>> the cache management stuff, since what's currently upstream for the
>> ccache driver does not do any cache management.
>
>I think errata isn't the right word here, it's more of a system requirement for anything that isn't coherent. All the SiFive systems
>I have are coherent so won't need this.

That's the mechanism we currently have for turning this stuff on.
This patch is going away anyway for an actual submission, so we can debate this sort of thing when it shows up for real.
This patch does certainly seem to be distracting from the main point, which was supposed to be the interface to arch/riscv.


>> ---
>>   drivers/cache/sifive_ccache.c | 45 +++++++++++++++++++++++++++++++++++
>>   1 file changed, 45 insertions(+)
>> 
>> diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
>> index 47e7d6557f85..3c00f205bace 100644
>> --- a/drivers/cache/sifive_ccache.c
>> +++ b/drivers/cache/sifive_ccache.c
>> @@ -9,12 +9,14 @@
>>   #define pr_fmt(fmt) "CCACHE: " fmt
>>     #include <linux/debugfs.h>
>> +#include <linux/dma-direction.h>
>>   #include <linux/interrupt.h>
>>   #include <linux/of_irq.h>
>>   #include <linux/of_address.h>
>>   #include <linux/device.h>
>>   #include <linux/bitfield.h>
>>   #include <asm/cacheinfo.h>
>> +#include <asm/cacheflush.h>
>>   #include <cache/sifive_ccache.h>
>>     #define SIFIVE_CCACHE_DIRECCFIX_LOW 0x100
>> @@ -42,11 +44,15 @@
>>   #define SIFIVE_CCACHE_WAYENABLE 0x08
>>   #define SIFIVE_CCACHE_ECCINJECTERR 0x40
>>   +#define SIFIVE_CCACHE_FLUSH64 0x200
>> +#define SIFIVE_CCACHE_FLUSH32 0x240
>> +
>>   #define SIFIVE_CCACHE_MAX_ECCINTR 4
>>     static void __iomem *ccache_base;
>>   static int g_irq[SIFIVE_CCACHE_MAX_ECCINTR];
>>   static struct riscv_cacheinfo_ops ccache_cache_ops;
>> +static struct riscv_cache_maint_ops ccache_cmos;
>>   static int level;
>>     enum {
>> @@ -205,6 +211,42 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
>>   	return IRQ_HANDLED;
>>   }
>>   +static void sifive_ccache_dma_wback_inv(void* vaddr, unsigned long size)
>> +{
>> +	void * __iomem flush = ccache_base + SIFIVE_CCACHE_FLUSH64;
>> +	phys_addr_t start = virt_to_phys(vaddr);
>> +	phys_addr_t aligned_start = start & ~0x3f;
>> +	u64 addr;
>> +	u64 end;
>> +	u64 aligned_end;
>> +
>> +	size += start - aligned_start;
>> +	end = start + size;
>> +	aligned_end = end += 0x3f;
>
>I think you meant + 0x3f here. There is an align macro in the kernel
>headers, and I'm not sure by inspection if you'd miss the last line
>with this code.
>
>> +	aligned_end &= ~0x3f;
>> +
>> +	for (addr = aligned_start; addr < aligned_end; addr += 64)
>> +		writeq(addr, flush);
>> +}
>
>The p550 manual states that the zero device flush method is quicker for
>any large area flush. However not sure what that level is and whether it
>is worth dealing with here? If so we need to have the L3 zero are mapped.
>
>> +
>> +static void sifive_ccache_cmo(unsigned int cache_size, void *vaddr, size_t size,
>> +			      int dir, int ops)
>> +{
>
>technically dir should have been of type "enum dma_data_direction"
>
>> +	switch (dir) {
>> +	case DMA_TO_DEVICE:
>> +		sifive_ccache_dma_wback_inv(vaddr, size);
>> +		break;
>> +	case DMA_FROM_DEVICE:
>> +		sifive_ccache_dma_wback_inv(vaddr, size);
>> +		break;
>> +	case DMA_BIDIRECTIONAL:
>> +		sifive_ccache_dma_wback_inv(vaddr, size);
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +}
>
>I'm not sure why you'd bother checking the dir here, the cache can
>only be flushed (I hope DMA_FROM_DEVICE is done /before/ the DMA op).
>
>You could have saved yourself an include if just ignoring dir.
>
>> +
>>   static int __init sifive_ccache_init(void)
>>   {
>>   	struct device_node *np;
>> @@ -254,6 +296,9 @@ static int __init sifive_ccache_init(void)
>>   	ccache_cache_ops.get_priv_group = ccache_get_priv_group;
>>   	riscv_set_cacheinfo_ops(&ccache_cache_ops);
>>   +	ccache_cmos.cmo_patchfunc = sifive_ccache_cmo;
>> +	riscv_set_cache_maint_ops(&ccache_cmos);
>> +
>>   #ifdef CONFIG_DEBUG_FS
>>   	setup_sifive_debug();
>>   #endif
>
Arnd Bergmann Jan. 4, 2023, 10:19 a.m. UTC | #8
On Wed, Jan 4, 2023, at 10:23, Conor Dooley wrote:
>>Right, no need to touch the existing file as part of this series,
>>it probably just gets in the way of defining a good interface here.
>
> Sure. Can leave it where it was & I'll sort it out later when it's 
> errata etc get added.
>
> Btw, would you mind pointing out where you wanted to have that if/else 
> you mentioned on IRC?

I meant replacing both of the runtime patching indirections in
arch_sync_dma_for_device(). At the moment, this function calls
ALT_CMO_OP(), which is patched to either call the ZICBOM or the
THEAD variant, and if I read this right you add a third case
there with another level of indirection using static_branch.

I would try to replace both of these indirections and instead
handle it all from C code in arch_sync_dma_for_device() directly,
for the purpose of readability and maintainability.

static inline void dma_cache_clean(void *vaddr, size_t size)
{
        if (!cache_maint_ops.clean)
               zicbom_cache_clean(vaddr, size, riscv_cbom_block_size);
        else
               cache_maint_ops.clean(vaddr, size, riscv_cbom_block_size);
}

void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
                              enum dma_data_direction dir)
{
        void *vaddr = phys_to_virt(paddr);

        switch (dir) {
        case DMA_TO_DEVICE:
        case DMA_FROM_DEVICE:
                dma_cache_clean(vaddr, size);
                break;
        case DMA_BIDIRECTIONAL:
                dma_cache_flush(vaddr, size);
                break;
        default:
                break;
        }
}

which then makes it very clear what the actual code path
is, while leaving the zicbom case free of indirect function
calls. You can still use a static_branch() to optimize the
conditional, but I would try to avoid any extra indirection
levels or errata checks.

     Arnd
Conor Dooley Jan. 4, 2023, 11:56 a.m. UTC | #9
Hey Arnd,

On Wed, Jan 04, 2023 at 11:19:44AM +0100, Arnd Bergmann wrote:
> On Wed, Jan 4, 2023, at 10:23, Conor Dooley wrote:
> >>Right, no need to touch the existing file as part of this series,
> >>it probably just gets in the way of defining a good interface here.
> >
> > Sure. Can leave it where it was & I'll sort it out later when it's 
> > errata etc get added.
> >
> > Btw, would you mind pointing out where you wanted to have that if/else 
> > you mentioned on IRC?
> 
> I meant replacing both of the runtime patching indirections in
> arch_sync_dma_for_device(). At the moment, this function calls
> ALT_CMO_OP(), which is patched to either call the ZICBOM or the
> THEAD variant, and if I read this right you add a third case
> there with another level of indirection using static_branch.

Yah, pretty much.

> I would try to replace both of these indirections and instead
> handle it all from C code in arch_sync_dma_for_device() directly,
> for the purpose of readability and maintainability.
> static inline void dma_cache_clean(void *vaddr, size_t size)
> {
>         if (!cache_maint_ops.clean)
>                zicbom_cache_clean(vaddr, size, riscv_cbom_block_size);

And I figure that this function is effectively a wrapper around ALT_CMO_OP()?

>         else
>                cache_maint_ops.clean(vaddr, size, riscv_cbom_block_size);

And this one gets registered by the driver using an interface like the
one I already proposed, just with the cache_maint_ops struct expanded?

Extrapolating, with these changes having an errata would not even be
needed in order to do cache maintenance.
Since the ALT_CMO_OP() version would only be used inside
zicbom_cache_clean(), assuming I understood correctly, a driver could
just register cache_maint_ops for a given platform without having to
muck around with errata.
If so, that seems like a distinct improvement over my suggestion & gets
around the thing I mentioned in 0/9 about a shared case in the
alternative application code.

Again, assuming I understood correctly, I like this a lot.

> }
> 
> void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
>                               enum dma_data_direction dir)
> {
>         void *vaddr = phys_to_virt(paddr);
> 
>         switch (dir) {
>         case DMA_TO_DEVICE:
>         case DMA_FROM_DEVICE:
>                 dma_cache_clean(vaddr, size);
>                 break;
>         case DMA_BIDIRECTIONAL:
>                 dma_cache_flush(vaddr, size);
>                 break;
>         default:
>                 break;
>         }
> }
> 
> which then makes it very clear what the actual code path
> is, while leaving the zicbom case free of indirect function
> calls. You can still use a static_branch() to optimize the
> conditional, but I would try to avoid any extra indirection
> levels or errata checks.

The other thing that I like about this is we can then remove the various
calls to ALT_CMO_OP() that are scattered around arch/riscv now & replace
them with functions that have more understandable names.

Thanks Arnd!
Conor.
Arnd Bergmann Jan. 4, 2023, 12:18 p.m. UTC | #10
On Wed, Jan 4, 2023, at 12:56, Conor Dooley wrote:
> On Wed, Jan 04, 2023 at 11:19:44AM +0100, Arnd Bergmann wrote:
>> On Wed, Jan 4, 2023, at 10:23, Conor Dooley wrote:
>> I would try to replace both of these indirections and instead
>> handle it all from C code in arch_sync_dma_for_device() directly,
>> for the purpose of readability and maintainability.
>> static inline void dma_cache_clean(void *vaddr, size_t size)
>> {
>>         if (!cache_maint_ops.clean)
>>                zicbom_cache_clean(vaddr, size, riscv_cbom_block_size);
>
> And I figure that this function is effectively a wrapper around ALT_CMO_OP()?
>
>>         else
>>                cache_maint_ops.clean(vaddr, size, riscv_cbom_block_size);
>
> And this one gets registered by the driver using an interface like the
> one I already proposed, just with the cache_maint_ops struct expanded?

Yes, exactly.

> Extrapolating, with these changes having an errata would not even be
> needed in order to do cache maintenance.
> Since the ALT_CMO_OP() version would only be used inside
> zicbom_cache_clean(), assuming I understood correctly, a driver could
> just register cache_maint_ops for a given platform without having to
> muck around with errata.

That is the idea, and ALT_CMO_OP() itself can just go away
as by just putting the inline asm without the alternative into
the zicbom_cache_clean() version, making the THEAD branch yet
another cache_maint_ops instance.

>> which then makes it very clear what the actual code path
>> is, while leaving the zicbom case free of indirect function
>> calls. You can still use a static_branch() to optimize the
>> conditional, but I would try to avoid any extra indirection
>> levels or errata checks.
>
> The other thing that I like about this is we can then remove the various
> calls to ALT_CMO_OP() that are scattered around arch/riscv now & replace
> them with functions that have more understandable names.

I only see them in arch/riscv/mm/dma-noncoherent.c and arch/riscv/mm/pmem.c,
but yes, both of these should just call the new functions, whatever the
calling conventions end up being.

    Arnd
Conor Dooley Jan. 4, 2023, 1:20 p.m. UTC | #11
On Wed, Jan 04, 2023 at 01:18:45PM +0100, Arnd Bergmann wrote:
> On Wed, Jan 4, 2023, at 12:56, Conor Dooley wrote:
> > On Wed, Jan 04, 2023 at 11:19:44AM +0100, Arnd Bergmann wrote:
> >> On Wed, Jan 4, 2023, at 10:23, Conor Dooley wrote:
> >> I would try to replace both of these indirections and instead
> >> handle it all from C code in arch_sync_dma_for_device() directly,
> >> for the purpose of readability and maintainability.
> >> static inline void dma_cache_clean(void *vaddr, size_t size)
> >> {
> >>         if (!cache_maint_ops.clean)
> >>                zicbom_cache_clean(vaddr, size, riscv_cbom_block_size);
> >
> > And I figure that this function is effectively a wrapper around ALT_CMO_OP()?
> >
> >>         else
> >>                cache_maint_ops.clean(vaddr, size, riscv_cbom_block_size);
> >
> > And this one gets registered by the driver using an interface like the
> > one I already proposed, just with the cache_maint_ops struct expanded?
> 
> Yes, exactly.
> 
> > Extrapolating, with these changes having an errata would not even be
> > needed in order to do cache maintenance.
> > Since the ALT_CMO_OP() version would only be used inside
> > zicbom_cache_clean(), assuming I understood correctly, a driver could
> > just register cache_maint_ops for a given platform without having to
> > muck around with errata.
> 
> That is the idea, and ALT_CMO_OP() itself can just go away
> as by just putting the inline asm without the alternative into
> the zicbom_cache_clean() version, making the THEAD branch yet
> another cache_maint_ops instance.

Perhaps more of a question for Palmer than you, but how about leaving
ALT_CMO_OP as-is in riscv/for-next at the moment, wrapping it in
zicbom_cache_foo() & leaving that extraction for a follow-on work?
There's another conversation going on about expanding the THEAD stuff,
so that could be done on top of Prabhakar's v6.

That series is here:
https://lore.kernel.org/linux-riscv/CAJF2gTQp1bOp9kfoOkbvNnSXQhzrCpG3rn8C+LPPoJtMCCDOdA@mail.gmail.com/T/#t
Although unfortunately Icenowy is having issues getting their patches to
the lists so I assume it'll get let through at some point today.

> >> which then makes it very clear what the actual code path
> >> is, while leaving the zicbom case free of indirect function
> >> calls. You can still use a static_branch() to optimize the
> >> conditional, but I would try to avoid any extra indirection
> >> levels or errata checks.
> >
> > The other thing that I like about this is we can then remove the various
> > calls to ALT_CMO_OP() that are scattered around arch/riscv now & replace
> > them with functions that have more understandable names.
> 
> I only see them in arch/riscv/mm/dma-noncoherent.c and arch/riscv/mm/pmem.c,
> but yes, both of these should just call the new functions, whatever the
> calling conventions end up being.

Dunno why I had it in my head there was a third place. Seeing ghosts
maybe!

Thanks,
Conor.
Arnd Bergmann Jan. 4, 2023, 2:15 p.m. UTC | #12
On Wed, Jan 4, 2023, at 14:20, Conor Dooley wrote:
> On Wed, Jan 04, 2023 at 01:18:45PM +0100, Arnd Bergmann wrote:
>> On Wed, Jan 4, 2023, at 12:56, Conor Dooley wrote:
>> > On Wed, Jan 04, 2023 at 11:19:44AM +0100, Arnd Bergmann wrote:
> Perhaps more of a question for Palmer than you, but how about leaving
> ALT_CMO_OP as-is in riscv/for-next at the moment, wrapping it in
> zicbom_cache_foo() & leaving that extraction for a follow-on work?
> There's another conversation going on about expanding the THEAD stuff,
> so that could be done on top of Prabhakar's v6.

Right, makes sense to me.

      Arnd
diff mbox series

Patch

diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
index 47e7d6557f85..3c00f205bace 100644
--- a/drivers/cache/sifive_ccache.c
+++ b/drivers/cache/sifive_ccache.c
@@ -9,12 +9,14 @@ 
 #define pr_fmt(fmt) "CCACHE: " fmt
 
 #include <linux/debugfs.h>
+#include <linux/dma-direction.h>
 #include <linux/interrupt.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/device.h>
 #include <linux/bitfield.h>
 #include <asm/cacheinfo.h>
+#include <asm/cacheflush.h>
 #include <cache/sifive_ccache.h>
 
 #define SIFIVE_CCACHE_DIRECCFIX_LOW 0x100
@@ -42,11 +44,15 @@ 
 #define SIFIVE_CCACHE_WAYENABLE 0x08
 #define SIFIVE_CCACHE_ECCINJECTERR 0x40
 
+#define SIFIVE_CCACHE_FLUSH64 0x200
+#define SIFIVE_CCACHE_FLUSH32 0x240
+
 #define SIFIVE_CCACHE_MAX_ECCINTR 4
 
 static void __iomem *ccache_base;
 static int g_irq[SIFIVE_CCACHE_MAX_ECCINTR];
 static struct riscv_cacheinfo_ops ccache_cache_ops;
+static struct riscv_cache_maint_ops ccache_cmos;
 static int level;
 
 enum {
@@ -205,6 +211,42 @@  static irqreturn_t ccache_int_handler(int irq, void *device)
 	return IRQ_HANDLED;
 }
 
+static void sifive_ccache_dma_wback_inv(void* vaddr, unsigned long size)
+{
+	void * __iomem flush = ccache_base + SIFIVE_CCACHE_FLUSH64;
+	phys_addr_t start = virt_to_phys(vaddr);
+	phys_addr_t aligned_start = start & ~0x3f;
+	u64 addr;
+	u64 end;
+	u64 aligned_end;
+
+	size += start - aligned_start;
+	end = start + size;
+	aligned_end = end += 0x3f;
+	aligned_end &= ~0x3f;
+
+	for (addr = aligned_start; addr < aligned_end; addr += 64)
+		writeq(addr, flush);
+}
+
+static void sifive_ccache_cmo(unsigned int cache_size, void *vaddr, size_t size,
+			      int dir, int ops)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		sifive_ccache_dma_wback_inv(vaddr, size);
+		break;
+	case DMA_FROM_DEVICE:
+		sifive_ccache_dma_wback_inv(vaddr, size);
+		break;
+	case DMA_BIDIRECTIONAL:
+		sifive_ccache_dma_wback_inv(vaddr, size);
+		break;
+	default:
+		break;
+	}
+}
+
 static int __init sifive_ccache_init(void)
 {
 	struct device_node *np;
@@ -254,6 +296,9 @@  static int __init sifive_ccache_init(void)
 	ccache_cache_ops.get_priv_group = ccache_get_priv_group;
 	riscv_set_cacheinfo_ops(&ccache_cache_ops);
 
+	ccache_cmos.cmo_patchfunc = sifive_ccache_cmo;
+	riscv_set_cache_maint_ops(&ccache_cmos);
+
 #ifdef CONFIG_DEBUG_FS
 	setup_sifive_debug();
 #endif