diff mbox series

[5/6] net: fec: use dma_alloc_noncoherent for m532x

Message ID 20231009074121.219686-6-hch@lst.de (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [1/6] dma-direct: add depdenencies to CONFIG_DMA_GLOBAL_POOL | expand

Checks

Context Check Description
netdev/series_format warning Series does not have a cover letter
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1362 this patch: 1362
netdev/cc_maintainers warning 4 maintainers not CCed: kuba@kernel.org davem@davemloft.net pabeni@redhat.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 1387 this patch: 1387
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1387 this patch: 1387
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: networking block comments don't use an empty /* line, use /* Comment...
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Christoph Hellwig Oct. 9, 2023, 7:41 a.m. UTC
The coldfire platforms can't properly implement dma_alloc_coherent and
currently just return noncoherent memory from dma_alloc_coherent.

The fec driver than works around this with a flush of all caches in the
receive path. Make this hack a little less bad by using the explicit
dma_alloc_noncoherent API and documenting the hacky cache flushes so
that the DMA API level hack can be removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++---
 1 file changed, 75 insertions(+), 9 deletions(-)

Comments

Robin Murphy Oct. 9, 2023, 10:29 a.m. UTC | #1
On 2023-10-09 08:41, Christoph Hellwig wrote:
> The coldfire platforms can't properly implement dma_alloc_coherent and
> currently just return noncoherent memory from dma_alloc_coherent.
> 
> The fec driver than works around this with a flush of all caches in the
> receive path. Make this hack a little less bad by using the explicit
> dma_alloc_noncoherent API and documenting the hacky cache flushes so
> that the DMA API level hack can be removed.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++---
>   1 file changed, 75 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 77c8e9cfb44562..aa032ea0ebf0c2 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev)
>   	} while (bdp != txq->bd.base);
>   }
>   
> +/*
> + * Coldfire does not support DMA coherent allocations, and has historically used
> + * a band-aid with a manual flush in fec_enet_rx_queue.
> + */
> +#ifdef CONFIG_COLDFIRE

It looks a bit odd that this ends up applying to all of Coldfire, while 
the associated cache flush only applies to the M532x platform, which 
implies that we'd now be relying on the non-coherent allocation actually 
being coherent on other Coldfire platforms.

Would it work to do something like this to make sure dma-direct does the 
right thing on such platforms (which presumably don't have caches?), and 
then reduce the scope of this FEC hack accordingly, to clean things up 
even better?

diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index b826e9c677b2..1851fa3fe077 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -27,6 +27,7 @@ config COLDFIRE
  	select CPU_HAS_NO_BITFIELDS
  	select CPU_HAS_NO_CAS
  	select CPU_HAS_NO_MULDIV64
+	select DMA_DEFAULT_COHERENT if !MMU && !M523x
  	select GENERIC_CSUM
  	select GPIOLIB
  	select HAVE_LEGACY_CLK


Thanks,
Robin.

> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp);
> +}
> +
> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
> +		dma_addr_t handle)
> +{
> +	dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL);
> +}
> +#else /* CONFIG_COLDFIRE */
> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	return dma_alloc_coherent(dev, size, handle, gfp);
> +}
> +
> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
> +		dma_addr_t handle)
> +{
> +	dma_free_coherent(dev, size, cpu_addr, handle);
> +}
> +#endif /* !CONFIG_COLDFIRE */
> +
> +struct fec_dma_devres {
> +	size_t		size;
> +	void		*vaddr;
> +	dma_addr_t	dma_handle;
> +};
> +
> +static void fec_dmam_release(struct device *dev, void *res)
> +{
> +	struct fec_dma_devres *this = res;
> +
> +	fec_dma_free(dev, this->size, this->vaddr, this->dma_handle);
> +}
> +
> +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	struct fec_dma_devres *dr;
> +	void *vaddr;
> +
> +	dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp);
> +	if (!dr)
> +		return NULL;
> +	vaddr = fec_dma_alloc(dev, size, handle, gfp);
> +	if (!vaddr) {
> +		devres_free(dr);
> +		return NULL;
> +	}
> +	dr->vaddr = vaddr;
> +	dr->dma_handle = *handle;
> +	dr->size = size;
> +	devres_add(dev, dr);
> +	return vaddr;
> +}
> +
>   static inline bool is_ipv4_pkt(struct sk_buff *skb)
>   {
>   	return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
> @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
>   #endif
>   
>   #ifdef CONFIG_M532x
> +	/*
> +	 * Hacky flush of all caches instead of using the DMA API for the TSO
> +	 * headers.
> +	 */
>   	flush_cache_all();
>   #endif
>   	rxq = fep->rx_queue[queue_id];
> @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev)
>   	for (i = 0; i < fep->num_tx_queues; i++)
>   		if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) {
>   			txq = fep->tx_queue[i];
> -			dma_free_coherent(&fep->pdev->dev,
> -					  txq->bd.ring_size * TSO_HEADER_SIZE,
> -					  txq->tso_hdrs,
> -					  txq->tso_hdrs_dma);
> +			fec_dma_free(&fep->pdev->dev,
> +				     txq->bd.ring_size * TSO_HEADER_SIZE,
> +				     txq->tso_hdrs, txq->tso_hdrs_dma);
>   		}
>   
>   	for (i = 0; i < fep->num_rx_queues; i++)
> @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev)
>   		txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
>   		txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
>   
> -		txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
> +		txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev,
>   					txq->bd.ring_size * TSO_HEADER_SIZE,
> -					&txq->tso_hdrs_dma,
> -					GFP_KERNEL);
> +					&txq->tso_hdrs_dma, GFP_KERNEL);
>   		if (!txq->tso_hdrs) {
>   			ret = -ENOMEM;
>   			goto alloc_failed;
> @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev)
>   	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
>   
>   	/* Allocate memory for buffer descriptors. */
> -	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
> -				       GFP_KERNEL);
> +	cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma,
> +				  GFP_KERNEL);
>   	if (!cbd_base) {
>   		ret = -ENOMEM;
>   		goto free_queue_mem;
Christoph Hellwig Oct. 9, 2023, 12:58 p.m. UTC | #2
On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote:
> It looks a bit odd that this ends up applying to all of Coldfire, while the 
> associated cache flush only applies to the M532x platform, which implies 
> that we'd now be relying on the non-coherent allocation actually being 
> coherent on other Coldfire platforms.
>
> Would it work to do something like this to make sure dma-direct does the 
> right thing on such platforms (which presumably don't have caches?), and 
> then reduce the scope of this FEC hack accordingly, to clean things up even 
> better?

Probably.  Actually Greg comment something along the lines last
time, and mentioned something about just instruction vs instruction
and data cache.

>
> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> index b826e9c677b2..1851fa3fe077 100644
> --- a/arch/m68k/Kconfig.cpu
> +++ b/arch/m68k/Kconfig.cpu
> @@ -27,6 +27,7 @@ config COLDFIRE
>  	select CPU_HAS_NO_BITFIELDS
>  	select CPU_HAS_NO_CAS
>  	select CPU_HAS_NO_MULDIV64
> +	select DMA_DEFAULT_COHERENT if !MMU && !M523x

Although it would probably make more sense to simply not select
CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and
CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not
build the non-coherent code at all.  This should also include
all coldfire platforms with mmu (M54xx/M548x/M5441x).  Then
again for many of the coldfire platforms the Kconfig allows
to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related.

Greg, any chance you could help out with the caching modes on
coldfire and legacy m68knommu?
Greg Ungerer Oct. 10, 2023, 2:20 p.m. UTC | #3
Hi Robin,

On 9/10/23 20:29, Robin Murphy wrote:
> On 2023-10-09 08:41, Christoph Hellwig wrote:
>> The coldfire platforms can't properly implement dma_alloc_coherent and
>> currently just return noncoherent memory from dma_alloc_coherent.
>>
>> The fec driver than works around this with a flush of all caches in the
>> receive path. Make this hack a little less bad by using the explicit
>> dma_alloc_noncoherent API and documenting the hacky cache flushes so
>> that the DMA API level hack can be removed.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>   drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++---
>>   1 file changed, 75 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
>> index 77c8e9cfb44562..aa032ea0ebf0c2 100644
>> --- a/drivers/net/ethernet/freescale/fec_main.c
>> +++ b/drivers/net/ethernet/freescale/fec_main.c
>> @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev)
>>       } while (bdp != txq->bd.base);
>>   }
>> +/*
>> + * Coldfire does not support DMA coherent allocations, and has historically used
>> + * a band-aid with a manual flush in fec_enet_rx_queue.
>> + */
>> +#ifdef CONFIG_COLDFIRE
> 
> It looks a bit odd that this ends up applying to all of Coldfire, while the associated cache flush only applies to the M532x platform, which implies that we'd now be relying on the non-coherent allocation actually being coherent on other Coldfire platforms.
> 
> Would it work to do something like this to make sure dma-direct does the right thing on such platforms (which presumably don't have caches?), and then reduce the scope of this FEC hack accordingly, to clean things up even better?
> 
> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> index b826e9c677b2..1851fa3fe077 100644
> --- a/arch/m68k/Kconfig.cpu
> +++ b/arch/m68k/Kconfig.cpu
> @@ -27,6 +27,7 @@ config COLDFIRE
>       select CPU_HAS_NO_BITFIELDS
>       select CPU_HAS_NO_CAS
>       select CPU_HAS_NO_MULDIV64
> +    select DMA_DEFAULT_COHERENT if !MMU && !M523x

That should be M532x.

I am pretty sure the code as-is today is broken for the case of using
the split cache arrangement (so both instruction and data cache) for any
of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option).
But that has probably not been picked up because the default on those
has always been instruction cache only.

The reason for the special case for the M532x series is that it is a version 3
core and they have a unified instruction and data cache. The 523x series is the
only version 3 core that Linux supports that has the FEC hardware module.

Regards
Greg


>       select GENERIC_CSUM
>       select GPIOLIB
>       select HAVE_LEGACY_CLK
> 
> 
> Thanks,
> Robin.
> 
>> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>> +        gfp_t gfp)
>> +{
>> +    return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp);
>> +}
>> +
>> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
>> +        dma_addr_t handle)
>> +{
>> +    dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL);
>> +}
>> +#else /* CONFIG_COLDFIRE */
>> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>> +        gfp_t gfp)
>> +{
>> +    return dma_alloc_coherent(dev, size, handle, gfp);
>> +}
>> +
>> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
>> +        dma_addr_t handle)
>> +{
>> +    dma_free_coherent(dev, size, cpu_addr, handle);
>> +}
>> +#endif /* !CONFIG_COLDFIRE */
>> +
>> +struct fec_dma_devres {
>> +    size_t        size;
>> +    void        *vaddr;
>> +    dma_addr_t    dma_handle;
>> +};
>> +
>> +static void fec_dmam_release(struct device *dev, void *res)
>> +{
>> +    struct fec_dma_devres *this = res;
>> +
>> +    fec_dma_free(dev, this->size, this->vaddr, this->dma_handle);
>> +}
>> +
>> +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>> +        gfp_t gfp)
>> +{
>> +    struct fec_dma_devres *dr;
>> +    void *vaddr;
>> +
>> +    dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp);
>> +    if (!dr)
>> +        return NULL;
>> +    vaddr = fec_dma_alloc(dev, size, handle, gfp);
>> +    if (!vaddr) {
>> +        devres_free(dr);
>> +        return NULL;
>> +    }
>> +    dr->vaddr = vaddr;
>> +    dr->dma_handle = *handle;
>> +    dr->size = size;
>> +    devres_add(dev, dr);
>> +    return vaddr;
>> +}
>> +
>>   static inline bool is_ipv4_pkt(struct sk_buff *skb)
>>   {
>>       return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
>> @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
>>   #endif
>>   #ifdef CONFIG_M532x
>> +    /*
>> +     * Hacky flush of all caches instead of using the DMA API for the TSO
>> +     * headers.
>> +     */
>>       flush_cache_all();
>>   #endif
>>       rxq = fep->rx_queue[queue_id];
>> @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev)
>>       for (i = 0; i < fep->num_tx_queues; i++)
>>           if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) {
>>               txq = fep->tx_queue[i];
>> -            dma_free_coherent(&fep->pdev->dev,
>> -                      txq->bd.ring_size * TSO_HEADER_SIZE,
>> -                      txq->tso_hdrs,
>> -                      txq->tso_hdrs_dma);
>> +            fec_dma_free(&fep->pdev->dev,
>> +                     txq->bd.ring_size * TSO_HEADER_SIZE,
>> +                     txq->tso_hdrs, txq->tso_hdrs_dma);
>>           }
>>       for (i = 0; i < fep->num_rx_queues; i++)
>> @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev)
>>           txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
>>           txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
>> -        txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
>> +        txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev,
>>                       txq->bd.ring_size * TSO_HEADER_SIZE,
>> -                    &txq->tso_hdrs_dma,
>> -                    GFP_KERNEL);
>> +                    &txq->tso_hdrs_dma, GFP_KERNEL);
>>           if (!txq->tso_hdrs) {
>>               ret = -ENOMEM;
>>               goto alloc_failed;
>> @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev)
>>       bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
>>       /* Allocate memory for buffer descriptors. */
>> -    cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
>> -                       GFP_KERNEL);
>> +    cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma,
>> +                  GFP_KERNEL);
>>       if (!cbd_base) {
>>           ret = -ENOMEM;
>>           goto free_queue_mem;
Greg Ungerer Oct. 10, 2023, 2:44 p.m. UTC | #4
Hi Christoph,

On 9/10/23 22:58, Christoph Hellwig wrote:
> On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote:
>> It looks a bit odd that this ends up applying to all of Coldfire, while the
>> associated cache flush only applies to the M532x platform, which implies
>> that we'd now be relying on the non-coherent allocation actually being
>> coherent on other Coldfire platforms.
>>
>> Would it work to do something like this to make sure dma-direct does the
>> right thing on such platforms (which presumably don't have caches?), and
>> then reduce the scope of this FEC hack accordingly, to clean things up even
>> better?
> 
> Probably.  Actually Greg comment something along the lines last
> time, and mentioned something about just instruction vs instruction
> and data cache.

I just elaborated on that point a little in response to Robin's email.

>>
>> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
>> index b826e9c677b2..1851fa3fe077 100644
>> --- a/arch/m68k/Kconfig.cpu
>> +++ b/arch/m68k/Kconfig.cpu
>> @@ -27,6 +27,7 @@ config COLDFIRE
>>   	select CPU_HAS_NO_BITFIELDS
>>   	select CPU_HAS_NO_CAS
>>   	select CPU_HAS_NO_MULDIV64
>> +	select DMA_DEFAULT_COHERENT if !MMU && !M523x
> 
> Although it would probably make more sense to simply not select
> CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and
> CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not
> build the non-coherent code at all.  This should also include
> all coldfire platforms with mmu (M54xx/M548x/M5441x).  Then
> again for many of the coldfire platforms the Kconfig allows
> to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related.
> 
> Greg, any chance you could help out with the caching modes on
> coldfire and legacy m68knommu?

Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches
at all.

The cache modes change a bit through out the various ColdFire family series, but
can be broken down roughly into 2 groups.

1.  Version 2 cores (so everything named 52xx). Early members (5206, 5206e, 5272)
     had instruction cache only. Later members (5208, 5271/5275, 523x, etc) had
     a selectable instruction or data or both cache arrangement. Kconfig lets you
     select which you want - the default is instruction cache only.

2.  Version 3 and 4 cores (so everything named 53xx and 54xx). They have a unified
     instruction and data cache. Data caching can be selected to be write-through
     (the default) or write-back.

Some of the version 4 cores also have an MMU.

The M532x hack in the fec driver is to deal with its unified cache, and it is the
only ColdFire version 3 or 4 SoC that has the fec hardware module (thus no others
listed out there). I suspect if you select data cache on the version 2 cores that have
it would break in the fec driver too.

Regards
Greg
Christoph Hellwig Oct. 11, 2023, 5:52 a.m. UTC | #5
On Wed, Oct 11, 2023 at 12:20:57AM +1000, Greg Ungerer wrote:
> That should be M532x.
>
> I am pretty sure the code as-is today is broken for the case of using
> the split cache arrangement (so both instruction and data cache) for any
> of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option).
> But that has probably not been picked up because the default on those
> has always been instruction cache only.
>
> The reason for the special case for the M532x series is that it is a version 3
> core and they have a unified instruction and data cache. The 523x series is the
> only version 3 core that Linux supports that has the FEC hardware module.

So what config option should we check for supporting coherent allocations
and which not having the hack in fec?

Here is my guesses based on the above:

in m68k support coherent allocations with no work if

CONFIG_COLDIFRE is set and neither CONFIG_CACHE_D or CONFIG_CACHE_BOTH
is set.

in the fec driver do the alloc_noncoherent and global cache flush
hack if:

COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH)

?
Greg Ungerer Oct. 11, 2023, 1:09 p.m. UTC | #6
On 11/10/23 15:52, Christoph Hellwig wrote:
> On Wed, Oct 11, 2023 at 12:20:57AM +1000, Greg Ungerer wrote:
>> That should be M532x.
>>
>> I am pretty sure the code as-is today is broken for the case of using
>> the split cache arrangement (so both instruction and data cache) for any
>> of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option).
>> But that has probably not been picked up because the default on those
>> has always been instruction cache only.
>>
>> The reason for the special case for the M532x series is that it is a version 3
>> core and they have a unified instruction and data cache. The 523x series is the
>> only version 3 core that Linux supports that has the FEC hardware module.
> 
> So what config option should we check for supporting coherent allocations
> and which not having the hack in fec?
> 
> Here is my guesses based on the above:
> 
> in m68k support coherent allocations with no work if
> 
> CONFIG_COLDIFRE is set and neither CONFIG_CACHE_D or CONFIG_CACHE_BOTH
> is set.

I think this needs to be CONFIG_COLDFIRE is set and none of CONFIG_HAVE_CACHE_CB or
CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set.



> in the fec driver do the alloc_noncoherent and global cache flush
> hack if:
> 
> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH)

And then this becomes:

CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
Michael Schmitz Oct. 11, 2023, 6:21 p.m. UTC | #7
Hi Greg,

On 12/10/23 02:09, Greg Ungerer wrote:
>
> I think this needs to be CONFIG_COLDFIRE is set and none of 
> CONFIG_HAVE_CACHE_CB or
> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set.
>
>
>
>> in the fec driver do the alloc_noncoherent and global cache flush
>> hack if:
>>
>> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
>
> And then this becomes:
>
> CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || 
> CONFIG_CACHE_BOTH)

You appear to have dropped a '!' there ...

Cheers,

     Michael
Greg Ungerer Oct. 12, 2023, 1:25 p.m. UTC | #8
Hi Michael,

On 12/10/23 04:21, Michael Schmitz wrote:
> Hi Greg,
> 
> On 12/10/23 02:09, Greg Ungerer wrote:
>>
>> I think this needs to be CONFIG_COLDFIRE is set and none of CONFIG_HAVE_CACHE_CB or
>> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set.
>>
>>
>>
>>> in the fec driver do the alloc_noncoherent and global cache flush
>>> hack if:
>>>
>>> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
>>
>> And then this becomes:
>>
>> CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
> 
> You appear to have dropped a '!' there ...

Not sure I follow. This is the opposite of the case above. The noncoherent alloc
and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB,
CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data
caching involved.

Regards
Greg
Christoph Hellwig Oct. 12, 2023, 2 p.m. UTC | #9
On Thu, Oct 12, 2023 at 11:25:00PM +1000, Greg Ungerer wrote:
> Not sure I follow. This is the opposite of the case above. The noncoherent alloc
> and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB,
> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data
> caching involved.

FYI, this is what I ended up with this morning:

http://git.infradead.org/users/hch/misc.git/commitdiff/ea7c8c5ca3f158f88594f4f1c9a52735115f9aca

Whole branch:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-coherent-deps
Michael Schmitz Oct. 12, 2023, 7:18 p.m. UTC | #10
Hi Greg,

On 13/10/23 02:25, Greg Ungerer wrote:
> Hi Michael,
>
> On 12/10/23 04:21, Michael Schmitz wrote:
>> Hi Greg,
>>
>> On 12/10/23 02:09, Greg Ungerer wrote:
>>>
>>> I think this needs to be CONFIG_COLDFIRE is set and none of 
>>> CONFIG_HAVE_CACHE_CB or
>>> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set.
>>>
>>>
>>>> in the fec driver do the alloc_noncoherent and global cache flush
>>>> hack if:
>>>>
>>>> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
>>>
>>> And then this becomes:
>>>
>>> CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || 
>>> CONFIG_CACHE_BOTH)
>>
>> You appear to have dropped a '!' there ...
>
> Not sure I follow. This is the opposite of the case above. The 
> noncoherent alloc
> and cache flush should be performed if ColdFire and any of 
> CONFIG_HAVE_CACHE_CB,
> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there 
> is data
> caching involved.

Now I see - I had read that definition to correspond to the 'select 
coherent allocations on m68k' case, not the latter 'use non-coherent 
allocations in the fec driver'.

Your definitions are correct as written (and for all I can see, 
Christoph's implementation is correct, too).

Apologies for the noise - I blame jet leg for getting my wires crossed ...

Cheers,

     Michael

>
> Regards
> Greg
>
Greg Ungerer Oct. 13, 2023, 1:48 a.m. UTC | #11
Hi Christoph,

On 13/10/23 00:00, Christoph Hellwig wrote:
> On Thu, Oct 12, 2023 at 11:25:00PM +1000, Greg Ungerer wrote:
>> Not sure I follow. This is the opposite of the case above. The noncoherent alloc
>> and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB,
>> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data
>> caching involved.
> 
> FYI, this is what I ended up with this morning:
> 
> http://git.infradead.org/users/hch/misc.git/commitdiff/ea7c8c5ca3f158f88594f4f1c9a52735115f9aca

That looks nice.

> Whole branch:
> 
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-coherent-deps

I'll try and give this some testing next week.

Thanks
Greg
Geert Uytterhoeven Oct. 16, 2023, 9:12 a.m. UTC | #12
Hi Greg,

On Tue, Oct 10, 2023 at 4:45 PM Greg Ungerer <gerg@linux-m68k.org> wrote:
> On 9/10/23 22:58, Christoph Hellwig wrote:
> > On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote:
> >> It looks a bit odd that this ends up applying to all of Coldfire, while the
> >> associated cache flush only applies to the M532x platform, which implies
> >> that we'd now be relying on the non-coherent allocation actually being
> >> coherent on other Coldfire platforms.
> >>
> >> Would it work to do something like this to make sure dma-direct does the
> >> right thing on such platforms (which presumably don't have caches?), and
> >> then reduce the scope of this FEC hack accordingly, to clean things up even
> >> better?
> >
> > Probably.  Actually Greg comment something along the lines last
> > time, and mentioned something about just instruction vs instruction
> > and data cache.
>
> I just elaborated on that point a little in response to Robin's email.
>
> >>
> >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> >> index b826e9c677b2..1851fa3fe077 100644
> >> --- a/arch/m68k/Kconfig.cpu
> >> +++ b/arch/m68k/Kconfig.cpu
> >> @@ -27,6 +27,7 @@ config COLDFIRE
> >>      select CPU_HAS_NO_BITFIELDS
> >>      select CPU_HAS_NO_CAS
> >>      select CPU_HAS_NO_MULDIV64
> >> +    select DMA_DEFAULT_COHERENT if !MMU && !M523x
> >
> > Although it would probably make more sense to simply not select
> > CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and
> > CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not
> > build the non-coherent code at all.  This should also include
> > all coldfire platforms with mmu (M54xx/M548x/M5441x).  Then
> > again for many of the coldfire platforms the Kconfig allows
> > to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related.
> >
> > Greg, any chance you could help out with the caching modes on
> > coldfire and legacy m68knommu?
>
> Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches
> at all.

68000 (and derivatives like 68*328) does not have any cache.
68360 (which is no longer supported by Linux) is based on CPU32, and
does not seem to have any caches, although the documentation does
mention the use of "external cache memories".

Gr{oetje,eeting}s,

                        Geert
Geert Uytterhoeven Oct. 17, 2023, 8:31 a.m. UTC | #13
On Mon, Oct 16, 2023 at 11:12 AM Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Tue, Oct 10, 2023 at 4:45 PM Greg Ungerer <gerg@linux-m68k.org> wrote:
> > On 9/10/23 22:58, Christoph Hellwig wrote:
> > > On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote:
> > >> It looks a bit odd that this ends up applying to all of Coldfire, while the
> > >> associated cache flush only applies to the M532x platform, which implies
> > >> that we'd now be relying on the non-coherent allocation actually being
> > >> coherent on other Coldfire platforms.
> > >>
> > >> Would it work to do something like this to make sure dma-direct does the
> > >> right thing on such platforms (which presumably don't have caches?), and
> > >> then reduce the scope of this FEC hack accordingly, to clean things up even
> > >> better?
> > >
> > > Probably.  Actually Greg comment something along the lines last
> > > time, and mentioned something about just instruction vs instruction
> > > and data cache.
> >
> > I just elaborated on that point a little in response to Robin's email.
> >
> > >>
> > >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> > >> index b826e9c677b2..1851fa3fe077 100644
> > >> --- a/arch/m68k/Kconfig.cpu
> > >> +++ b/arch/m68k/Kconfig.cpu
> > >> @@ -27,6 +27,7 @@ config COLDFIRE
> > >>      select CPU_HAS_NO_BITFIELDS
> > >>      select CPU_HAS_NO_CAS
> > >>      select CPU_HAS_NO_MULDIV64
> > >> +    select DMA_DEFAULT_COHERENT if !MMU && !M523x
> > >
> > > Although it would probably make more sense to simply not select
> > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and
> > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not
> > > build the non-coherent code at all.  This should also include
> > > all coldfire platforms with mmu (M54xx/M548x/M5441x).  Then
> > > again for many of the coldfire platforms the Kconfig allows
> > > to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related.
> > >
> > > Greg, any chance you could help out with the caching modes on
> > > coldfire and legacy m68knommu?
> >
> > Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches
> > at all.
>
> 68000 (and derivatives like 68*328) does not have any cache.
> 68360 (which is no longer supported by Linux) is based on CPU32, and
> does not seem to have any caches, although the documentation does
> mention the use of "external cache memories".

As M68K selects NO_DMA if !MMU && !COLDFIRE anyway, this doesn't
matter for legacy^Wclassic m68knommu.

Gr{oetje,eeting}s,

                        Geert
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 77c8e9cfb44562..aa032ea0ebf0c2 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -406,6 +406,70 @@  static void fec_dump(struct net_device *ndev)
 	} while (bdp != txq->bd.base);
 }
 
+/*
+ * Coldfire does not support DMA coherent allocations, and has historically used
+ * a band-aid with a manual flush in fec_enet_rx_queue.
+ */
+#ifdef CONFIG_COLDFIRE
+static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t gfp)
+{
+	return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp);
+}
+
+static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t handle)
+{
+	dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL);
+}
+#else /* CONFIG_COLDFIRE */
+static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t gfp)
+{
+	return dma_alloc_coherent(dev, size, handle, gfp);
+}
+
+static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t handle)
+{
+	dma_free_coherent(dev, size, cpu_addr, handle);
+}
+#endif /* !CONFIG_COLDFIRE */
+
+struct fec_dma_devres {
+	size_t		size;
+	void		*vaddr;
+	dma_addr_t	dma_handle;
+};
+
+static void fec_dmam_release(struct device *dev, void *res)
+{
+	struct fec_dma_devres *this = res;
+
+	fec_dma_free(dev, this->size, this->vaddr, this->dma_handle);
+}
+
+static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t gfp)
+{
+	struct fec_dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp);
+	if (!dr)
+		return NULL;
+	vaddr = fec_dma_alloc(dev, size, handle, gfp);
+	if (!vaddr) {
+		devres_free(dr);
+		return NULL;
+	}
+	dr->vaddr = vaddr;
+	dr->dma_handle = *handle;
+	dr->size = size;
+	devres_add(dev, dr);
+	return vaddr;
+}
+
 static inline bool is_ipv4_pkt(struct sk_buff *skb)
 {
 	return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
@@ -1661,6 +1725,10 @@  fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 #endif
 
 #ifdef CONFIG_M532x
+	/*
+	 * Hacky flush of all caches instead of using the DMA API for the TSO
+	 * headers.
+	 */
 	flush_cache_all();
 #endif
 	rxq = fep->rx_queue[queue_id];
@@ -3288,10 +3356,9 @@  static void fec_enet_free_queue(struct net_device *ndev)
 	for (i = 0; i < fep->num_tx_queues; i++)
 		if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) {
 			txq = fep->tx_queue[i];
-			dma_free_coherent(&fep->pdev->dev,
-					  txq->bd.ring_size * TSO_HEADER_SIZE,
-					  txq->tso_hdrs,
-					  txq->tso_hdrs_dma);
+			fec_dma_free(&fep->pdev->dev,
+				     txq->bd.ring_size * TSO_HEADER_SIZE,
+				     txq->tso_hdrs, txq->tso_hdrs_dma);
 		}
 
 	for (i = 0; i < fep->num_rx_queues; i++)
@@ -3321,10 +3388,9 @@  static int fec_enet_alloc_queue(struct net_device *ndev)
 		txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
 		txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
 
-		txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
+		txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev,
 					txq->bd.ring_size * TSO_HEADER_SIZE,
-					&txq->tso_hdrs_dma,
-					GFP_KERNEL);
+					&txq->tso_hdrs_dma, GFP_KERNEL);
 		if (!txq->tso_hdrs) {
 			ret = -ENOMEM;
 			goto alloc_failed;
@@ -4043,8 +4109,8 @@  static int fec_enet_init(struct net_device *ndev)
 	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
 
 	/* Allocate memory for buffer descriptors. */
-	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
-				       GFP_KERNEL);
+	cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma,
+				  GFP_KERNEL);
 	if (!cbd_base) {
 		ret = -ENOMEM;
 		goto free_queue_mem;