Message ID | 20231009074121.219686-6-hch@lst.de (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [1/6] dma-direct: add depdenencies to CONFIG_DMA_GLOBAL_POOL | expand |
On 2023-10-09 08:41, Christoph Hellwig wrote: > The coldfire platforms can't properly implement dma_alloc_coherent and > currently just return noncoherent memory from dma_alloc_coherent. > > The fec driver than works around this with a flush of all caches in the > receive path. Make this hack a little less bad by using the explicit > dma_alloc_noncoherent API and documenting the hacky cache flushes so > that the DMA API level hack can be removed. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++--- > 1 file changed, 75 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c > index 77c8e9cfb44562..aa032ea0ebf0c2 100644 > --- a/drivers/net/ethernet/freescale/fec_main.c > +++ b/drivers/net/ethernet/freescale/fec_main.c > @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev) > } while (bdp != txq->bd.base); > } > > +/* > + * Coldfire does not support DMA coherent allocations, and has historically used > + * a band-aid with a manual flush in fec_enet_rx_queue. > + */ > +#ifdef CONFIG_COLDFIRE It looks a bit odd that this ends up applying to all of Coldfire, while the associated cache flush only applies to the M532x platform, which implies that we'd now be relying on the non-coherent allocation actually being coherent on other Coldfire platforms. Would it work to do something like this to make sure dma-direct does the right thing on such platforms (which presumably don't have caches?), and then reduce the scope of this FEC hack accordingly, to clean things up even better? diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index b826e9c677b2..1851fa3fe077 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -27,6 +27,7 @@ config COLDFIRE select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_CAS select CPU_HAS_NO_MULDIV64 + select DMA_DEFAULT_COHERENT if !MMU && !M523x select GENERIC_CSUM select GPIOLIB select HAVE_LEGACY_CLK Thanks, Robin. > +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, > + gfp_t gfp) > +{ > + return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp); > +} > + > +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, > + dma_addr_t handle) > +{ > + dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL); > +} > +#else /* CONFIG_COLDFIRE */ > +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, > + gfp_t gfp) > +{ > + return dma_alloc_coherent(dev, size, handle, gfp); > +} > + > +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, > + dma_addr_t handle) > +{ > + dma_free_coherent(dev, size, cpu_addr, handle); > +} > +#endif /* !CONFIG_COLDFIRE */ > + > +struct fec_dma_devres { > + size_t size; > + void *vaddr; > + dma_addr_t dma_handle; > +}; > + > +static void fec_dmam_release(struct device *dev, void *res) > +{ > + struct fec_dma_devres *this = res; > + > + fec_dma_free(dev, this->size, this->vaddr, this->dma_handle); > +} > + > +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle, > + gfp_t gfp) > +{ > + struct fec_dma_devres *dr; > + void *vaddr; > + > + dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp); > + if (!dr) > + return NULL; > + vaddr = fec_dma_alloc(dev, size, handle, gfp); > + if (!vaddr) { > + devres_free(dr); > + return NULL; > + } > + dr->vaddr = vaddr; > + dr->dma_handle = *handle; > + dr->size = size; > + devres_add(dev, dr); > + return vaddr; > +} > + > static inline bool is_ipv4_pkt(struct sk_buff *skb) > { > return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; > @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) > #endif > > #ifdef CONFIG_M532x > + /* > + * Hacky flush of all caches instead of using the DMA API for the TSO > + * headers. > + */ > flush_cache_all(); > #endif > rxq = fep->rx_queue[queue_id]; > @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev) > for (i = 0; i < fep->num_tx_queues; i++) > if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) { > txq = fep->tx_queue[i]; > - dma_free_coherent(&fep->pdev->dev, > - txq->bd.ring_size * TSO_HEADER_SIZE, > - txq->tso_hdrs, > - txq->tso_hdrs_dma); > + fec_dma_free(&fep->pdev->dev, > + txq->bd.ring_size * TSO_HEADER_SIZE, > + txq->tso_hdrs, txq->tso_hdrs_dma); > } > > for (i = 0; i < fep->num_rx_queues; i++) > @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev) > txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; > txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; > > - txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, > + txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev, > txq->bd.ring_size * TSO_HEADER_SIZE, > - &txq->tso_hdrs_dma, > - GFP_KERNEL); > + &txq->tso_hdrs_dma, GFP_KERNEL); > if (!txq->tso_hdrs) { > ret = -ENOMEM; > goto alloc_failed; > @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev) > bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; > > /* Allocate memory for buffer descriptors. */ > - cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, > - GFP_KERNEL); > + cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma, > + GFP_KERNEL); > if (!cbd_base) { > ret = -ENOMEM; > goto free_queue_mem;
On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote: > It looks a bit odd that this ends up applying to all of Coldfire, while the > associated cache flush only applies to the M532x platform, which implies > that we'd now be relying on the non-coherent allocation actually being > coherent on other Coldfire platforms. > > Would it work to do something like this to make sure dma-direct does the > right thing on such platforms (which presumably don't have caches?), and > then reduce the scope of this FEC hack accordingly, to clean things up even > better? Probably. Actually Greg comment something along the lines last time, and mentioned something about just instruction vs instruction and data cache. > > diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > index b826e9c677b2..1851fa3fe077 100644 > --- a/arch/m68k/Kconfig.cpu > +++ b/arch/m68k/Kconfig.cpu > @@ -27,6 +27,7 @@ config COLDFIRE > select CPU_HAS_NO_BITFIELDS > select CPU_HAS_NO_CAS > select CPU_HAS_NO_MULDIV64 > + select DMA_DEFAULT_COHERENT if !MMU && !M523x Although it would probably make more sense to simply not select CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not build the non-coherent code at all. This should also include all coldfire platforms with mmu (M54xx/M548x/M5441x). Then again for many of the coldfire platforms the Kconfig allows to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related. Greg, any chance you could help out with the caching modes on coldfire and legacy m68knommu?
Hi Robin, On 9/10/23 20:29, Robin Murphy wrote: > On 2023-10-09 08:41, Christoph Hellwig wrote: >> The coldfire platforms can't properly implement dma_alloc_coherent and >> currently just return noncoherent memory from dma_alloc_coherent. >> >> The fec driver than works around this with a flush of all caches in the >> receive path. Make this hack a little less bad by using the explicit >> dma_alloc_noncoherent API and documenting the hacky cache flushes so >> that the DMA API level hack can be removed. >> >> Signed-off-by: Christoph Hellwig <hch@lst.de> >> --- >> drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++--- >> 1 file changed, 75 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c >> index 77c8e9cfb44562..aa032ea0ebf0c2 100644 >> --- a/drivers/net/ethernet/freescale/fec_main.c >> +++ b/drivers/net/ethernet/freescale/fec_main.c >> @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev) >> } while (bdp != txq->bd.base); >> } >> +/* >> + * Coldfire does not support DMA coherent allocations, and has historically used >> + * a band-aid with a manual flush in fec_enet_rx_queue. >> + */ >> +#ifdef CONFIG_COLDFIRE > > It looks a bit odd that this ends up applying to all of Coldfire, while the associated cache flush only applies to the M532x platform, which implies that we'd now be relying on the non-coherent allocation actually being coherent on other Coldfire platforms. > > Would it work to do something like this to make sure dma-direct does the right thing on such platforms (which presumably don't have caches?), and then reduce the scope of this FEC hack accordingly, to clean things up even better? > > diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > index b826e9c677b2..1851fa3fe077 100644 > --- a/arch/m68k/Kconfig.cpu > +++ b/arch/m68k/Kconfig.cpu > @@ -27,6 +27,7 @@ config COLDFIRE > select CPU_HAS_NO_BITFIELDS > select CPU_HAS_NO_CAS > select CPU_HAS_NO_MULDIV64 > + select DMA_DEFAULT_COHERENT if !MMU && !M523x That should be M532x. I am pretty sure the code as-is today is broken for the case of using the split cache arrangement (so both instruction and data cache) for any of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option). But that has probably not been picked up because the default on those has always been instruction cache only. The reason for the special case for the M532x series is that it is a version 3 core and they have a unified instruction and data cache. The 523x series is the only version 3 core that Linux supports that has the FEC hardware module. Regards Greg > select GENERIC_CSUM > select GPIOLIB > select HAVE_LEGACY_CLK > > > Thanks, > Robin. > >> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, >> + gfp_t gfp) >> +{ >> + return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp); >> +} >> + >> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, >> + dma_addr_t handle) >> +{ >> + dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL); >> +} >> +#else /* CONFIG_COLDFIRE */ >> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, >> + gfp_t gfp) >> +{ >> + return dma_alloc_coherent(dev, size, handle, gfp); >> +} >> + >> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, >> + dma_addr_t handle) >> +{ >> + dma_free_coherent(dev, size, cpu_addr, handle); >> +} >> +#endif /* !CONFIG_COLDFIRE */ >> + >> +struct fec_dma_devres { >> + size_t size; >> + void *vaddr; >> + dma_addr_t dma_handle; >> +}; >> + >> +static void fec_dmam_release(struct device *dev, void *res) >> +{ >> + struct fec_dma_devres *this = res; >> + >> + fec_dma_free(dev, this->size, this->vaddr, this->dma_handle); >> +} >> + >> +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle, >> + gfp_t gfp) >> +{ >> + struct fec_dma_devres *dr; >> + void *vaddr; >> + >> + dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp); >> + if (!dr) >> + return NULL; >> + vaddr = fec_dma_alloc(dev, size, handle, gfp); >> + if (!vaddr) { >> + devres_free(dr); >> + return NULL; >> + } >> + dr->vaddr = vaddr; >> + dr->dma_handle = *handle; >> + dr->size = size; >> + devres_add(dev, dr); >> + return vaddr; >> +} >> + >> static inline bool is_ipv4_pkt(struct sk_buff *skb) >> { >> return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; >> @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) >> #endif >> #ifdef CONFIG_M532x >> + /* >> + * Hacky flush of all caches instead of using the DMA API for the TSO >> + * headers. >> + */ >> flush_cache_all(); >> #endif >> rxq = fep->rx_queue[queue_id]; >> @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev) >> for (i = 0; i < fep->num_tx_queues; i++) >> if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) { >> txq = fep->tx_queue[i]; >> - dma_free_coherent(&fep->pdev->dev, >> - txq->bd.ring_size * TSO_HEADER_SIZE, >> - txq->tso_hdrs, >> - txq->tso_hdrs_dma); >> + fec_dma_free(&fep->pdev->dev, >> + txq->bd.ring_size * TSO_HEADER_SIZE, >> + txq->tso_hdrs, txq->tso_hdrs_dma); >> } >> for (i = 0; i < fep->num_rx_queues; i++) >> @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev) >> txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; >> txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; >> - txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, >> + txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev, >> txq->bd.ring_size * TSO_HEADER_SIZE, >> - &txq->tso_hdrs_dma, >> - GFP_KERNEL); >> + &txq->tso_hdrs_dma, GFP_KERNEL); >> if (!txq->tso_hdrs) { >> ret = -ENOMEM; >> goto alloc_failed; >> @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev) >> bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; >> /* Allocate memory for buffer descriptors. */ >> - cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, >> - GFP_KERNEL); >> + cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma, >> + GFP_KERNEL); >> if (!cbd_base) { >> ret = -ENOMEM; >> goto free_queue_mem;
Hi Christoph, On 9/10/23 22:58, Christoph Hellwig wrote: > On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote: >> It looks a bit odd that this ends up applying to all of Coldfire, while the >> associated cache flush only applies to the M532x platform, which implies >> that we'd now be relying on the non-coherent allocation actually being >> coherent on other Coldfire platforms. >> >> Would it work to do something like this to make sure dma-direct does the >> right thing on such platforms (which presumably don't have caches?), and >> then reduce the scope of this FEC hack accordingly, to clean things up even >> better? > > Probably. Actually Greg comment something along the lines last > time, and mentioned something about just instruction vs instruction > and data cache. I just elaborated on that point a little in response to Robin's email. >> >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu >> index b826e9c677b2..1851fa3fe077 100644 >> --- a/arch/m68k/Kconfig.cpu >> +++ b/arch/m68k/Kconfig.cpu >> @@ -27,6 +27,7 @@ config COLDFIRE >> select CPU_HAS_NO_BITFIELDS >> select CPU_HAS_NO_CAS >> select CPU_HAS_NO_MULDIV64 >> + select DMA_DEFAULT_COHERENT if !MMU && !M523x > > Although it would probably make more sense to simply not select > CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and > CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not > build the non-coherent code at all. This should also include > all coldfire platforms with mmu (M54xx/M548x/M5441x). Then > again for many of the coldfire platforms the Kconfig allows > to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related. > > Greg, any chance you could help out with the caching modes on > coldfire and legacy m68knommu? Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches at all. The cache modes change a bit through out the various ColdFire family series, but can be broken down roughly into 2 groups. 1. Version 2 cores (so everything named 52xx). Early members (5206, 5206e, 5272) had instruction cache only. Later members (5208, 5271/5275, 523x, etc) had a selectable instruction or data or both cache arrangement. Kconfig lets you select which you want - the default is instruction cache only. 2. Version 3 and 4 cores (so everything named 53xx and 54xx). They have a unified instruction and data cache. Data caching can be selected to be write-through (the default) or write-back. Some of the version 4 cores also have an MMU. The M532x hack in the fec driver is to deal with its unified cache, and it is the only ColdFire version 3 or 4 SoC that has the fec hardware module (thus no others listed out there). I suspect if you select data cache on the version 2 cores that have it would break in the fec driver too. Regards Greg
On Wed, Oct 11, 2023 at 12:20:57AM +1000, Greg Ungerer wrote: > That should be M532x. > > I am pretty sure the code as-is today is broken for the case of using > the split cache arrangement (so both instruction and data cache) for any > of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option). > But that has probably not been picked up because the default on those > has always been instruction cache only. > > The reason for the special case for the M532x series is that it is a version 3 > core and they have a unified instruction and data cache. The 523x series is the > only version 3 core that Linux supports that has the FEC hardware module. So what config option should we check for supporting coherent allocations and which not having the hack in fec? Here is my guesses based on the above: in m68k support coherent allocations with no work if CONFIG_COLDIFRE is set and neither CONFIG_CACHE_D or CONFIG_CACHE_BOTH is set. in the fec driver do the alloc_noncoherent and global cache flush hack if: COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH) ?
On 11/10/23 15:52, Christoph Hellwig wrote: > On Wed, Oct 11, 2023 at 12:20:57AM +1000, Greg Ungerer wrote: >> That should be M532x. >> >> I am pretty sure the code as-is today is broken for the case of using >> the split cache arrangement (so both instruction and data cache) for any >> of the version 2 cores too (denoted by the HAVE_CACHE_SPLIT option). >> But that has probably not been picked up because the default on those >> has always been instruction cache only. >> >> The reason for the special case for the M532x series is that it is a version 3 >> core and they have a unified instruction and data cache. The 523x series is the >> only version 3 core that Linux supports that has the FEC hardware module. > > So what config option should we check for supporting coherent allocations > and which not having the hack in fec? > > Here is my guesses based on the above: > > in m68k support coherent allocations with no work if > > CONFIG_COLDIFRE is set and neither CONFIG_CACHE_D or CONFIG_CACHE_BOTH > is set. I think this needs to be CONFIG_COLDFIRE is set and none of CONFIG_HAVE_CACHE_CB or CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set. > in the fec driver do the alloc_noncoherent and global cache flush > hack if: > > COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH) And then this becomes: CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || CONFIG_CACHE_BOTH)
Hi Greg, On 12/10/23 02:09, Greg Ungerer wrote: > > I think this needs to be CONFIG_COLDFIRE is set and none of > CONFIG_HAVE_CACHE_CB or > CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set. > > > >> in the fec driver do the alloc_noncoherent and global cache flush >> hack if: >> >> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH) > > And then this becomes: > > CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || > CONFIG_CACHE_BOTH) You appear to have dropped a '!' there ... Cheers, Michael
Hi Michael, On 12/10/23 04:21, Michael Schmitz wrote: > Hi Greg, > > On 12/10/23 02:09, Greg Ungerer wrote: >> >> I think this needs to be CONFIG_COLDFIRE is set and none of CONFIG_HAVE_CACHE_CB or >> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set. >> >> >> >>> in the fec driver do the alloc_noncoherent and global cache flush >>> hack if: >>> >>> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH) >> >> And then this becomes: >> >> CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || CONFIG_CACHE_BOTH) > > You appear to have dropped a '!' there ... Not sure I follow. This is the opposite of the case above. The noncoherent alloc and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB, CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data caching involved. Regards Greg
On Thu, Oct 12, 2023 at 11:25:00PM +1000, Greg Ungerer wrote: > Not sure I follow. This is the opposite of the case above. The noncoherent alloc > and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB, > CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data > caching involved. FYI, this is what I ended up with this morning: http://git.infradead.org/users/hch/misc.git/commitdiff/ea7c8c5ca3f158f88594f4f1c9a52735115f9aca Whole branch: http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-coherent-deps
Hi Greg, On 13/10/23 02:25, Greg Ungerer wrote: > Hi Michael, > > On 12/10/23 04:21, Michael Schmitz wrote: >> Hi Greg, >> >> On 12/10/23 02:09, Greg Ungerer wrote: >>> >>> I think this needs to be CONFIG_COLDFIRE is set and none of >>> CONFIG_HAVE_CACHE_CB or >>> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set. >>> >>> >>>> in the fec driver do the alloc_noncoherent and global cache flush >>>> hack if: >>>> >>>> COMFIG_COLDFIRE && (CONFIG_CACHE_D || CONFIG_CACHE_BOTH) >>> >>> And then this becomes: >>> >>> CONFIG_COLDFIRE && (CONFIG_HAVE_CACHE_CB || CONFIG_CACHE_D || >>> CONFIG_CACHE_BOTH) >> >> You appear to have dropped a '!' there ... > > Not sure I follow. This is the opposite of the case above. The > noncoherent alloc > and cache flush should be performed if ColdFire and any of > CONFIG_HAVE_CACHE_CB, > CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there > is data > caching involved. Now I see - I had read that definition to correspond to the 'select coherent allocations on m68k' case, not the latter 'use non-coherent allocations in the fec driver'. Your definitions are correct as written (and for all I can see, Christoph's implementation is correct, too). Apologies for the noise - I blame jet leg for getting my wires crossed ... Cheers, Michael > > Regards > Greg >
Hi Christoph, On 13/10/23 00:00, Christoph Hellwig wrote: > On Thu, Oct 12, 2023 at 11:25:00PM +1000, Greg Ungerer wrote: >> Not sure I follow. This is the opposite of the case above. The noncoherent alloc >> and cache flush should be performed if ColdFire and any of CONFIG_HAVE_CACHE_CB, >> CONFIG_CACHE_D or CONFIG_CACHE_BOTH are set - since that means there is data >> caching involved. > > FYI, this is what I ended up with this morning: > > http://git.infradead.org/users/hch/misc.git/commitdiff/ea7c8c5ca3f158f88594f4f1c9a52735115f9aca That looks nice. > Whole branch: > > http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-coherent-deps I'll try and give this some testing next week. Thanks Greg
Hi Greg, On Tue, Oct 10, 2023 at 4:45 PM Greg Ungerer <gerg@linux-m68k.org> wrote: > On 9/10/23 22:58, Christoph Hellwig wrote: > > On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote: > >> It looks a bit odd that this ends up applying to all of Coldfire, while the > >> associated cache flush only applies to the M532x platform, which implies > >> that we'd now be relying on the non-coherent allocation actually being > >> coherent on other Coldfire platforms. > >> > >> Would it work to do something like this to make sure dma-direct does the > >> right thing on such platforms (which presumably don't have caches?), and > >> then reduce the scope of this FEC hack accordingly, to clean things up even > >> better? > > > > Probably. Actually Greg comment something along the lines last > > time, and mentioned something about just instruction vs instruction > > and data cache. > > I just elaborated on that point a little in response to Robin's email. > > >> > >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > >> index b826e9c677b2..1851fa3fe077 100644 > >> --- a/arch/m68k/Kconfig.cpu > >> +++ b/arch/m68k/Kconfig.cpu > >> @@ -27,6 +27,7 @@ config COLDFIRE > >> select CPU_HAS_NO_BITFIELDS > >> select CPU_HAS_NO_CAS > >> select CPU_HAS_NO_MULDIV64 > >> + select DMA_DEFAULT_COHERENT if !MMU && !M523x > > > > Although it would probably make more sense to simply not select > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not > > build the non-coherent code at all. This should also include > > all coldfire platforms with mmu (M54xx/M548x/M5441x). Then > > again for many of the coldfire platforms the Kconfig allows > > to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related. > > > > Greg, any chance you could help out with the caching modes on > > coldfire and legacy m68knommu? > > Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches > at all. 68000 (and derivatives like 68*328) does not have any cache. 68360 (which is no longer supported by Linux) is based on CPU32, and does not seem to have any caches, although the documentation does mention the use of "external cache memories". Gr{oetje,eeting}s, Geert
On Mon, Oct 16, 2023 at 11:12 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > On Tue, Oct 10, 2023 at 4:45 PM Greg Ungerer <gerg@linux-m68k.org> wrote: > > On 9/10/23 22:58, Christoph Hellwig wrote: > > > On Mon, Oct 09, 2023 at 11:29:12AM +0100, Robin Murphy wrote: > > >> It looks a bit odd that this ends up applying to all of Coldfire, while the > > >> associated cache flush only applies to the M532x platform, which implies > > >> that we'd now be relying on the non-coherent allocation actually being > > >> coherent on other Coldfire platforms. > > >> > > >> Would it work to do something like this to make sure dma-direct does the > > >> right thing on such platforms (which presumably don't have caches?), and > > >> then reduce the scope of this FEC hack accordingly, to clean things up even > > >> better? > > > > > > Probably. Actually Greg comment something along the lines last > > > time, and mentioned something about just instruction vs instruction > > > and data cache. > > > > I just elaborated on that point a little in response to Robin's email. > > > > >> > > >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > > >> index b826e9c677b2..1851fa3fe077 100644 > > >> --- a/arch/m68k/Kconfig.cpu > > >> +++ b/arch/m68k/Kconfig.cpu > > >> @@ -27,6 +27,7 @@ config COLDFIRE > > >> select CPU_HAS_NO_BITFIELDS > > >> select CPU_HAS_NO_CAS > > >> select CPU_HAS_NO_MULDIV64 > > >> + select DMA_DEFAULT_COHERENT if !MMU && !M523x > > > > > > Although it would probably make more sense to simply not select > > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE and > > > CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU for these platforms and not > > > build the non-coherent code at all. This should also include > > > all coldfire platforms with mmu (M54xx/M548x/M5441x). Then > > > again for many of the coldfire platforms the Kconfig allows > > > to select CACHE_WRITETHRU/CACHE_COPYBACK which looks related. > > > > > > Greg, any chance you could help out with the caching modes on > > > coldfire and legacy m68knommu? > > > > Sure, yep. I am not aware that the legacy 68000 or 68328 had any caches > > at all. > > 68000 (and derivatives like 68*328) does not have any cache. > 68360 (which is no longer supported by Linux) is based on CPU32, and > does not seem to have any caches, although the documentation does > mention the use of "external cache memories". As M68K selects NO_DMA if !MMU && !COLDFIRE anyway, this doesn't matter for legacy^Wclassic m68knommu. Gr{oetje,eeting}s, Geert
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 77c8e9cfb44562..aa032ea0ebf0c2 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev) } while (bdp != txq->bd.base); } +/* + * Coldfire does not support DMA coherent allocations, and has historically used + * a band-aid with a manual flush in fec_enet_rx_queue. + */ +#ifdef CONFIG_COLDFIRE +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp); +} + +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle) +{ + dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL); +} +#else /* CONFIG_COLDFIRE */ +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + return dma_alloc_coherent(dev, size, handle, gfp); +} + +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle) +{ + dma_free_coherent(dev, size, cpu_addr, handle); +} +#endif /* !CONFIG_COLDFIRE */ + +struct fec_dma_devres { + size_t size; + void *vaddr; + dma_addr_t dma_handle; +}; + +static void fec_dmam_release(struct device *dev, void *res) +{ + struct fec_dma_devres *this = res; + + fec_dma_free(dev, this->size, this->vaddr, this->dma_handle); +} + +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + struct fec_dma_devres *dr; + void *vaddr; + + dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp); + if (!dr) + return NULL; + vaddr = fec_dma_alloc(dev, size, handle, gfp); + if (!vaddr) { + devres_free(dr); + return NULL; + } + dr->vaddr = vaddr; + dr->dma_handle = *handle; + dr->size = size; + devres_add(dev, dr); + return vaddr; +} + static inline bool is_ipv4_pkt(struct sk_buff *skb) { return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) #endif #ifdef CONFIG_M532x + /* + * Hacky flush of all caches instead of using the DMA API for the TSO + * headers. + */ flush_cache_all(); #endif rxq = fep->rx_queue[queue_id]; @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev) for (i = 0; i < fep->num_tx_queues; i++) if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) { txq = fep->tx_queue[i]; - dma_free_coherent(&fep->pdev->dev, - txq->bd.ring_size * TSO_HEADER_SIZE, - txq->tso_hdrs, - txq->tso_hdrs_dma); + fec_dma_free(&fep->pdev->dev, + txq->bd.ring_size * TSO_HEADER_SIZE, + txq->tso_hdrs, txq->tso_hdrs_dma); } for (i = 0; i < fep->num_rx_queues; i++) @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev) txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; - txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, + txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev, txq->bd.ring_size * TSO_HEADER_SIZE, - &txq->tso_hdrs_dma, - GFP_KERNEL); + &txq->tso_hdrs_dma, GFP_KERNEL); if (!txq->tso_hdrs) { ret = -ENOMEM; goto alloc_failed; @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev) bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; /* Allocate memory for buffer descriptors. */ - cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, - GFP_KERNEL); + cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma, + GFP_KERNEL); if (!cbd_base) { ret = -ENOMEM; goto free_queue_mem;
The coldfire platforms can't properly implement dma_alloc_coherent and currently just return noncoherent memory from dma_alloc_coherent. The fec driver than works around this with a flush of all caches in the receive path. Make this hack a little less bad by using the explicit dma_alloc_noncoherent API and documenting the hacky cache flushes so that the DMA API level hack can be removed. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++--- 1 file changed, 75 insertions(+), 9 deletions(-)