diff mbox series

[PoC] arm: dma-mapping: direct: Apply dma_pfn_offset only when it is valid

Message ID 20200114164332.3164-1-peter.ujfalusi@ti.com (mailing list archive)
State New, archived
Headers show
Series [PoC] arm: dma-mapping: direct: Apply dma_pfn_offset only when it is valid | expand

Commit Message

Peter Ujfalusi Jan. 14, 2020, 4:43 p.m. UTC
The dma_pfn_offset should only be applied to an address which is within the
dma-ranges range. Any address outside should have offset as 0.

This is a proof of concept patch which works on k2g where we have
dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
for the SoC.

Without this patch everything which tries to set DMA_BIT_MASK(32) or less
fails -> DMA and peripherals with built in DMA (SD with ADMA) will not
probe or fall back to PIO mode.

With this patch EDMA probes, SD's ADMA is working.
Audio and dma-test is working just fine with EDMA, mmc accesses with ADMA
also operational.

The patch does not tried to address the incomplete handling of dma-ranges
from DT and it is not fixing/updating arch code or drivers which uses
dma_pfn_offset.
Neither provides fallback support for kernel setting only dma_pfn_offset to
arbitrary number without paddr/dma_addr/size.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
---
Hi Christoph, Robin,

I know it is a bit more complicated, but with this patch k2g is working fine...

I wanted to test the concept I was describing and a patch speaks better than
words.

Kind regards,
Peter

 arch/arm/include/asm/dma-mapping.h | 25 ++++++++++++++++++++--
 drivers/of/device.c                |  7 ++++++-
 include/linux/device.h             |  8 ++++++++
 include/linux/dma-direct.h         | 33 ++++++++++++++++++++++++++++--
 kernel/dma/coherent.c              |  9 +++++---
 5 files changed, 74 insertions(+), 8 deletions(-)

Comments

Robin Murphy Jan. 14, 2020, 6:19 p.m. UTC | #1
On 14/01/2020 4:43 pm, Peter Ujfalusi wrote:
> The dma_pfn_offset should only be applied to an address which is within the
> dma-ranges range. Any address outside should have offset as 0.

No, that's wrong. If a non-empty dma-ranges is present, then addresses 
which do not fall within any specified range are invalid altogether.

The current long-term plan is indeed to try to move to some sort of 
internal "DMA range descriptor" in order to properly cope with the kind 
of esoteric integrations which have multiple disjoint windows, 
potentially even with different offsets, but as you point out there are 
still many hurdles between now and that becoming reality. So although 
this patch does represent the "right" thing, it's for entirely the wrong 
reason. AFAICT for your case it basically just works out as a very 
baroque way to hack dma_direct_supported() again - we shouldn't need a 
special case to map a bogus physical address to valid DMA address, we 
should be fixing the source of the bogus PA in the first place.

> This is a proof of concept patch which works on k2g where we have
> dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
> for the SoC.

TBH it's probably extra-confusing that you're on Keystone 2, where 
technically this ends up closer-to-OK than most, since IIRC the 0-2GB 
MMIO region is the same on all 3(?) interconnect maps. Thus the 100% 
honest description would really be:

dma-ranges = <0x0 0x0 0x0 0x80000000>,
	     <0x80000000 0x8 0x00000000 0x80000000>;

but yeah, that would just go horribly wrong with Linux today. The 
subtelty that dma_map_resource() ignores the pfn_offset happens to be a 
"feature" in this regard ;)

Robin.

> Without this patch everything which tries to set DMA_BIT_MASK(32) or less
> fails -> DMA and peripherals with built in DMA (SD with ADMA) will not
> probe or fall back to PIO mode.
> 
> With this patch EDMA probes, SD's ADMA is working.
> Audio and dma-test is working just fine with EDMA, mmc accesses with ADMA
> also operational.
> 
> The patch does not tried to address the incomplete handling of dma-ranges
> from DT and it is not fixing/updating arch code or drivers which uses
> dma_pfn_offset.
> Neither provides fallback support for kernel setting only dma_pfn_offset to
> arbitrary number without paddr/dma_addr/size.
> 
> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
> ---
> Hi Christoph, Robin,
> 
> I know it is a bit more complicated, but with this patch k2g is working fine...
> 
> I wanted to test the concept I was describing and a patch speaks better than
> words.
> 
> Kind regards,
> Peter
> 
>   arch/arm/include/asm/dma-mapping.h | 25 ++++++++++++++++++++--
>   drivers/of/device.c                |  7 ++++++-
>   include/linux/device.h             |  8 ++++++++
>   include/linux/dma-direct.h         | 33 ++++++++++++++++++++++++++++--
>   kernel/dma/coherent.c              |  9 +++++---
>   5 files changed, 74 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> index bdd80ddbca34..9bff6ad2d8c8 100644
> --- a/arch/arm/include/asm/dma-mapping.h
> +++ b/arch/arm/include/asm/dma-mapping.h
> @@ -33,10 +33,31 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
>    * addresses. They must not be used by drivers.
>    */
>   #ifndef __arch_pfn_to_dma
> +
> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
> +						     phys_addr_t paddr)
> +{
> +	if (paddr >= dev->dma_ranges.paddr &&
> +	    paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
> +		return dev->dma_ranges.pfn_offset;
> +
> +	return 0;
> +}
> +
> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
> +						     dma_addr_t dma_addr)
> +{
> +	if (dma_addr >= dev->dma_ranges.dma_addr &&
> +	    dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
> +		return dev->dma_ranges.pfn_offset;
> +
> +	return 0;
> +}
> +
>   static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
>   {
>   	if (dev)
> -		pfn -= dev->dma_pfn_offset;
> +		pfn -= __phys_to_dma_pfn_offset(dev, __pfn_to_phys(pfn));
>   	return (dma_addr_t)__pfn_to_bus(pfn);
>   }
>   
> @@ -45,7 +66,7 @@ static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
>   	unsigned long pfn = __bus_to_pfn(addr);
>   
>   	if (dev)
> -		pfn += dev->dma_pfn_offset;
> +		pfn += __dma_to_phys_pfn_offset(dev, addr);
>   
>   	return pfn;
>   }
> diff --git a/drivers/of/device.c b/drivers/of/device.c
> index 27203bfd0b22..07a8cc1a7d7f 100644
> --- a/drivers/of/device.c
> +++ b/drivers/of/device.c
> @@ -105,7 +105,7 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
>   		if (!force_dma)
>   			return ret == -ENODEV ? 0 : ret;
>   
> -		dma_addr = offset = 0;
> +		dma_addr = offset = paddr = 0;
>   	} else {
>   		offset = PFN_DOWN(paddr - dma_addr);
>   
> @@ -144,6 +144,11 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
>   
>   	dev->dma_pfn_offset = offset;
>   
> +	dev->dma_ranges.paddr = paddr;
> +	dev->dma_ranges.dma_addr = dma_addr;
> +	dev->dma_ranges.size = size;
> +	dev->dma_ranges.pfn_offset = offset;
> +
>   	/*
>   	 * Limit coherent and dma mask based on size and default mask
>   	 * set by the driver.
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ce6db68c3f29..57006b51a989 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -293,6 +293,13 @@ struct device_dma_parameters {
>   	unsigned long segment_boundary_mask;
>   };
>   
> +struct dma_ranges {
> +	u64 paddr;
> +	u64 dma_addr;
> +	u64 size;
> +	unsigned long pfn_offset;
> +};
> +
>   /**
>    * struct device_connection - Device Connection Descriptor
>    * @fwnode: The device node of the connected device
> @@ -581,6 +588,7 @@ struct device {
>   					     allocations such descriptors. */
>   	u64		bus_dma_limit;	/* upstream dma constraint */
>   	unsigned long	dma_pfn_offset;
> +	struct dma_ranges dma_ranges;
>   
>   	struct device_dma_parameters *dma_parms;
>   
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index 24b8684aa21d..4a46a15945ea 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -11,18 +11,47 @@ extern unsigned int zone_dma_bits;
>   #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
>   #include <asm/dma-direct.h>
>   #else
> +
> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
> +						     phys_addr_t paddr)
> +{
> +	if (!dev)
> +		return 0;
> +
> +	if (paddr >= dev->dma_ranges.paddr &&
> +	    paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
> +		return dev->dma_ranges.pfn_offset
> +
> +	return 0;
> +}
> +
> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
> +						     dma_addr_t dma_addr)
> +{
> +	if (!dev)
> +		return 0;
> +
> +	if (dma_addr >= dev->dma_ranges.dma_addr &&
> +	    dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
> +		return dev->dma_ranges.pfn_offset
> +
> +	return 0;
> +}
> +
>   static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
>   {
>   	dma_addr_t dev_addr = (dma_addr_t)paddr;
> +	unsigned long offset = __phys_to_dma_pfn_offset(dev, paddr);
>   
> -	return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> +	return dev_addr - ((dma_addr_t)offset << PAGE_SHIFT);
>   }
>   
>   static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
>   {
>   	phys_addr_t paddr = (phys_addr_t)dev_addr;
> +	unsigned long offset = __dma_to_phys_pfn_offset(dev, dev_addr);
>   
> -	return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> +	return paddr + ((phys_addr_t)offset << PAGE_SHIFT);
>   }
>   #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>   
> diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
> index 551b0eb7028a..7a68fd09f5d0 100644
> --- a/kernel/dma/coherent.c
> +++ b/kernel/dma/coherent.c
> @@ -31,10 +31,13 @@ static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de
>   static inline dma_addr_t dma_get_device_base(struct device *dev,
>   					     struct dma_coherent_mem * mem)
>   {
> -	if (mem->use_dev_dma_pfn_offset)
> -		return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
> -	else
> +	if (mem->use_dev_dma_pfn_offset) {
> +		unsigned long offset = __phys_to_dma_pfn_offset(dev,
> +						__pfn_to_phys(mem->pfn_base));
> +		return (mem->pfn_base - offset) << PAGE_SHIFT;
> +	} else {
>   		return mem->device_base;
> +	}
>   }
>   
>   static int dma_init_coherent_memory(phys_addr_t phys_addr,
>
Peter Ujfalusi Jan. 15, 2020, 11:50 a.m. UTC | #2
On 14/01/2020 20.19, Robin Murphy wrote:
> On 14/01/2020 4:43 pm, Peter Ujfalusi wrote:
>> The dma_pfn_offset should only be applied to an address which is
>> within the
>> dma-ranges range. Any address outside should have offset as 0.
> 
> No, that's wrong. If a non-empty dma-ranges is present, then addresses
> which do not fall within any specified range are invalid altogether.

It is not explicitly stated by the specification, but can be interpreted
like that and from a pow it does make sense to treat things like that.

> The current long-term plan is indeed to try to move to some sort of
> internal "DMA range descriptor" in order to properly cope with the kind
> of esoteric integrations which have multiple disjoint windows,
> potentially even with different offsets, but as you point out there are
> still many hurdles between now and that becoming reality. So although
> this patch does represent the "right" thing, it's for entirely the wrong
> reason. AFAICT for your case it basically just works out as a very
> baroque way to hack dma_direct_supported() again - we shouldn't need a
> special case to map a bogus physical address to valid DMA address, we
> should be fixing the source of the bogus PA in the first place.

DMA_BIT_MASK(32) is pretty clear: The DMA can handle addresses within
32bit space. DMA_BIT_MASK(24) is also clear: The DMA can handle
addresses within 24bit space.

dma-ranges does not change that. The DMA can still address the same
space. What dma-ranges will tell is that a physical address range 'X'
can be accessed on the bus under range 'Y'.
For the DMA within the bus the physical address within 'X' does not
matter. What matters is the matching address within 'Y'

We should do dma_pfn_offset conversion _only_ for the range it applies
to. Outside of it is not valid to apply it. The dma API will check
(without applying dma_pfn_offset) addresses outside of any range (only
one currently in Linux) and if it is not OK for the mask then it will fail.

> 
>> This is a proof of concept patch which works on k2g where we have
>> dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
>> for the SoC.
> 
> TBH it's probably extra-confusing that you're on Keystone 2, where
> technically this ends up closer-to-OK than most, since IIRC the 0-2GB
> MMIO region is the same on all 3(?) interconnect maps. Thus the 100%
> honest description would really be:
> 
> dma-ranges = <0x0 0x0 0x0 0x80000000>,
>          <0x80000000 0x8 0x00000000 0x80000000>;
> 
> but yeah, that would just go horribly wrong with Linux today.

It does ;) This was the first thing I have tried.

> The
> subtelty that dma_map_resource() ignores the pfn_offset happens to be a
> "feature" in this regard ;)

Right, but Keystone 2 is broken since 5.3-rc3 by commit
ad3c7b18c5b362be5dbd0f2c0bcf1fd5fd659315.

Can you propose a fix which we can use until things get sorted out?

Thanks,
- Péter

> 
> Robin.
> 
>> Without this patch everything which tries to set DMA_BIT_MASK(32) or less
>> fails -> DMA and peripherals with built in DMA (SD with ADMA) will not
>> probe or fall back to PIO mode.
>>
>> With this patch EDMA probes, SD's ADMA is working.
>> Audio and dma-test is working just fine with EDMA, mmc accesses with ADMA
>> also operational.
>>
>> The patch does not tried to address the incomplete handling of dma-ranges
>> from DT and it is not fixing/updating arch code or drivers which uses
>> dma_pfn_offset.
>> Neither provides fallback support for kernel setting only
>> dma_pfn_offset to
>> arbitrary number without paddr/dma_addr/size.
>>
>> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
>> ---
>> Hi Christoph, Robin,
>>
>> I know it is a bit more complicated, but with this patch k2g is
>> working fine...
>>
>> I wanted to test the concept I was describing and a patch speaks
>> better than
>> words.
>>
>> Kind regards,
>> Peter
>>
>>   arch/arm/include/asm/dma-mapping.h | 25 ++++++++++++++++++++--
>>   drivers/of/device.c                |  7 ++++++-
>>   include/linux/device.h             |  8 ++++++++
>>   include/linux/dma-direct.h         | 33 ++++++++++++++++++++++++++++--
>>   kernel/dma/coherent.c              |  9 +++++---
>>   5 files changed, 74 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/dma-mapping.h
>> b/arch/arm/include/asm/dma-mapping.h
>> index bdd80ddbca34..9bff6ad2d8c8 100644
>> --- a/arch/arm/include/asm/dma-mapping.h
>> +++ b/arch/arm/include/asm/dma-mapping.h
>> @@ -33,10 +33,31 @@ static inline const struct dma_map_ops
>> *get_arch_dma_ops(struct bus_type *bus)
>>    * addresses. They must not be used by drivers.
>>    */
>>   #ifndef __arch_pfn_to_dma
>> +
>> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
>> +                             phys_addr_t paddr)
>> +{
>> +    if (paddr >= dev->dma_ranges.paddr &&
>> +        paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
>> +        return dev->dma_ranges.pfn_offset;
>> +
>> +    return 0;
>> +}
>> +
>> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
>> +                             dma_addr_t dma_addr)
>> +{
>> +    if (dma_addr >= dev->dma_ranges.dma_addr &&
>> +        dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
>> +        return dev->dma_ranges.pfn_offset;
>> +
>> +    return 0;
>> +}
>> +
>>   static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned
>> long pfn)
>>   {
>>       if (dev)
>> -        pfn -= dev->dma_pfn_offset;
>> +        pfn -= __phys_to_dma_pfn_offset(dev, __pfn_to_phys(pfn));
>>       return (dma_addr_t)__pfn_to_bus(pfn);
>>   }
>>   @@ -45,7 +66,7 @@ static inline unsigned long dma_to_pfn(struct
>> device *dev, dma_addr_t addr)
>>       unsigned long pfn = __bus_to_pfn(addr);
>>         if (dev)
>> -        pfn += dev->dma_pfn_offset;
>> +        pfn += __dma_to_phys_pfn_offset(dev, addr);
>>         return pfn;
>>   }
>> diff --git a/drivers/of/device.c b/drivers/of/device.c
>> index 27203bfd0b22..07a8cc1a7d7f 100644
>> --- a/drivers/of/device.c
>> +++ b/drivers/of/device.c
>> @@ -105,7 +105,7 @@ int of_dma_configure(struct device *dev, struct
>> device_node *np, bool force_dma)
>>           if (!force_dma)
>>               return ret == -ENODEV ? 0 : ret;
>>   -        dma_addr = offset = 0;
>> +        dma_addr = offset = paddr = 0;
>>       } else {
>>           offset = PFN_DOWN(paddr - dma_addr);
>>   @@ -144,6 +144,11 @@ int of_dma_configure(struct device *dev, struct
>> device_node *np, bool force_dma)
>>         dev->dma_pfn_offset = offset;
>>   +    dev->dma_ranges.paddr = paddr;
>> +    dev->dma_ranges.dma_addr = dma_addr;
>> +    dev->dma_ranges.size = size;
>> +    dev->dma_ranges.pfn_offset = offset;
>> +
>>       /*
>>        * Limit coherent and dma mask based on size and default mask
>>        * set by the driver.
>> diff --git a/include/linux/device.h b/include/linux/device.h
>> index ce6db68c3f29..57006b51a989 100644
>> --- a/include/linux/device.h
>> +++ b/include/linux/device.h
>> @@ -293,6 +293,13 @@ struct device_dma_parameters {
>>       unsigned long segment_boundary_mask;
>>   };
>>   +struct dma_ranges {
>> +    u64 paddr;
>> +    u64 dma_addr;
>> +    u64 size;
>> +    unsigned long pfn_offset;
>> +};
>> +
>>   /**
>>    * struct device_connection - Device Connection Descriptor
>>    * @fwnode: The device node of the connected device
>> @@ -581,6 +588,7 @@ struct device {
>>                            allocations such descriptors. */
>>       u64        bus_dma_limit;    /* upstream dma constraint */
>>       unsigned long    dma_pfn_offset;
>> +    struct dma_ranges dma_ranges;
>>         struct device_dma_parameters *dma_parms;
>>   diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
>> index 24b8684aa21d..4a46a15945ea 100644
>> --- a/include/linux/dma-direct.h
>> +++ b/include/linux/dma-direct.h
>> @@ -11,18 +11,47 @@ extern unsigned int zone_dma_bits;
>>   #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
>>   #include <asm/dma-direct.h>
>>   #else
>> +
>> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
>> +                             phys_addr_t paddr)
>> +{
>> +    if (!dev)
>> +        return 0;
>> +
>> +    if (paddr >= dev->dma_ranges.paddr &&
>> +        paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
>> +        return dev->dma_ranges.pfn_offset
>> +
>> +    return 0;
>> +}
>> +
>> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
>> +                             dma_addr_t dma_addr)
>> +{
>> +    if (!dev)
>> +        return 0;
>> +
>> +    if (dma_addr >= dev->dma_ranges.dma_addr &&
>> +        dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
>> +        return dev->dma_ranges.pfn_offset
>> +
>> +    return 0;
>> +}
>> +
>>   static inline dma_addr_t __phys_to_dma(struct device *dev,
>> phys_addr_t paddr)
>>   {
>>       dma_addr_t dev_addr = (dma_addr_t)paddr;
>> +    unsigned long offset = __phys_to_dma_pfn_offset(dev, paddr);
>>   -    return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
>> +    return dev_addr - ((dma_addr_t)offset << PAGE_SHIFT);
>>   }
>>     static inline phys_addr_t __dma_to_phys(struct device *dev,
>> dma_addr_t dev_addr)
>>   {
>>       phys_addr_t paddr = (phys_addr_t)dev_addr;
>> +    unsigned long offset = __dma_to_phys_pfn_offset(dev, dev_addr);
>>   -    return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
>> +    return paddr + ((phys_addr_t)offset << PAGE_SHIFT);
>>   }
>>   #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>>   diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
>> index 551b0eb7028a..7a68fd09f5d0 100644
>> --- a/kernel/dma/coherent.c
>> +++ b/kernel/dma/coherent.c
>> @@ -31,10 +31,13 @@ static inline struct dma_coherent_mem
>> *dev_get_coherent_memory(struct device *de
>>   static inline dma_addr_t dma_get_device_base(struct device *dev,
>>                            struct dma_coherent_mem * mem)
>>   {
>> -    if (mem->use_dev_dma_pfn_offset)
>> -        return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
>> -    else
>> +    if (mem->use_dev_dma_pfn_offset) {
>> +        unsigned long offset = __phys_to_dma_pfn_offset(dev,
>> +                        __pfn_to_phys(mem->pfn_base));
>> +        return (mem->pfn_base - offset) << PAGE_SHIFT;
>> +    } else {
>>           return mem->device_base;
>> +    }
>>   }
>>     static int dma_init_coherent_memory(phys_addr_t phys_addr,
>>

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Robin Murphy Jan. 16, 2020, 7:13 p.m. UTC | #3
On 15/01/2020 11:50 am, Peter Ujfalusi wrote:
> 
> 
> On 14/01/2020 20.19, Robin Murphy wrote:
>> On 14/01/2020 4:43 pm, Peter Ujfalusi wrote:
>>> The dma_pfn_offset should only be applied to an address which is
>>> within the
>>> dma-ranges range. Any address outside should have offset as 0.
>>
>> No, that's wrong. If a non-empty dma-ranges is present, then addresses
>> which do not fall within any specified range are invalid altogether.
> 
> It is not explicitly stated by the specification, but can be interpreted
> like that and from a pow it does make sense to treat things like that.

Yes, DTspec doesn't explicitly say so, but it does follow fairly 
logically from the definition of "ranges"/"dma-ranges" as a translation 
between address spaces that an address not matching any range cannot 
pass between those address spaces at all. Case in point being that an 
absent "ranges" property means "no translation at all" (sadly the ship 
sailed too long ago to treat "dma-ranges" similarly strictly, so we're 
stuck with the assumption that absent = empty in that direction)

>> The current long-term plan is indeed to try to move to some sort of
>> internal "DMA range descriptor" in order to properly cope with the kind
>> of esoteric integrations which have multiple disjoint windows,
>> potentially even with different offsets, but as you point out there are
>> still many hurdles between now and that becoming reality. So although
>> this patch does represent the "right" thing, it's for entirely the wrong
>> reason. AFAICT for your case it basically just works out as a very
>> baroque way to hack dma_direct_supported() again - we shouldn't need a
>> special case to map a bogus physical address to valid DMA address, we
>> should be fixing the source of the bogus PA in the first place.
> 
> DMA_BIT_MASK(32) is pretty clear: The DMA can handle addresses within
> 32bit space. DMA_BIT_MASK(24) is also clear: The DMA can handle
> addresses within 24bit space.

Careful there - DMA *masks* are about how wide an address the device may 
generate, but it's not necessarily true that the interconnect beyond 
will actually accept every possible address that that many bits can 
encode (see the aforementioned case of PCI host bridge windows, or the 
recent change of bus_dma_mask to a not-necessarily-power-of-two 
bus_dma_limit)...

> dma-ranges does not change that. The DMA can still address the same
> space.

...thus that assumption is incorrect. However it's not particularly 
important to the immediate problem at hand.

> What dma-ranges will tell is that a physical address range 'X'
> can be accessed on the bus under range 'Y'.
> For the DMA within the bus the physical address within 'X' does not
> matter. What matters is the matching address within 'Y'
> 
> We should do dma_pfn_offset conversion _only_ for the range it applies
> to. Outside of it is not valid to apply it.

That much is agreed. For a physical address A in Y, phys_to_dma(A) 
should return the corresponding DMA address A' in X. What you're 
proposing is that for address B not in Y, phys_to_dma(B) should just 
return B, but my point is that even doing that is wrong, because there 
is no possible DMA address corresponding to B, so there is no valid 
value to return at all.

Nobody's disputing that the current dma_direct_supported() 
implementation is broken for the case where ZONE_DMA itself is offset 
from PA 0; the more pressing question is why Christoph's diff, which was 
trying to take that into account, still didn't work.

Robin.

> The dma API will check
> (without applying dma_pfn_offset) addresses outside of any range (only
> one currently in Linux) and if it is not OK for the mask then it will fail.
> 
>>
>>> This is a proof of concept patch which works on k2g where we have
>>> dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
>>> for the SoC.
>>
>> TBH it's probably extra-confusing that you're on Keystone 2, where
>> technically this ends up closer-to-OK than most, since IIRC the 0-2GB
>> MMIO region is the same on all 3(?) interconnect maps. Thus the 100%
>> honest description would really be:
>>
>> dma-ranges = <0x0 0x0 0x0 0x80000000>,
>>           <0x80000000 0x8 0x00000000 0x80000000>;
>>
>> but yeah, that would just go horribly wrong with Linux today.
> 
> It does ;) This was the first thing I have tried.
> 
>> The
>> subtelty that dma_map_resource() ignores the pfn_offset happens to be a
>> "feature" in this regard ;)
> 
> Right, but Keystone 2 is broken since 5.3-rc3 by commit
> ad3c7b18c5b362be5dbd0f2c0bcf1fd5fd659315.
> 
> Can you propose a fix which we can use until things get sorted out?
> 
> Thanks,
> - Péter
> 
>>
>> Robin.
>>
>>> Without this patch everything which tries to set DMA_BIT_MASK(32) or less
>>> fails -> DMA and peripherals with built in DMA (SD with ADMA) will not
>>> probe or fall back to PIO mode.
>>>
>>> With this patch EDMA probes, SD's ADMA is working.
>>> Audio and dma-test is working just fine with EDMA, mmc accesses with ADMA
>>> also operational.
>>>
>>> The patch does not tried to address the incomplete handling of dma-ranges
>>> from DT and it is not fixing/updating arch code or drivers which uses
>>> dma_pfn_offset.
>>> Neither provides fallback support for kernel setting only
>>> dma_pfn_offset to
>>> arbitrary number without paddr/dma_addr/size.
>>>
>>> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
>>> ---
>>> Hi Christoph, Robin,
>>>
>>> I know it is a bit more complicated, but with this patch k2g is
>>> working fine...
>>>
>>> I wanted to test the concept I was describing and a patch speaks
>>> better than
>>> words.
>>>
>>> Kind regards,
>>> Peter
>>>
>>>    arch/arm/include/asm/dma-mapping.h | 25 ++++++++++++++++++++--
>>>    drivers/of/device.c                |  7 ++++++-
>>>    include/linux/device.h             |  8 ++++++++
>>>    include/linux/dma-direct.h         | 33 ++++++++++++++++++++++++++++--
>>>    kernel/dma/coherent.c              |  9 +++++---
>>>    5 files changed, 74 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/arm/include/asm/dma-mapping.h
>>> b/arch/arm/include/asm/dma-mapping.h
>>> index bdd80ddbca34..9bff6ad2d8c8 100644
>>> --- a/arch/arm/include/asm/dma-mapping.h
>>> +++ b/arch/arm/include/asm/dma-mapping.h
>>> @@ -33,10 +33,31 @@ static inline const struct dma_map_ops
>>> *get_arch_dma_ops(struct bus_type *bus)
>>>     * addresses. They must not be used by drivers.
>>>     */
>>>    #ifndef __arch_pfn_to_dma
>>> +
>>> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
>>> +                             phys_addr_t paddr)
>>> +{
>>> +    if (paddr >= dev->dma_ranges.paddr &&
>>> +        paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
>>> +        return dev->dma_ranges.pfn_offset;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
>>> +                             dma_addr_t dma_addr)
>>> +{
>>> +    if (dma_addr >= dev->dma_ranges.dma_addr &&
>>> +        dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
>>> +        return dev->dma_ranges.pfn_offset;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>    static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned
>>> long pfn)
>>>    {
>>>        if (dev)
>>> -        pfn -= dev->dma_pfn_offset;
>>> +        pfn -= __phys_to_dma_pfn_offset(dev, __pfn_to_phys(pfn));
>>>        return (dma_addr_t)__pfn_to_bus(pfn);
>>>    }
>>>    @@ -45,7 +66,7 @@ static inline unsigned long dma_to_pfn(struct
>>> device *dev, dma_addr_t addr)
>>>        unsigned long pfn = __bus_to_pfn(addr);
>>>          if (dev)
>>> -        pfn += dev->dma_pfn_offset;
>>> +        pfn += __dma_to_phys_pfn_offset(dev, addr);
>>>          return pfn;
>>>    }
>>> diff --git a/drivers/of/device.c b/drivers/of/device.c
>>> index 27203bfd0b22..07a8cc1a7d7f 100644
>>> --- a/drivers/of/device.c
>>> +++ b/drivers/of/device.c
>>> @@ -105,7 +105,7 @@ int of_dma_configure(struct device *dev, struct
>>> device_node *np, bool force_dma)
>>>            if (!force_dma)
>>>                return ret == -ENODEV ? 0 : ret;
>>>    -        dma_addr = offset = 0;
>>> +        dma_addr = offset = paddr = 0;
>>>        } else {
>>>            offset = PFN_DOWN(paddr - dma_addr);
>>>    @@ -144,6 +144,11 @@ int of_dma_configure(struct device *dev, struct
>>> device_node *np, bool force_dma)
>>>          dev->dma_pfn_offset = offset;
>>>    +    dev->dma_ranges.paddr = paddr;
>>> +    dev->dma_ranges.dma_addr = dma_addr;
>>> +    dev->dma_ranges.size = size;
>>> +    dev->dma_ranges.pfn_offset = offset;
>>> +
>>>        /*
>>>         * Limit coherent and dma mask based on size and default mask
>>>         * set by the driver.
>>> diff --git a/include/linux/device.h b/include/linux/device.h
>>> index ce6db68c3f29..57006b51a989 100644
>>> --- a/include/linux/device.h
>>> +++ b/include/linux/device.h
>>> @@ -293,6 +293,13 @@ struct device_dma_parameters {
>>>        unsigned long segment_boundary_mask;
>>>    };
>>>    +struct dma_ranges {
>>> +    u64 paddr;
>>> +    u64 dma_addr;
>>> +    u64 size;
>>> +    unsigned long pfn_offset;
>>> +};
>>> +
>>>    /**
>>>     * struct device_connection - Device Connection Descriptor
>>>     * @fwnode: The device node of the connected device
>>> @@ -581,6 +588,7 @@ struct device {
>>>                             allocations such descriptors. */
>>>        u64        bus_dma_limit;    /* upstream dma constraint */
>>>        unsigned long    dma_pfn_offset;
>>> +    struct dma_ranges dma_ranges;
>>>          struct device_dma_parameters *dma_parms;
>>>    diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
>>> index 24b8684aa21d..4a46a15945ea 100644
>>> --- a/include/linux/dma-direct.h
>>> +++ b/include/linux/dma-direct.h
>>> @@ -11,18 +11,47 @@ extern unsigned int zone_dma_bits;
>>>    #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
>>>    #include <asm/dma-direct.h>
>>>    #else
>>> +
>>> +static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
>>> +                             phys_addr_t paddr)
>>> +{
>>> +    if (!dev)
>>> +        return 0;
>>> +
>>> +    if (paddr >= dev->dma_ranges.paddr &&
>>> +        paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
>>> +        return dev->dma_ranges.pfn_offset
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
>>> +                             dma_addr_t dma_addr)
>>> +{
>>> +    if (!dev)
>>> +        return 0;
>>> +
>>> +    if (dma_addr >= dev->dma_ranges.dma_addr &&
>>> +        dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
>>> +        return dev->dma_ranges.pfn_offset
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>    static inline dma_addr_t __phys_to_dma(struct device *dev,
>>> phys_addr_t paddr)
>>>    {
>>>        dma_addr_t dev_addr = (dma_addr_t)paddr;
>>> +    unsigned long offset = __phys_to_dma_pfn_offset(dev, paddr);
>>>    -    return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
>>> +    return dev_addr - ((dma_addr_t)offset << PAGE_SHIFT);
>>>    }
>>>      static inline phys_addr_t __dma_to_phys(struct device *dev,
>>> dma_addr_t dev_addr)
>>>    {
>>>        phys_addr_t paddr = (phys_addr_t)dev_addr;
>>> +    unsigned long offset = __dma_to_phys_pfn_offset(dev, dev_addr);
>>>    -    return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
>>> +    return paddr + ((phys_addr_t)offset << PAGE_SHIFT);
>>>    }
>>>    #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>>>    diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
>>> index 551b0eb7028a..7a68fd09f5d0 100644
>>> --- a/kernel/dma/coherent.c
>>> +++ b/kernel/dma/coherent.c
>>> @@ -31,10 +31,13 @@ static inline struct dma_coherent_mem
>>> *dev_get_coherent_memory(struct device *de
>>>    static inline dma_addr_t dma_get_device_base(struct device *dev,
>>>                             struct dma_coherent_mem * mem)
>>>    {
>>> -    if (mem->use_dev_dma_pfn_offset)
>>> -        return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
>>> -    else
>>> +    if (mem->use_dev_dma_pfn_offset) {
>>> +        unsigned long offset = __phys_to_dma_pfn_offset(dev,
>>> +                        __pfn_to_phys(mem->pfn_base));
>>> +        return (mem->pfn_base - offset) << PAGE_SHIFT;
>>> +    } else {
>>>            return mem->device_base;
>>> +    }
>>>    }
>>>      static int dma_init_coherent_memory(phys_addr_t phys_addr,
>>>
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
>
Peter Ujfalusi Jan. 27, 2020, 2 p.m. UTC | #4
On 16/01/2020 21.13, Robin Murphy wrote:
> On 15/01/2020 11:50 am, Peter Ujfalusi wrote:
>>
>>
>> On 14/01/2020 20.19, Robin Murphy wrote:
>>> On 14/01/2020 4:43 pm, Peter Ujfalusi wrote:
>>>> The dma_pfn_offset should only be applied to an address which is
>>>> within the
>>>> dma-ranges range. Any address outside should have offset as 0.
>>>
>>> No, that's wrong. If a non-empty dma-ranges is present, then addresses
>>> which do not fall within any specified range are invalid altogether.
>>
>> It is not explicitly stated by the specification, but can be interpreted
>> like that and from a pow it does make sense to treat things like that.
> 
> Yes, DTspec doesn't explicitly say so, but it does follow fairly
> logically from the definition of "ranges"/"dma-ranges" as a translation
> between address spaces that an address not matching any range cannot
> pass between those address spaces at all. Case in point being that an
> absent "ranges" property means "no translation at all" (sadly the ship
> sailed too long ago to treat "dma-ranges" similarly strictly, so we're
> stuck with the assumption that absent = empty in that direction)

Agree.

> 
>>> The current long-term plan is indeed to try to move to some sort of
>>> internal "DMA range descriptor" in order to properly cope with the kind
>>> of esoteric integrations which have multiple disjoint windows,
>>> potentially even with different offsets, but as you point out there are
>>> still many hurdles between now and that becoming reality. So although
>>> this patch does represent the "right" thing, it's for entirely the wrong
>>> reason. AFAICT for your case it basically just works out as a very
>>> baroque way to hack dma_direct_supported() again - we shouldn't need a
>>> special case to map a bogus physical address to valid DMA address, we
>>> should be fixing the source of the bogus PA in the first place.
>>
>> DMA_BIT_MASK(32) is pretty clear: The DMA can handle addresses within
>> 32bit space. DMA_BIT_MASK(24) is also clear: The DMA can handle
>> addresses within 24bit space.
> 
> Careful there - DMA *masks* are about how wide an address the device may
> generate

Which is in a simplified view is what address range the DMA can address.
The DMA can not generate address beyond the mask.

> but it's not necessarily true that the interconnect beyond
> will actually accept every possible address that that many bits can
> encode

True.

> (see the aforementioned case of PCI host bridge windows, or the
> recent change of bus_dma_mask to a not-necessarily-power-of-two
> bus_dma_limit)...

I see.

>> dma-ranges does not change that. The DMA can still address the same
>> space.
> 
> ...thus that assumption is incorrect.

Hrm, why it is not correct?
The DMA can generate addresses up to 32 bits. Anything beyond that is
not accessible by DMA.
The interconnect makes part of a higher (not addressable memory space)
available within the 32 bits range, thus the DMA can address that.
We tell this via the dma-ranges.
I agree that for a correct dma-ranges for k2g should be:
dma-ranges = <0x0 0x0 0x0 0x80000000>,
             <0x80000000 0x8 0x00000000 0x80000000>;

> However it's not particularly
> important to the immediate problem at hand.
> 
>> What dma-ranges will tell is that a physical address range 'X'
>> can be accessed on the bus under range 'Y'.
>> For the DMA within the bus the physical address within 'X' does not
>> matter. What matters is the matching address within 'Y'
>>
>> We should do dma_pfn_offset conversion _only_ for the range it applies
>> to. Outside of it is not valid to apply it.
> 
> That much is agreed. For a physical address A in Y, phys_to_dma(A)
> should return the corresponding DMA address A' in X. What you're
> proposing is that for address B not in Y, phys_to_dma(B) should just
> return B, but my point is that even doing that is wrong, because there
> is no possible DMA address corresponding to B, so there is no valid
> value to return at all.

I agree on the phys_to_dma(). It should fail for addresses which does
not fall into any of the ranges.
It is just a that we in Linux don't have the concept atm for ranges, we
have only _one_ range which applies to every memory address.

I guess what I'm proposing is to _not_ apply the phys_to_dma() to the
DMA mask itself?

Or reverse check the dma-ranges against the dma_mask?

0x0 - 0x8000 0000 : direct mapped w/o pfn_offset
0x8000 0000 - 0xFFFF FFFF : mapped from 0x8 0000 0000 w/ pfn_offset

and this is the whole address range the DMA can address.

> Nobody's disputing that the current dma_direct_supported()
> implementation is broken for the case where ZONE_DMA itself is offset
> from PA 0; the more pressing question is why Christoph's diff, which was
> trying to take that into account, still didn't work.

I understand that this is a bit more complex than I interpret it, but
the k2g is broken and currently the simplest way to make it work is to
use the arm dma_ops in case the pfn_offset is not 0.
It will be easy to test dma-direct changes trying to address the issue
in hand, but will allow k2g to be usable at the same time.

A patch like I first proposed in:
https://lore.kernel.org/lkml/8eb68140-97b2-62ce-3e06-3761984aa5b1@ti.com/

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Christoph Hellwig Jan. 30, 2020, 7:53 a.m. UTC | #5
[skipping the DT bits, as I'm everything but an expert on that..]

On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
> I agree on the phys_to_dma(). It should fail for addresses which does
> not fall into any of the ranges.
> It is just a that we in Linux don't have the concept atm for ranges, we
> have only _one_ range which applies to every memory address.

what does atm here mean?

We have needed multi-range support for quite a while, as common broadcom
SOCs do need it.  So patches for that are welcome at least from the
DMA layer perspective (kinda similar to your pseudo code earlier)

> > Nobody's disputing that the current dma_direct_supported()
> > implementation is broken for the case where ZONE_DMA itself is offset
> > from PA 0; the more pressing question is why Christoph's diff, which was
> > trying to take that into account, still didn't work.
> 
> I understand that this is a bit more complex than I interpret it, but
> the k2g is broken and currently the simplest way to make it work is to
> use the arm dma_ops in case the pfn_offset is not 0.
> It will be easy to test dma-direct changes trying to address the issue
> in hand, but will allow k2g to be usable at the same time.

Well, using the legacy arm dma ops means we can't use swiotlb if there
is an offset, which is also wrong for lots of common cases, including
the Rpi 4.  I'm still curious why my patch didn't work, as I thought
it should.  We'll need to find the minimum change to make it work
for now without switching ops, even if it isn't the correct one, and
then work from there.
Peter Ujfalusi Jan. 30, 2020, 1:04 p.m. UTC | #6
On 30/01/2020 9.53, Christoph Hellwig wrote:
> [skipping the DT bits, as I'm everything but an expert on that..]
> 
> On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
>> I agree on the phys_to_dma(). It should fail for addresses which does
>> not fall into any of the ranges.
>> It is just a that we in Linux don't have the concept atm for ranges, we
>> have only _one_ range which applies to every memory address.
> 
> what does atm here mean?

struct device have only single dma_pfn_offset, one can not have multiple
ranges defined. If we have then only the first is taken and the physical
address and dma address is discarded, only the dma_pfn_offset is stored
and used.

> We have needed multi-range support for quite a while, as common broadcom
> SOCs do need it.  So patches for that are welcome at least from the
> DMA layer perspective (kinda similar to your pseudo code earlier)

But do they have dma_pfn_offset != 0?

>>> Nobody's disputing that the current dma_direct_supported()
>>> implementation is broken for the case where ZONE_DMA itself is offset
>>> from PA 0; the more pressing question is why Christoph's diff, which was
>>> trying to take that into account, still didn't work.
>>
>> I understand that this is a bit more complex than I interpret it, but
>> the k2g is broken and currently the simplest way to make it work is to
>> use the arm dma_ops in case the pfn_offset is not 0.
>> It will be easy to test dma-direct changes trying to address the issue
>> in hand, but will allow k2g to be usable at the same time.
> 
> Well, using the legacy arm dma ops means we can't use swiotlb if there
> is an offset, which is also wrong for lots of common cases, including
> the Rpi 4.  I'm still curious why my patch didn't work, as I thought
> it should.

The dma_pfn_offset is _still_ applied to the mask we are trying to set
(and validate) via dma-direct.

in dma_direct_supported:
mask == 0xffffffff // DMA_BIT_MASK(32)
dev->dma_pfn_offset == 0x780000 // Keystone 2
min_mask == 0xffffff

tmp_mask = __phys_to_dma(dev, min_mask);
tmp_mask == 0xff880ffffff

within __phys_to_dma() converts the min_mask to pfn and calls
pfn_to_dma() which does:
if (dev)
	pfn -= dev->dma_pfn_offset;

the returned pfn is then converted back to address.

the mask (0xffffffff) is well under the tmp_mask (0xff880ffffff) so
dma_direct_supported() will tell us that DMA is not supported for
DMA_BIT_MASK(32), which is not true, because DMA is supporting 32 bits.

> We'll need to find the minimum change to make it work
> for now without switching ops, even if it isn't the correct one, and
> then work from there.

Sure, but can we fix the regression by reverting to arm_ops for now only
if dma_pfn_offset is not 0? It used to work fine in the past at least it
appeared to work on K2 platforms.

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Christoph Hellwig Jan. 30, 2020, 4:40 p.m. UTC | #7
On Thu, Jan 30, 2020 at 03:04:37PM +0200, Peter Ujfalusi via iommu wrote:
> On 30/01/2020 9.53, Christoph Hellwig wrote:
> > [skipping the DT bits, as I'm everything but an expert on that..]
> > 
> > On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
> >> I agree on the phys_to_dma(). It should fail for addresses which does
> >> not fall into any of the ranges.
> >> It is just a that we in Linux don't have the concept atm for ranges, we
> >> have only _one_ range which applies to every memory address.
> > 
> > what does atm here mean?
> 
> struct device have only single dma_pfn_offset, one can not have multiple
> ranges defined. If we have then only the first is taken and the physical
> address and dma address is discarded, only the dma_pfn_offset is stored
> and used.
> 
> > We have needed multi-range support for quite a while, as common broadcom
> > SOCs do need it.  So patches for that are welcome at least from the
> > DMA layer perspective (kinda similar to your pseudo code earlier)
> 
> But do they have dma_pfn_offset != 0?

Well, with that I mean multiple ranges with different offsets.  Take
a look at arch/mips/bmips/dma.c:__phys_to_dma() and friends.  This
is an existing implementation for mips, but there are arm and arm64
SOCs using the same logic on the market as well, and we'll want to
support them eventually.

> The dma_pfn_offset is _still_ applied to the mask we are trying to set
> (and validate) via dma-direct.

And for the general case that is exactly the right thing to do, we
just need to deal with really odd ZONE_DMA placements like yours.

> > We'll need to find the minimum change to make it work
> > for now without switching ops, even if it isn't the correct one, and
> > then work from there.
> 
> Sure, but can we fix the regression by reverting to arm_ops for now only
> if dma_pfn_offset is not 0? It used to work fine in the past at least it
> appeared to work on K2 platforms.

But that will cause yet another regression in what we have just fixed
with using the generic direct ops, at which points it turns into who
screams louder.

For now I'm tempted to throw something like this in, which is a bit
of a hack, but actually 100% matches what various architectures have
historically done:


diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 6af7ae83c4ad..6ba9ee6e20bd 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -482,6 +482,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
 {
 	u64 min_mask;
 
+	if (mask >= DMA_BIT_MASK(32))
+		return 1;
+
 	if (IS_ENABLED(CONFIG_ZONE_DMA))
 		min_mask = DMA_BIT_MASK(zone_dma_bits);
 	else
Peter Ujfalusi Jan. 31, 2020, 1:59 p.m. UTC | #8
Hi Christoph,

On 30/01/2020 18.40, Christoph Hellwig wrote:
> On Thu, Jan 30, 2020 at 03:04:37PM +0200, Peter Ujfalusi via iommu wrote:
>> On 30/01/2020 9.53, Christoph Hellwig wrote:
>>> [skipping the DT bits, as I'm everything but an expert on that..]
>>>
>>> On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
>>>> I agree on the phys_to_dma(). It should fail for addresses which does
>>>> not fall into any of the ranges.
>>>> It is just a that we in Linux don't have the concept atm for ranges, we
>>>> have only _one_ range which applies to every memory address.
>>>
>>> what does atm here mean?
>>
>> struct device have only single dma_pfn_offset, one can not have multiple
>> ranges defined. If we have then only the first is taken and the physical
>> address and dma address is discarded, only the dma_pfn_offset is stored
>> and used.
>>
>>> We have needed multi-range support for quite a while, as common broadcom
>>> SOCs do need it.  So patches for that are welcome at least from the
>>> DMA layer perspective (kinda similar to your pseudo code earlier)
>>
>> But do they have dma_pfn_offset != 0?
> 
> Well, with that I mean multiple ranges with different offsets.  Take
> a look at arch/mips/bmips/dma.c:__phys_to_dma() and friends.  This
> is an existing implementation for mips, but there are arm and arm64
> SOCs using the same logic on the market as well, and we'll want to
> support them eventually.

I see. My PoC patch was not too off then ;)
So the plan is to have a generic implementation for all of the
architecture, right?

>> The dma_pfn_offset is _still_ applied to the mask we are trying to set
>> (and validate) via dma-direct.
> 
> And for the general case that is exactly the right thing to do, we
> just need to deal with really odd ZONE_DMA placements like yours.

I'm still not convinced, the point of the DMA mask, at least how I see
it, to check that the dma address can be handled by the device (DMA,
peripheral with built in DMA, etc), it is not against physical address.
Doing phys_to_dma() on the mask from the dma_set_mask() is just wrong.

>>> We'll need to find the minimum change to make it work
>>> for now without switching ops, even if it isn't the correct one, and
>>> then work from there.
>>
>> Sure, but can we fix the regression by reverting to arm_ops for now only
>> if dma_pfn_offset is not 0? It used to work fine in the past at least it
>> appeared to work on K2 platforms.
> 
> But that will cause yet another regression in what we have just fixed
> with using the generic direct ops, at which points it turns into who
> screams louder.

Hehe, I see.
I genuinely curious why k2 platform worked just fine with LPAE (it needs
it), but guys had issues with LPAE on dra7/am5.
The fix for dra7/am5 broke k2.
As far as I can see the main (only) difference is that k2 have
dma_pfn_offset = 0x780000, while dra7/am5 have it 0 (really direct mapping).

> For now I'm tempted to throw something like this in, which is a bit
> of a hack, but actually 100% matches what various architectures have
> historically done:
> 
> 
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 6af7ae83c4ad..6ba9ee6e20bd 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -482,6 +482,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
>  {
>  	u64 min_mask;
>  
> +	if (mask >= DMA_BIT_MASK(32))
> +		return 1;
> +

Right, so skipping phys_to_dma() for the mask and believing that it will
work..

It does: audio and dmatest memcpy tests are just fine with this, MMC
also probed with ADMA enabled.

As far as I can tell it works as well as falling back to the old arm ops
in case of LPAE && dma_pfn_offset != 0

Fwiw:
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>

Would yo be comfortable to send apply this patch to mainline with
Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE
configs")

So it gets picked for stable kernels as well?

>  	if (IS_ENABLED(CONFIG_ZONE_DMA))
>  		min_mask = DMA_BIT_MASK(zone_dma_bits);
>  	else
> 

Thank you,
- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Peter Ujfalusi Jan. 31, 2020, 2 p.m. UTC | #9
Hi Christoph,

On 30/01/2020 18.40, Christoph Hellwig wrote:
> On Thu, Jan 30, 2020 at 03:04:37PM +0200, Peter Ujfalusi via iommu wrote:
>> On 30/01/2020 9.53, Christoph Hellwig wrote:
>>> [skipping the DT bits, as I'm everything but an expert on that..]
>>>
>>> On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
>>>> I agree on the phys_to_dma(). It should fail for addresses which does
>>>> not fall into any of the ranges.
>>>> It is just a that we in Linux don't have the concept atm for ranges, we
>>>> have only _one_ range which applies to every memory address.
>>>
>>> what does atm here mean?
>>
>> struct device have only single dma_pfn_offset, one can not have multiple
>> ranges defined. If we have then only the first is taken and the physical
>> address and dma address is discarded, only the dma_pfn_offset is stored
>> and used.
>>
>>> We have needed multi-range support for quite a while, as common broadcom
>>> SOCs do need it.  So patches for that are welcome at least from the
>>> DMA layer perspective (kinda similar to your pseudo code earlier)
>>
>> But do they have dma_pfn_offset != 0?
> 
> Well, with that I mean multiple ranges with different offsets.  Take
> a look at arch/mips/bmips/dma.c:__phys_to_dma() and friends.  This
> is an existing implementation for mips, but there are arm and arm64
> SOCs using the same logic on the market as well, and we'll want to
> support them eventually.

I see. My PoC patch was not too off then ;)
So the plan is to have a generic implementation for all of the
architecture, right?

>> The dma_pfn_offset is _still_ applied to the mask we are trying to set
>> (and validate) via dma-direct.
> 
> And for the general case that is exactly the right thing to do, we
> just need to deal with really odd ZONE_DMA placements like yours.

I'm still not convinced, the point of the DMA mask, at least how I see
it, to check that the dma address can be handled by the device (DMA,
peripheral with built in DMA, etc), it is not against physical address.
Doing phys_to_dma() on the mask from the dma_set_mask() is just wrong.

>>> We'll need to find the minimum change to make it work
>>> for now without switching ops, even if it isn't the correct one, and
>>> then work from there.
>>
>> Sure, but can we fix the regression by reverting to arm_ops for now only
>> if dma_pfn_offset is not 0? It used to work fine in the past at least it
>> appeared to work on K2 platforms.
> 
> But that will cause yet another regression in what we have just fixed
> with using the generic direct ops, at which points it turns into who
> screams louder.

Hehe, I see.
I genuinely curious why k2 platform worked just fine with LPAE (it needs
it), but guys had issues with LPAE on dra7/am5.
The fix for dra7/am5 broke k2.
As far as I can see the main (only) difference is that k2 have
dma_pfn_offset = 0x780000, while dra7/am5 have it 0 (really direct mapping).

> For now I'm tempted to throw something like this in, which is a bit
> of a hack, but actually 100% matches what various architectures have
> historically done:
> 
> 
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 6af7ae83c4ad..6ba9ee6e20bd 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -482,6 +482,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
>  {
>  	u64 min_mask;
>  
> +	if (mask >= DMA_BIT_MASK(32))
> +		return 1;
> +

Right, so skipping phys_to_dma() for the mask and believing that it will
work..

It does: audio and dmatest memcpy tests are just fine with this, MMC
also probed with ADMA enabled.

As far as I can tell it works as well as falling back to the old arm ops
in case of LPAE && dma_pfn_offset != 0

Fwiw:
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>

Would you be comfortable to send apply this patch to mainline with
Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE
configs")

So it gets picked for stable kernels as well?

>  	if (IS_ENABLED(CONFIG_ZONE_DMA))
>  		min_mask = DMA_BIT_MASK(zone_dma_bits);
>  	else
> 

Thank you,
- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Peter Ujfalusi Jan. 31, 2020, 2 p.m. UTC | #10
Hi Christoph,

On 30/01/2020 18.40, Christoph Hellwig wrote:
> On Thu, Jan 30, 2020 at 03:04:37PM +0200, Peter Ujfalusi via iommu wrote:
>> On 30/01/2020 9.53, Christoph Hellwig wrote:
>>> [skipping the DT bits, as I'm everything but an expert on that..]
>>>
>>> On Mon, Jan 27, 2020 at 04:00:30PM +0200, Peter Ujfalusi wrote:
>>>> I agree on the phys_to_dma(). It should fail for addresses which does
>>>> not fall into any of the ranges.
>>>> It is just a that we in Linux don't have the concept atm for ranges, we
>>>> have only _one_ range which applies to every memory address.
>>>
>>> what does atm here mean?
>>
>> struct device have only single dma_pfn_offset, one can not have multiple
>> ranges defined. If we have then only the first is taken and the physical
>> address and dma address is discarded, only the dma_pfn_offset is stored
>> and used.
>>
>>> We have needed multi-range support for quite a while, as common broadcom
>>> SOCs do need it.  So patches for that are welcome at least from the
>>> DMA layer perspective (kinda similar to your pseudo code earlier)
>>
>> But do they have dma_pfn_offset != 0?
> 
> Well, with that I mean multiple ranges with different offsets.  Take
> a look at arch/mips/bmips/dma.c:__phys_to_dma() and friends.  This
> is an existing implementation for mips, but there are arm and arm64
> SOCs using the same logic on the market as well, and we'll want to
> support them eventually.

I see. My PoC patch was not too off then ;)
So the plan is to have a generic implementation for all of the
architecture, right?

>> The dma_pfn_offset is _still_ applied to the mask we are trying to set
>> (and validate) via dma-direct.
> 
> And for the general case that is exactly the right thing to do, we
> just need to deal with really odd ZONE_DMA placements like yours.

I'm still not convinced, the point of the DMA mask, at least how I see
it, to check that the dma address can be handled by the device (DMA,
peripheral with built in DMA, etc), it is not against physical address.
Doing phys_to_dma() on the mask from the dma_set_mask() is just wrong.

>>> We'll need to find the minimum change to make it work
>>> for now without switching ops, even if it isn't the correct one, and
>>> then work from there.
>>
>> Sure, but can we fix the regression by reverting to arm_ops for now only
>> if dma_pfn_offset is not 0? It used to work fine in the past at least it
>> appeared to work on K2 platforms.
> 
> But that will cause yet another regression in what we have just fixed
> with using the generic direct ops, at which points it turns into who
> screams louder.

Hehe, I see.
I genuinely curious why k2 platform worked just fine with LPAE (it needs
it), but guys had issues with LPAE on dra7/am5.
The fix for dra7/am5 broke k2.
As far as I can see the main (only) difference is that k2 have
dma_pfn_offset = 0x780000, while dra7/am5 have it 0 (really direct mapping).

> For now I'm tempted to throw something like this in, which is a bit
> of a hack, but actually 100% matches what various architectures have
> historically done:
> 
> 
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 6af7ae83c4ad..6ba9ee6e20bd 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -482,6 +482,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
>  {
>  	u64 min_mask;
>  
> +	if (mask >= DMA_BIT_MASK(32))
> +		return 1;
> +

Right, so skipping phys_to_dma() for the mask and believing that it will
work..

It does: audio and dmatest memcpy tests are just fine with this, MMC
also probed with ADMA enabled.

As far as I can tell it works as well as falling back to the old arm ops
in case of LPAE && dma_pfn_offset != 0

Fwiw:
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>

Would you be comfortable to send this patch for mainline with
Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE
configs")

So it gets picked for stable kernels as well?

>  	if (IS_ENABLED(CONFIG_ZONE_DMA))
>  		min_mask = DMA_BIT_MASK(zone_dma_bits);
>  	else
> 

Thank you,
- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Christoph Hellwig Feb. 3, 2020, 5:08 p.m. UTC | #11
On Fri, Jan 31, 2020 at 04:00:20PM +0200, Peter Ujfalusi wrote:
> I see. My PoC patch was not too off then ;)
> So the plan is to have a generic implementation for all of the
> architecture, right?

І don't know of a concrete plan, but that's defintively what I'd like
to see.

> >> The dma_pfn_offset is _still_ applied to the mask we are trying to set
> >> (and validate) via dma-direct.
> > 
> > And for the general case that is exactly the right thing to do, we
> > just need to deal with really odd ZONE_DMA placements like yours.
> 
> I'm still not convinced, the point of the DMA mask, at least how I see
> it, to check that the dma address can be handled by the device (DMA,
> peripheral with built in DMA, etc), it is not against physical address.
> Doing phys_to_dma() on the mask from the dma_set_mask() is just wrong.

We have a translation between the addresses that the device sees, and
those that the CPU sees.  The device can address N bits of address space
as seen from the device.  The addresses encoded in max_pfn,
zone_dma_bits or the harcoded 32 in the zone dma 32 case are CPU address.
So no, we can't blindly compare those.


> > But that will cause yet another regression in what we have just fixed
> > with using the generic direct ops, at which points it turns into who
> > screams louder.
> 
> Hehe, I see.
> I genuinely curious why k2 platform worked just fine with LPAE (it needs
> it), but guys had issues with LPAE on dra7/am5.
> The fix for dra7/am5 broke k2.
> As far as I can see the main (only) difference is that k2 have
> dma_pfn_offset = 0x780000, while dra7/am5 have it 0 (really direct mapping).

How much memory does the platform have?  Once you are above 32-bits worth
of address space devices with a 32-bit DMA mask can't address all the
memory.  Now if k2 for example only had less than 4G of memory, but at
addresses over 4G, and the offset compensates for the offset of the DRAM
it works without bounce buffering and thus didn't need swiotlb.  But any
platform that has DRAM that is not addressable will need swiotlb.

> >  	u64 min_mask;
> >  
> > +	if (mask >= DMA_BIT_MASK(32))
> > +		return 1;
> > +
> 
> Right, so skipping phys_to_dma() for the mask and believing that it will
> work..
> 
> It does: audio and dmatest memcpy tests are just fine with this, MMC
> also probed with ADMA enabled.
> 
> As far as I can tell it works as well as falling back to the old arm ops
> in case of LPAE && dma_pfn_offset != 0
> 
> Fwiw:
> Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
> 
> Would you be comfortable to send this patch for mainline with
> Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE
> configs")

That is the big question.  I don't feel overly comfortable as I've been
trying to get this right, but so far it seems like the least bad option.
I'll send out a proper patch with updated comments and will see what
people think.
Peter Ujfalusi Feb. 5, 2020, 10:19 a.m. UTC | #12
On 03/02/2020 19.08, Christoph Hellwig wrote:
> On Fri, Jan 31, 2020 at 04:00:20PM +0200, Peter Ujfalusi wrote:
>> I see. My PoC patch was not too off then ;)
>> So the plan is to have a generic implementation for all of the
>> architecture, right?
> 
> І don't know of a concrete plan, but that's defintively what I'd like
> to see.
> 
>>>> The dma_pfn_offset is _still_ applied to the mask we are trying to set
>>>> (and validate) via dma-direct.
>>>
>>> And for the general case that is exactly the right thing to do, we
>>> just need to deal with really odd ZONE_DMA placements like yours.
>>
>> I'm still not convinced, the point of the DMA mask, at least how I see
>> it, to check that the dma address can be handled by the device (DMA,
>> peripheral with built in DMA, etc), it is not against physical address.
>> Doing phys_to_dma() on the mask from the dma_set_mask() is just wrong.
> 
> We have a translation between the addresses that the device sees, and
> those that the CPU sees.  The device can address N bits of address space
> as seen from the device.  The addresses encoded in max_pfn,
> zone_dma_bits or the harcoded 32 in the zone dma 32 case are CPU address.
> So no, we can't blindly compare those.

Right, thanks for the explanation.

>>> But that will cause yet another regression in what we have just fixed
>>> with using the generic direct ops, at which points it turns into who
>>> screams louder.
>>
>> Hehe, I see.
>> I genuinely curious why k2 platform worked just fine with LPAE (it needs
>> it), but guys had issues with LPAE on dra7/am5.
>> The fix for dra7/am5 broke k2.
>> As far as I can see the main (only) difference is that k2 have
>> dma_pfn_offset = 0x780000, while dra7/am5 have it 0 (really direct mapping).
> 
> How much memory does the platform have?

The boards which is bootable in mainline have maximum of 2G, there might
be custom boards with more RAM, but I'm not aware of them.

> Once you are above 32-bits worth
> of address space devices with a 32-bit DMA mask can't address all the
> memory.  Now if k2 for example only had less than 4G of memory, but at
> addresses over 4G, and the offset compensates for the offset of the DRAM
> it works without bounce buffering and thus didn't need swiotlb.  But any
> platform that has DRAM that is not addressable will need swiotlb.

I see, since we have maximum of 2G, which is mirrored at 0x80000000 for
devices we never needed the assistance from swiotlb for bounce buffering
and that's why the arm ops worked fine.

> 
>>>  	u64 min_mask;
>>>  
>>> +	if (mask >= DMA_BIT_MASK(32))
>>> +		return 1;
>>> +
>>
>> Right, so skipping phys_to_dma() for the mask and believing that it will
>> work..
>>
>> It does: audio and dmatest memcpy tests are just fine with this, MMC
>> also probed with ADMA enabled.
>>
>> As far as I can tell it works as well as falling back to the old arm ops
>> in case of LPAE && dma_pfn_offset != 0
>>
>> Fwiw:
>> Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
>>
>> Would you be comfortable to send this patch for mainline with
>> Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE
>> configs")
> 
> That is the big question.  I don't feel overly comfortable as I've been
> trying to get this right, but so far it seems like the least bad option.
> I'll send out a proper patch with updated comments and will see what
> people think.

I understand and thank you for the patch, it makes k2 platform working
again!

- Péter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
diff mbox series

Patch

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index bdd80ddbca34..9bff6ad2d8c8 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -33,10 +33,31 @@  static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
  * addresses. They must not be used by drivers.
  */
 #ifndef __arch_pfn_to_dma
+
+static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
+						     phys_addr_t paddr)
+{
+	if (paddr >= dev->dma_ranges.paddr &&
+	    paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
+		return dev->dma_ranges.pfn_offset;
+
+	return 0;
+}
+
+static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
+						     dma_addr_t dma_addr)
+{
+	if (dma_addr >= dev->dma_ranges.dma_addr &&
+	    dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
+		return dev->dma_ranges.pfn_offset;
+
+	return 0;
+}
+
 static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
 {
 	if (dev)
-		pfn -= dev->dma_pfn_offset;
+		pfn -= __phys_to_dma_pfn_offset(dev, __pfn_to_phys(pfn));
 	return (dma_addr_t)__pfn_to_bus(pfn);
 }
 
@@ -45,7 +66,7 @@  static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
 	unsigned long pfn = __bus_to_pfn(addr);
 
 	if (dev)
-		pfn += dev->dma_pfn_offset;
+		pfn += __dma_to_phys_pfn_offset(dev, addr);
 
 	return pfn;
 }
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 27203bfd0b22..07a8cc1a7d7f 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -105,7 +105,7 @@  int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 		if (!force_dma)
 			return ret == -ENODEV ? 0 : ret;
 
-		dma_addr = offset = 0;
+		dma_addr = offset = paddr = 0;
 	} else {
 		offset = PFN_DOWN(paddr - dma_addr);
 
@@ -144,6 +144,11 @@  int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 
 	dev->dma_pfn_offset = offset;
 
+	dev->dma_ranges.paddr = paddr;
+	dev->dma_ranges.dma_addr = dma_addr;
+	dev->dma_ranges.size = size;
+	dev->dma_ranges.pfn_offset = offset;
+
 	/*
 	 * Limit coherent and dma mask based on size and default mask
 	 * set by the driver.
diff --git a/include/linux/device.h b/include/linux/device.h
index ce6db68c3f29..57006b51a989 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -293,6 +293,13 @@  struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+struct dma_ranges {
+	u64 paddr;
+	u64 dma_addr;
+	u64 size;
+	unsigned long pfn_offset;
+};
+
 /**
  * struct device_connection - Device Connection Descriptor
  * @fwnode: The device node of the connected device
@@ -581,6 +588,7 @@  struct device {
 					     allocations such descriptors. */
 	u64		bus_dma_limit;	/* upstream dma constraint */
 	unsigned long	dma_pfn_offset;
+	struct dma_ranges dma_ranges;
 
 	struct device_dma_parameters *dma_parms;
 
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 24b8684aa21d..4a46a15945ea 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -11,18 +11,47 @@  extern unsigned int zone_dma_bits;
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
+
+static inline unsigned long __phys_to_dma_pfn_offset(struct device *dev,
+						     phys_addr_t paddr)
+{
+	if (!dev)
+		return 0;
+
+	if (paddr >= dev->dma_ranges.paddr &&
+	    paddr <= (dev->dma_ranges.paddr + dev->dma_ranges.size))
+		return dev->dma_ranges.pfn_offset
+
+	return 0;
+}
+
+static inline unsigned long __dma_to_phys_pfn_offset(struct device *dev,
+						     dma_addr_t dma_addr)
+{
+	if (!dev)
+		return 0;
+
+	if (dma_addr >= dev->dma_ranges.dma_addr &&
+	    dma_addr <= (dev->dma_ranges.dma_addr + dev->dma_ranges.size))
+		return dev->dma_ranges.pfn_offset
+
+	return 0;
+}
+
 static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	dma_addr_t dev_addr = (dma_addr_t)paddr;
+	unsigned long offset = __phys_to_dma_pfn_offset(dev, paddr);
 
-	return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+	return dev_addr - ((dma_addr_t)offset << PAGE_SHIFT);
 }
 
 static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 {
 	phys_addr_t paddr = (phys_addr_t)dev_addr;
+	unsigned long offset = __dma_to_phys_pfn_offset(dev, dev_addr);
 
-	return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+	return paddr + ((phys_addr_t)offset << PAGE_SHIFT);
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 551b0eb7028a..7a68fd09f5d0 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -31,10 +31,13 @@  static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de
 static inline dma_addr_t dma_get_device_base(struct device *dev,
 					     struct dma_coherent_mem * mem)
 {
-	if (mem->use_dev_dma_pfn_offset)
-		return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
-	else
+	if (mem->use_dev_dma_pfn_offset) {
+		unsigned long offset = __phys_to_dma_pfn_offset(dev,
+						__pfn_to_phys(mem->pfn_base));
+		return (mem->pfn_base - offset) << PAGE_SHIFT;
+	} else {
 		return mem->device_base;
+	}
 }
 
 static int dma_init_coherent_memory(phys_addr_t phys_addr,