diff mbox

[v3] pxa2xx_spi: fix memory corruption

Message ID 1310311099-24638-1-git-send-email-anarsoul@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vasily Khoruzhick July 10, 2011, 3:18 p.m. UTC
pxa2xx_spi_probe allocates struct driver_data and null_dma_buf
at same time via spi_alloc_master(), but then calculates
null_dma_buf pointer incorrectly, and it causes memory corruption
later if DMA usage is enabled.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
---
v2: - add u8 __null_dma_buf[16] to the end of driver_data structure
    and use it as null_dma_buf after alignment.
    - use PTR_ALIGN instead of ALIGN
v3: - drop (u8 *) cast, use & operator instead, change array name
 drivers/spi/pxa2xx_spi.c |    9 +++++----
 1 files changed, 5 insertions(+), 4 deletions(-)

Comments

Vasily Khoruzhick July 14, 2011, 12:17 p.m. UTC | #1
On Sunday 10 July 2011 18:18:19 Vasily Khoruzhick wrote:
> pxa2xx_spi_probe allocates struct driver_data and null_dma_buf
> at same time via spi_alloc_master(), but then calculates
> null_dma_buf pointer incorrectly, and it causes memory corruption
> later if DMA usage is enabled.

Ping?

> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> ---
> v2: - add u8 __null_dma_buf[16] to the end of driver_data structure
>     and use it as null_dma_buf after alignment.
>     - use PTR_ALIGN instead of ALIGN
> v3: - drop (u8 *) cast, use & operator instead, change array name
>  drivers/spi/pxa2xx_spi.c |    9 +++++----
>  1 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
> index dc25bee..b25fe27 100644
> --- a/drivers/spi/pxa2xx_spi.c
> +++ b/drivers/spi/pxa2xx_spi.c
> @@ -106,6 +106,7 @@ struct driver_data {
>  	int rx_channel;
>  	int tx_channel;
>  	u32 *null_dma_buf;
> +	u8 null_dma_buf_unaligned[16];
> 
>  	/* SSP register addresses */
>  	void __iomem *ioaddr;
> @@ -1543,8 +1544,8 @@ static int __devinit pxa2xx_spi_probe(struct
> platform_device *pdev) return -ENODEV;
>  	}
> 
> -	/* Allocate master with space for drv_data and null dma buffer */
> -	master = spi_alloc_master(dev, sizeof(struct driver_data) + 16);
> +	/* Allocate master with space for drv_data */
> +	master = spi_alloc_master(dev, sizeof(struct driver_data));
>  	if (!master) {
>  		dev_err(&pdev->dev, "cannot alloc spi_master\n");
>  		pxa_ssp_free(ssp);
> @@ -1569,8 +1570,8 @@ static int __devinit pxa2xx_spi_probe(struct
> platform_device *pdev) master->transfer = transfer;
> 
>  	drv_data->ssp_type = ssp->type;
> -	drv_data->null_dma_buf = (u32 *)ALIGN((u32)(drv_data +
> -						sizeof(struct driver_data)), 8);
> +	drv_data->null_dma_buf =
> +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
> 
>  	drv_data->ioaddr = ssp->mmio_base;
>  	drv_data->ssdr_physical = ssp->phys_base + SSDR;
Marek Vasut July 14, 2011, 12:21 p.m. UTC | #2
> On Sunday 10 July 2011 18:18:19 Vasily Khoruzhick wrote:
> > pxa2xx_spi_probe allocates struct driver_data and null_dma_buf
> > at same time via spi_alloc_master(), but then calculates
> > null_dma_buf pointer incorrectly, and it causes memory corruption
> > later if DMA usage is enabled.
> 
> Ping?

Pong!

> 
> > Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> > ---
> > v2: - add u8 __null_dma_buf[16] to the end of driver_data structure
> > and use it as null_dma_buf after alignment.
> > - use PTR_ALIGN instead of ALIGN
> > v3: - drop (u8 *) cast, use & operator instead, change array name
> > drivers/spi/pxa2xx_spi.c |       9 +++++----
> > 1 files changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
> > index dc25bee..b25fe27 100644
> > --- a/drivers/spi/pxa2xx_spi.c
> > +++ b/drivers/spi/pxa2xx_spi.c
> > @@ -106,6 +106,7 @@ struct driver_data {
> >     int rx_channel;
> >     int tx_channel;
> >     u32 *null_dma_buf;
> > +    u8 null_dma_buf_unaligned[16];
> > 
> >     /* SSP register addresses */
> >     void __iomem *ioaddr;
> > @@ -1543,8 +1544,8 @@ static int __devinit pxa2xx_spi_probe(struct
> > platform_device *pdev) return -ENODEV;
> >     }
> > 
> > -    /* Allocate master with space for drv_data and null dma buffer */
> > -    master = spi_alloc_master(dev, sizeof(struct driver_data) + 16);
> > +    /* Allocate master with space for drv_data */
> > +    master = spi_alloc_master(dev, sizeof(struct driver_data));
> >     if (!master) {
> >         dev_err(&pdev->dev, "cannot alloc spi_master\n");
> >         pxa_ssp_free(ssp);
> > @@ -1569,8 +1570,8 @@ static int __devinit pxa2xx_spi_probe(struct
> > platform_device *pdev) master->transfer = transfer;
> > 
> >     drv_data->ssp_type = ssp->type;
> > -    drv_data->null_dma_buf = (u32 *)ALIGN((u32)(drv_data +
> > -                        sizeof(struct driver_data)), 8);
> > +    drv_data->null_dma_buf =
> > +        (u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
> > 
> >     drv_data->ioaddr = ssp->mmio_base;
> >     drv_data->ssdr_physical = ssp->phys_base + SSDR;
Grant Likely July 15, 2011, 2:53 a.m. UTC | #3
On Sun, Jul 10, 2011 at 06:18:19PM +0300, Vasily Khoruzhick wrote:
> pxa2xx_spi_probe allocates struct driver_data and null_dma_buf
> at same time via spi_alloc_master(), but then calculates
> null_dma_buf pointer incorrectly, and it causes memory corruption
> later if DMA usage is enabled.
> 
> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> ---
> v2: - add u8 __null_dma_buf[16] to the end of driver_data structure
>     and use it as null_dma_buf after alignment.
>     - use PTR_ALIGN instead of ALIGN
> v3: - drop (u8 *) cast, use & operator instead, change array name
>  drivers/spi/pxa2xx_spi.c |    9 +++++----
>  1 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
> index dc25bee..b25fe27 100644
> --- a/drivers/spi/pxa2xx_spi.c
> +++ b/drivers/spi/pxa2xx_spi.c
> @@ -106,6 +106,7 @@ struct driver_data {
>  	int rx_channel;
>  	int tx_channel;
>  	u32 *null_dma_buf;
> +	u8 null_dma_buf_unaligned[16];

Don't dma buffers need to be cache-line aligned?  How large is the
actual transfer?  Using the __aligned() or __cacheline_aligned
attribute is the correct way to make sure you've got a data buffer
that can be used for DMA mixed with other stuff.  Then you don't need
to fool around with PTR_ALIGN or anything.

g.

>  
>  	/* SSP register addresses */
>  	void __iomem *ioaddr;
> @@ -1543,8 +1544,8 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
>  		return -ENODEV;
>  	}
>  
> -	/* Allocate master with space for drv_data and null dma buffer */
> -	master = spi_alloc_master(dev, sizeof(struct driver_data) + 16);
> +	/* Allocate master with space for drv_data */
> +	master = spi_alloc_master(dev, sizeof(struct driver_data));
>  	if (!master) {
>  		dev_err(&pdev->dev, "cannot alloc spi_master\n");
>  		pxa_ssp_free(ssp);
> @@ -1569,8 +1570,8 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
>  	master->transfer = transfer;
>  
>  	drv_data->ssp_type = ssp->type;
> -	drv_data->null_dma_buf = (u32 *)ALIGN((u32)(drv_data +
> -						sizeof(struct driver_data)), 8);
> +	drv_data->null_dma_buf =
> +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
>  
>  	drv_data->ioaddr = ssp->mmio_base;
>  	drv_data->ssdr_physical = ssp->phys_base + SSDR;
> -- 
> 1.7.5.rc3
>
Russell King - ARM Linux July 15, 2011, 8:12 a.m. UTC | #4
On Thu, Jul 14, 2011 at 08:53:31PM -0600, Grant Likely wrote:
> > +	u8 null_dma_buf_unaligned[16];
> 
> Don't dma buffers need to be cache-line aligned?  How large is the
> actual transfer?  Using the __aligned() or __cacheline_aligned
> attribute is the correct way to make sure you've got a data buffer
> that can be used for DMA mixed with other stuff.  Then you don't need
> to fool around with PTR_ALIGN or anything.

Err, did you not read the whole patch?

> > +	drv_data->null_dma_buf =
> > +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
Grant Likely July 15, 2011, 7:50 p.m. UTC | #5
On Fri, Jul 15, 2011 at 09:12:42AM +0100, Russell King - ARM Linux wrote:
> On Thu, Jul 14, 2011 at 08:53:31PM -0600, Grant Likely wrote:
> > > +	u8 null_dma_buf_unaligned[16];
> > 
> > Don't dma buffers need to be cache-line aligned?  How large is the
> > actual transfer?  Using the __aligned() or __cacheline_aligned
> > attribute is the correct way to make sure you've got a data buffer
> > that can be used for DMA mixed with other stuff.  Then you don't need
> > to fool around with PTR_ALIGN or anything.
> 
> Err, did you not read the whole patch?
> 
> > > +	drv_data->null_dma_buf =
> > > +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);

I read a lot of patches yesterday.  I may very well have missed
something.  I still don't see what you're referring to though.  If
the __aligned() was used inside the structure definition, then there
would be no need to have both the null_dma_buf pointer and the
null_dma_buf_unaligned buffer.  It would just be a correctly aligned
null_dma_buf.

Plus, I was asking about whether it was valid to use the structure as
allocated in DMA operations since it may very well end up in the same
cache line as the allocated structure.  Firstly, that could mean DMA
and the cache referencing the same memory which could cause
corruption, and secondly on ARM isn't it a problem to have DMA buffers
in memory that is also cache mapped?

That said, I've not read the entire driver, so I haven't checked
null_dma_buffer is used in a real dma operation.

g.
Russell King - ARM Linux July 15, 2011, 8:24 p.m. UTC | #6
On Fri, Jul 15, 2011 at 01:50:03PM -0600, Grant Likely wrote:
> On Fri, Jul 15, 2011 at 09:12:42AM +0100, Russell King - ARM Linux wrote:
> > On Thu, Jul 14, 2011 at 08:53:31PM -0600, Grant Likely wrote:
> > > > +	u8 null_dma_buf_unaligned[16];
> > > 
> > > Don't dma buffers need to be cache-line aligned?  How large is the
> > > actual transfer?  Using the __aligned() or __cacheline_aligned
> > > attribute is the correct way to make sure you've got a data buffer
> > > that can be used for DMA mixed with other stuff.  Then you don't need
> > > to fool around with PTR_ALIGN or anything.
> > 
> > Err, did you not read the whole patch?
> > 
> > > > +	drv_data->null_dma_buf =
> > > > +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
> 
> I read a lot of patches yesterday.  I may very well have missed
> something.  I still don't see what you're referring to though.  If
> the __aligned() was used inside the structure definition, then there
> would be no need to have both the null_dma_buf pointer and the
> null_dma_buf_unaligned buffer.  It would just be a correctly aligned
> null_dma_buf.

That depends on the alignment guarantees from kmalloc, which may not be
8 bytes - we have this:

#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
#define ARCH_SLAB_MINALIGN 8
#endif

so presumably on !AEABI or arches < ARMv5, kmalloc _can_ return less than
8 byte alignments.  Which makes using __aligned() in the definition useless.

> Plus, I was asking about whether it was valid to use the structure as
> allocated in DMA operations since it may very well end up in the same
> cache line as the allocated structure.  Firstly, that could mean DMA
> and the cache referencing the same memory which could cause
> corruption, and secondly on ARM isn't it a problem to have DMA buffers
> in memory that is also cache mapped?

For the second point, that depends on whether you're talking about the
coherent stuff or the streaming stuff.

The coherent DMA API has entirely different semantics to streaming DMA API.
The coherent DMA API allows for simultaneous access to the buffer by both
the DMA device and the host CPU.

The streaming DMA API only allows exclusive access by either the DMA device
or the host CPU.

Therefore, with the streaming DMA API, the only thing that's required is
to ensure that data is visible in some manner to the DMA device.  If the
DMA device can read from the CPU cache, then probably nothing's required.
If not, then the data must be evicted from as many levels of cache that
are necessary to make it visible.  Conversely, for DMA writes, what
matters is the visibility of the data to the host CPU.

That approach does not work with the coherent DMA API.  Take a network
driver TX ring.  Consider the effect of the following series of actions
to see why it won't work:

- host CPU reads a word from the DMA buffer.  This brings in a whole
  cache line.
- network device writes to the previous descriptor (which overlaps the
  just read cache line) to change its status
- host CPU updates the next descriptor and writes the cache line back
  XXX overwriting the network device's write to the previous descriptor.

So, coherent DMA is special because there's no exclusiveness there.
Grant Likely July 15, 2011, 9:31 p.m. UTC | #7
On Fri, Jul 15, 2011 at 09:24:21PM +0100, Russell King - ARM Linux wrote:
> On Fri, Jul 15, 2011 at 01:50:03PM -0600, Grant Likely wrote:
> > On Fri, Jul 15, 2011 at 09:12:42AM +0100, Russell King - ARM Linux wrote:
> > > On Thu, Jul 14, 2011 at 08:53:31PM -0600, Grant Likely wrote:
> > > > > +	u8 null_dma_buf_unaligned[16];
> > > > 
> > > > Don't dma buffers need to be cache-line aligned?  How large is the
> > > > actual transfer?  Using the __aligned() or __cacheline_aligned
> > > > attribute is the correct way to make sure you've got a data buffer
> > > > that can be used for DMA mixed with other stuff.  Then you don't need
> > > > to fool around with PTR_ALIGN or anything.
> > > 
> > > Err, did you not read the whole patch?
> > > 
> > > > > +	drv_data->null_dma_buf =
> > > > > +		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
> > 
> > I read a lot of patches yesterday.  I may very well have missed
> > something.  I still don't see what you're referring to though.  If
> > the __aligned() was used inside the structure definition, then there
> > would be no need to have both the null_dma_buf pointer and the
> > null_dma_buf_unaligned buffer.  It would just be a correctly aligned
> > null_dma_buf.
> 
> That depends on the alignment guarantees from kmalloc, which may not be
> 8 bytes - we have this:
> 
> #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
> #define ARCH_SLAB_MINALIGN 8
> #endif
> 
> so presumably on !AEABI or arches < ARMv5, kmalloc _can_ return less than
> 8 byte alignments.  Which makes using __aligned() in the definition useless.
> 
> > Plus, I was asking about whether it was valid to use the structure as
> > allocated in DMA operations since it may very well end up in the same
> > cache line as the allocated structure.  Firstly, that could mean DMA
> > and the cache referencing the same memory which could cause
> > corruption, and secondly on ARM isn't it a problem to have DMA buffers
> > in memory that is also cache mapped?
> 
> For the second point, that depends on whether you're talking about the
> coherent stuff or the streaming stuff.
> 
> The coherent DMA API has entirely different semantics to streaming DMA API.
> The coherent DMA API allows for simultaneous access to the buffer by both
> the DMA device and the host CPU.
> 
> The streaming DMA API only allows exclusive access by either the DMA device
> or the host CPU.
> 
> Therefore, with the streaming DMA API, the only thing that's required is
> to ensure that data is visible in some manner to the DMA device.  If the
> DMA device can read from the CPU cache, then probably nothing's required.
> If not, then the data must be evicted from as many levels of cache that
> are necessary to make it visible.  Conversely, for DMA writes, what
> matters is the visibility of the data to the host CPU.

... plus care must be taken not to accidentally reload the line into
cache after it has been pushed out for DMA, which is a risk on
structures with embedded DMA buffers if other non-DMA elements end up
in the same cache line.  This is the situation I was wondering about.

Of course, as you mention, if the DMA hw is cache-coherent, this isn't
a problem.

g.
Vasily Khoruzhick July 18, 2011, 7:56 a.m. UTC | #8
On Friday 15 July 2011 05:53:31 Grant Likely wrote:
> On Sun, Jul 10, 2011 at 06:18:19PM +0300, Vasily Khoruzhick wrote:
> > pxa2xx_spi_probe allocates struct driver_data and null_dma_buf
> > at same time via spi_alloc_master(), but then calculates
> > null_dma_buf pointer incorrectly, and it causes memory corruption
> > later if DMA usage is enabled.
> > 
> > Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> > ---
> > v2: - add u8 __null_dma_buf[16] to the end of driver_data structure
> > 
> >     and use it as null_dma_buf after alignment.
> >     - use PTR_ALIGN instead of ALIGN
> > 
> > v3: - drop (u8 *) cast, use & operator instead, change array name
> > 
> >  drivers/spi/pxa2xx_spi.c |    9 +++++----
> >  1 files changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
> > index dc25bee..b25fe27 100644
> > --- a/drivers/spi/pxa2xx_spi.c
> > +++ b/drivers/spi/pxa2xx_spi.c
> > @@ -106,6 +106,7 @@ struct driver_data {
> > 
> >  	int rx_channel;
> >  	int tx_channel;
> >  	u32 *null_dma_buf;
> > 
> > +	u8 null_dma_buf_unaligned[16];
> 
> Don't dma buffers need to be cache-line aligned? 

No, on PXA2xx they need to be 8-bytes aligned (according to PXA27x developer's 
manual)

> How large is the actual transfer?

Looks like 8 bytes, but I'm not sure, I'm not author of driver and did not dig 
deeply into its code. Just attempting to fix memory corruption.

> Using the __aligned() or __cacheline_aligned
> attribute is the correct way to make sure you've got a data buffer
> that can be used for DMA mixed with other stuff.  Then you don't need
> to fool around with PTR_ALIGN or anything.

Errr, it can't be applied to struct field, right? But driver needs per-device 
null_dma_buf (there's 3 SPI controllers on PXA2xx)

> g.

Regards
Vasily
Russell King - ARM Linux July 18, 2011, 10:10 a.m. UTC | #9
On Fri, Jul 15, 2011 at 03:31:06PM -0600, Grant Likely wrote:
> ... plus care must be taken not to accidentally reload the line into
> cache after it has been pushed out for DMA, which is a risk on
> structures with embedded DMA buffers if other non-DMA elements end up
> in the same cache line.  This is the situation I was wondering about.

I don't think it matters one bit in this case - we're not caring about
the actual data read/written in the DMA, we just need to do DMA to
keep the controller happy.  That's especially true as we don't set the
address increment bit in the DMA command register, so we end up accessing
the same RAM location time and time again for each DMA transfer.

So please, merge the patch - it fixes a serious memory-scribbling bug.
diff mbox

Patch

diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index dc25bee..b25fe27 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -106,6 +106,7 @@  struct driver_data {
 	int rx_channel;
 	int tx_channel;
 	u32 *null_dma_buf;
+	u8 null_dma_buf_unaligned[16];
 
 	/* SSP register addresses */
 	void __iomem *ioaddr;
@@ -1543,8 +1544,8 @@  static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	/* Allocate master with space for drv_data and null dma buffer */
-	master = spi_alloc_master(dev, sizeof(struct driver_data) + 16);
+	/* Allocate master with space for drv_data */
+	master = spi_alloc_master(dev, sizeof(struct driver_data));
 	if (!master) {
 		dev_err(&pdev->dev, "cannot alloc spi_master\n");
 		pxa_ssp_free(ssp);
@@ -1569,8 +1570,8 @@  static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
 	master->transfer = transfer;
 
 	drv_data->ssp_type = ssp->type;
-	drv_data->null_dma_buf = (u32 *)ALIGN((u32)(drv_data +
-						sizeof(struct driver_data)), 8);
+	drv_data->null_dma_buf =
+		(u32 *)PTR_ALIGN(&drv_data->null_dma_buf_unaligned, 8);
 
 	drv_data->ioaddr = ssp->mmio_base;
 	drv_data->ssdr_physical = ssp->phys_base + SSDR;