diff mbox

[v9,03/16] iommu/exynos: fix page table maintenance

Message ID 002701ce941a$eecebdb0$cc6c3910$@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cho KyongHo Aug. 8, 2013, 9:37 a.m. UTC
This prevents allocating lv2 page table for the lv1 page table entry
that already has 1MB page mapping. In addition, changed to BUG_ON
instead of returning -EADDRINUSE.

Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
---
 drivers/iommu/exynos-iommu.c |   68 ++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 28 deletions(-)

Comments

Tomasz Figa Aug. 8, 2013, 1:54 p.m. UTC | #1
On Thursday 08 of August 2013 18:37:43 Cho KyongHo wrote:
> This prevents allocating lv2 page table for the lv1 page table entry
  ^ What this is this this about? :)

> that already has 1MB page mapping. In addition, changed to BUG_ON
> instead of returning -EADDRINUSE.

The change mentioned in last sentence should be a separate patch.

> Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> ---
>  drivers/iommu/exynos-iommu.c |   68
> ++++++++++++++++++++++++----------------- 1 files changed, 40
> insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> index d545a25..d90e6fa 100644
> --- a/drivers/iommu/exynos-iommu.c
> +++ b/drivers/iommu/exynos-iommu.c
> @@ -52,11 +52,11 @@
>  #define lv2ent_large(pent) ((*(pent) & 3) == 1)
> 
>  #define section_phys(sent) (*(sent) & SECT_MASK)
> -#define section_offs(iova) ((iova) & 0xFFFFF)
> +#define section_offs(iova) ((iova) & ~SECT_MASK)
>  #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> -#define lpage_offs(iova) ((iova) & 0xFFFF)
> +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
>  #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> -#define spage_offs(iova) ((iova) & 0xFFF)
> +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
> 
>  #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
>  #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> @@ -856,13 +856,15 @@ finish:
>  static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long
> iova, short *pgcounter)
>  {
> +	BUG_ON(lv1ent_section(sent));

Is this condition really a critical one, to the point that the system 
should not continue execution?

> +
>  	if (lv1ent_fault(sent)) {
>  		unsigned long *pent;
> 
>  		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
>  		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
>  		if (!pent)
> -			return NULL;
> +			return ERR_PTR(-ENOMEM);
> 
>  		*sent = mk_lv1ent_page(__pa(pent));
>  		*pgcounter = NUM_LV2ENTRIES;
> @@ -875,15 +877,11 @@ static unsigned long *alloc_lv2entry(unsigned long
> *sent, unsigned long iova,
> 
>  static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short
> *pgcnt) {
> -	if (lv1ent_section(sent))
> -		return -EADDRINUSE;
> +	BUG_ON(lv1ent_section(sent));

Ditto.

>  	if (lv1ent_page(sent)) {
> -		if (*pgcnt != NUM_LV2ENTRIES)
> -			return -EADDRINUSE;
> -
> +		BUG_ON(*pgcnt != NUM_LV2ENTRIES);

Ditto.

>  		kfree(page_entry(sent, 0));
> -
>  		*pgcnt = 0;
>  	}
> 
> @@ -894,24 +892,24 @@ static int lv1set_section(unsigned long *sent,
> phys_addr_t paddr, short *pgcnt) return 0;
>  }
> 
> +static void clear_page_table(unsigned long *ent, int n)
> +{
> +	if (n > 0)
> +		memset(ent, 0, sizeof(*ent) * n);
> +}

I don't see the point of creating this function. It seems to be used only 
once, in addition with a constant as n, so the check for n > 0 is 
unnecessary.

And even if there is a need for this change, it should be done in separate 
patch, as this one is not about stylistic changes, but fixing page table 
maintenance (at least based on your commit message).

>  static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t
> size, short *pgcnt)
>  {
>  	if (size == SPAGE_SIZE) {
> -		if (!lv2ent_fault(pent))
> -			return -EADDRINUSE;
> -
> +		BUG_ON(!lv2ent_fault(pent));

Ditto.

>  		*pent = mk_lv2ent_spage(paddr);
>  		pgtable_flush(pent, pent + 1);
>  		*pgcnt -= 1;
>  	} else { /* size == LPAGE_SIZE */
>  		int i;
>  		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
> -			if (!lv2ent_fault(pent)) {
> -				memset(pent, 0, sizeof(*pent) * i);
> -				return -EADDRINUSE;
> -			}
> -
> +			BUG_ON(!lv2ent_fault(pent));

Ditto.

>  			*pent = mk_lv2ent_lpage(paddr);
>  		}
>  		pgtable_flush(pent - SPAGES_PER_LPAGE, pent);
> @@ -944,17 +942,16 @@ static int exynos_iommu_map(struct iommu_domain
> *domain, unsigned long iova, pent = alloc_lv2entry(entry, iova,
>  					&priv->lv2entcnt[lv1ent_offset(iova)]);
> 
> -		if (!pent)
> -			ret = -ENOMEM;
> +		if (IS_ERR(pent))
> +			ret = PTR_ERR(pent);
>  		else
>  			ret = lv2set_page(pent, paddr, size,
>  					&priv->lv2entcnt[lv1ent_offset(iova)]);
>  	}
> 
> -	if (ret) {
> -		pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
> -							__func__, iova, size);
> -	}
> +	if (ret)
> +		pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
> +			__func__, ret, size, iova);
> 
>  	spin_unlock_irqrestore(&priv->pgtablelock, flags);
> 
> @@ -968,6 +965,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain
> *domain, struct sysmmu_drvdata *data;
>  	unsigned long flags;
>  	unsigned long *ent;
> +	size_t err_pgsize;
> 
>  	BUG_ON(priv->pgtable == NULL);
> 
> @@ -976,7 +974,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain
> *domain, ent = section_entry(priv->pgtable, iova);
> 
>  	if (lv1ent_section(ent)) {
> -		BUG_ON(size < SECT_SIZE);
> +		if (WARN_ON(size < SECT_SIZE)) {
> +			err_pgsize = SECT_SIZE;
> +			goto err;
> +		}
> 
>  		*ent = 0;
>  		pgtable_flush(ent, ent + 1);
> @@ -1008,9 +1009,12 @@ static size_t exynos_iommu_unmap(struct
> iommu_domain *domain, }
> 
>  	/* lv1ent_large(ent) == true here */
> -	BUG_ON(size < LPAGE_SIZE);
> +	if (WARN_ON(size < LPAGE_SIZE)) {
> +		err_pgsize = LPAGE_SIZE;
> +		goto err;
> +	}
> 
> -	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
> +	clear_page_table(ent, SPAGES_PER_LPAGE);

This seems to be the only use of the introduced clear_page_table() 
function. Is there a need to replace the memset with it?

>  	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
> 
>  	size = LPAGE_SIZE;
> @@ -1023,8 +1027,16 @@ done:
>  		sysmmu_tlb_invalidate_entry(data->dev, iova);
>  	spin_unlock_irqrestore(&priv->lock, flags);
> 
> -
>  	return size;
> +err:
> +	spin_unlock_irqrestore(&priv->pgtablelock, flags);
> +
> +	pr_err("%s: Failed due to size(%#x) @ %#lx is"\
> +		" smaller than page size %#x\n",
> +		__func__, size, iova, err_pgsize);
> +
> +	return 0;
> +

nit: Stray blank line.

Best regards,
Tomasz

>  }
> 
>  static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain
> *domain,
Grant Grundler Aug. 8, 2013, 6:28 p.m. UTC | #2
Tomasz,

On Thu, Aug 8, 2013 at 6:54 AM, Tomasz Figa <t.figa@samsung.com> wrote:
...
>> +     BUG_ON(lv1ent_section(sent));
>
> Is this condition really a critical one, to the point that the system
> should not continue execution?
>
...
>>       if (lv1ent_page(sent)) {
>> -             if (*pgcnt != NUM_LV2ENTRIES)
>> -                     return -EADDRINUSE;
>> -
>> +             BUG_ON(*pgcnt != NUM_LV2ENTRIES);
>
> Ditto.

I can't speak to the previous BUG_ON(). I believe the EADDRESSINUSE
failures could be either WARN_ON or BUG_ON.   This condition is
clearly a bug in the generic IOMMU allocator and I think that's why
KyongHo Cho used BUG_ON.

Handing out duplicate addresses will generally lead to some sort of
data corruption or other fault depending on how robust the underlying
device drivers are written.  So my preference is a BUG_ON to
immediately flag this condition instead of hoping a device driver will
correctly handling the dma mapping failure (Some do, most currently
don't).

WARN_ON() + return -EADDRESSINUSE would be a good alternative.

thanks,
grant
Cho KyongHo Aug. 9, 2013, 4:15 a.m. UTC | #3
On Thu, 08 Aug 2013 15:54:50 +0200, Tomasz Figa wrote:
> On Thursday 08 of August 2013 18:37:43 Cho KyongHo wrote:
> > This prevents allocating lv2 page table for the lv1 page table entry
>   ^ What this is this this about? :)
> 
As you might indicate, 'this' means this patch :)

> > that already has 1MB page mapping. In addition, changed to BUG_ON
> > instead of returning -EADDRINUSE.
> 
> The change mentioned in last sentence should be a separate patch.
> 
Ok :)

> > Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> > ---
> >  drivers/iommu/exynos-iommu.c |   68
> > ++++++++++++++++++++++++----------------- 1 files changed, 40
> > insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> > index d545a25..d90e6fa 100644
> > --- a/drivers/iommu/exynos-iommu.c
> > +++ b/drivers/iommu/exynos-iommu.c
> > @@ -52,11 +52,11 @@
> >  #define lv2ent_large(pent) ((*(pent) & 3) == 1)
> > 
> >  #define section_phys(sent) (*(sent) & SECT_MASK)
> > -#define section_offs(iova) ((iova) & 0xFFFFF)
> > +#define section_offs(iova) ((iova) & ~SECT_MASK)
> >  #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> > -#define lpage_offs(iova) ((iova) & 0xFFFF)
> > +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
> >  #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> > -#define spage_offs(iova) ((iova) & 0xFFF)
> > +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
> > 
> >  #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
> >  #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> > @@ -856,13 +856,15 @@ finish:
> >  static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long
> > iova, short *pgcounter)
> >  {
> > +	BUG_ON(lv1ent_section(sent));
> 
> Is this condition really a critical one, to the point that the system 
> should not continue execution?
> 
Discussed with Grant. He thought that creating mapping on a valid mapping
is just a BUG and I finally agreed with him. Is there a case that the condition
in BUG_ON is true intentionally?

> > +
> >  	if (lv1ent_fault(sent)) {
> >  		unsigned long *pent;
> > 
> >  		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
> >  		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
> >  		if (!pent)
> > -			return NULL;
> > +			return ERR_PTR(-ENOMEM);
> > 
> >  		*sent = mk_lv1ent_page(__pa(pent));
> >  		*pgcounter = NUM_LV2ENTRIES;
> > @@ -875,15 +877,11 @@ static unsigned long *alloc_lv2entry(unsigned long
> > *sent, unsigned long iova,
> > 
> >  static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short
> > *pgcnt) {
> > -	if (lv1ent_section(sent))
> > -		return -EADDRINUSE;
> > +	BUG_ON(lv1ent_section(sent));
> 
> Ditto.
> 
> >  	if (lv1ent_page(sent)) {
> > -		if (*pgcnt != NUM_LV2ENTRIES)
> > -			return -EADDRINUSE;
> > -
> > +		BUG_ON(*pgcnt != NUM_LV2ENTRIES);
> 
> Ditto.
> 
> >  		kfree(page_entry(sent, 0));
> > -
> >  		*pgcnt = 0;
> >  	}
> > 
> > @@ -894,24 +892,24 @@ static int lv1set_section(unsigned long *sent,
> > phys_addr_t paddr, short *pgcnt) return 0;
> >  }
> > 
> > +static void clear_page_table(unsigned long *ent, int n)
> > +{
> > +	if (n > 0)
> > +		memset(ent, 0, sizeof(*ent) * n);
> > +}
> 
> I don't see the point of creating this function. It seems to be used only 
> once, in addition with a constant as n, so the check for n > 0 is 
> unnecessary.
> 
> And even if there is a need for this change, it should be done in separate 
> patch, as this one is not about stylistic changes, but fixing page table 
> maintenance (at least based on your commit message).
> 

I know what you are concerning about.
It was introduced in v8 patches to recover previous fault entries before
returning -EADDRINUSE. It is still remained though "return -EADDRINUSE"
is changed into BUG_ON().
I also think that it needs to be removed.

> >  static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t
> > size, short *pgcnt)
> >  {
> >  	if (size == SPAGE_SIZE) {
> > -		if (!lv2ent_fault(pent))
> > -			return -EADDRINUSE;
> > -
> > +		BUG_ON(!lv2ent_fault(pent));
> 
> Ditto.
> 
> >  		*pent = mk_lv2ent_spage(paddr);
> >  		pgtable_flush(pent, pent + 1);
> >  		*pgcnt -= 1;
> >  	} else { /* size == LPAGE_SIZE */
> >  		int i;
> >  		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
> > -			if (!lv2ent_fault(pent)) {
> > -				memset(pent, 0, sizeof(*pent) * i);
> > -				return -EADDRINUSE;
> > -			}
> > -
> > +			BUG_ON(!lv2ent_fault(pent));
> 
> Ditto.
> 
> >  			*pent = mk_lv2ent_lpage(paddr);
> >  		}
> >  		pgtable_flush(pent - SPAGES_PER_LPAGE, pent);
> > @@ -944,17 +942,16 @@ static int exynos_iommu_map(struct iommu_domain
> > *domain, unsigned long iova, pent = alloc_lv2entry(entry, iova,
> >  					&priv->lv2entcnt[lv1ent_offset(iova)]);
> > 
> > -		if (!pent)
> > -			ret = -ENOMEM;
> > +		if (IS_ERR(pent))
> > +			ret = PTR_ERR(pent);
> >  		else
> >  			ret = lv2set_page(pent, paddr, size,
> >  					&priv->lv2entcnt[lv1ent_offset(iova)]);
> >  	}
> > 
> > -	if (ret) {
> > -		pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
> > -							__func__, iova, size);
> > -	}
> > +	if (ret)
> > +		pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
> > +			__func__, ret, size, iova);
> > 
> >  	spin_unlock_irqrestore(&priv->pgtablelock, flags);
> > 
> > @@ -968,6 +965,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain
> > *domain, struct sysmmu_drvdata *data;
> >  	unsigned long flags;
> >  	unsigned long *ent;
> > +	size_t err_pgsize;
> > 
> >  	BUG_ON(priv->pgtable == NULL);
> > 
> > @@ -976,7 +974,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain
> > *domain, ent = section_entry(priv->pgtable, iova);
> > 
> >  	if (lv1ent_section(ent)) {
> > -		BUG_ON(size < SECT_SIZE);
> > +		if (WARN_ON(size < SECT_SIZE)) {
> > +			err_pgsize = SECT_SIZE;
> > +			goto err;
> > +		}
> > 
> >  		*ent = 0;
> >  		pgtable_flush(ent, ent + 1);
> > @@ -1008,9 +1009,12 @@ static size_t exynos_iommu_unmap(struct
> > iommu_domain *domain, }
> > 
> >  	/* lv1ent_large(ent) == true here */
> > -	BUG_ON(size < LPAGE_SIZE);
> > +	if (WARN_ON(size < LPAGE_SIZE)) {
> > +		err_pgsize = LPAGE_SIZE;
> > +		goto err;
> > +	}
> > 
> > -	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
> > +	clear_page_table(ent, SPAGES_PER_LPAGE);
> 
> This seems to be the only use of the introduced clear_page_table() 
> function. Is there a need to replace the memset with it?
> 

Agree.

> >  	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
> > 
> >  	size = LPAGE_SIZE;
> > @@ -1023,8 +1027,16 @@ done:
> >  		sysmmu_tlb_invalidate_entry(data->dev, iova);
> >  	spin_unlock_irqrestore(&priv->lock, flags);
> > 
> > -
> >  	return size;
> > +err:
> > +	spin_unlock_irqrestore(&priv->pgtablelock, flags);
> > +
> > +	pr_err("%s: Failed due to size(%#x) @ %#lx is"\
> > +		" smaller than page size %#x\n",
> > +		__func__, size, iova, err_pgsize);
> > +
> > +	return 0;
> > +
> 
> nit: Stray blank line.
> 
Oh. I didn't catch that.

Thanks.

KyongHo

> Best regards,
> Tomasz
> 
> >  }
> > 
> >  static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain
> > *domain,
Tomasz Figa Aug. 9, 2013, 7:47 a.m. UTC | #4
Hi KyongHo,

On Friday 09 of August 2013 13:15:20 Cho KyongHo wrote:
> On Thu, 08 Aug 2013 15:54:50 +0200, Tomasz Figa wrote:
> > On Thursday 08 of August 2013 18:37:43 Cho KyongHo wrote:
> > > This prevents allocating lv2 page table for the lv1 page table entry
> > > 
> >   ^ What this is this this about? :)
> 
> As you might indicate, 'this' means this patch :)

Yep, I was just nitpicking, but still I would go with something among 
following lines:

8<---
Currently if lv2 page table allocation is requested for a lv1 page table 
entry that already has 1MB page mapping, the driver returns -EADDRINUSE. 
However this case should not happen, unless there is a bug in the generic 
IOMMU allocator, so BUG_ON() is the right error handling here, which is 
implemented by this patch.
--->8

> > > that already has 1MB page mapping. In addition, changed to BUG_ON
> > > instead of returning -EADDRINUSE.
> > 
> > The change mentioned in last sentence should be a separate patch.
> 
> Ok :)

Well, actually I was confused by subject of this patch.

It looks like this change is actually the main part of it and the only 
unrelated changes are using defined macros for page masks and introduction 
of clear_page_table() function. Since we both agreed that the latter can 
be dropped only the former should be separated from this patch.

Maybe you could use following patch subject:

iommu/exynos: fix handling of possible BUG cases in page table setup code

> > > Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> > > ---
> > > 
> > >  drivers/iommu/exynos-iommu.c |   68
> > > 
> > > ++++++++++++++++++++++++----------------- 1 files changed, 40
> > > insertions(+), 28 deletions(-)
> > > 
> > > diff --git a/drivers/iommu/exynos-iommu.c
> > > b/drivers/iommu/exynos-iommu.c index d545a25..d90e6fa 100644
> > > --- a/drivers/iommu/exynos-iommu.c
> > > +++ b/drivers/iommu/exynos-iommu.c
> > > @@ -52,11 +52,11 @@
> > > 
> > >  #define lv2ent_large(pent) ((*(pent) & 3) == 1)
> > >  
> > >  #define section_phys(sent) (*(sent) & SECT_MASK)
> > > 
> > > -#define section_offs(iova) ((iova) & 0xFFFFF)
> > > +#define section_offs(iova) ((iova) & ~SECT_MASK)
> > > 
> > >  #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> > > 
> > > -#define lpage_offs(iova) ((iova) & 0xFFFF)
> > > +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
> > > 
> > >  #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> > > 
> > > -#define spage_offs(iova) ((iova) & 0xFFF)
> > > +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
> > > 
> > >  #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
> > >  #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> > > 
> > > @@ -856,13 +856,15 @@ finish:
> > >  static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned
> > >  long
> > > 
> > > iova, short *pgcounter)
> > > 
> > >  {
> > > 
> > > +	BUG_ON(lv1ent_section(sent));
> > 
> > Is this condition really a critical one, to the point that the system
> > should not continue execution?
> 
> Discussed with Grant. He thought that creating mapping on a valid
> mapping is just a BUG and I finally agreed with him. Is there a case
> that the condition in BUG_ON is true intentionally?

Yes, he explained this as well. It's fine for me then.

> > > +
> > > 
> > >  	if (lv1ent_fault(sent)) {
> > >  	
> > >  		unsigned long *pent;
> > >  		
> > >  		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
> > >  		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
> > >  		if (!pent)
> > > 
> > > -			return NULL;
> > > +			return ERR_PTR(-ENOMEM);
> > > 
> > >  		*sent = mk_lv1ent_page(__pa(pent));
> > >  		*pgcounter = NUM_LV2ENTRIES;
> > > 
> > > @@ -875,15 +877,11 @@ static unsigned long *alloc_lv2entry(unsigned
> > > long *sent, unsigned long iova,
> > > 
> > >  static int lv1set_section(unsigned long *sent, phys_addr_t paddr,
> > >  short
> > > 
> > > *pgcnt) {
> > > -	if (lv1ent_section(sent))
> > > -		return -EADDRINUSE;
> > > +	BUG_ON(lv1ent_section(sent));
> > 
> > Ditto.
> > 
> > >  	if (lv1ent_page(sent)) {
> > > 
> > > -		if (*pgcnt != NUM_LV2ENTRIES)
> > > -			return -EADDRINUSE;
> > > -
> > > +		BUG_ON(*pgcnt != NUM_LV2ENTRIES);
> > 
> > Ditto.
> > 
> > >  		kfree(page_entry(sent, 0));
> > > 
> > > -
> > > 
> > >  		*pgcnt = 0;
> > >  	
> > >  	}
> > > 
> > > @@ -894,24 +892,24 @@ static int lv1set_section(unsigned long *sent,
> > > phys_addr_t paddr, short *pgcnt) return 0;
> > > 
> > >  }
> > > 
> > > +static void clear_page_table(unsigned long *ent, int n)
> > > +{
> > > +	if (n > 0)
> > > +		memset(ent, 0, sizeof(*ent) * n);
> > > +}
> > 
> > I don't see the point of creating this function. It seems to be used
> > only once, in addition with a constant as n, so the check for n > 0
> > is unnecessary.
> > 
> > And even if there is a need for this change, it should be done in
> > separate patch, as this one is not about stylistic changes, but
> > fixing page table maintenance (at least based on your commit
> > message).
> 
> I know what you are concerning about.
> It was introduced in v8 patches to recover previous fault entries before
> returning -EADDRINUSE. It is still remained though "return -EADDRINUSE"
> is changed into BUG_ON().
> I also think that it needs to be removed.

OK, thanks.

Best regards,
Tomasz
Cho KyongHo Aug. 9, 2013, 8:33 a.m. UTC | #5
On Fri, 09 Aug 2013 09:47:28 +0200, Tomasz Figa wrote:
> Hi KyongHo,
> 
> On Friday 09 of August 2013 13:15:20 Cho KyongHo wrote:
> > On Thu, 08 Aug 2013 15:54:50 +0200, Tomasz Figa wrote:
> > > On Thursday 08 of August 2013 18:37:43 Cho KyongHo wrote:
> > > > This prevents allocating lv2 page table for the lv1 page table entry
> > > > 
> > >   ^ What this is this this about? :)
> > 
> > As you might indicate, 'this' means this patch :)
> 
> Yep, I was just nitpicking, but still I would go with something among 
> following lines:
> 
> 8<---
> Currently if lv2 page table allocation is requested for a lv1 page table 
> entry that already has 1MB page mapping, the driver returns -EADDRINUSE. 
> However this case should not happen, unless there is a bug in the generic 
> IOMMU allocator, so BUG_ON() is the right error handling here, which is 
> implemented by this patch.
> --->8
> 
> > > > that already has 1MB page mapping. In addition, changed to BUG_ON
> > > > instead of returning -EADDRINUSE.
> > > 
> > > The change mentioned in last sentence should be a separate patch.
> > 
> > Ok :)
> 
> Well, actually I was confused by subject of this patch.
> 
> It looks like this change is actually the main part of it and the only 
> unrelated changes are using defined macros for page masks and introduction 
> of clear_page_table() function. Since we both agreed that the latter can 
> be dropped only the former should be separated from this patch.
> 
> Maybe you could use following patch subject:
> 
> iommu/exynos: fix handling of possible BUG cases in page table setup code
> 

Your title looks much better. Thanks.

Initially, this patch is created for handling -EADDRINUSE correctly but
it is changed :). I missed to change the subject.

> > > > Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> > > > ---
> > > > 
> > > >  drivers/iommu/exynos-iommu.c |   68
> > > > 
> > > > ++++++++++++++++++++++++----------------- 1 files changed, 40
> > > > insertions(+), 28 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/exynos-iommu.c
> > > > b/drivers/iommu/exynos-iommu.c index d545a25..d90e6fa 100644
> > > > --- a/drivers/iommu/exynos-iommu.c
> > > > +++ b/drivers/iommu/exynos-iommu.c
> > > > @@ -52,11 +52,11 @@
> > > > 
> > > >  #define lv2ent_large(pent) ((*(pent) & 3) == 1)
> > > >  
> > > >  #define section_phys(sent) (*(sent) & SECT_MASK)
> > > > 
> > > > -#define section_offs(iova) ((iova) & 0xFFFFF)
> > > > +#define section_offs(iova) ((iova) & ~SECT_MASK)
> > > > 
> > > >  #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> > > > 
> > > > -#define lpage_offs(iova) ((iova) & 0xFFFF)
> > > > +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
> > > > 
> > > >  #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> > > > 
> > > > -#define spage_offs(iova) ((iova) & 0xFFF)
> > > > +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
> > > > 
> > > >  #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
> > > >  #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> > > > 
> > > > @@ -856,13 +856,15 @@ finish:
> > > >  static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned
> > > >  long
> > > > 
> > > > iova, short *pgcounter)
> > > > 
> > > >  {
> > > > 
> > > > +	BUG_ON(lv1ent_section(sent));
> > > 
> > > Is this condition really a critical one, to the point that the system
> > > should not continue execution?
> > 
> > Discussed with Grant. He thought that creating mapping on a valid
> > mapping is just a BUG and I finally agreed with him. Is there a case
> > that the condition in BUG_ON is true intentionally?
> 
> Yes, he explained this as well. It's fine for me then.
> 
> > > > +
> > > > 
> > > >  	if (lv1ent_fault(sent)) {
> > > >  	
> > > >  		unsigned long *pent;
> > > >  		
> > > >  		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
> > > >  		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
> > > >  		if (!pent)
> > > > 
> > > > -			return NULL;
> > > > +			return ERR_PTR(-ENOMEM);
> > > > 
> > > >  		*sent = mk_lv1ent_page(__pa(pent));
> > > >  		*pgcounter = NUM_LV2ENTRIES;
> > > > 
> > > > @@ -875,15 +877,11 @@ static unsigned long *alloc_lv2entry(unsigned
> > > > long *sent, unsigned long iova,
> > > > 
> > > >  static int lv1set_section(unsigned long *sent, phys_addr_t paddr,
> > > >  short
> > > > 
> > > > *pgcnt) {
> > > > -	if (lv1ent_section(sent))
> > > > -		return -EADDRINUSE;
> > > > +	BUG_ON(lv1ent_section(sent));
> > > 
> > > Ditto.
> > > 
> > > >  	if (lv1ent_page(sent)) {
> > > > 
> > > > -		if (*pgcnt != NUM_LV2ENTRIES)
> > > > -			return -EADDRINUSE;
> > > > -
> > > > +		BUG_ON(*pgcnt != NUM_LV2ENTRIES);
> > > 
> > > Ditto.
> > > 
> > > >  		kfree(page_entry(sent, 0));
> > > > 
> > > > -
> > > > 
> > > >  		*pgcnt = 0;
> > > >  	
> > > >  	}
> > > > 
> > > > @@ -894,24 +892,24 @@ static int lv1set_section(unsigned long *sent,
> > > > phys_addr_t paddr, short *pgcnt) return 0;
> > > > 
> > > >  }
> > > > 
> > > > +static void clear_page_table(unsigned long *ent, int n)
> > > > +{
> > > > +	if (n > 0)
> > > > +		memset(ent, 0, sizeof(*ent) * n);
> > > > +}
> > > 
> > > I don't see the point of creating this function. It seems to be used
> > > only once, in addition with a constant as n, so the check for n > 0
> > > is unnecessary.
> > > 
> > > And even if there is a need for this change, it should be done in
> > > separate patch, as this one is not about stylistic changes, but
> > > fixing page table maintenance (at least based on your commit
> > > message).
> > 
> > I know what you are concerning about.
> > It was introduced in v8 patches to recover previous fault entries before
> > returning -EADDRINUSE. It is still remained though "return -EADDRINUSE"
> > is changed into BUG_ON().
> > I also think that it needs to be removed.
> 
> OK, thanks.
> 
> Best regards,
> Tomasz
>
Joerg Roedel Aug. 14, 2013, 10:49 a.m. UTC | #6
On Thu, Aug 08, 2013 at 11:28:44AM -0700, Grant Grundler wrote:
> I can't speak to the previous BUG_ON(). I believe the EADDRESSINUSE
> failures could be either WARN_ON or BUG_ON.   This condition is
> clearly a bug in the generic IOMMU allocator and I think that's why
> KyongHo Cho used BUG_ON.
> 
> Handing out duplicate addresses will generally lead to some sort of
> data corruption or other fault depending on how robust the underlying
> device drivers are written.  So my preference is a BUG_ON to
> immediately flag this condition instead of hoping a device driver will
> correctly handling the dma mapping failure (Some do, most currently
> don't).
> 
> WARN_ON() + return -EADDRESSINUSE would be a good alternative.

Even if it is a real BUG condition, I don't think it is worth to stop
execution at this point. It makes debugging harder and the system less
reliable. I prefer to go with the WARN_ON and an error return value.


	Joerg
Grant Grundler Aug. 14, 2013, 8:54 p.m. UTC | #7
On Wed, Aug 14, 2013 at 3:49 AM, Joerg Roedel <joro@8bytes.org> wrote:
> On Thu, Aug 08, 2013 at 11:28:44AM -0700, Grant Grundler wrote:
>> I can't speak to the previous BUG_ON(). I believe the EADDRESSINUSE
>> failures could be either WARN_ON or BUG_ON.   This condition is
>> clearly a bug in the generic IOMMU allocator and I think that's why
>> KyongHo Cho used BUG_ON.
>>
>> Handing out duplicate addresses will generally lead to some sort of
>> data corruption or other fault depending on how robust the underlying
>> device drivers are written.  So my preference is a BUG_ON to
>> immediately flag this condition instead of hoping a device driver will
>> correctly handling the dma mapping failure (Some do, most currently
>> don't).
>>
>> WARN_ON() + return -EADDRESSINUSE would be a good alternative.
>
> Even if it is a real BUG condition, I don't think it is worth to stop
> execution at this point. It makes debugging harder and the system less
> reliable. I prefer to go with the WARN_ON and an error return value.

I'm ok with WARN_ON and an error return value. This is "valid"
behavior.  I expect this bug to never happen but if and when it does,
I want a clear symptom (e.g. WARN_ON) that it happened.

My concern is that historically, drivers did not get an error return
value on failure:
    ftp://193.166.3.4/pub/linux/kernel/v2.3/patch-html/patch-2.3.47/linux_Documentation_DMA-mapping.txt.html

or later:
    https://www.kernel.org/pub/linux/kernel/people/marcelo/linux-2.4/Documentation/DMA-mapping.txt

And thus, some drivers don't check or attempt to handle mapping
failures based on this existing code. Here is a recent example:
     http://comments.gmane.org/gmane.linux.network/272969

I hope very few or none of those exist since Neil Horman demonstrated
"dma debugging" can flag this behavior.

Just for fun, I'll include this link : (apperently 2003 was a good
year for DMA talks :)
     http://ols.fedoraproject.org/OLS/Reprints-2003/LinuxSymposium2003-2side.pdf
     (three talks on DMA issues)

thanks
grant
Cho KyongHo Aug. 16, 2013, 11:17 a.m. UTC | #8
On Wed, 14 Aug 2013 13:54:53 -0700, Grant Grundler wrote:
> On Wed, Aug 14, 2013 at 3:49 AM, Joerg Roedel <joro@8bytes.org> wrote:
> > On Thu, Aug 08, 2013 at 11:28:44AM -0700, Grant Grundler wrote:
> >> I can't speak to the previous BUG_ON(). I believe the EADDRESSINUSE
> >> failures could be either WARN_ON or BUG_ON.   This condition is
> >> clearly a bug in the generic IOMMU allocator and I think that's why
> >> KyongHo Cho used BUG_ON.
> >>
> >> Handing out duplicate addresses will generally lead to some sort of
> >> data corruption or other fault depending on how robust the underlying
> >> device drivers are written.  So my preference is a BUG_ON to
> >> immediately flag this condition instead of hoping a device driver will
> >> correctly handling the dma mapping failure (Some do, most currently
> >> don't).
> >>
> >> WARN_ON() + return -EADDRESSINUSE would be a good alternative.
> >
> > Even if it is a real BUG condition, I don't think it is worth to stop
> > execution at this point. It makes debugging harder and the system less
> > reliable. I prefer to go with the WARN_ON and an error return value.
> 
> I'm ok with WARN_ON and an error return value. This is "valid"
> behavior.  I expect this bug to never happen but if and when it does,
> I want a clear symptom (e.g. WARN_ON) that it happened.
> 

Ok.
Finally, everyone thinks that WARN_ON() is OK.
It will be helpful for the kernel code that uses iommu api.

> My concern is that historically, drivers did not get an error return
> value on failure:
>     ftp://193.166.3.4/pub/linux/kernel/v2.3/patch-html/patch-2.3.47/linux_Documentation_DMA-mapping.txt.html
> 
> or later:
>     https://www.kernel.org/pub/linux/kernel/people/marcelo/linux-2.4/Documentation/DMA-mapping.txt
> 
> And thus, some drivers don't check or attempt to handle mapping
> failures based on this existing code. Here is a recent example:
>      http://comments.gmane.org/gmane.linux.network/272969
> 
> I hope very few or none of those exist since Neil Horman demonstrated
> "dma debugging" can flag this behavior.
> 
> Just for fun, I'll include this link : (apperently 2003 was a good
> year for DMA talks :)
>      http://ols.fedoraproject.org/OLS/Reprints-2003/LinuxSymposium2003-2side.pdf
>      (three talks on DMA issues)
> 

Thank you for the resources :)
Those will be helpful for the guys with me.

> thanks
> grant
diff mbox

Patch

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index d545a25..d90e6fa 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -52,11 +52,11 @@ 
 #define lv2ent_large(pent) ((*(pent) & 3) == 1)
 
 #define section_phys(sent) (*(sent) & SECT_MASK)
-#define section_offs(iova) ((iova) & 0xFFFFF)
+#define section_offs(iova) ((iova) & ~SECT_MASK)
 #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
-#define lpage_offs(iova) ((iova) & 0xFFFF)
+#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
 #define spage_phys(pent) (*(pent) & SPAGE_MASK)
-#define spage_offs(iova) ((iova) & 0xFFF)
+#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
 
 #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
 #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
@@ -856,13 +856,15 @@  finish:
 static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
 					short *pgcounter)
 {
+	BUG_ON(lv1ent_section(sent));
+
 	if (lv1ent_fault(sent)) {
 		unsigned long *pent;
 
 		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
 		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
 		if (!pent)
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 
 		*sent = mk_lv1ent_page(__pa(pent));
 		*pgcounter = NUM_LV2ENTRIES;
@@ -875,15 +877,11 @@  static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
 
 static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
 {
-	if (lv1ent_section(sent))
-		return -EADDRINUSE;
+	BUG_ON(lv1ent_section(sent));
 
 	if (lv1ent_page(sent)) {
-		if (*pgcnt != NUM_LV2ENTRIES)
-			return -EADDRINUSE;
-
+		BUG_ON(*pgcnt != NUM_LV2ENTRIES);
 		kfree(page_entry(sent, 0));
-
 		*pgcnt = 0;
 	}
 
@@ -894,24 +892,24 @@  static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
 	return 0;
 }
 
+static void clear_page_table(unsigned long *ent, int n)
+{
+	if (n > 0)
+		memset(ent, 0, sizeof(*ent) * n);
+}
+
 static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
 								short *pgcnt)
 {
 	if (size == SPAGE_SIZE) {
-		if (!lv2ent_fault(pent))
-			return -EADDRINUSE;
-
+		BUG_ON(!lv2ent_fault(pent));
 		*pent = mk_lv2ent_spage(paddr);
 		pgtable_flush(pent, pent + 1);
 		*pgcnt -= 1;
 	} else { /* size == LPAGE_SIZE */
 		int i;
 		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
-			if (!lv2ent_fault(pent)) {
-				memset(pent, 0, sizeof(*pent) * i);
-				return -EADDRINUSE;
-			}
-
+			BUG_ON(!lv2ent_fault(pent));
 			*pent = mk_lv2ent_lpage(paddr);
 		}
 		pgtable_flush(pent - SPAGES_PER_LPAGE, pent);
@@ -944,17 +942,16 @@  static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova,
 		pent = alloc_lv2entry(entry, iova,
 					&priv->lv2entcnt[lv1ent_offset(iova)]);
 
-		if (!pent)
-			ret = -ENOMEM;
+		if (IS_ERR(pent))
+			ret = PTR_ERR(pent);
 		else
 			ret = lv2set_page(pent, paddr, size,
 					&priv->lv2entcnt[lv1ent_offset(iova)]);
 	}
 
-	if (ret) {
-		pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
-							__func__, iova, size);
-	}
+	if (ret)
+		pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
+			__func__, ret, size, iova);
 
 	spin_unlock_irqrestore(&priv->pgtablelock, flags);
 
@@ -968,6 +965,7 @@  static size_t exynos_iommu_unmap(struct iommu_domain *domain,
 	struct sysmmu_drvdata *data;
 	unsigned long flags;
 	unsigned long *ent;
+	size_t err_pgsize;
 
 	BUG_ON(priv->pgtable == NULL);
 
@@ -976,7 +974,10 @@  static size_t exynos_iommu_unmap(struct iommu_domain *domain,
 	ent = section_entry(priv->pgtable, iova);
 
 	if (lv1ent_section(ent)) {
-		BUG_ON(size < SECT_SIZE);
+		if (WARN_ON(size < SECT_SIZE)) {
+			err_pgsize = SECT_SIZE;
+			goto err;
+		}
 
 		*ent = 0;
 		pgtable_flush(ent, ent + 1);
@@ -1008,9 +1009,12 @@  static size_t exynos_iommu_unmap(struct iommu_domain *domain,
 	}
 
 	/* lv1ent_large(ent) == true here */
-	BUG_ON(size < LPAGE_SIZE);
+	if (WARN_ON(size < LPAGE_SIZE)) {
+		err_pgsize = LPAGE_SIZE;
+		goto err;
+	}
 
-	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
+	clear_page_table(ent, SPAGES_PER_LPAGE);
 	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
 
 	size = LPAGE_SIZE;
@@ -1023,8 +1027,16 @@  done:
 		sysmmu_tlb_invalidate_entry(data->dev, iova);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-
 	return size;
+err:
+	spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+	pr_err("%s: Failed due to size(%#x) @ %#lx is"\
+		" smaller than page size %#x\n",
+		__func__, size, iova, err_pgsize);
+
+	return 0;
+
 }
 
 static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,