Message ID | 1571035101-4213-4-git-send-email-yong.wu@mediatek.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve tlb range flush | expand |
On 14/10/2019 07:38, Yong Wu wrote: > Use the iommu_gather mechanism to achieve the tlb range flush. > Gather the iova range in the "tlb_add_page", then flush the merged iova > range in iotlb_sync. > > Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to > avoid retry the lock since the spinlock have already been acquired. I think this could probably be even simpler - once the actual register-poking is all confined to mtk_iommu_tlb_sync(), you should be able get rid of the per-domain locking in map/unmap and just have a single per-IOMMU lock to serialise syncs. The io-pgtable code itself hasn't needed external locking for a while now. Robin. > Suggested-by: Tomasz Figa <tfiga@chromium.org> > Signed-off-by: Yong Wu <yong.wu@mediatek.com> > --- > 1) This is the special case backtrace: > > mtk_iommu_iotlb_sync+0x50/0xa0 > mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0 > __arm_v7s_unmap+0x174/0x598 > arm_v7s_unmap+0x30/0x48 > mtk_iommu_unmap+0x50/0x78 > __iommu_unmap+0xa4/0xf8 > > 2) The checking "if (gather->start == ULONG_MAX) return;" also is > necessary. It will happened when unmap only go to _flush_walk, then > enter this tlb_sync. > --- > drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++++++---- > drivers/iommu/mtk_iommu.h | 1 + > 2 files changed, 26 insertions(+), 4 deletions(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 5f594d6..8712afc 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, > unsigned long iova, size_t granule, > void *cookie) > { > - mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); > + struct mtk_iommu_data *data = cookie; > + struct iommu_domain *domain = &data->m4u_dom->domain; > + > + data->is_in_tlb_gather_add_page = true; > + iommu_iotlb_gather_add_page(domain, gather, iova, granule); > + data->is_in_tlb_gather_add_page = false; > } > > static const struct iommu_flush_ops mtk_iommu_flush_ops = { > @@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) > static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, > struct iommu_iotlb_gather *gather) > { > + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); > struct mtk_iommu_domain *dom = to_mtk_domain(domain); > + bool is_in_gather = data->is_in_tlb_gather_add_page; > + size_t length = gather->end - gather->start; > unsigned long flags; > > - spin_lock_irqsave(&dom->pgtlock, flags); > - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); > - spin_unlock_irqrestore(&dom->pgtlock, flags); > + if (gather->start == ULONG_MAX) > + return; > + > + /* > + * Avoid acquire the lock when it's in gather_add_page since the lock > + * has already been held. > + */ > + if (!is_in_gather) > + spin_lock_irqsave(&dom->pgtlock, flags); > + > + mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize, > + false, data); > + mtk_iommu_tlb_sync(data); > + > + if (!is_in_gather) > + spin_unlock_irqrestore(&dom->pgtlock, flags); > } > > static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index fc0f16e..d29af1d 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -58,6 +58,7 @@ struct mtk_iommu_data { > struct iommu_group *m4u_group; > bool enable_4GB; > bool tlb_flush_active; > + bool is_in_tlb_gather_add_page; > > struct iommu_device iommu; > const struct mtk_iommu_plat_data *plat_data; >
On Mon, 2019-10-14 at 15:21 +0100, Robin Murphy wrote: > On 14/10/2019 07:38, Yong Wu wrote: > > Use the iommu_gather mechanism to achieve the tlb range flush. > > Gather the iova range in the "tlb_add_page", then flush the merged iova > > range in iotlb_sync. > > > > Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to > > avoid retry the lock since the spinlock have already been acquired. > > I think this could probably be even simpler - once the actual > register-poking is all confined to mtk_iommu_tlb_sync(), you should be > able get rid of the per-domain locking in map/unmap and just have a > single per-IOMMU lock to serialise syncs. The io-pgtable code itself > hasn't needed external locking for a while now. This is more simpler! Thanks very much. I will try this. The only concern is there is no lock in the iova_to_phys then, maybe use the new lock instead. > > Robin. > > > Suggested-by: Tomasz Figa <tfiga@chromium.org> > > Signed-off-by: Yong Wu <yong.wu@mediatek.com> > > --- > > 1) This is the special case backtrace: > > > > mtk_iommu_iotlb_sync+0x50/0xa0 > > mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0 > > __arm_v7s_unmap+0x174/0x598 > > arm_v7s_unmap+0x30/0x48 > > mtk_iommu_unmap+0x50/0x78 > > __iommu_unmap+0xa4/0xf8 > > > > 2) The checking "if (gather->start == ULONG_MAX) return;" also is > > necessary. It will happened when unmap only go to _flush_walk, then > > enter this tlb_sync. > > --- > > drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++++++---- > > drivers/iommu/mtk_iommu.h | 1 + > > 2 files changed, 26 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > > index 5f594d6..8712afc 100644 > > --- a/drivers/iommu/mtk_iommu.c > > +++ b/drivers/iommu/mtk_iommu.c > > @@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, > > unsigned long iova, size_t granule, > > void *cookie) > > { > > - mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); > > + struct mtk_iommu_data *data = cookie; > > + struct iommu_domain *domain = &data->m4u_dom->domain; > > + > > + data->is_in_tlb_gather_add_page = true; > > + iommu_iotlb_gather_add_page(domain, gather, iova, granule); > > + data->is_in_tlb_gather_add_page = false; > > } > > > > static const struct iommu_flush_ops mtk_iommu_flush_ops = { > > @@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) > > static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, > > struct iommu_iotlb_gather *gather) > > { > > + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); > > struct mtk_iommu_domain *dom = to_mtk_domain(domain); > > + bool is_in_gather = data->is_in_tlb_gather_add_page; > > + size_t length = gather->end - gather->start; > > unsigned long flags; > > > > - spin_lock_irqsave(&dom->pgtlock, flags); > > - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); > > - spin_unlock_irqrestore(&dom->pgtlock, flags); > > + if (gather->start == ULONG_MAX) > > + return; > > + > > + /* > > + * Avoid acquire the lock when it's in gather_add_page since the lock > > + * has already been held. > > + */ > > + if (!is_in_gather) > > + spin_lock_irqsave(&dom->pgtlock, flags); > > + > > + mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize, > > + false, data); > > + mtk_iommu_tlb_sync(data); > > + > > + if (!is_in_gather) > > + spin_unlock_irqrestore(&dom->pgtlock, flags); > > } > > > > static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > > index fc0f16e..d29af1d 100644 > > --- a/drivers/iommu/mtk_iommu.h > > +++ b/drivers/iommu/mtk_iommu.h > > @@ -58,6 +58,7 @@ struct mtk_iommu_data { > > struct iommu_group *m4u_group; > > bool enable_4GB; > > bool tlb_flush_active; > > + bool is_in_tlb_gather_add_page; > > > > struct iommu_device iommu; > > const struct mtk_iommu_plat_data *plat_data; > >
On 15/10/2019 06:26, Yong Wu wrote: > On Mon, 2019-10-14 at 15:21 +0100, Robin Murphy wrote: >> On 14/10/2019 07:38, Yong Wu wrote: >>> Use the iommu_gather mechanism to achieve the tlb range flush. >>> Gather the iova range in the "tlb_add_page", then flush the merged iova >>> range in iotlb_sync. >>> >>> Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to >>> avoid retry the lock since the spinlock have already been acquired. >> >> I think this could probably be even simpler - once the actual >> register-poking is all confined to mtk_iommu_tlb_sync(), you should be >> able get rid of the per-domain locking in map/unmap and just have a >> single per-IOMMU lock to serialise syncs. The io-pgtable code itself >> hasn't needed external locking for a while now. > > This is more simpler! Thanks very much. I will try this. > > The only concern is there is no lock in the iova_to_phys then, maybe use > the new lock instead. iova_to_phys isn't issuing any syncs, so you don't need any locking there - if anyone calls that in a way which races against the given address being unmapped and remapped they can't expect a meaningful result anyway. Robin. >>> Suggested-by: Tomasz Figa <tfiga@chromium.org> >>> Signed-off-by: Yong Wu <yong.wu@mediatek.com> >>> --- >>> 1) This is the special case backtrace: >>> >>> mtk_iommu_iotlb_sync+0x50/0xa0 >>> mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0 >>> __arm_v7s_unmap+0x174/0x598 >>> arm_v7s_unmap+0x30/0x48 >>> mtk_iommu_unmap+0x50/0x78 >>> __iommu_unmap+0xa4/0xf8 >>> >>> 2) The checking "if (gather->start == ULONG_MAX) return;" also is >>> necessary. It will happened when unmap only go to _flush_walk, then >>> enter this tlb_sync. >>> --- >>> drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++++++---- >>> drivers/iommu/mtk_iommu.h | 1 + >>> 2 files changed, 26 insertions(+), 4 deletions(-) >>> >>> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c >>> index 5f594d6..8712afc 100644 >>> --- a/drivers/iommu/mtk_iommu.c >>> +++ b/drivers/iommu/mtk_iommu.c >>> @@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, >>> unsigned long iova, size_t granule, >>> void *cookie) >>> { >>> - mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); >>> + struct mtk_iommu_data *data = cookie; >>> + struct iommu_domain *domain = &data->m4u_dom->domain; >>> + >>> + data->is_in_tlb_gather_add_page = true; >>> + iommu_iotlb_gather_add_page(domain, gather, iova, granule); >>> + data->is_in_tlb_gather_add_page = false; >>> } >>> >>> static const struct iommu_flush_ops mtk_iommu_flush_ops = { >>> @@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) >>> static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, >>> struct iommu_iotlb_gather *gather) >>> { >>> + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); >>> struct mtk_iommu_domain *dom = to_mtk_domain(domain); >>> + bool is_in_gather = data->is_in_tlb_gather_add_page; >>> + size_t length = gather->end - gather->start; >>> unsigned long flags; >>> >>> - spin_lock_irqsave(&dom->pgtlock, flags); >>> - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); >>> - spin_unlock_irqrestore(&dom->pgtlock, flags); >>> + if (gather->start == ULONG_MAX) >>> + return; >>> + >>> + /* >>> + * Avoid acquire the lock when it's in gather_add_page since the lock >>> + * has already been held. >>> + */ >>> + if (!is_in_gather) >>> + spin_lock_irqsave(&dom->pgtlock, flags); >>> + >>> + mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize, >>> + false, data); >>> + mtk_iommu_tlb_sync(data); >>> + >>> + if (!is_in_gather) >>> + spin_unlock_irqrestore(&dom->pgtlock, flags); >>> } >>> >>> static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, >>> diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h >>> index fc0f16e..d29af1d 100644 >>> --- a/drivers/iommu/mtk_iommu.h >>> +++ b/drivers/iommu/mtk_iommu.h >>> @@ -58,6 +58,7 @@ struct mtk_iommu_data { >>> struct iommu_group *m4u_group; >>> bool enable_4GB; >>> bool tlb_flush_active; >>> + bool is_in_tlb_gather_add_page; >>> >>> struct iommu_device iommu; >>> const struct mtk_iommu_plat_data *plat_data; >>> > >
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5f594d6..8712afc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie) { - mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); + struct mtk_iommu_data *data = cookie; + struct iommu_domain *domain = &data->m4u_dom->domain; + + data->is_in_tlb_gather_add_page = true; + iommu_iotlb_gather_add_page(domain, gather, iova, granule); + data->is_in_tlb_gather_add_page = false; } static const struct iommu_flush_ops mtk_iommu_flush_ops = { @@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + bool is_in_gather = data->is_in_tlb_gather_add_page; + size_t length = gather->end - gather->start; unsigned long flags; - spin_lock_irqsave(&dom->pgtlock, flags); - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); - spin_unlock_irqrestore(&dom->pgtlock, flags); + if (gather->start == ULONG_MAX) + return; + + /* + * Avoid acquire the lock when it's in gather_add_page since the lock + * has already been held. + */ + if (!is_in_gather) + spin_lock_irqsave(&dom->pgtlock, flags); + + mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize, + false, data); + mtk_iommu_tlb_sync(data); + + if (!is_in_gather) + spin_unlock_irqrestore(&dom->pgtlock, flags); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index fc0f16e..d29af1d 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -58,6 +58,7 @@ struct mtk_iommu_data { struct iommu_group *m4u_group; bool enable_4GB; bool tlb_flush_active; + bool is_in_tlb_gather_add_page; struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data;
Use the iommu_gather mechanism to achieve the tlb range flush. Gather the iova range in the "tlb_add_page", then flush the merged iova range in iotlb_sync. Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to avoid retry the lock since the spinlock have already been acquired. Suggested-by: Tomasz Figa <tfiga@chromium.org> Signed-off-by: Yong Wu <yong.wu@mediatek.com> --- 1) This is the special case backtrace: mtk_iommu_iotlb_sync+0x50/0xa0 mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0 __arm_v7s_unmap+0x174/0x598 arm_v7s_unmap+0x30/0x48 mtk_iommu_unmap+0x50/0x78 __iommu_unmap+0xa4/0xf8 2) The checking "if (gather->start == ULONG_MAX) return;" also is necessary. It will happened when unmap only go to _flush_walk, then enter this tlb_sync. --- drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++++++---- drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 26 insertions(+), 4 deletions(-)