Message ID | 20200303115154.32263-1-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | iommu/dma: Fix MSI reservation allocation | expand |
Hi Marc, On 3/3/20 12:51 PM, Marc Zyngier wrote: > The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page > structures doesn't match the way iommu_put_dma_cookie() frees them. > > The former performs a single allocation of all the required structures, > while the latter tries to free them one at a time. It doesn't quite > work for the main use case (the GICv3 ITS where the range is 64kB) > when the base ganule size is 4kB. > > This leads to a nice slab corruption on teardown, which is easily > observable by simply creating a VF on a SRIOV-capable device, and > tearing it down immediately (no need to even make use of it). > > Fix it by allocating iommu_dma_msi_page structures one at a time. > > Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Reviewed-by: Eric Auger <eric.auger@redhat.com> Thanks Eric > Signed-off-by: Marc Zyngier <maz@kernel.org> > Cc: Robin Murphy <robin.murphy@arm.com> > Cc: Joerg Roedel <jroedel@suse.de> > Cc: Eric Auger <eric.auger@redhat.com> > Cc: Will Deacon <will@kernel.org> > Cc: stable@vger.kernel.org > --- > drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ > 1 file changed, 24 insertions(+), 12 deletions(-) > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index a2e96a5fd9a7..01fa64856c12 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, > phys_addr_t start, phys_addr_t end) > { > struct iova_domain *iovad = &cookie->iovad; > - struct iommu_dma_msi_page *msi_page; > - int i, num_pages; > + struct iommu_dma_msi_page *msi_page, *tmp; > + int i, num_pages, ret = 0; > + phys_addr_t base; > > - start -= iova_offset(iovad, start); > + base = start -= iova_offset(iovad, start); > num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); > > - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); > - if (!msi_page) > - return -ENOMEM; > - > for (i = 0; i < num_pages; i++) { > - msi_page[i].phys = start; > - msi_page[i].iova = start; > - INIT_LIST_HEAD(&msi_page[i].list); > - list_add(&msi_page[i].list, &cookie->msi_page_list); > + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); > + if (!msi_page) { > + ret = -ENOMEM; > + break; > + } > + msi_page->phys = start; > + msi_page->iova = start; > + INIT_LIST_HEAD(&msi_page->list); > + list_add(&msi_page->list, &cookie->msi_page_list); > start += iovad->granule; > } > > - return 0; > + if (ret) { > + list_for_each_entry_safe(msi_page, tmp, > + &cookie->msi_page_list, list) { > + if (msi_page->phys >= base && msi_page->phys < start) { > + list_del(&msi_page->list); > + kfree(msi_page); > + } > + } > + } > + > + return ret; > } > > static int iova_reserve_pci_windows(struct pci_dev *dev, >
On 03/03/2020 11:51 am, Marc Zyngier wrote: > The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page > structures doesn't match the way iommu_put_dma_cookie() frees them. > > The former performs a single allocation of all the required structures, > while the latter tries to free them one at a time. It doesn't quite > work for the main use case (the GICv3 ITS where the range is 64kB) > when the base ganule size is 4kB. > > This leads to a nice slab corruption on teardown, which is easily > observable by simply creating a VF on a SRIOV-capable device, and > tearing it down immediately (no need to even make use of it). > > Fix it by allocating iommu_dma_msi_page structures one at a time. Bleh, you know you're supposed to be using 64K pages on those things, right? :P > Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") > Signed-off-by: Marc Zyngier <maz@kernel.org> > Cc: Robin Murphy <robin.murphy@arm.com> > Cc: Joerg Roedel <jroedel@suse.de> > Cc: Eric Auger <eric.auger@redhat.com> > Cc: Will Deacon <will@kernel.org> > Cc: stable@vger.kernel.org > --- > drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ > 1 file changed, 24 insertions(+), 12 deletions(-) > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index a2e96a5fd9a7..01fa64856c12 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, > phys_addr_t start, phys_addr_t end) > { > struct iova_domain *iovad = &cookie->iovad; > - struct iommu_dma_msi_page *msi_page; > - int i, num_pages; > + struct iommu_dma_msi_page *msi_page, *tmp; > + int i, num_pages, ret = 0; > + phys_addr_t base; > > - start -= iova_offset(iovad, start); > + base = start -= iova_offset(iovad, start); > num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); > > - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); > - if (!msi_page) > - return -ENOMEM; > - > for (i = 0; i < num_pages; i++) { > - msi_page[i].phys = start; > - msi_page[i].iova = start; > - INIT_LIST_HEAD(&msi_page[i].list); > - list_add(&msi_page[i].list, &cookie->msi_page_list); > + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); > + if (!msi_page) { > + ret = -ENOMEM; I think we can just return here and skip the cleanup below - by the time we get here the cookie itself has already been allocated and initialised, so even if iommu_dma_init_domain() fails someone else has already accepted the responsibility of calling iommu_put_dma_cookie() at some point later, which will clean up properly. Cheers, Robin. > + break; > + } > + msi_page->phys = start; > + msi_page->iova = start; > + INIT_LIST_HEAD(&msi_page->list); > + list_add(&msi_page->list, &cookie->msi_page_list); > start += iovad->granule; > } > > - return 0; > + if (ret) { > + list_for_each_entry_safe(msi_page, tmp, > + &cookie->msi_page_list, list) { > + if (msi_page->phys >= base && msi_page->phys < start) { > + list_del(&msi_page->list); > + kfree(msi_page); > + } > + } > + } > + > + return ret; > } > > static int iova_reserve_pci_windows(struct pci_dev *dev, >
On 2020-03-03 17:23, Robin Murphy wrote: > On 03/03/2020 11:51 am, Marc Zyngier wrote: >> The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page >> structures doesn't match the way iommu_put_dma_cookie() frees them. >> >> The former performs a single allocation of all the required >> structures, >> while the latter tries to free them one at a time. It doesn't quite >> work for the main use case (the GICv3 ITS where the range is 64kB) >> when the base ganule size is 4kB. >> >> This leads to a nice slab corruption on teardown, which is easily >> observable by simply creating a VF on a SRIOV-capable device, and >> tearing it down immediately (no need to even make use of it). >> >> Fix it by allocating iommu_dma_msi_page structures one at a time. > > Bleh, you know you're supposed to be using 64K pages on those things, > right? :P lalalala... ;-) [...] >> + if (!msi_page) { >> + ret = -ENOMEM; > > I think we can just return here and skip the cleanup below - by the > time we get here the cookie itself has already been allocated and > initialised, so even if iommu_dma_init_domain() fails someone else has > already accepted the responsibility of calling iommu_put_dma_cookie() > at some point later, which will clean up properly. Ah, that's a very good point. I'll refresh the patch with a simplified error handling. Thanks, M.
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..01fa64856c12 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, phys_addr_t start, phys_addr_t end) { struct iova_domain *iovad = &cookie->iovad; - struct iommu_dma_msi_page *msi_page; - int i, num_pages; + struct iommu_dma_msi_page *msi_page, *tmp; + int i, num_pages, ret = 0; + phys_addr_t base; - start -= iova_offset(iovad, start); + base = start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) { + ret = -ENOMEM; + break; + } + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } - return 0; + if (ret) { + list_for_each_entry_safe(msi_page, tmp, + &cookie->msi_page_list, list) { + if (msi_page->phys >= base && msi_page->phys < start) { + list_del(&msi_page->list); + kfree(msi_page); + } + } + } + + return ret; } static int iova_reserve_pci_windows(struct pci_dev *dev,
The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them. The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base ganule size is 4kB. This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it). Fix it by allocating iommu_dma_msi_page structures one at a time. Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier <maz@kernel.org> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Eric Auger <eric.auger@redhat.com> Cc: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org --- drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-)