@@ -3092,7 +3092,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
- phys_addr_t paddr, size_t page_size, int iommu_prot)
+ phys_addr_t paddr, size_t page_size, int iommu_prot,
+ gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
int prot = 0;
@@ -3106,9 +3107,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- mutex_lock(&domain->api_lock);
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
- mutex_unlock(&domain->api_lock);
+ ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
domain_flush_np_cache(domain, iova, page_size);
@@ -3119,16 +3118,11 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size)
{
struct protection_domain *domain = to_pdomain(dom);
- size_t unmap_size;
if (domain->mode == PAGE_MODE_NONE)
return 0;
- mutex_lock(&domain->api_lock);
- unmap_size = iommu_unmap_page(domain, iova, page_size);
- mutex_unlock(&domain->api_lock);
-
- return unmap_size;
+ return iommu_unmap_page(domain, iova, page_size);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -1777,7 +1777,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
@@ -1286,7 +1286,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -440,7 +440,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
if (!iova)
return DMA_MAPPING_ERROR;
- if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+ if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
return DMA_MAPPING_ERROR;
}
@@ -641,7 +641,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
+ if (iommu_map_sg_atomic(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
< size)
goto out_free_sg;
@@ -1003,7 +1003,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
* We'll leave any physical concatenation to the IOMMU driver's
* implementation - it knows better than we do.
*/
- if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
+ if (iommu_map_sg_atomic(domain, iova, sg, nents, prot) < iova_len)
goto out_free_iova;
return __finalise_sg(dev, sg, nents, iova);
@@ -1078,7 +1078,7 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
*/
static int exynos_iommu_map(struct iommu_domain *iommu_domain,
unsigned long l_iova, phys_addr_t paddr, size_t size,
- int prot)
+ int prot, gfp_t gfp)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_pte_t *entry;
@@ -5137,7 +5137,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
- size_t size, int iommu_prot)
+ size_t size, int iommu_prot, gfp_t gfp)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
@@ -1584,8 +1584,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
return pgsize;
}
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+int __iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_ops *ops = domain->ops;
unsigned long orig_iova = iova;
@@ -1622,8 +1622,8 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
+ ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
- ret = ops->map(domain, iova, paddr, pgsize, prot);
if (ret)
break;
@@ -1643,8 +1643,22 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ might_sleep();
+ return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(iommu_map);
+int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(iommu_map_atomic);
+
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
bool sync)
@@ -1719,8 +1733,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
-size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
+size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp)
{
size_t len = 0, mapped = 0;
phys_addr_t start;
@@ -1731,7 +1746,9 @@ size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
phys_addr_t s_phys = sg_phys(sg);
if (len && s_phys != start + len) {
- ret = iommu_map(domain, iova + mapped, start, len, prot);
+ ret = __iommu_map(domain, iova + mapped, start,
+ len, prot, gfp);
+
if (ret)
goto out_err;
@@ -1759,8 +1776,22 @@ size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
return 0;
}
+
+size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot)
+{
+ might_sleep();
+ return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(iommu_map_sg);
+size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot)
+{
+ return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(iommu_map_sg_atomic);
+
int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
phys_addr_t paddr, u64 size, int prot)
{
@@ -707,7 +707,7 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain,
}
static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -508,7 +508,7 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain,
}
static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t len, int prot)
+ phys_addr_t pa, size_t len, int prot, gfp_t gfp)
{
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
@@ -364,7 +364,7 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
}
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
@@ -303,7 +303,7 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
}
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
@@ -1109,7 +1109,7 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
}
static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
- phys_addr_t pa, size_t bytes, int prot)
+ phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -411,7 +411,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
}
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
int ret;
unsigned long flags;
@@ -760,7 +760,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
}
static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -265,7 +265,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
}
static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
int flags = ZPCI_PTE_VALID, rc = 0;
@@ -190,7 +190,7 @@ static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova,
}
static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t bytes, int prot)
+ phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
{
struct gart_device *gart = gart_handle;
int ret;
@@ -641,7 +641,7 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
}
static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
@@ -198,7 +198,7 @@ struct iommu_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size);
void (*flush_iotlb_all)(struct iommu_domain *domain);
@@ -295,12 +295,17 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
+extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size);
extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg,unsigned int nents, int prot);
+extern size_t iommu_map_sg_atomic(struct iommu_domain *domain,
+ unsigned long iova, struct scatterlist *sg,
+ unsigned int nents, int prot);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
extern void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler, void *token);
@@ -469,6 +474,13 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
return -ENODEV;
}
+static inline int iommu_map_atomic(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot)
+{
+ return -ENODEV;
+}
+
static inline size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
@@ -488,6 +500,13 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain,
return 0;
}
+static inline size_t iommu_map_sg_atomic(struct iommu_domain *domain,
+ unsigned long iova, struct scatterlist *sg,
+ unsigned int nents, int prot)
+{
+ return 0;
+}
+
static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
{
}
Add a gfp_t parameter to the iommu_ops::map function. Remove the needless locking in the AMD iommu driver. The iommu_ops::map function (or the iommu_map function which calls it) was always supposed to be sleepable (according to Joerg's comment in this thread: https://lore.kernel.org/patchwork/patch/977520/ ) and so should probably have had a "might_sleep()" since it was written. However currently the dma-iommu api can call iommu_map in an atomic context, which it shouldn't do. This doesn't cause any problems because any iommu driver which uses the dma-iommu api uses gfp_atomic in it's iommu_ops::map function. But doing this wastes the memory allocators atomic pools. We can remove the mutex lock from amd_iommu_map and amd_iommu_unmap. iommu_map doesn’t lock while mapping and so no two calls should touch the same iova range. The AMD driver already handles the page table page allocations without locks so we can safely remove the locks. Signed-off-by: Tom Murphy <tmurphy@arista.com> --- drivers/iommu/amd_iommu.c | 14 ++++------- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/dma-iommu.c | 6 ++--- drivers/iommu/exynos-iommu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/iommu.c | 43 +++++++++++++++++++++++++++++----- drivers/iommu/ipmmu-vmsa.c | 2 +- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/mtk_iommu_v1.c | 2 +- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/qcom_iommu.c | 2 +- drivers/iommu/rockchip-iommu.c | 2 +- drivers/iommu/s390-iommu.c | 2 +- drivers/iommu/tegra-gart.c | 2 +- drivers/iommu/tegra-smmu.c | 2 +- include/linux/iommu.h | 21 ++++++++++++++++- 18 files changed, 78 insertions(+), 34 deletions(-)