diff mbox

[04/13] drm/msm: add support for non-IOMMU systems

Message ID 1386459345-17731-5-git-send-email-robdclark@gmail.com (mailing list archive)
State Deferred
Headers show

Commit Message

Rob Clark Dec. 7, 2013, 11:35 p.m. UTC
Add a VRAM carveout that is used for systems which do not have an IOMMU.

The VRAM carveout uses CMA.  The arch code must setup a CMA pool for the
device (preferrably in highmem.. a 256m-512m VRAM pool in lowmem is not
cool).  The user can configure the VRAM pool size using msm.vram module
param.

Technically, the abstraction of IOMMU behind msm_mmu is not strictly
needed, but it simplifies the GEM code a bit, and will be useful later
when I add support for a2xx devices with GPUMMU, so I decided to keep
this part.

It appears to be possible to configure the GPU to restrict access to
addresses within the VRAM pool, but this is not done yet.  So for now
the GPU will refuse to load if there is no sort of mmu.  Once address
based limits are supported and tested to confirm that we aren't giving
the GPU access to arbitrary memory, this restriction can be lifted

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/Makefile            |   1 +
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  15 ++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  13 ++-
 drivers/gpu/drm/msm/mdp4/mdp4_kms.c     |  29 +++---
 drivers/gpu/drm/msm/msm_drv.c           |  87 +++++++++-------
 drivers/gpu/drm/msm/msm_drv.h           |  21 ++--
 drivers/gpu/drm/msm/msm_gem.c           | 170 +++++++++++++++++---------------
 drivers/gpu/drm/msm/msm_gem.h           |   5 +
 drivers/gpu/drm/msm/msm_gpu.c           |  19 ++--
 drivers/gpu/drm/msm/msm_gpu.h           |   2 +-
 drivers/gpu/drm/msm/msm_iommu.c         | 148 +++++++++++++++++++++++++++
 drivers/gpu/drm/msm/msm_mmu.h           |  47 +++++++++
 12 files changed, 410 insertions(+), 147 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/msm_iommu.c
 create mode 100644 drivers/gpu/drm/msm/msm_mmu.h
diff mbox

Patch

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index e5fa12b..ca62457 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -24,6 +24,7 @@  msm-y := \
 	msm_gem_prime.o \
 	msm_gem_submit.o \
 	msm_gpu.o \
+	msm_iommu.o \
 	msm_ringbuffer.o
 
 msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index d9e72a6..16fe15d 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -426,7 +426,20 @@  struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	if (ret)
 		goto fail;
 
-	return &a3xx_gpu->base.base;
+	if (!gpu->mmu) {
+		/* TODO we think it is possible to configure the GPU to
+		 * restrict access to VRAM carveout.  But the required
+		 * registers are unknown.  For now just bail out and
+		 * limp along with just modesetting.  If it turns out
+		 * to not be possible to restrict access, then we must
+		 * implement a cmdstream validator.
+		 */
+		dev_err(dev->dev, "No memory protection without IOMMU\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	return gpu;
 
 fail:
 	if (a3xx_gpu)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index d7bc51b..3f1c7b2 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -17,6 +17,7 @@ 
 
 #include "adreno_gpu.h"
 #include "msm_gem.h"
+#include "msm_mmu.h"
 
 struct adreno_info {
 	struct adreno_rev rev;
@@ -291,6 +292,7 @@  int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
 		struct adreno_rev rev)
 {
+	struct msm_mmu *mmu;
 	int i, ret;
 
 	/* identify gpu: */
@@ -338,10 +340,13 @@  int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	ret = msm_iommu_attach(drm, gpu->base.iommu,
-			iommu_ports, ARRAY_SIZE(iommu_ports));
-	if (ret)
-		return ret;
+	mmu = gpu->base.mmu;
+	if (mmu) {
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret)
+			return ret;
+	}
 
 	gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs),
 			MSM_BO_UNCACHED);
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
index 8972ac3..bab8cbc 100644
--- a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
@@ -17,6 +17,7 @@ 
 
 
 #include "msm_drv.h"
+#include "msm_mmu.h"
 #include "mdp4_kms.h"
 
 static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev);
@@ -260,6 +261,7 @@  struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	struct mdp4_platform_config *config = mdp4_get_config(pdev);
 	struct mdp4_kms *mdp4_kms;
 	struct msm_kms *kms = NULL;
+	struct msm_mmu *mmu;
 	int ret;
 
 	mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL);
@@ -322,12 +324,6 @@  struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	clk_set_rate(mdp4_kms->clk, config->max_clk);
 	clk_set_rate(mdp4_kms->lut_clk, config->max_clk);
 
-	if (!config->iommu) {
-		dev_err(dev->dev, "no iommu\n");
-		ret = -ENXIO;
-		goto fail;
-	}
-
 	/* make sure things are off before attaching iommu (bootloader could
 	 * have left things on, in which case we'll start getting faults if
 	 * we don't disable):
@@ -337,12 +333,23 @@  struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 0);
 	mdelay(16);
 
-	ret = msm_iommu_attach(dev, config->iommu,
-			iommu_ports, ARRAY_SIZE(iommu_ports));
-	if (ret)
-		goto fail;
+	if (config->iommu) {
+		mmu = msm_iommu_new(dev, config->iommu);
+		if (IS_ERR(mmu)) {
+			ret = PTR_ERR(mmu);
+			goto fail;
+		}
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret)
+			goto fail;
+	} else {
+		dev_info(dev->dev, "no iommu, fallback to phys "
+				"contig buffers for scanout\n");
+		mmu = NULL;
+	}
 
-	mdp4_kms->id = msm_register_iommu(dev, config->iommu);
+	mdp4_kms->id = msm_register_mmu(dev, mmu);
 	if (mdp4_kms->id < 0) {
 		ret = mdp4_kms->id;
 		dev_err(dev->dev, "failed to register mdp4 iommu: %d\n", ret);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 8653769..2e3d746 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -30,50 +30,19 @@  static const struct drm_mode_config_funcs mode_config_funcs = {
 	.output_poll_changed = msm_fb_output_poll_changed,
 };
 
-static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
-		unsigned long iova, int flags, void *arg)
-{
-	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
-	return 0;
-}
-
-int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu)
+int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	int idx = priv->num_iommus++;
+	int idx = priv->num_mmus++;
 
-	if (WARN_ON(idx >= ARRAY_SIZE(priv->iommus)))
+	if (WARN_ON(idx >= ARRAY_SIZE(priv->mmus)))
 		return -EINVAL;
 
-	priv->iommus[idx] = iommu;
-
-	iommu_set_fault_handler(iommu, msm_fault_handler, dev);
-
-	/* need to iommu_attach_device() somewhere??  on resume?? */
+	priv->mmus[idx] = mmu;
 
 	return idx;
 }
 
-int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
-		const char **names, int cnt)
-{
-	int i, ret;
-
-	for (i = 0; i < cnt; i++) {
-		/* TODO maybe some day msm iommu won't require this hack: */
-		struct device *msm_iommu_get_ctx(const char *ctx_name);
-		struct device *ctx = msm_iommu_get_ctx(names[i]);
-		if (!ctx)
-			continue;
-		ret = iommu_attach_device(iommu, ctx);
-		if (ret) {
-			dev_warn(dev->dev, "could not attach iommu to %s", names[i]);
-			return ret;
-		}
-	}
-	return 0;
-}
-
 #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING
 static bool reglog = false;
 MODULE_PARM_DESC(reglog, "Enable register read/write logging");
@@ -82,6 +51,10 @@  module_param(reglog, bool, 0600);
 #define reglog 0
 #endif
 
+static char *vram;
+MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU");
+module_param(vram, charp, 0);
+
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 		const char *dbgname)
 {
@@ -161,6 +134,14 @@  static int msm_unload(struct drm_device *dev)
 		mutex_unlock(&dev->struct_mutex);
 	}
 
+	if (priv->vram.paddr) {
+		DEFINE_DMA_ATTRS(attrs);
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+		drm_mm_takedown(&priv->vram.mm);
+		dma_free_attrs(dev->dev, priv->vram.size, NULL,
+				priv->vram.paddr, &attrs);
+	}
+
 	dev->dev_private = NULL;
 
 	kfree(priv);
@@ -191,6 +172,41 @@  static int msm_load(struct drm_device *dev, unsigned long flags)
 
 	drm_mode_config_init(dev);
 
+	/* if we have no IOMMU, then we need to use carveout allocator.
+	 * Grab the entire CMA chunk carved out in early startup in
+	 * mach-msm:
+	 */
+	if (!iommu_present(&platform_bus_type)) {
+		DEFINE_DMA_ATTRS(attrs);
+		unsigned long size;
+		void *p;
+
+		DBG("using %s VRAM carveout", vram);
+		size = memparse(vram, NULL);
+		priv->vram.size = size;
+
+		drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1);
+
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+
+		/* note that for no-kernel-mapping, the vaddr returned
+		 * is bogus, but non-null if allocation succeeded:
+		 */
+		p = dma_alloc_attrs(dev->dev, size,
+				&priv->vram.paddr, 0, &attrs);
+		if (!p) {
+			dev_err(dev->dev, "failed to allocate VRAM\n");
+			priv->vram.paddr = 0;
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		dev_info(dev->dev, "VRAM: %08x->%08x\n",
+				(uint32_t)priv->vram.paddr,
+				(uint32_t)(priv->vram.paddr + size));
+	}
+
 	kms = mdp4_kms_init(dev);
 	if (IS_ERR(kms)) {
 		/*
@@ -778,6 +794,7 @@  static const struct dev_pm_ops msm_pm_ops = {
 
 static int msm_pdev_probe(struct platform_device *pdev)
 {
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return drm_platform_init(&msm_driver, pdev);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 8823a88..3f9ba33 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -55,6 +55,7 @@  static inline struct device *msm_iommu_get_ctx(const char *ctx_name)
 
 struct msm_kms;
 struct msm_gpu;
+struct msm_mmu;
 
 #define NUM_DOMAINS 2    /* one for KMS, then one per gpu core (?) */
 
@@ -87,9 +88,9 @@  struct msm_drm_private {
 	/* callbacks deferred until bo is inactive: */
 	struct list_head fence_cbs;
 
-	/* registered IOMMU domains: */
-	unsigned int num_iommus;
-	struct iommu_domain *iommus[NUM_DOMAINS];
+	/* registered MMUs: */
+	unsigned int num_mmus;
+	struct msm_mmu *mmus[NUM_DOMAINS];
 
 	unsigned int num_planes;
 	struct drm_plane *planes[8];
@@ -105,6 +106,16 @@  struct msm_drm_private {
 
 	unsigned int num_connectors;
 	struct drm_connector *connectors[8];
+
+	/* VRAM carveout, used when no IOMMU: */
+	struct {
+		unsigned long size;
+		dma_addr_t paddr;
+		/* NOTE: mm managed at the page level, size is in # of pages
+		 * and position mm_node->start is in # of pages:
+		 */
+		struct drm_mm mm;
+	} vram;
 };
 
 struct msm_format {
@@ -155,9 +166,7 @@  struct msm_kms {
 
 struct msm_kms *mdp4_kms_init(struct drm_device *dev);
 
-int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu);
-int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
-		const char **names, int cnt);
+int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu);
 
 int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence,
 		struct timespec *timeout);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index e587d25..d8d60c9 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -22,7 +22,45 @@ 
 #include "msm_drv.h"
 #include "msm_gem.h"
 #include "msm_gpu.h"
+#include "msm_mmu.h"
 
+static dma_addr_t physaddr(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	return (((dma_addr_t)msm_obj->vram_node->start) << PAGE_SHIFT) +
+			priv->vram.paddr;
+}
+
+/* allocate pages from VRAM carveout, used when no IOMMU: */
+static struct page **get_pages_vram(struct drm_gem_object *obj,
+		int npages)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	dma_addr_t paddr;
+	struct page **p;
+	int ret, i;
+
+	p = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node,
+			npages, 0, DRM_MM_SEARCH_DEFAULT);
+	if (ret) {
+		drm_free_large(p);
+		return ERR_PTR(ret);
+	}
+
+	paddr = physaddr(obj);
+	for (i = 0; i < npages; i++) {
+		p[i] = phys_to_page(paddr);
+		paddr += PAGE_SIZE;
+	}
+
+	return p;
+}
 
 /* called with dev->struct_mutex held */
 static struct page **get_pages(struct drm_gem_object *obj)
@@ -31,9 +69,14 @@  static struct page **get_pages(struct drm_gem_object *obj)
 
 	if (!msm_obj->pages) {
 		struct drm_device *dev = obj->dev;
-		struct page **p = drm_gem_get_pages(obj, 0);
+		struct page **p;
 		int npages = obj->size >> PAGE_SHIFT;
 
+		if (iommu_present(&platform_bus_type))
+			p = drm_gem_get_pages(obj, 0);
+		else
+			p = get_pages_vram(obj, npages);
+
 		if (IS_ERR(p)) {
 			dev_err(dev->dev, "could not get pages: %ld\n",
 					PTR_ERR(p));
@@ -73,7 +116,11 @@  static void put_pages(struct drm_gem_object *obj)
 		sg_free_table(msm_obj->sgt);
 		kfree(msm_obj->sgt);
 
-		drm_gem_put_pages(obj, msm_obj->pages, true, false);
+		if (iommu_present(&platform_bus_type))
+			drm_gem_put_pages(obj, msm_obj->pages, true, false);
+		else
+			drm_mm_remove_node(msm_obj->vram_node);
+
 		msm_obj->pages = NULL;
 	}
 }
@@ -138,7 +185,6 @@  int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
-	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	struct drm_device *dev = obj->dev;
 	struct page **pages;
 	unsigned long pfn;
@@ -163,7 +209,7 @@  int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	pgoff = ((unsigned long)vmf->virtual_address -
 			vma->vm_start) >> PAGE_SHIFT;
 
-	pfn = page_to_pfn(msm_obj->pages[pgoff]);
+	pfn = page_to_pfn(pages[pgoff]);
 
 	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
 			pfn, pfn << PAGE_SHIFT);
@@ -219,67 +265,6 @@  uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj)
 	return offset;
 }
 
-/* helpers for dealing w/ iommu: */
-static int map_range(struct iommu_domain *domain, unsigned int iova,
-		struct sg_table *sgt, unsigned int len, int prot)
-{
-	struct scatterlist *sg;
-	unsigned int da = iova;
-	unsigned int i, j;
-	int ret;
-
-	if (!domain || !sgt)
-		return -EINVAL;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa = sg_phys(sg) - sg->offset;
-		size_t bytes = sg->length + sg->offset;
-
-		VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
-
-		ret = iommu_map(domain, da, pa, bytes, prot);
-		if (ret)
-			goto fail;
-
-		da += bytes;
-	}
-
-	return 0;
-
-fail:
-	da = iova;
-
-	for_each_sg(sgt->sgl, sg, i, j) {
-		size_t bytes = sg->length + sg->offset;
-		iommu_unmap(domain, da, bytes);
-		da += bytes;
-	}
-	return ret;
-}
-
-static void unmap_range(struct iommu_domain *domain, unsigned int iova,
-		struct sg_table *sgt, unsigned int len)
-{
-	struct scatterlist *sg;
-	unsigned int da = iova;
-	int i;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes = sg->length + sg->offset;
-		size_t unmapped;
-
-		unmapped = iommu_unmap(domain, da, bytes);
-		if (unmapped < bytes)
-			break;
-
-		VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
-
-		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
-
-		da += bytes;
-	}
-}
-
 /* should be called under struct_mutex.. although it can be called
  * from atomic context without struct_mutex to acquire an extra
  * iova ref if you know one is already held.
@@ -295,15 +280,20 @@  int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
 
 	if (!msm_obj->domain[id].iova) {
 		struct msm_drm_private *priv = obj->dev->dev_private;
-		uint32_t offset = (uint32_t)mmap_offset(obj);
-		struct page **pages;
-		pages = get_pages(obj);
+		struct msm_mmu *mmu = priv->mmus[id];
+		struct page **pages = get_pages(obj);
+
 		if (IS_ERR(pages))
 			return PTR_ERR(pages);
-		// XXX ideally we would not map buffers writable when not needed...
-		ret = map_range(priv->iommus[id], offset, msm_obj->sgt,
-				obj->size, IOMMU_READ | IOMMU_WRITE);
-		msm_obj->domain[id].iova = offset;
+
+		if (iommu_present(&platform_bus_type)) {
+			uint32_t offset = (uint32_t)mmap_offset(obj);
+			ret = mmu->funcs->map(mmu, offset, msm_obj->sgt,
+					obj->size, IOMMU_READ | IOMMU_WRITE);
+			msm_obj->domain[id].iova = offset;
+		} else {
+			msm_obj->domain[id].iova = physaddr(obj);
+		}
 	}
 
 	if (!ret)
@@ -514,6 +504,7 @@  void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 void msm_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = obj->dev->dev_private;
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	int id;
 
@@ -525,11 +516,10 @@  void msm_gem_free_object(struct drm_gem_object *obj)
 	list_del(&msm_obj->mm_list);
 
 	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
-		if (msm_obj->domain[id].iova) {
-			struct msm_drm_private *priv = obj->dev->dev_private;
+		struct msm_mmu *mmu = priv->mmus[id];
+		if (mmu && msm_obj->domain[id].iova) {
 			uint32_t offset = (uint32_t)mmap_offset(obj);
-			unmap_range(priv->iommus[id], offset,
-					msm_obj->sgt, obj->size);
+			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
 		}
 	}
 
@@ -591,6 +581,7 @@  static int msm_gem_new_impl(struct drm_device *dev,
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_object *msm_obj;
+	unsigned sz;
 
 	switch (flags & MSM_BO_CACHE_MASK) {
 	case MSM_BO_UNCACHED:
@@ -603,10 +594,17 @@  static int msm_gem_new_impl(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	msm_obj = kzalloc(sizeof(*msm_obj), GFP_KERNEL);
+	sz = sizeof(*msm_obj);
+	if (!iommu_present(&platform_bus_type))
+		sz += sizeof(struct drm_mm_node);
+
+	msm_obj = kzalloc(sz, GFP_KERNEL);
 	if (!msm_obj)
 		return -ENOMEM;
 
+	if (!iommu_present(&platform_bus_type))
+		msm_obj->vram_node = (void *)&msm_obj[1];
+
 	msm_obj->flags = flags;
 
 	msm_obj->resv = &msm_obj->_resv;
@@ -623,7 +621,7 @@  static int msm_gem_new_impl(struct drm_device *dev,
 struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 		uint32_t size, uint32_t flags)
 {
-	struct drm_gem_object *obj;
+	struct drm_gem_object *obj = NULL;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -634,9 +632,13 @@  struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
-	ret = drm_gem_object_init(dev, obj, size);
-	if (ret)
-		goto fail;
+	if (iommu_present(&platform_bus_type)) {
+		ret = drm_gem_object_init(dev, obj, size);
+		if (ret)
+			goto fail;
+	} else {
+		drm_gem_private_object_init(dev, obj, size);
+	}
 
 	return obj;
 
@@ -654,6 +656,12 @@  struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 	struct drm_gem_object *obj;
 	int ret, npages;
 
+	/* if we don't have IOMMU, don't bother pretending we can import: */
+	if (!iommu_present(&platform_bus_type)) {
+		dev_err(dev->dev, "cannot import without IOMMU\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	size = PAGE_ALIGN(size);
 
 	ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index f4f23a5..3246bb4 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -57,6 +57,11 @@  struct msm_gem_object {
 	/* normally (resv == &_resv) except for imported bo's */
 	struct reservation_object *resv;
 	struct reservation_object _resv;
+
+	/* For physically contiguous buffers.  Used when we don't have
+	 * an IOMMU.
+	 */
+	struct drm_mm_node *vram_node;
 };
 #define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 71f105f..4ebce8b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -17,6 +17,7 @@ 
 
 #include "msm_gpu.h"
 #include "msm_gem.h"
+#include "msm_mmu.h"
 
 
 /*
@@ -353,6 +354,7 @@  int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 		const char *name, const char *ioname, const char *irqname, int ringsz)
 {
+	struct iommu_domain *iommu;
 	int i, ret;
 
 	gpu->dev = drm;
@@ -418,13 +420,14 @@  int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	 * and have separate page tables per context.  For now, to keep things
 	 * simple and to get something working, just use a single address space:
 	 */
-	gpu->iommu = iommu_domain_alloc(&platform_bus_type);
-	if (!gpu->iommu) {
-		dev_err(drm->dev, "failed to allocate IOMMU\n");
-		ret = -ENOMEM;
-		goto fail;
+	iommu = iommu_domain_alloc(&platform_bus_type);
+	if (iommu) {
+		dev_info(drm->dev, "%s: using IOMMU\n", name);
+		gpu->mmu = msm_iommu_new(drm, iommu);
+	} else {
+		dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
 	}
-	gpu->id = msm_register_iommu(drm, gpu->iommu);
+	gpu->id = msm_register_mmu(drm, gpu->mmu);
 
 	/* Create ringbuffer: */
 	gpu->rb = msm_ringbuffer_new(gpu, ringsz);
@@ -464,6 +467,6 @@  void msm_gpu_cleanup(struct msm_gpu *gpu)
 		msm_ringbuffer_destroy(gpu->rb);
 	}
 
-	if (gpu->iommu)
-		iommu_domain_free(gpu->iommu);
+	if (gpu->mmu)
+		gpu->mmu->funcs->destroy(gpu->mmu);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 08d0842..458db8c 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -78,7 +78,7 @@  struct msm_gpu {
 	void __iomem *mmio;
 	int irq;
 
-	struct iommu_domain *iommu;
+	struct msm_mmu *mmu;
 	int id;
 
 	/* Power Control: */
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
new file mode 100644
index 0000000..014a3fd
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -0,0 +1,148 @@ 
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_mmu.h"
+
+struct msm_iommu {
+	struct msm_mmu base;
+	struct iommu_domain *domain;
+};
+#define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+
+static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
+		unsigned long iova, int flags, void *arg)
+{
+	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
+	return 0;
+}
+
+static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
+{
+	struct drm_device *dev = mmu->dev;
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	int i, ret;
+
+	for (i = 0; i < cnt; i++) {
+		struct device *msm_iommu_get_ctx(const char *ctx_name);
+		struct device *ctx = msm_iommu_get_ctx(names[i]);
+		if (!ctx)
+			continue;
+		ret = iommu_attach_device(iommu->domain, ctx);
+		if (ret) {
+			dev_warn(dev->dev, "could not attach iommu to %s", names[i]);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova,
+		struct sg_table *sgt, unsigned len, int prot)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	unsigned int i, j;
+	int ret;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa = sg_phys(sg) - sg->offset;
+		size_t bytes = sg->length + sg->offset;
+
+		VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
+
+		ret = iommu_map(domain, da, pa, bytes, prot);
+		if (ret)
+			goto fail;
+
+		da += bytes;
+	}
+
+	return 0;
+
+fail:
+	da = iova;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes = sg->length + sg->offset;
+		iommu_unmap(domain, da, bytes);
+		da += bytes;
+	}
+	return ret;
+}
+
+static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova,
+		struct sg_table *sgt, unsigned len)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = sg->length + sg->offset;
+		size_t unmapped;
+
+		unmapped = iommu_unmap(domain, da, bytes);
+		if (unmapped < bytes)
+			return unmapped;
+
+		VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
+
+		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
+
+		da += bytes;
+	}
+
+	return 0;
+}
+
+static void msm_iommu_destroy(struct msm_mmu *mmu)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+	iommu_domain_free(iommu->domain);
+	kfree(iommu);
+}
+
+static const struct msm_mmu_funcs funcs = {
+		.attach = msm_iommu_attach,
+		.map = msm_iommu_map,
+		.unmap = msm_iommu_unmap,
+		.destroy = msm_iommu_destroy,
+};
+
+struct msm_mmu *msm_iommu_new(struct drm_device *dev, struct iommu_domain *domain)
+{
+	struct msm_iommu *iommu;
+
+	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return ERR_PTR(-ENOMEM);
+
+	iommu->domain = domain;
+	msm_mmu_init(&iommu->base, dev, &funcs);
+	iommu_set_fault_handler(domain, msm_fault_handler, dev);
+
+	return &iommu->base;
+}
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
new file mode 100644
index 0000000..0303244
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -0,0 +1,47 @@ 
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_MMU_H__
+#define __MSM_MMU_H__
+
+#include <linux/iommu.h>
+
+struct msm_mmu_funcs {
+	int (*attach)(struct msm_mmu *mmu, const char **names, int cnt);
+	int (*map)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
+			unsigned len, int prot);
+	int (*unmap)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
+			unsigned len);
+	void (*destroy)(struct msm_mmu *mmu);
+};
+
+struct msm_mmu {
+	const struct msm_mmu_funcs *funcs;
+	struct drm_device *dev;
+};
+
+static inline void msm_mmu_init(struct msm_mmu *mmu, struct drm_device *dev,
+		const struct msm_mmu_funcs *funcs)
+{
+	mmu->dev = dev;
+	mmu->funcs = funcs;
+}
+
+struct msm_mmu *msm_iommu_new(struct drm_device *dev, struct iommu_domain *domain);
+struct msm_mmu *msm_gpummu_new(struct drm_device *dev, struct msm_gpu *gpu);
+
+#endif /* __MSM_MMU_H__ */