Message ID | 1453908108-12869-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jan 27, 2016 at 03:21:48PM +0000, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > This enables mapping via CPU using the proper DRM mmap API for > better debug (Valgrind) and implementation symmetry in the > driver. > > v2: > * Use normal mutex, skip domain management and pin pages. (Chris Wilson) > * No need to drop struct mutex over vma_insert_pfn. I think we still neeed that on first fault: - userspac calls set_domain(GTT) - kernel does nothing since there's no binding - userspace starts accessing gtt mmap - kernel faults, but doesn't update domain/flush cpu caches -> BOOM Needs more testcases I'd say ;-) -Daniel > * Turn off write-combine set up by the DRM core. > * Commit message. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 98 ++++++++++++++++++++++++++++++++++++++--- > include/uapi/drm/i915_drm.h | 3 ++ > 2 files changed, 95 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index a1e2832047e2..a690cee31f20 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1954,6 +1954,62 @@ out: > return i915_gem_ret_to_vm_ret(dev_priv, ret); > } > > +static int > +i915_gem_cpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > +{ > + struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); > + struct drm_device *dev = obj->base.dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); > + pgoff_t page_offset; > + struct page *page; > + unsigned long pgprot; > + int ret; > + > + /* We don't use vmf->pgoff since that has the fake offset */ > + page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> > + PAGE_SHIFT; > + > + trace_i915_gem_object_fault(obj, page_offset, true, write); > + > + intel_runtime_pm_get(dev_priv); > + > + ret = mutex_lock_interruptible(&dev->struct_mutex); > + if (ret) > + goto out; > + > + ret = i915_gem_object_get_pages(obj); > + if (ret) > + goto out_unlock; > + > + i915_gem_object_pin_pages(obj); > + > + page = i915_gem_object_get_page(obj, page_offset); > + if (!page) { > + ret = -EFAULT; > + goto out_unpin; > + } > + > + ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, > + page_to_pfn(page)); > + if (ret == 0) { > + /* DRM core sets up WC by default and we want WB. */ > + pgprot = pgprot_val(vm_get_page_prot(vma->vm_flags)); > + pgprot &= ~_PAGE_CACHE_MASK; > + pgprot |= cachemode2protval(_PAGE_CACHE_MODE_WB); > + vma->vm_page_prot = __pgprot(pgprot); > + } > + > +out_unpin: > + i915_gem_object_unpin_pages(obj); > +out_unlock: > + mutex_unlock(&dev->struct_mutex); > +out: > + intel_runtime_pm_put(dev_priv); > + > + return i915_gem_ret_to_vm_ret(dev_priv, ret); > +} > + > /** > * i915_gem_release_mmap - remove physical page mappings > * @obj: obj in question > @@ -2078,11 +2134,18 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) > drm_gem_free_mmap_offset(&obj->base); > } > > -int > -i915_gem_mmap_gtt(struct drm_file *file, > - struct drm_device *dev, > - uint32_t handle, > - uint64_t *offset) > +static const struct vm_operations_struct i915_gem_cpu_vm_ops = { > + .fault = i915_gem_cpu_fault, > + .open = drm_gem_vm_open, > + .close = drm_gem_vm_close, > +}; > + > +static int > +i915_gem_mmap(struct drm_file *file, > + struct drm_device *dev, > + uint32_t handle, > + uint32_t flags, > + uint64_t *offset) > { > struct drm_i915_gem_object *obj; > int ret; > @@ -2103,10 +2166,23 @@ i915_gem_mmap_gtt(struct drm_file *file, > goto out; > } > > + if (!obj->base.filp && (flags & I915_MMAP2_CPU)) { > + DRM_DEBUG("Attempting to mmap non-shm based object via CPU!\n"); > + ret = -EINVAL; > + goto out; > + } > + > ret = i915_gem_object_create_mmap_offset(obj); > if (ret) > goto out; > > + if (flags & I915_MMAP2_CPU) { > + ret = drm_vma_node_set_vm_ops(&obj->base.vma_node, > + &i915_gem_cpu_vm_ops); > + if (ret) > + goto out; > + } > + > *offset = drm_vma_node_offset_addr(&obj->base.vma_node); > > out: > @@ -2116,6 +2192,15 @@ unlock: > return ret; > } > > +int > +i915_gem_mmap_gtt(struct drm_file *file, > + struct drm_device *dev, > + uint32_t handle, > + uint64_t *offset) > +{ > + return i915_gem_mmap(file, dev, handle, 0, offset); > +} > + > /** > * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing > * @dev: DRM device > @@ -2137,7 +2222,8 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, > { > struct drm_i915_gem_mmap_gtt *args = data; > > - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); > + return i915_gem_mmap(file, dev, args->handle, args->flags, > + &args->offset); > } > > /* Immediately discard the backing storage */ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 6a19371391fa..359a36d604bb 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -528,6 +528,9 @@ struct drm_i915_gem_mmap_gtt { > * This is a fixed-size type for 32/64 compatibility. > */ > __u64 offset; > + > +#define I915_MMAP2_CPU 0x1 > + __u64 flags; > }; > > struct drm_i915_gem_set_domain { > -- > 1.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 27/01/16 15:51, Daniel Vetter wrote: > On Wed, Jan 27, 2016 at 03:21:48PM +0000, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> This enables mapping via CPU using the proper DRM mmap API for >> better debug (Valgrind) and implementation symmetry in the >> driver. >> >> v2: >> * Use normal mutex, skip domain management and pin pages. (Chris Wilson) >> * No need to drop struct mutex over vma_insert_pfn. > > I think we still neeed that on first fault: > > - userspac calls set_domain(GTT) > - kernel does nothing since there's no binding > - userspace starts accessing gtt mmap > - kernel faults, but doesn't update domain/flush cpu caches > -> BOOM Boom what? :) (seriously I don't follow) And as an aside, you would merge this in general or see value in it? > Needs more testcases I'd say ;-) Oh it has no dedicated ones, just gem_mmap_gtt hacked up to always pass the new flag in (so request CPU mmap via the mmap_gtt ioctl). Regards, Tvrtko > -Daniel > >> * Turn off write-combine set up by the DRM core. >> * Commit message. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> --- >> drivers/gpu/drm/i915/i915_gem.c | 98 ++++++++++++++++++++++++++++++++++++++--- >> include/uapi/drm/i915_drm.h | 3 ++ >> 2 files changed, 95 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index a1e2832047e2..a690cee31f20 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -1954,6 +1954,62 @@ out: >> return i915_gem_ret_to_vm_ret(dev_priv, ret); >> } >> >> +static int >> +i915_gem_cpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) >> +{ >> + struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); >> + struct drm_device *dev = obj->base.dev; >> + struct drm_i915_private *dev_priv = dev->dev_private; >> + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); >> + pgoff_t page_offset; >> + struct page *page; >> + unsigned long pgprot; >> + int ret; >> + >> + /* We don't use vmf->pgoff since that has the fake offset */ >> + page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> >> + PAGE_SHIFT; >> + >> + trace_i915_gem_object_fault(obj, page_offset, true, write); >> + >> + intel_runtime_pm_get(dev_priv); >> + >> + ret = mutex_lock_interruptible(&dev->struct_mutex); >> + if (ret) >> + goto out; >> + >> + ret = i915_gem_object_get_pages(obj); >> + if (ret) >> + goto out_unlock; >> + >> + i915_gem_object_pin_pages(obj); >> + >> + page = i915_gem_object_get_page(obj, page_offset); >> + if (!page) { >> + ret = -EFAULT; >> + goto out_unpin; >> + } >> + >> + ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, >> + page_to_pfn(page)); >> + if (ret == 0) { >> + /* DRM core sets up WC by default and we want WB. */ >> + pgprot = pgprot_val(vm_get_page_prot(vma->vm_flags)); >> + pgprot &= ~_PAGE_CACHE_MASK; >> + pgprot |= cachemode2protval(_PAGE_CACHE_MODE_WB); >> + vma->vm_page_prot = __pgprot(pgprot); >> + } >> + >> +out_unpin: >> + i915_gem_object_unpin_pages(obj); >> +out_unlock: >> + mutex_unlock(&dev->struct_mutex); >> +out: >> + intel_runtime_pm_put(dev_priv); >> + >> + return i915_gem_ret_to_vm_ret(dev_priv, ret); >> +} >> + >> /** >> * i915_gem_release_mmap - remove physical page mappings >> * @obj: obj in question >> @@ -2078,11 +2134,18 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) >> drm_gem_free_mmap_offset(&obj->base); >> } >> >> -int >> -i915_gem_mmap_gtt(struct drm_file *file, >> - struct drm_device *dev, >> - uint32_t handle, >> - uint64_t *offset) >> +static const struct vm_operations_struct i915_gem_cpu_vm_ops = { >> + .fault = i915_gem_cpu_fault, >> + .open = drm_gem_vm_open, >> + .close = drm_gem_vm_close, >> +}; >> + >> +static int >> +i915_gem_mmap(struct drm_file *file, >> + struct drm_device *dev, >> + uint32_t handle, >> + uint32_t flags, >> + uint64_t *offset) >> { >> struct drm_i915_gem_object *obj; >> int ret; >> @@ -2103,10 +2166,23 @@ i915_gem_mmap_gtt(struct drm_file *file, >> goto out; >> } >> >> + if (!obj->base.filp && (flags & I915_MMAP2_CPU)) { >> + DRM_DEBUG("Attempting to mmap non-shm based object via CPU!\n"); >> + ret = -EINVAL; >> + goto out; >> + } >> + >> ret = i915_gem_object_create_mmap_offset(obj); >> if (ret) >> goto out; >> >> + if (flags & I915_MMAP2_CPU) { >> + ret = drm_vma_node_set_vm_ops(&obj->base.vma_node, >> + &i915_gem_cpu_vm_ops); >> + if (ret) >> + goto out; >> + } >> + >> *offset = drm_vma_node_offset_addr(&obj->base.vma_node); >> >> out: >> @@ -2116,6 +2192,15 @@ unlock: >> return ret; >> } >> >> +int >> +i915_gem_mmap_gtt(struct drm_file *file, >> + struct drm_device *dev, >> + uint32_t handle, >> + uint64_t *offset) >> +{ >> + return i915_gem_mmap(file, dev, handle, 0, offset); >> +} >> + >> /** >> * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing >> * @dev: DRM device >> @@ -2137,7 +2222,8 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, >> { >> struct drm_i915_gem_mmap_gtt *args = data; >> >> - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); >> + return i915_gem_mmap(file, dev, args->handle, args->flags, >> + &args->offset); >> } >> >> /* Immediately discard the backing storage */ >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 6a19371391fa..359a36d604bb 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -528,6 +528,9 @@ struct drm_i915_gem_mmap_gtt { >> * This is a fixed-size type for 32/64 compatibility. >> */ >> __u64 offset; >> + >> +#define I915_MMAP2_CPU 0x1 >> + __u64 flags; >> }; >> >> struct drm_i915_gem_set_domain { >> -- >> 1.9.1 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On Wed, Jan 27, 2016 at 04:01:26PM +0000, Tvrtko Ursulin wrote: > > On 27/01/16 15:51, Daniel Vetter wrote: > >On Wed, Jan 27, 2016 at 03:21:48PM +0000, Tvrtko Ursulin wrote: > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >>This enables mapping via CPU using the proper DRM mmap API for > >>better debug (Valgrind) and implementation symmetry in the > >>driver. > >> > >>v2: > >> * Use normal mutex, skip domain management and pin pages. (Chris Wilson) > >> * No need to drop struct mutex over vma_insert_pfn. > > > >I think we still neeed that on first fault: > > > >- userspac calls set_domain(GTT) > >- kernel does nothing since there's no binding > >- userspace starts accessing gtt mmap > >- kernel faults, but doesn't update domain/flush cpu caches > >-> BOOM > > Boom what? :) (seriously I don't follow) I screwed up the example, this one explains why we need the set_domain for gtt mmaps. For cpu mmaps I think we can get away with it, since we never optimize away a set_domain(CPU). The problem is just the asymmetry in how we treat set_domain(GTT) when there's no gtt mmaping. > And as an aside, you would merge this in general or see value in it? I think the idea makes sense, seems to still lack justification in form of some open-source userspace wanting this ... -Daniel
On Wed, Jan 27, 2016 at 04:51:02PM +0100, Daniel Vetter wrote: > On Wed, Jan 27, 2016 at 03:21:48PM +0000, Tvrtko Ursulin wrote: > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > > This enables mapping via CPU using the proper DRM mmap API for > > better debug (Valgrind) and implementation symmetry in the > > driver. > > > > v2: > > * Use normal mutex, skip domain management and pin pages. (Chris Wilson) > > * No need to drop struct mutex over vma_insert_pfn. > > I think we still neeed that on first fault: > > - userspac calls set_domain(GTT) > - kernel does nothing since there's no binding For GPU mmaps it would be set_domain(CPU). For WC/GTT, we already closed that hole for precisely this reason. However, I can accept the if the kernel shrinks between pointer accesses, even though I don't like it. pagefault-of-doom again. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1e2832047e2..a690cee31f20 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1954,6 +1954,62 @@ out: return i915_gem_ret_to_vm_ret(dev_priv, ret); } +static int +i915_gem_cpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); + pgoff_t page_offset; + struct page *page; + unsigned long pgprot; + int ret; + + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> + PAGE_SHIFT; + + trace_i915_gem_object_fault(obj, page_offset, true, write); + + intel_runtime_pm_get(dev_priv); + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto out_unlock; + + i915_gem_object_pin_pages(obj); + + page = i915_gem_object_get_page(obj, page_offset); + if (!page) { + ret = -EFAULT; + goto out_unpin; + } + + ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, + page_to_pfn(page)); + if (ret == 0) { + /* DRM core sets up WC by default and we want WB. */ + pgprot = pgprot_val(vm_get_page_prot(vma->vm_flags)); + pgprot &= ~_PAGE_CACHE_MASK; + pgprot |= cachemode2protval(_PAGE_CACHE_MODE_WB); + vma->vm_page_prot = __pgprot(pgprot); + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_unlock: + mutex_unlock(&dev->struct_mutex); +out: + intel_runtime_pm_put(dev_priv); + + return i915_gem_ret_to_vm_ret(dev_priv, ret); +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -2078,11 +2134,18 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) drm_gem_free_mmap_offset(&obj->base); } -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - uint32_t handle, - uint64_t *offset) +static const struct vm_operations_struct i915_gem_cpu_vm_ops = { + .fault = i915_gem_cpu_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static int +i915_gem_mmap(struct drm_file *file, + struct drm_device *dev, + uint32_t handle, + uint32_t flags, + uint64_t *offset) { struct drm_i915_gem_object *obj; int ret; @@ -2103,10 +2166,23 @@ i915_gem_mmap_gtt(struct drm_file *file, goto out; } + if (!obj->base.filp && (flags & I915_MMAP2_CPU)) { + DRM_DEBUG("Attempting to mmap non-shm based object via CPU!\n"); + ret = -EINVAL; + goto out; + } + ret = i915_gem_object_create_mmap_offset(obj); if (ret) goto out; + if (flags & I915_MMAP2_CPU) { + ret = drm_vma_node_set_vm_ops(&obj->base.vma_node, + &i915_gem_cpu_vm_ops); + if (ret) + goto out; + } + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); out: @@ -2116,6 +2192,15 @@ unlock: return ret; } +int +i915_gem_mmap_gtt(struct drm_file *file, + struct drm_device *dev, + uint32_t handle, + uint64_t *offset) +{ + return i915_gem_mmap(file, dev, handle, 0, offset); +} + /** * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device @@ -2137,7 +2222,8 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_mmap_gtt *args = data; - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); + return i915_gem_mmap(file, dev, args->handle, args->flags, + &args->offset); } /* Immediately discard the backing storage */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6a19371391fa..359a36d604bb 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -528,6 +528,9 @@ struct drm_i915_gem_mmap_gtt { * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; + +#define I915_MMAP2_CPU 0x1 + __u64 flags; }; struct drm_i915_gem_set_domain {