From patchwork Thu Apr 25 09:19:48 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 10916473 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 70C3E14DB for ; Thu, 25 Apr 2019 09:38:28 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 59A8F28AA5 for ; Thu, 25 Apr 2019 09:38:28 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4C43528B8C; Thu, 25 Apr 2019 09:38:28 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 44F0528B90 for ; Thu, 25 Apr 2019 09:38:24 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B626C8925E; Thu, 25 Apr 2019 09:38:21 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 37D6089255 for ; Thu, 25 Apr 2019 09:38:12 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 16355011-1500050 for multiple; Thu, 25 Apr 2019 10:20:09 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Thu, 25 Apr 2019 10:19:48 +0100 Message-Id: <20190425092004.9995-29-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190425092004.9995-1-chris@chris-wilson.co.uk> References: <20190425092004.9995-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 29/45] drm/i915: Move mmap and friends to its own file X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Matthew Auld Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Continuing the decluttering of i915_gem.c, now the turn of do_mmap and the faulthandlers Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 505 ++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 56 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 7 + .../drm/i915/gem/selftests/i915_gem_mman.c | 503 ++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 561 +----------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 487 --------------- .../drm/i915/selftests/i915_live_selftests.h | 1 + 10 files changed, 1088 insertions(+), 1036 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 84277b29389c..b86af182b1ac 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -88,6 +88,7 @@ i915-y += $(gt-y) obj-y += gem/ gem-y += \ gem/i915_gem_object.o \ + gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_shmem.o diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c new file mode 100644 index 000000000000..1bcc6e1091e9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -0,0 +1,505 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2016 Intel Corporation + */ + +#include +#include + +#include "i915_gem_ioctls.h" +#include "i915_gem_object.h" + +#include "../i915_gem_gtt.h" +#include "../i915_vma.h" +#include "../i915_drv.h" +#include "../intel_drv.h" + +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && + (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); +} + +/** + * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address + * it is mapped to. + * @dev: drm device + * @data: ioctl data blob + * @file: drm file + * + * While the mapping holds a reference on the contents of the object, it doesn't + * imply a ref on the object itself. + * + * IMPORTANT: + * + * DRM driver writers who look a this function as an example for how to do GEM + * mmap support, please don't implement mmap support like here. The modern way + * to implement DRM mmap support is with an mmap offset ioctl (like + * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. + * That way debug tooling like valgrind will understand what's going on, hiding + * the mmap call in a driver private ioctl will break that. The i915 driver only + * does cpu mmaps this way because we didn't know better. + */ +int +i915_gem_mmap_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_mmap *args = data; + struct drm_i915_gem_object *obj; + unsigned long addr; + + if (args->flags & ~(I915_MMAP_WC)) + return -EINVAL; + + if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + /* prime objects have no backing filp to GEM mmap + * pages from. + */ + if (!obj->base.filp) { + addr = -ENXIO; + goto err; + } + + if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { + addr = -EINVAL; + goto err; + } + + addr = vm_mmap(obj->base.filp, 0, args->size, + PROT_READ | PROT_WRITE, MAP_SHARED, + args->offset); + if (IS_ERR_VALUE(addr)) + goto err; + + if (args->flags & I915_MMAP_WC) { + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (down_write_killable(&mm->mmap_sem)) { + addr = -EINTR; + goto err; + } + vma = find_vma(mm, addr); + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + else + addr = -ENOMEM; + up_write(&mm->mmap_sem); + if (IS_ERR_VALUE(addr)) + goto err; + + /* This may race, but that's ok, it only gets set */ + WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); + } + i915_gem_object_put(obj); + + args->addr_ptr = (u64)addr; + return 0; + +err: + i915_gem_object_put(obj); + return addr; +} + +static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; +} + +/** + * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps + * + * A history of the GTT mmap interface: + * + * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to + * aligned and suitable for fencing, and still fit into the available + * mappable space left by the pinned display objects. A classic problem + * we called the page-fault-of-doom where we would ping-pong between + * two objects that could not fit inside the GTT and so the memcpy + * would page one object in at the expense of the other between every + * single byte. + * + * 1 - Objects can be any size, and have any compatible fencing (X Y, or none + * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the + * object is too large for the available space (or simply too large + * for the mappable aperture!), a view is created instead and faulted + * into userspace. (This view is aligned and sized appropriately for + * fenced access.) + * + * 2 - Recognise WC as a separate cache domain so that we can flush the + * delayed writes via GTT before performing direct access via WC. + * + * 3 - Remove implicit set-domain(GTT) and synchronisation on initial + * pagefault; swapin remains transparent. + * + * Restrictions: + * + * * snoopable objects cannot be accessed via the GTT. It can cause machine + * hangs on some architectures, corruption on others. An attempt to service + * a GTT page fault from a snoopable object will generate a SIGBUS. + * + * * the object must be able to fit into RAM (physical memory, though no + * limited to the mappable aperture). + * + * + * Caveats: + * + * * a new GTT page fault will synchronize rendering from the GPU and flush + * all data to system memory. Subsequent access will not be synchronized. + * + * * all mappings are revoked on runtime device suspend. + * + * * there are only 8, 16 or 32 fence registers to share between all users + * (older machines require fence register for display and blitter access + * as well). Contention of the fence registers will cause the previous users + * to be unmapped and any new access will generate new page faults. + * + * * running out of memory while servicing a fault may generate a SIGBUS, + * rather than the expected SIGSEGV. + */ +int i915_gem_mmap_gtt_version(void) +{ + return 3; +} + +static inline struct i915_ggtt_view +compute_partial_view(const struct drm_i915_gem_object *obj, + pgoff_t page_offset, + unsigned int chunk) +{ + struct i915_ggtt_view view; + + if (i915_gem_object_is_tiled(obj)) + chunk = roundup(chunk, tile_row_pages(obj)); + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = rounddown(page_offset, chunk); + view.partial.size = + min_t(unsigned int, chunk, + (obj->base.size >> PAGE_SHIFT) - view.partial.offset); + + /* If the partial covers the entire object, just create a normal VMA. */ + if (chunk >= obj->base.size >> PAGE_SHIFT) + view.type = I915_GGTT_VIEW_NORMAL; + + return view; +} + +/** + * i915_gem_fault - fault a page into the GTT + * @vmf: fault info + * + * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped + * from userspace. The fault handler takes care of binding the object to + * the GTT (if needed), allocating and programming a fence register (again, + * only if needed based on whether the old reg is still valid or the object + * is tiled) and inserting a new PTE into the faulting process. + * + * Note that the faulting process may involve evicting existing objects + * from the GTT and/or fence registers to make room. So performance may + * suffer if the GTT working set is large or there are few fence registers + * left. + * + * The current feature set supported by i915_gem_fault() and thus GTT mmaps + * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). + */ +vm_fault_t i915_gem_fault(struct vm_fault *vmf) +{ +#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) + struct vm_area_struct *area = vmf->vma; + struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *i915 = to_i915(dev); + struct i915_ggtt *ggtt = &i915->ggtt; + bool write = area->vm_flags & VM_WRITE; + intel_wakeref_t wakeref; + struct i915_vma *vma; + pgoff_t page_offset; + int srcu; + int ret; + + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) + return VM_FAULT_SIGBUS; + + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + + trace_i915_gem_object_fault(obj, page_offset, true, write); + + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + + wakeref = intel_runtime_pm_get(i915); + + srcu = i915_reset_trylock(i915); + if (srcu < 0) { + ret = srcu; + goto err_rpm; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_reset; + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unlock; + } + + /* Now pin it into the GTT as needed */ + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); + if (IS_ERR(vma)) { + /* Use a partial view if it is bigger than available space */ + struct i915_ggtt_view view = + compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); + unsigned int flags; + + flags = PIN_MAPPABLE; + if (view.type == I915_GGTT_VIEW_NORMAL) + flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ + + /* + * Userspace is now writing through an untracked VMA, abandon + * all hope that the hardware is able to track future writes. + */ + obj->frontbuffer_ggtt_origin = ORIGIN_CPU; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + if (IS_ERR(vma) && !view.type) { + flags = PIN_MAPPABLE; + view.type = I915_GGTT_VIEW_PARTIAL; + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + } + } + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unlock; + } + + ret = i915_vma_pin_fence(vma); + if (ret) + goto err_unpin; + + /* Finally, remap it using the new GTT offset */ + ret = remap_io_mapping(area, + area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), + (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, + min_t(u64, vma->size, area->vm_end - area->vm_start), + &ggtt->iomap); + if (ret) + goto err_fence; + + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(i915); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &i915->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); + + i915_vma_set_ggtt_write(vma); + +err_fence: + i915_vma_unpin_fence(vma); +err_unpin: + __i915_vma_unpin(vma); +err_unlock: + mutex_unlock(&dev->struct_mutex); +err_reset: + i915_reset_unlock(i915, srcu); +err_rpm: + intel_runtime_pm_put(i915, wakeref); + i915_gem_object_unpin_pages(obj); +err: + switch (ret) { + case -EIO: + /* + * We eat errors when the gpu is terminally wedged to avoid + * userspace unduly crashing (gl has no provisions for mmaps to + * fail). But any other -EIO isn't ours (e.g. swap in failure) + * and so needs to be reported. + */ + if (!i915_terminally_wedged(i915)) + return VM_FAULT_SIGBUS; + /* else: fall through */ + case -EAGAIN: + /* + * EAGAIN means the gpu is hung and we'll wait for the error + * handler to reset everything when re-faulting in + * i915_mutex_lock_interruptible. + */ + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + case -ENOSPC: + case -EFAULT: + return VM_FAULT_SIGBUS; + default: + WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); + return VM_FAULT_SIGBUS; + } +} + +void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + for_each_ggtt_vma(vma, obj) + i915_vma_unset_userfault(vma); +} + +/** + * i915_gem_object_release_mmap - remove physical page mappings + * @obj: obj in question + * + * Preserve the reservation of the mmapping with the DRM core code, but + * relinquish ownership of the pages back to the system. + * + * It is vital that we remove the page mapping if we have mapped a tiled + * object through the GTT and then lose the fence register due to + * resource pressure. Similarly if the object has been moved out of the + * aperture, than pages mapped into userspace must be revoked. Removing the + * mapping will then trigger a page fault on the next user access, allowing + * fixup by i915_gem_fault(). + */ +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + /* Serialisation between user GTT access and our code depends upon + * revoking the CPU's PTE whilst the mutex is held. The next user + * pagefault then has to wait until we release the mutex. + * + * Note that RPM complicates somewhat by adding an additional + * requirement that operations to the GGTT be made holding the RPM + * wakeref. + */ + lockdep_assert_held(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (!obj->userfault_count) + goto out; + + __i915_gem_object_release_mmap(obj); + + /* Ensure that the CPU's PTE are revoked and there are not outstanding + * memory transactions from userspace before we return. The TLB + * flushing implied above by changing the PTE above *should* be + * sufficient, an extra barrier here just provides us with a bit + * of paranoid documentation about our requirement to serialise + * memory writes before touching registers / GSM. + */ + wmb(); + +out: + intel_runtime_pm_put(i915, wakeref); +} + +static int create_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + int err; + + err = drm_gem_create_mmap_offset(&obj->base); + if (likely(!err)) + return 0; + + /* Attempt to reap some mmap space from dead objects */ + do { + err = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + i915_gem_drain_freed_objects(i915); + err = drm_gem_create_mmap_offset(&obj->base); + if (!err) + break; + + } while (flush_delayed_work(&i915->gem.retire_work)); + + return err; +} + +int +i915_gem_mmap_gtt(struct drm_file *file, + struct drm_device *dev, + u32 handle, + u64 *offset) +{ + struct drm_i915_gem_object *obj; + int ret; + + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; + + ret = create_mmap_offset(obj); + if (ret == 0) + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); + + i915_gem_object_put(obj); + return ret; +} + +/** + * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing + * @dev: DRM device + * @data: GTT mapping ioctl data + * @file: GEM object info + * + * Simply returns the fake offset to userspace so it can mmap it. + * The mmap call will end up in drm_gem_mmap(), which will set things + * up so we can get faults in the handler above. + * + * The fault handler will take care of binding the object into the GTT + * (since it may have been evicted to make room for something), allocating + * a fence register, and mapping the appropriate aperture address into + * userspace. + */ +int +i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_mmap_gtt *args = data; + + return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_mman.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 86e7e88817af..005e17b3acce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -26,6 +26,7 @@ #include "../i915_drv.h" #include "../i915_globals.h" +#include "../i915_gem_clflush.h" #include "../intel_frontbuffer.h" static struct i915_global_object { @@ -357,6 +358,61 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } +static inline enum fb_op_origin +fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) +{ + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); +} + +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + +void +i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, + unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + + if (!(obj->write_domain & flush_domains)) + return; + + switch (obj->write_domain) { + case I915_GEM_DOMAIN_GTT: + i915_gem_flush_ggtt_writes(dev_priv); + + intel_fb_obj_flush(obj, + fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + for_each_ggtt_vma(vma, obj) { + if (vma->iomap) + continue; + + i915_vma_unset_ggtt_write(vma); + } + break; + + case I915_GEM_DOMAIN_WC: + wmb(); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + + case I915_GEM_DOMAIN_RENDER: + if (gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; + break; + } + + obj->write_domain = 0; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2e963a593245..9bf8155d27de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -346,6 +346,13 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); + +void +i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, + unsigned int flush_domains); + static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c new file mode 100644 index 000000000000..031e7ad875e3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -0,0 +1,503 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include + +#include "gt/intel_gt_pm.h" +#include "i915_selftest.h" +#include "selftests/huge_gem_object.h" +#include "selftests/igt_flush_test.h" + +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else if (tile->width == 128) { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 512; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } else { + const unsigned int ytile_span = 32; + const unsigned int ytile_height = 256; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + cond_resched(); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", + err); + return err; + } + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + return PTR_ERR(io); + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + + i915_vma_destroy(vma); + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + /* + * The swizzling pattern is actually unknown as it + * varies based on physical address of each page. + * See i915_gem_detect_bit_6_swizzle(). + */ + break; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN); + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_request_create(i915->engine[RCS0]->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + i915_request_add(rq); + + __i915_gem_object_release_unless_active(obj); + i915_vma_unpin(vma); + + return err; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static void disable_retire_worker(struct drm_i915_private *i915) +{ + i915_gem_shrinker_unregister(i915); + + intel_gt_pm_get(i915); + + cancel_delayed_work_sync(&i915->gem.retire_work); + cancel_delayed_work_sync(&i915->gem.idle_work); +} + +static void restore_retire_worker(struct drm_i915_private *i915) +{ + intel_gt_pm_put(i915); + + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_shrinker_register(i915); +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Disable background reaper */ + disable_retire_worker(i915); + GEM_BUG_ON(!i915->gt.awake); + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + goto out_park; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + if (i915_terminally_wedged(i915)) + break; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + /* NB we rely on the _active_ reference to access obj now */ + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); +out_park: + restore_retire_worker(i915); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + +int i915_gem_mman_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ac9723466488..c4893dd9489f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2878,7 +2878,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 flags); int i915_gem_object_unbind(struct drm_i915_gem_object *obj); -void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0549b73f01ff..78d99841cf76 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -380,12 +380,6 @@ i915_gem_dumb_create(struct drm_file *file, &args->size, &args->handle); } -static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_level == I915_CACHE_NONE || - obj->cache_level == I915_CACHE_WT); -} - /** * Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -405,13 +399,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, &args->size, &args->handle); } -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { intel_wakeref_t wakeref; @@ -451,47 +438,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) } } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - - if (!(obj->write_domain & flush_domains)) - return; - - switch (obj->write_domain) { - case I915_GEM_DOMAIN_GTT: - i915_gem_flush_ggtt_writes(dev_priv); - - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); - - for_each_ggtt_vma(vma, obj) { - if (vma->iomap) - continue; - - i915_vma_unset_ggtt_write(vma); - } - break; - - case I915_GEM_DOMAIN_WC: - wmb(); - break; - - case I915_GEM_DOMAIN_CPU: - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - break; - - case I915_GEM_DOMAIN_RENDER: - if (gpu_write_needs_clflush(obj)) - obj->cache_dirty = true; - break; - } - - obj->write_domain = 0; -} - /* * Pins the specified object's pages and synchronizes the object with * GPU accesses. Sets needs_clflush to non-zero if the caller should @@ -528,7 +474,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, goto out; } - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -580,7 +526,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, goto out; } - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -1183,6 +1129,13 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_unlock(&i915->mm.obj_lock); } +static inline enum fb_op_origin +fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) +{ + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); +} + /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -1326,420 +1279,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } -static inline bool -__vma_matches(struct vm_area_struct *vma, struct file *filp, - unsigned long addr, unsigned long size) -{ - if (vma->vm_file != filp) - return false; - - return vma->vm_start == addr && - (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); -} - -/** - * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address - * it is mapped to. - * @dev: drm device - * @data: ioctl data blob - * @file: drm file - * - * While the mapping holds a reference on the contents of the object, it doesn't - * imply a ref on the object itself. - * - * IMPORTANT: - * - * DRM driver writers who look a this function as an example for how to do GEM - * mmap support, please don't implement mmap support like here. The modern way - * to implement DRM mmap support is with an mmap offset ioctl (like - * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. - * That way debug tooling like valgrind will understand what's going on, hiding - * the mmap call in a driver private ioctl will break that. The i915 driver only - * does cpu mmaps this way because we didn't know better. - */ -int -i915_gem_mmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_mmap *args = data; - struct drm_i915_gem_object *obj; - unsigned long addr; - - if (args->flags & ~(I915_MMAP_WC)) - return -EINVAL; - - if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) - return -ENODEV; - - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; - - /* prime objects have no backing filp to GEM mmap - * pages from. - */ - if (!obj->base.filp) { - addr = -ENXIO; - goto err; - } - - if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { - addr = -EINVAL; - goto err; - } - - addr = vm_mmap(obj->base.filp, 0, args->size, - PROT_READ | PROT_WRITE, MAP_SHARED, - args->offset); - if (IS_ERR_VALUE(addr)) - goto err; - - if (args->flags & I915_MMAP_WC) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (down_write_killable(&mm->mmap_sem)) { - addr = -EINTR; - goto err; - } - vma = find_vma(mm, addr); - if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) - vma->vm_page_prot = - pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - else - addr = -ENOMEM; - up_write(&mm->mmap_sem); - if (IS_ERR_VALUE(addr)) - goto err; - - /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); - } - i915_gem_object_put(obj); - - args->addr_ptr = (u64)addr; - return 0; - -err: - i915_gem_object_put(obj); - return addr; -} - -static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; -} - -/** - * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps - * - * A history of the GTT mmap interface: - * - * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to - * aligned and suitable for fencing, and still fit into the available - * mappable space left by the pinned display objects. A classic problem - * we called the page-fault-of-doom where we would ping-pong between - * two objects that could not fit inside the GTT and so the memcpy - * would page one object in at the expense of the other between every - * single byte. - * - * 1 - Objects can be any size, and have any compatible fencing (X Y, or none - * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the - * object is too large for the available space (or simply too large - * for the mappable aperture!), a view is created instead and faulted - * into userspace. (This view is aligned and sized appropriately for - * fenced access.) - * - * 2 - Recognise WC as a separate cache domain so that we can flush the - * delayed writes via GTT before performing direct access via WC. - * - * 3 - Remove implicit set-domain(GTT) and synchronisation on initial - * pagefault; swapin remains transparent. - * - * Restrictions: - * - * * snoopable objects cannot be accessed via the GTT. It can cause machine - * hangs on some architectures, corruption on others. An attempt to service - * a GTT page fault from a snoopable object will generate a SIGBUS. - * - * * the object must be able to fit into RAM (physical memory, though no - * limited to the mappable aperture). - * - * - * Caveats: - * - * * a new GTT page fault will synchronize rendering from the GPU and flush - * all data to system memory. Subsequent access will not be synchronized. - * - * * all mappings are revoked on runtime device suspend. - * - * * there are only 8, 16 or 32 fence registers to share between all users - * (older machines require fence register for display and blitter access - * as well). Contention of the fence registers will cause the previous users - * to be unmapped and any new access will generate new page faults. - * - * * running out of memory while servicing a fault may generate a SIGBUS, - * rather than the expected SIGSEGV. - */ -int i915_gem_mmap_gtt_version(void) -{ - return 3; -} - -static inline struct i915_ggtt_view -compute_partial_view(const struct drm_i915_gem_object *obj, - pgoff_t page_offset, - unsigned int chunk) -{ - struct i915_ggtt_view view; - - if (i915_gem_object_is_tiled(obj)) - chunk = roundup(chunk, tile_row_pages(obj)); - - view.type = I915_GGTT_VIEW_PARTIAL; - view.partial.offset = rounddown(page_offset, chunk); - view.partial.size = - min_t(unsigned int, chunk, - (obj->base.size >> PAGE_SHIFT) - view.partial.offset); - - /* If the partial covers the entire object, just create a normal VMA. */ - if (chunk >= obj->base.size >> PAGE_SHIFT) - view.type = I915_GGTT_VIEW_NORMAL; - - return view; -} - -/** - * i915_gem_fault - fault a page into the GTT - * @vmf: fault info - * - * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped - * from userspace. The fault handler takes care of binding the object to - * the GTT (if needed), allocating and programming a fence register (again, - * only if needed based on whether the old reg is still valid or the object - * is tiled) and inserting a new PTE into the faulting process. - * - * Note that the faulting process may involve evicting existing objects - * from the GTT and/or fence registers to make room. So performance may - * suffer if the GTT working set is large or there are few fence registers - * left. - * - * The current feature set supported by i915_gem_fault() and thus GTT mmaps - * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). - */ -vm_fault_t i915_gem_fault(struct vm_fault *vmf) -{ -#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) - struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool write = area->vm_flags & VM_WRITE; - intel_wakeref_t wakeref; - struct i915_vma *vma; - pgoff_t page_offset; - int srcu; - int ret; - - /* Sanity check that we allow writing into this object */ - if (i915_gem_object_is_readonly(obj) && write) - return VM_FAULT_SIGBUS; - - /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; - - trace_i915_gem_object_fault(obj, page_offset, true, write); - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - wakeref = intel_runtime_pm_get(dev_priv); - - srcu = i915_reset_trylock(dev_priv); - if (srcu < 0) { - ret = srcu; - goto err_rpm; - } - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { - ret = -EFAULT; - goto err_unlock; - } - - /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); - if (IS_ERR(vma)) { - /* Use a partial view if it is bigger than available space */ - struct i915_ggtt_view view = - compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); - unsigned int flags; - - flags = PIN_MAPPABLE; - if (view.type == I915_GGTT_VIEW_NORMAL) - flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ - - /* - * Userspace is now writing through an untracked VMA, abandon - * all hope that the hardware is able to track future writes. - */ - obj->frontbuffer_ggtt_origin = ORIGIN_CPU; - - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma) && !view.type) { - flags = PIN_MAPPABLE; - view.type = I915_GGTT_VIEW_PARTIAL; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - } - } - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unlock; - } - - ret = i915_vma_pin_fence(vma); - if (ret) - goto err_unpin; - - /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); - if (ret) - goto err_fence; - - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - GEM_BUG_ON(!obj->userfault_count); - - i915_vma_set_ggtt_write(vma); - -err_fence: - i915_vma_unpin_fence(vma); -err_unpin: - __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); -err_reset: - i915_reset_unlock(dev_priv, srcu); -err_rpm: - intel_runtime_pm_put(dev_priv, wakeref); - i915_gem_object_unpin_pages(obj); -err: - switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!i915_terminally_wedged(dev_priv)) - return VM_FAULT_SIGBUS; - /* else: fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); - return VM_FAULT_SIGBUS; - } -} - -static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - - GEM_BUG_ON(!obj->userfault_count); - - obj->userfault_count = 0; - list_del(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - - for_each_ggtt_vma(vma, obj) - i915_vma_unset_userfault(vma); -} - -/** - * i915_gem_release_mmap - remove physical page mappings - * @obj: obj in question - * - * Preserve the reservation of the mmapping with the DRM core code, but - * relinquish ownership of the pages back to the system. - * - * It is vital that we remove the page mapping if we have mapped a tiled - * object through the GTT and then lose the fence register due to - * resource pressure. Similarly if the object has been moved out of the - * aperture, than pages mapped into userspace must be revoked. Removing the - * mapping will then trigger a page fault on the next user access, allowing - * fixup by i915_gem_fault(). - */ -void -i915_gem_release_mmap(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - intel_wakeref_t wakeref; - - /* Serialisation between user GTT access and our code depends upon - * revoking the CPU's PTE whilst the mutex is held. The next user - * pagefault then has to wait until we release the mutex. - * - * Note that RPM complicates somewhat by adding an additional - * requirement that operations to the GGTT be made holding the RPM - * wakeref. - */ - lockdep_assert_held(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (!obj->userfault_count) - goto out; - - __i915_gem_object_release_mmap(obj); - - /* Ensure that the CPU's PTE are revoked and there are not outstanding - * memory transactions from userspace before we return. The TLB - * flushing implied above by changing the PTE above *should* be - * sufficient, an extra barrier here just provides us with a bit - * of paranoid documentation about our requirement to serialise - * memory writes before touching registers / GSM. - */ - wmb(); - -out: - intel_runtime_pm_put(i915, wakeref); -} - void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj, *on; @@ -1782,78 +1321,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) } } -static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int err; - - err = drm_gem_create_mmap_offset(&obj->base); - if (likely(!err)) - return 0; - - /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - i915_gem_drain_freed_objects(dev_priv); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; - - } while (flush_delayed_work(&dev_priv->gem.retire_work)); - - return err; -} - -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - u32 handle, - u64 *offset) -{ - struct drm_i915_gem_object *obj; - int ret; - - obj = i915_gem_object_lookup(file, handle); - if (!obj) - return -ENOENT; - - ret = i915_gem_object_create_mmap_offset(obj); - if (ret == 0) - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - - i915_gem_object_put(obj); - return ret; -} - -/** - * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing - * @dev: DRM device - * @data: GTT mapping ioctl data - * @file: GEM object info - * - * Simply returns the fake offset to userspace so it can mmap it. - * The mmap call will end up in drm_gem_mmap(), which will set things - * up so we can get faults in the handler above. - * - * The fault handler will take care of binding the object into the GTT - * (since it may have been evicted to make room for something), allocating - * a fence register, and mapping the appropriate aperture address into - * userspace. - */ -int -i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_mmap_gtt *args = data; - - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); -} - bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; @@ -2057,7 +1524,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) * We manually flush the CPU domain so that we can override and * force the flush for the display, and perform it asyncrhonously. */ - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->write_domain = 0; @@ -2111,7 +1578,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -2173,7 +1640,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -2282,7 +1749,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * then double check if the GTT mapping is still * valid for that pointer access. */ - i915_gem_release_mmap(obj); + i915_gem_object_release_mmap(obj); /* As we no longer need a fence for GTT access, * we can relinquish it now (and so prevent having @@ -2537,7 +2004,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 7d5cc9ccf6dd..86d6d92ccbc9 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -299,7 +299,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, i915_gem_object_unlock(obj); /* Force the fence to be reacquired for GTT access */ - i915_gem_release_mmap(obj); + i915_gem_object_release_mmap(obj); /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 37f32c2eca19..a3dd2f1be95b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -89,491 +89,6 @@ static int igt_gem_huge(void *arg) return err; } -struct tile { - unsigned int width; - unsigned int height; - unsigned int stride; - unsigned int size; - unsigned int tiling; - unsigned int swizzle; -}; - -static u64 swizzle_bit(unsigned int bit, u64 offset) -{ - return (offset & BIT_ULL(bit)) >> (bit - 6); -} - -static u64 tiled_offset(const struct tile *tile, u64 v) -{ - u64 x, y; - - if (tile->tiling == I915_TILING_NONE) - return v; - - y = div64_u64_rem(v, tile->stride, &x); - v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; - - if (tile->tiling == I915_TILING_X) { - v += y * tile->width; - v += div64_u64_rem(x, tile->width, &x) << tile->size; - v += x; - } else if (tile->width == 128) { - const unsigned int ytile_span = 16; - const unsigned int ytile_height = 512; - - v += y * ytile_span; - v += div64_u64_rem(x, ytile_span, &x) * ytile_height; - v += x; - } else { - const unsigned int ytile_span = 32; - const unsigned int ytile_height = 256; - - v += y * ytile_span; - v += div64_u64_rem(x, ytile_span, &x) * ytile_height; - v += x; - } - - switch (tile->swizzle) { - case I915_BIT_6_SWIZZLE_9: - v ^= swizzle_bit(9, v); - break; - case I915_BIT_6_SWIZZLE_9_10: - v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); - break; - case I915_BIT_6_SWIZZLE_9_11: - v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); - break; - } - - return v; -} - -static int check_partial_mapping(struct drm_i915_gem_object *obj, - const struct tile *tile, - unsigned long end_time) -{ - const unsigned int nreal = obj->scratch / PAGE_SIZE; - const unsigned long npages = obj->base.size / PAGE_SIZE; - struct i915_vma *vma; - unsigned long page; - int err; - - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; - - err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); - if (err) { - pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", - tile->tiling, tile->stride, err); - return err; - } - - GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); - GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - - for_each_prime_number_from(page, 1, npages) { - struct i915_ggtt_view view = - compute_partial_view(obj, page, MIN_CHUNK_PAGES); - u32 __iomem *io; - struct page *p; - unsigned int n; - u64 offset; - u32 *cpu; - - GEM_BUG_ON(view.partial.size > nreal); - cond_resched(); - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) { - pr_err("Failed to flush to GTT write domain; err=%d\n", - err); - return err; - } - - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) { - pr_err("Failed to pin partial view: offset=%lu; err=%d\n", - page, (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } - - n = page - view.partial.offset; - GEM_BUG_ON(n >= view.partial.size); - - io = i915_vma_pin_iomap(vma); - i915_vma_unpin(vma); - if (IS_ERR(io)) { - pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", - page, (int)PTR_ERR(io)); - return PTR_ERR(io); - } - - iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); - i915_vma_unpin_iomap(vma); - - offset = tiled_offset(tile, page << PAGE_SHIFT); - if (offset >= obj->base.size) - continue; - - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); - - p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); - cpu = kmap(p) + offset_in_page(offset); - drm_clflush_virt_range(cpu, sizeof(*cpu)); - if (*cpu != (u32)page) { - pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", - page, n, - view.partial.offset, - view.partial.size, - vma->size >> PAGE_SHIFT, - tile->tiling ? tile_row_pages(obj) : 0, - vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, - offset >> PAGE_SHIFT, - (unsigned int)offset_in_page(offset), - offset, - (u32)page, *cpu); - err = -EINVAL; - } - *cpu = 0; - drm_clflush_virt_range(cpu, sizeof(*cpu)); - kunmap(p); - if (err) - return err; - - i915_vma_destroy(vma); - } - - return 0; -} - -static int igt_partial_tiling(void *arg) -{ - const unsigned int nreal = 1 << 12; /* largest tile row x2 */ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; - int tiling; - int err; - - /* We want to check the page mapping and fencing of a large object - * mmapped through the GTT. The object we create is larger than can - * possibly be mmaped as a whole, and so we must use partial GGTT vma. - * We then check that a write through each partial GGTT vma ends up - * in the right set of pages within the object, and with the expected - * tiling, which we verify by manual swizzling. - */ - - obj = huge_gem_object(i915, - nreal << PAGE_SHIFT, - (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) { - pr_err("Failed to allocate %u pages (%lu total), err=%d\n", - nreal, obj->base.size / PAGE_SIZE, err); - goto out; - } - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (1) { - IGT_TIMEOUT(end); - struct tile tile; - - tile.height = 1; - tile.width = 1; - tile.size = 0; - tile.stride = 0; - tile.swizzle = I915_BIT_6_SWIZZLE_NONE; - tile.tiling = I915_TILING_NONE; - - err = check_partial_mapping(obj, &tile, end); - if (err && err != -EINTR) - goto out_unlock; - } - - for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { - IGT_TIMEOUT(end); - unsigned int max_pitch; - unsigned int pitch; - struct tile tile; - - if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) - /* - * The swizzling pattern is actually unknown as it - * varies based on physical address of each page. - * See i915_gem_detect_bit_6_swizzle(). - */ - break; - - tile.tiling = tiling; - switch (tiling) { - case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; - break; - case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; - break; - } - - GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN); - if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || - tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) - continue; - - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; - - for (pitch = max_pitch; pitch; pitch >>= 1) { - tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); - if (err == -EINTR) - goto next_tiling; - if (err) - goto out_unlock; - - if (pitch > 2 && INTEL_GEN(i915) >= 4) { - tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); - if (err == -EINTR) - goto next_tiling; - if (err) - goto out_unlock; - } - - if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { - tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); - if (err == -EINTR) - goto next_tiling; - if (err) - goto out_unlock; - } - } - - if (INTEL_GEN(i915) >= 4) { - for_each_prime_number(pitch, max_pitch) { - tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); - if (err == -EINTR) - goto next_tiling; - if (err) - goto out_unlock; - } - } - -next_tiling: ; - } - -out_unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_object_unpin_pages(obj); -out: - i915_gem_object_put(obj); - return err; -} - -static int make_obj_busy(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); - } - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - - i915_request_add(rq); - - __i915_gem_object_release_unless_active(obj); - i915_vma_unpin(vma); - - return err; -} - -static bool assert_mmap_offset(struct drm_i915_private *i915, - unsigned long size, - int expected) -{ - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_create_mmap_offset(obj); - i915_gem_object_put(obj); - - return err == expected; -} - -static void disable_retire_worker(struct drm_i915_private *i915) -{ - i915_gem_shrinker_unregister(i915); - - intel_gt_pm_get(i915); - - cancel_delayed_work_sync(&i915->gem.retire_work); - cancel_delayed_work_sync(&i915->gem.idle_work); -} - -static void restore_retire_worker(struct drm_i915_private *i915) -{ - intel_gt_pm_put(i915); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - i915_gem_shrinker_register(i915); -} - -static int igt_mmap_offset_exhaustion(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; - struct drm_i915_gem_object *obj; - struct drm_mm_node resv, *hole; - u64 hole_start, hole_end; - int loop, err; - - /* Disable background reaper */ - disable_retire_worker(i915); - GEM_BUG_ON(!i915->gt.awake); - - /* Trim the device mmap space to only a page */ - memset(&resv, 0, sizeof(resv)); - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - resv.start = hole_start; - resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ - err = drm_mm_reserve_node(mm, &resv); - if (err) { - pr_err("Failed to trim VMA manager, err=%d\n", err); - goto out_park; - } - break; - } - - /* Just fits! */ - if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { - pr_err("Unable to insert object into single page hole\n"); - err = -EINVAL; - goto out; - } - - /* Too large */ - if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { - pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); - err = -EINVAL; - goto out; - } - - /* Fill the hole, further allocation attempts should then fail */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out; - } - - err = i915_gem_object_create_mmap_offset(obj); - if (err) { - pr_err("Unable to insert object into reclaimed hole\n"); - goto err_obj; - } - - if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { - pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); - err = -EINVAL; - goto err_obj; - } - - i915_gem_object_put(obj); - - /* Now fill with busy dead objects that we expect to reap */ - for (loop = 0; loop < 3; loop++) { - intel_wakeref_t wakeref; - - if (i915_terminally_wedged(i915)) - break; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out; - } - - err = 0; - mutex_lock(&i915->drm.struct_mutex); - with_intel_runtime_pm(i915, wakeref) - err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); - if (err) { - pr_err("[loop %d] Failed to busy the object\n", loop); - goto err_obj; - } - - /* NB we rely on the _active_ reference to access obj now */ - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - err = i915_gem_object_create_mmap_offset(obj); - if (err) { - pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", - loop, err); - goto out; - } - } - -out: - drm_mm_remove_node(&resv); -out_park: - restore_retire_worker(i915); - return err; -err_obj: - i915_gem_object_put(obj); - goto out; -} - int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -596,8 +111,6 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_huge), - SUBTEST(igt_partial_tiling), - SUBTEST(igt_mmap_offset_exhaustion), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 6d766925ad04..bbf387de8db3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,6 +16,7 @@ selftest(timelines, i915_timeline_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) +selftest(mman, i915_gem_mman_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests)