[v3] drm/i915: Split out i915_vma.c

Message ID	1478854795-18914-1-git-send-email-joonas.lahtinen@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> To: Intel graphics driver community testing & development <intel-gfx@lists.freedesktop.org> Date: Fri, 11 Nov 2016 10:59:55 +0200 Message-Id: <1478854795-18914-1-git-send-email-joonas.lahtinen@linux.intel.com> In-Reply-To: <1478768144-11087-1-git-send-email-joonas.lahtinen@linux.intel.com> References: <1478768144-11087-1-git-send-email-joonas.lahtinen@linux.intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH v3] drm/i915: Split out i915_vma.c Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

On 11/11/2016 08:59, Joonas Lahtinen wrote: > As a side product, had to split two other files; > - i915_gem_fence_reg.h > - i915_gem_object.h (only parts that needed immediate untanglement) > > I tried to move code in as big chunks as possible, to make review > easier. i915_vma_compare was moved to a header temporarily. > > v2: > - Use i915_gem_fence_reg.{c,h} > > v3: > - Rebased > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Acked-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > --- > drivers/gpu/drm/i915/Makefile | 3 +- > drivers/gpu/drm/i915/i915_drv.h | 385 +--------------- > drivers/gpu/drm/i915/i915_gem.c | 371 ---------------- > drivers/gpu/drm/i915/i915_gem_fence.c | 716 ------------------------------ > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 716 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_fence_reg.h | 51 +++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 277 +----------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 225 +--------- > drivers/gpu/drm/i915/i915_gem_object.h | 337 ++++++++++++++ > drivers/gpu/drm/i915/i915_gem_request.h | 3 + > drivers/gpu/drm/i915/i915_vma.c | 650 +++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_vma.h | 342 ++++++++++++++ > 12 files changed, 2120 insertions(+), 1956 deletions(-) > delete mode 100644 drivers/gpu/drm/i915/i915_gem_fence.c > create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.c > create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.h > create mode 100644 drivers/gpu/drm/i915/i915_gem_object.h > create mode 100644 drivers/gpu/drm/i915/i915_vma.c > create mode 100644 drivers/gpu/drm/i915/i915_vma.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 0857e50..3dea46a 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \ > i915_gem_dmabuf.o \ > i915_gem_evict.o \ > i915_gem_execbuffer.o \ > - i915_gem_fence.o \ > + i915_gem_fence_reg.o \ > i915_gem_gtt.o \ > i915_gem_internal.o \ > i915_gem.o \ > @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \ > i915_gem_timeline.o \ > i915_gem_userptr.o \ > i915_trace_points.o \ > + i915_vma.o \ > intel_breadcrumbs.o \ > intel_engine_cs.o \ > intel_hangcheck.o \ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 30777de..ccd0361 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -60,11 +60,15 @@ > #include "intel_ringbuffer.h" > > #include "i915_gem.h" > +#include "i915_gem_fence_reg.h" > +#include "i915_gem_object.h" > #include "i915_gem_gtt.h" > #include "i915_gem_render_state.h" > #include "i915_gem_request.h" > #include "i915_gem_timeline.h" > > +#include "i915_vma.h" > + > #include "intel_gvt.h" > > /* General customization: > @@ -459,23 +463,6 @@ struct intel_opregion { > struct intel_overlay; > struct intel_overlay_error_state; > > -struct drm_i915_fence_reg { > - struct list_head link; > - struct drm_i915_private *i915; > - struct i915_vma *vma; > - int pin_count; > - int id; > - /** > - * Whether the tiling parameters for the currently > - * associated fence register have changed. Note that > - * for the purposes of tracking tiling changes we also > - * treat the unfenced register, the register slot that > - * the object occupies whilst it executes a fenced > - * command (such as BLT on gen2/3), as a "fence". > - */ > - bool dirty; > -}; > - > struct sdvo_device_mapping { > u8 initialized; > u8 dvo_port; > @@ -2179,31 +2166,6 @@ enum hdmi_force_audio { > > #define I915_GTT_OFFSET_NONE ((u32)-1) > > -struct drm_i915_gem_object_ops { > - unsigned int flags; > -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 > -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 > - > - /* Interface between the GEM object and its backing storage. > - * get_pages() is called once prior to the use of the associated set > - * of pages before to binding them into the GTT, and put_pages() is > - * called after we no longer need them. As we expect there to be > - * associated cost with migrating pages between the backing storage > - * and making them available for the GPU (e.g. clflush), we may hold > - * onto the pages after they are no longer referenced by the GPU > - * in case they may be used again shortly (for example migrating the > - * pages to a different memory domain within the GTT). put_pages() > - * will therefore most likely be called when the object itself is > - * being released or under memory pressure (where we attempt to > - * reap pages for the shrinker). > - */ > - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); > - void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); > - > - int (*dmabuf_export)(struct drm_i915_gem_object *); > - void (*release)(struct drm_i915_gem_object *); > -}; > - > /* > * Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is > * considered to be the frontbuffer for the given plane interface-wise. This > @@ -2225,292 +2187,6 @@ struct drm_i915_gem_object_ops { > #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ > (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) > > -struct drm_i915_gem_object { > - struct drm_gem_object base; > - > - const struct drm_i915_gem_object_ops *ops; > - > - /** List of VMAs backed by this object */ > - struct list_head vma_list; > - struct rb_root vma_tree; > - > - /** Stolen memory for this object, instead of being backed by shmem. */ > - struct drm_mm_node *stolen; > - struct list_head global_link; > - union { > - struct rcu_head rcu; > - struct llist_node freed; > - }; > - > - /** > - * Whether the object is currently in the GGTT mmap. > - */ > - struct list_head userfault_link; > - > - /** Used in execbuf to temporarily hold a ref */ > - struct list_head obj_exec_link; > - > - struct list_head batch_pool_link; > - > - unsigned long flags; > - > - /** > - * Have we taken a reference for the object for incomplete GPU > - * activity? > - */ > -#define I915_BO_ACTIVE_REF 0 > - > - /* > - * Is the object to be mapped as read-only to the GPU > - * Only honoured if hardware has relevant pte bit > - */ > - unsigned long gt_ro:1; > - unsigned int cache_level:3; > - unsigned int cache_dirty:1; > - > - atomic_t frontbuffer_bits; > - unsigned int frontbuffer_ggtt_origin; /* write once */ > - > - /** Current tiling stride for the object, if it's tiled. */ > - unsigned int tiling_and_stride; > -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ > -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) > -#define STRIDE_MASK (~TILING_MASK) > - > - /** Count of VMA actually bound by this object */ > - unsigned int bind_count; > - unsigned int active_count; > - unsigned int pin_display; > - > - struct { > - struct mutex lock; /* protects the pages and their use */ > - atomic_t pages_pin_count; > - > - struct sg_table *pages; > - void *mapping; > - > - struct i915_gem_object_page_iter { > - struct scatterlist *sg_pos; > - unsigned int sg_idx; /* in pages, but 32bit eek! */ > - > - struct radix_tree_root radix; > - struct mutex lock; /* protects this cache */ > - } get_page; > - > - /** > - * Advice: are the backing pages purgeable? > - */ > - unsigned int madv:2; > - > - /** > - * This is set if the object has been written to since the > - * pages were last acquired. > - */ > - bool dirty:1; > - > - /** > - * This is set if the object has been pinned due to unknown > - * swizzling. > - */ > - bool quirked:1; > - } mm; > - > - /** Breadcrumb of last rendering to the buffer. > - * There can only be one writer, but we allow for multiple readers. > - * If there is a writer that necessarily implies that all other > - * read requests are complete - but we may only be lazily clearing > - * the read requests. A read request is naturally the most recent > - * request on a ring, so we may have two different write and read > - * requests on one ring where the write request is older than the > - * read request. This allows for the CPU to read from an active > - * buffer by only waiting for the write to complete. > - */ > - struct reservation_object *resv; > - > - /** References from framebuffers, locks out tiling changes. */ > - unsigned long framebuffer_references; > - > - /** Record of address bit 17 of each page at last unbind. */ > - unsigned long *bit_17; > - > - struct i915_gem_userptr { > - uintptr_t ptr; > - unsigned read_only :1; > - > - struct i915_mm_struct *mm; > - struct i915_mmu_object *mmu_object; > - struct work_struct *work; > - } userptr; > - > - /** for phys allocated objects */ > - struct drm_dma_handle *phys_handle; > - > - struct reservation_object __builtin_resv; > -}; > - > -static inline struct drm_i915_gem_object * > -to_intel_bo(struct drm_gem_object *gem) > -{ > - /* Assert that to_intel_bo(NULL) == NULL */ > - BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); > - > - return container_of(gem, struct drm_i915_gem_object, base); > -} > - > -/** > - * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle > - * @filp: DRM file private date > - * @handle: userspace handle > - * > - * Returns: > - * > - * A pointer to the object named by the handle if such exists on @filp, NULL > - * otherwise. This object is only valid whilst under the RCU read lock, and > - * note carefully the object may be in the process of being destroyed. > - */ > -static inline struct drm_i915_gem_object * > -i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) > -{ > -#ifdef CONFIG_LOCKDEP > - WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); > -#endif > - return idr_find(&file->object_idr, handle); > -} > - > -static inline struct drm_i915_gem_object * > -i915_gem_object_lookup(struct drm_file *file, u32 handle) > -{ > - struct drm_i915_gem_object *obj; > - > - rcu_read_lock(); > - obj = i915_gem_object_lookup_rcu(file, handle); > - if (obj && !kref_get_unless_zero(&obj->base.refcount)) > - obj = NULL; > - rcu_read_unlock(); > - > - return obj; > -} > - > -__deprecated > -extern struct drm_gem_object * > -drm_gem_object_lookup(struct drm_file *file, u32 handle); > - > -__attribute__((nonnull)) > -static inline struct drm_i915_gem_object * > -i915_gem_object_get(struct drm_i915_gem_object *obj) > -{ > - drm_gem_object_reference(&obj->base); > - return obj; > -} > - > -__deprecated > -extern void drm_gem_object_reference(struct drm_gem_object *); > - > -__attribute__((nonnull)) > -static inline void > -i915_gem_object_put(struct drm_i915_gem_object *obj) > -{ > - __drm_gem_object_unreference(&obj->base); > -} > - > -__deprecated > -extern void drm_gem_object_unreference(struct drm_gem_object *); > - > -__deprecated > -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); > - > -static inline bool > -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) > -{ > - return atomic_read(&obj->base.refcount.refcount) == 0; > -} > - > -static inline bool > -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) > -{ > - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; > -} > - > -static inline bool > -i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) > -{ > - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; > -} > - > -static inline bool > -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) > -{ > - return obj->active_count; > -} > - > -static inline bool > -i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) > -{ > - return test_bit(I915_BO_ACTIVE_REF, &obj->flags); > -} > - > -static inline void > -i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) > -{ > - lockdep_assert_held(&obj->base.dev->struct_mutex); > - __set_bit(I915_BO_ACTIVE_REF, &obj->flags); > -} > - > -static inline void > -i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) > -{ > - lockdep_assert_held(&obj->base.dev->struct_mutex); > - __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); > -} > - > -void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); > - > -static inline unsigned int > -i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) > -{ > - return obj->tiling_and_stride & TILING_MASK; > -} > - > -static inline bool > -i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) > -{ > - return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; > -} > - > -static inline unsigned int > -i915_gem_object_get_stride(struct drm_i915_gem_object *obj) > -{ > - return obj->tiling_and_stride & STRIDE_MASK; > -} > - > -static inline struct intel_engine_cs * > -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) > -{ > - struct intel_engine_cs *engine = NULL; > - struct dma_fence *fence; > - > - rcu_read_lock(); > - fence = reservation_object_get_excl_rcu(obj->resv); > - rcu_read_unlock(); > - > - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) > - engine = to_request(fence)->engine; > - dma_fence_put(fence); > - > - return engine; > -} > - > -static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) > -{ > - i915_gem_object_get(vma->obj); > - return vma; > -} > - > -static inline void i915_vma_put(struct i915_vma *vma) > -{ > - i915_gem_object_put(vma->obj); > -} > - > /* > * Optimised SGL iterator for GEM objects > */ > @@ -3222,13 +2898,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > u64 alignment, > u64 flags); > > -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > - u32 flags); > -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); > -int __must_check i915_vma_unbind(struct i915_vma *vma); > -void i915_vma_close(struct i915_vma *vma); > -void i915_vma_destroy(struct i915_vma *vma); > - > int i915_gem_object_unbind(struct drm_i915_gem_object *obj); > void i915_gem_release_mmap(struct drm_i915_gem_object *obj); > > @@ -3478,54 +3147,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, > return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); > } > > -/* i915_gem_fence.c */ > +/* i915_gem_fence_reg.c */ > int __must_check i915_vma_get_fence(struct i915_vma *vma); > int __must_check i915_vma_put_fence(struct i915_vma *vma); > > -/** > - * i915_vma_pin_fence - pin fencing state > - * @vma: vma to pin fencing for > - * > - * This pins the fencing state (whether tiled or untiled) to make sure the > - * vma (and its object) is ready to be used as a scanout target. Fencing > - * status must be synchronize first by calling i915_vma_get_fence(): > - * > - * The resulting fence pin reference must be released again with > - * i915_vma_unpin_fence(). > - * > - * Returns: > - * > - * True if the vma has a fence, false otherwise. > - */ > -static inline bool > -i915_vma_pin_fence(struct i915_vma *vma) > -{ > - lockdep_assert_held(&vma->vm->dev->struct_mutex); > - if (vma->fence) { > - vma->fence->pin_count++; > - return true; > - } else > - return false; > -} > - > -/** > - * i915_vma_unpin_fence - unpin fencing state > - * @vma: vma to unpin fencing for > - * > - * This releases the fence pin reference acquired through > - * i915_vma_pin_fence. It will handle both objects with and without an > - * attached fence correctly, callers do not need to distinguish this. > - */ > -static inline void > -i915_vma_unpin_fence(struct i915_vma *vma) > -{ > - lockdep_assert_held(&vma->vm->dev->struct_mutex); > - if (vma->fence) { > - GEM_BUG_ON(vma->fence->pin_count <= 0); > - vma->fence->pin_count--; > - } > -} > - > void i915_gem_restore_fences(struct drm_device *dev); > > void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1c20edb..d51fb5d 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2919,117 +2919,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > return ret; > } > > -static void __i915_vma_iounmap(struct i915_vma *vma) > -{ > - GEM_BUG_ON(i915_vma_is_pinned(vma)); > - > - if (vma->iomap == NULL) > - return; > - > - io_mapping_unmap(vma->iomap); > - vma->iomap = NULL; > -} > - > -int i915_vma_unbind(struct i915_vma *vma) > -{ > - struct drm_i915_gem_object *obj = vma->obj; > - unsigned long active; > - int ret; > - > - lockdep_assert_held(&obj->base.dev->struct_mutex); > - > - /* First wait upon any activity as retiring the request may > - * have side-effects such as unpinning or even unbinding this vma. > - */ > - active = i915_vma_get_active(vma); > - if (active) { > - int idx; > - > - /* When a closed VMA is retired, it is unbound - eek. > - * In order to prevent it from being recursively closed, > - * take a pin on the vma so that the second unbind is > - * aborted. > - * > - * Even more scary is that the retire callback may free > - * the object (last active vma). To prevent the explosion > - * we defer the actual object free to a worker that can > - * only proceed once it acquires the struct_mutex (which > - * we currently hold, therefore it cannot free this object > - * before we are finished). > - */ > - __i915_vma_pin(vma); > - > - for_each_active(active, idx) { > - ret = i915_gem_active_retire(&vma->last_read[idx], > - &vma->vm->dev->struct_mutex); > - if (ret) > - break; > - } > - > - __i915_vma_unpin(vma); > - if (ret) > - return ret; > - > - GEM_BUG_ON(i915_vma_is_active(vma)); > - } > - > - if (i915_vma_is_pinned(vma)) > - return -EBUSY; > - > - if (!drm_mm_node_allocated(&vma->node)) > - goto destroy; > - > - GEM_BUG_ON(obj->bind_count == 0); > - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); > - > - if (i915_vma_is_map_and_fenceable(vma)) { > - /* release the fence reg _after_ flushing */ > - ret = i915_vma_put_fence(vma); > - if (ret) > - return ret; > - > - /* Force a pagefault for domain tracking on next user access */ > - i915_gem_release_mmap(obj); > - > - __i915_vma_iounmap(vma); > - vma->flags &= ~I915_VMA_CAN_FENCE; > - } > - > - if (likely(!vma->vm->closed)) { > - trace_i915_vma_unbind(vma); > - vma->vm->unbind_vma(vma); > - } > - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); > - > - drm_mm_remove_node(&vma->node); > - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); > - > - if (vma->pages != obj->mm.pages) { > - GEM_BUG_ON(!vma->pages); > - sg_free_table(vma->pages); > - kfree(vma->pages); > - } > - vma->pages = NULL; > - > - /* Since the unbound list is global, only move to that list if > - * no more VMAs exist. */ > - if (--obj->bind_count == 0) > - list_move_tail(&obj->global_link, > - &to_i915(obj->base.dev)->mm.unbound_list); > - > - /* And finally now the object is completely decoupled from this vma, > - * we can drop its hold on the backing storage and allow it to be > - * reaped by the shrinker. > - */ > - i915_gem_object_unpin_pages(obj); > - > -destroy: > - if (unlikely(i915_vma_is_closed(vma))) > - i915_vma_destroy(vma); > - > - return 0; > -} > - > static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) > { > int ret, i; > @@ -3057,172 +2946,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) > return 0; > } > > -static bool i915_gem_valid_gtt_space(struct i915_vma *vma, > - unsigned long cache_level) > -{ > - struct drm_mm_node *gtt_space = &vma->node; > - struct drm_mm_node *other; > - > - /* > - * On some machines we have to be careful when putting differing types > - * of snoopable memory together to avoid the prefetcher crossing memory > - * domains and dying. During vm initialisation, we decide whether or not > - * these constraints apply and set the drm_mm.color_adjust > - * appropriately. > - */ > - if (vma->vm->mm.color_adjust == NULL) > - return true; > - > - if (!drm_mm_node_allocated(gtt_space)) > - return true; > - > - if (list_empty(&gtt_space->node_list)) > - return true; > - > - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); > - if (other->allocated && !other->hole_follows && other->color != cache_level) > - return false; > - > - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); > - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) > - return false; > - > - return true; > -} > - > -/** > - * i915_vma_insert - finds a slot for the vma in its address space > - * @vma: the vma > - * @size: requested size in bytes (can be larger than the VMA) > - * @alignment: required alignment > - * @flags: mask of PIN_* flags to use > - * > - * First we try to allocate some free space that meets the requirements for > - * the VMA. Failiing that, if the flags permit, it will evict an old VMA, > - * preferrably the oldest idle entry to make room for the new VMA. > - * > - * Returns: > - * 0 on success, negative error code otherwise. > - */ > -static int > -i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > -{ > - struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); > - struct drm_i915_gem_object *obj = vma->obj; > - u64 start, end; > - int ret; > - > - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); > - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); > - > - size = max(size, vma->size); > - if (flags & PIN_MAPPABLE) > - size = i915_gem_get_ggtt_size(dev_priv, size, > - i915_gem_object_get_tiling(obj)); > - > - alignment = max(max(alignment, vma->display_alignment), > - i915_gem_get_ggtt_alignment(dev_priv, size, > - i915_gem_object_get_tiling(obj), > - flags & PIN_MAPPABLE)); > - > - start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > - > - end = vma->vm->total; > - if (flags & PIN_MAPPABLE) > - end = min_t(u64, end, dev_priv->ggtt.mappable_end); > - if (flags & PIN_ZONE_4G) > - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); > - > - /* If binding the object/GGTT view requires more space than the entire > - * aperture has, reject it early before evicting everything in a vain > - * attempt to find space. > - */ > - if (size > end) { > - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", > - size, obj->base.size, > - flags & PIN_MAPPABLE ? "mappable" : "total", > - end); > - return -E2BIG; > - } > - > - ret = i915_gem_object_pin_pages(obj); > - if (ret) > - return ret; > - > - if (flags & PIN_OFFSET_FIXED) { > - u64 offset = flags & PIN_OFFSET_MASK; > - if (offset & (alignment - 1) || offset > end - size) { > - ret = -EINVAL; > - goto err_unpin; > - } > - > - vma->node.start = offset; > - vma->node.size = size; > - vma->node.color = obj->cache_level; > - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); > - if (ret) { > - ret = i915_gem_evict_for_vma(vma); > - if (ret == 0) > - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); > - if (ret) > - goto err_unpin; > - } > - } else { > - u32 search_flag, alloc_flag; > - > - if (flags & PIN_HIGH) { > - search_flag = DRM_MM_SEARCH_BELOW; > - alloc_flag = DRM_MM_CREATE_TOP; > - } else { > - search_flag = DRM_MM_SEARCH_DEFAULT; > - alloc_flag = DRM_MM_CREATE_DEFAULT; > - } > - > - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, > - * so we know that we always have a minimum alignment of 4096. > - * The drm_mm range manager is optimised to return results > - * with zero alignment, so where possible use the optimal > - * path. > - */ > - if (alignment <= 4096) > - alignment = 0; > - > -search_free: > - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, > - &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(vma->vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > - > - goto err_unpin; > - } > - > - GEM_BUG_ON(vma->node.start < start); > - GEM_BUG_ON(vma->node.start + vma->node.size > end); > - } > - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); > - > - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > - obj->bind_count++; > - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); > - > - return 0; > - > -err_unpin: > - i915_gem_object_unpin_pages(obj); > - return ret; > -} > - > void i915_gem_clflush_object(struct drm_i915_gem_object *obj, > bool force) > { > @@ -3818,100 +3541,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) > return ret < 0 ? ret : 0; > } > > -static bool > -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > -{ > - if (!drm_mm_node_allocated(&vma->node)) > - return false; > - > - if (vma->node.size < size) > - return true; > - > - if (alignment && vma->node.start & (alignment - 1)) > - return true; > - > - if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) > - return true; > - > - if (flags & PIN_OFFSET_BIAS && > - vma->node.start < (flags & PIN_OFFSET_MASK)) > - return true; > - > - if (flags & PIN_OFFSET_FIXED && > - vma->node.start != (flags & PIN_OFFSET_MASK)) > - return true; > - > - return false; > -} > - > -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) > -{ > - struct drm_i915_gem_object *obj = vma->obj; > - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > - bool mappable, fenceable; > - u32 fence_size, fence_alignment; > - > - fence_size = i915_gem_get_ggtt_size(dev_priv, > - vma->size, > - i915_gem_object_get_tiling(obj)); > - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, > - vma->size, > - i915_gem_object_get_tiling(obj), > - true); > - > - fenceable = (vma->node.size == fence_size && > - (vma->node.start & (fence_alignment - 1)) == 0); > - > - mappable = (vma->node.start + fence_size <= > - dev_priv->ggtt.mappable_end); > - > - /* > - * Explicitly disable for rotated VMA since the display does not > - * need the fence and the VMA is not accessible to other users. > - */ > - if (mappable && fenceable && > - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) > - vma->flags |= I915_VMA_CAN_FENCE; > - else > - vma->flags &= ~I915_VMA_CAN_FENCE; > -} > - > -int __i915_vma_do_pin(struct i915_vma *vma, > - u64 size, u64 alignment, u64 flags) > -{ > - unsigned int bound = vma->flags; > - int ret; > - > - lockdep_assert_held(&vma->vm->dev->struct_mutex); > - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); > - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); > - > - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { > - ret = -EBUSY; > - goto err; > - } > - > - if ((bound & I915_VMA_BIND_MASK) == 0) { > - ret = i915_vma_insert(vma, size, alignment, flags); > - if (ret) > - goto err; > - } > - > - ret = i915_vma_bind(vma, vma->obj->cache_level, flags); > - if (ret) > - goto err; > - > - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) > - __i915_vma_set_map_and_fenceable(vma); > - > - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); > - return 0; > - > -err: > - __i915_vma_unpin(vma); > - return ret; > -} > - > struct i915_vma * > i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view, > diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c > deleted file mode 100644 > index cd59dbc..0000000 > --- a/drivers/gpu/drm/i915/i915_gem_fence.c > +++ /dev/null > @@ -1,716 +0,0 @@ > -/* > - * Copyright © 2008-2015 Intel Corporation > - * > - * Permission is hereby granted, free of charge, to any person obtaining a > - * copy of this software and associated documentation files (the "Software"), > - * to deal in the Software without restriction, including without limitation > - * the rights to use, copy, modify, merge, publish, distribute, sublicense, > - * and/or sell copies of the Software, and to permit persons to whom the > - * Software is furnished to do so, subject to the following conditions: > - * > - * The above copyright notice and this permission notice (including the next > - * paragraph) shall be included in all copies or substantial portions of the > - * Software. > - * > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > - * IN THE SOFTWARE. > - */ > - > -#include <drm/drmP.h> > -#include <drm/i915_drm.h> > -#include "i915_drv.h" > - > -/** > - * DOC: fence register handling > - * > - * Important to avoid confusions: "fences" in the i915 driver are not execution > - * fences used to track command completion but hardware detiler objects which > - * wrap a given range of the global GTT. Each platform has only a fairly limited > - * set of these objects. > - * > - * Fences are used to detile GTT memory mappings. They're also connected to the > - * hardware frontbuffer render tracking and hence interact with frontbuffer > - * compression. Furthermore on older platforms fences are required for tiled > - * objects used by the display engine. They can also be used by the render > - * engine - they're required for blitter commands and are optional for render > - * commands. But on gen4+ both display (with the exception of fbc) and rendering > - * have their own tiling state bits and don't need fences. > - * > - * Also note that fences only support X and Y tiling and hence can't be used for > - * the fancier new tiling formats like W, Ys and Yf. > - * > - * Finally note that because fences are such a restricted resource they're > - * dynamically associated with objects. Furthermore fence state is committed to > - * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must > - * explicitly call i915_gem_object_get_fence() to synchronize fencing status > - * for cpu access. Also note that some code wants an unfenced view, for those > - * cases the fence can be removed forcefully with i915_gem_object_put_fence(). > - * > - * Internally these functions will synchronize with userspace access by removing > - * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. > - */ > - > -#define pipelined 0 > - > -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, > - struct i915_vma *vma) > -{ > - i915_reg_t fence_reg_lo, fence_reg_hi; > - int fence_pitch_shift; > - u64 val; > - > - if (INTEL_INFO(fence->i915)->gen >= 6) { > - fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); > - fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); > - fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; > - > - } else { > - fence_reg_lo = FENCE_REG_965_LO(fence->id); > - fence_reg_hi = FENCE_REG_965_HI(fence->id); > - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; > - } > - > - val = 0; > - if (vma) { > - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > - bool is_y_tiled = tiling == I915_TILING_Y; > - unsigned int stride = i915_gem_object_get_stride(vma->obj); > - u32 row_size = stride * (is_y_tiled ? 32 : 8); > - u32 size = rounddown((u32)vma->node.size, row_size); > - > - val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; > - val |= vma->node.start & 0xfffff000; > - val |= (u64)((stride / 128) - 1) << fence_pitch_shift; > - if (is_y_tiled) > - val |= BIT(I965_FENCE_TILING_Y_SHIFT); > - val |= I965_FENCE_REG_VALID; > - } > - > - if (!pipelined) { > - struct drm_i915_private *dev_priv = fence->i915; > - > - /* To w/a incoherency with non-atomic 64-bit register updates, > - * we split the 64-bit update into two 32-bit writes. In order > - * for a partial fence not to be evaluated between writes, we > - * precede the update with write to turn off the fence register, > - * and only enable the fence as the last step. > - * > - * For extra levels of paranoia, we make sure each step lands > - * before applying the next step. > - */ > - I915_WRITE(fence_reg_lo, 0); > - POSTING_READ(fence_reg_lo); > - > - I915_WRITE(fence_reg_hi, upper_32_bits(val)); > - I915_WRITE(fence_reg_lo, lower_32_bits(val)); > - POSTING_READ(fence_reg_lo); > - } > -} > - > -static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, > - struct i915_vma *vma) > -{ > - u32 val; > - > - val = 0; > - if (vma) { > - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > - bool is_y_tiled = tiling == I915_TILING_Y; > - unsigned int stride = i915_gem_object_get_stride(vma->obj); > - int pitch_val; > - int tile_width; > - > - WARN((vma->node.start & ~I915_FENCE_START_MASK) || > - !is_power_of_2(vma->node.size) || > - (vma->node.start & (vma->node.size - 1)), > - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", > - vma->node.start, > - i915_vma_is_map_and_fenceable(vma), > - vma->node.size); > - > - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) > - tile_width = 128; > - else > - tile_width = 512; > - > - /* Note: pitch better be a power of two tile widths */ > - pitch_val = stride / tile_width; > - pitch_val = ffs(pitch_val) - 1; > - > - val = vma->node.start; > - if (is_y_tiled) > - val |= BIT(I830_FENCE_TILING_Y_SHIFT); > - val |= I915_FENCE_SIZE_BITS(vma->node.size); > - val |= pitch_val << I830_FENCE_PITCH_SHIFT; > - val |= I830_FENCE_REG_VALID; > - } > - > - if (!pipelined) { > - struct drm_i915_private *dev_priv = fence->i915; > - i915_reg_t reg = FENCE_REG(fence->id); > - > - I915_WRITE(reg, val); > - POSTING_READ(reg); > - } > -} > - > -static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, > - struct i915_vma *vma) > -{ > - u32 val; > - > - val = 0; > - if (vma) { > - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > - bool is_y_tiled = tiling == I915_TILING_Y; > - unsigned int stride = i915_gem_object_get_stride(vma->obj); > - u32 pitch_val; > - > - WARN((vma->node.start & ~I830_FENCE_START_MASK) || > - !is_power_of_2(vma->node.size) || > - (vma->node.start & (vma->node.size - 1)), > - "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", > - vma->node.start, vma->node.size); > - > - pitch_val = stride / 128; > - pitch_val = ffs(pitch_val) - 1; > - > - val = vma->node.start; > - if (is_y_tiled) > - val |= BIT(I830_FENCE_TILING_Y_SHIFT); > - val |= I830_FENCE_SIZE_BITS(vma->node.size); > - val |= pitch_val << I830_FENCE_PITCH_SHIFT; > - val |= I830_FENCE_REG_VALID; > - } > - > - if (!pipelined) { > - struct drm_i915_private *dev_priv = fence->i915; > - i915_reg_t reg = FENCE_REG(fence->id); > - > - I915_WRITE(reg, val); > - POSTING_READ(reg); > - } > -} > - > -static void fence_write(struct drm_i915_fence_reg *fence, > - struct i915_vma *vma) > -{ > - /* Previous access through the fence register is marshalled by > - * the mb() inside the fault handlers (i915_gem_release_mmaps) > - * and explicitly managed for internal users. > - */ > - > - if (IS_GEN2(fence->i915)) > - i830_write_fence_reg(fence, vma); > - else if (IS_GEN3(fence->i915)) > - i915_write_fence_reg(fence, vma); > - else > - i965_write_fence_reg(fence, vma); > - > - /* Access through the fenced region afterwards is > - * ordered by the posting reads whilst writing the registers. > - */ > - > - fence->dirty = false; > -} > - > -static int fence_update(struct drm_i915_fence_reg *fence, > - struct i915_vma *vma) > -{ > - int ret; > - > - if (vma) { > - if (!i915_vma_is_map_and_fenceable(vma)) > - return -EINVAL; > - > - if (WARN(!i915_gem_object_get_stride(vma->obj) || > - !i915_gem_object_get_tiling(vma->obj), > - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", > - i915_gem_object_get_stride(vma->obj), > - i915_gem_object_get_tiling(vma->obj))) > - return -EINVAL; > - > - ret = i915_gem_active_retire(&vma->last_fence, > - &vma->obj->base.dev->struct_mutex); > - if (ret) > - return ret; > - } > - > - if (fence->vma) { > - ret = i915_gem_active_retire(&fence->vma->last_fence, > - &fence->vma->obj->base.dev->struct_mutex); > - if (ret) > - return ret; > - } > - > - if (fence->vma && fence->vma != vma) { > - /* Ensure that all userspace CPU access is completed before > - * stealing the fence. > - */ > - i915_gem_release_mmap(fence->vma->obj); > - > - fence->vma->fence = NULL; > - fence->vma = NULL; > - > - list_move(&fence->link, &fence->i915->mm.fence_list); > - } > - > - fence_write(fence, vma); > - > - if (vma) { > - if (fence->vma != vma) { > - vma->fence = fence; > - fence->vma = vma; > - } > - > - list_move_tail(&fence->link, &fence->i915->mm.fence_list); > - } > - > - return 0; > -} > - > -/** > - * i915_vma_put_fence - force-remove fence for a VMA > - * @vma: vma to map linearly (not through a fence reg) > - * > - * This function force-removes any fence from the given object, which is useful > - * if the kernel wants to do untiled GTT access. > - * > - * Returns: > - * > - * 0 on success, negative error code on failure. > - */ > -int > -i915_vma_put_fence(struct i915_vma *vma) > -{ > - struct drm_i915_fence_reg *fence = vma->fence; > - > - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > - > - if (!fence) > - return 0; > - > - if (fence->pin_count) > - return -EBUSY; > - > - return fence_update(fence, NULL); > -} > - > -static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) > -{ > - struct drm_i915_fence_reg *fence; > - > - list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { > - if (fence->pin_count) > - continue; > - > - return fence; > - } > - > - /* Wait for completion of pending flips which consume fences */ > - if (intel_has_pending_fb_unpin(&dev_priv->drm)) > - return ERR_PTR(-EAGAIN); > - > - return ERR_PTR(-EDEADLK); > -} > - > -/** > - * i915_vma_get_fence - set up fencing for a vma > - * @vma: vma to map through a fence reg > - * > - * When mapping objects through the GTT, userspace wants to be able to write > - * to them without having to worry about swizzling if the object is tiled. > - * This function walks the fence regs looking for a free one for @obj, > - * stealing one if it can't find any. > - * > - * It then sets up the reg based on the object's properties: address, pitch > - * and tiling format. > - * > - * For an untiled surface, this removes any existing fence. > - * > - * Returns: > - * > - * 0 on success, negative error code on failure. > - */ > -int > -i915_vma_get_fence(struct i915_vma *vma) > -{ > - struct drm_i915_fence_reg *fence; > - struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; > - > - /* Note that we revoke fences on runtime suspend. Therefore the user > - * must keep the device awake whilst using the fence. > - */ > - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > - > - /* Just update our place in the LRU if our fence is getting reused. */ > - if (vma->fence) { > - fence = vma->fence; > - if (!fence->dirty) { > - list_move_tail(&fence->link, > - &fence->i915->mm.fence_list); > - return 0; > - } > - } else if (set) { > - fence = fence_find(to_i915(vma->vm->dev)); > - if (IS_ERR(fence)) > - return PTR_ERR(fence); > - } else > - return 0; > - > - return fence_update(fence, set); > -} > - > -/** > - * i915_gem_restore_fences - restore fence state > - * @dev: DRM device > - * > - * Restore the hw fence state to match the software tracking again, to be called > - * after a gpu reset and on resume. Note that on runtime suspend we only cancel > - * the fences, to be reacquired by the user later. > - */ > -void i915_gem_restore_fences(struct drm_device *dev) > -{ > - struct drm_i915_private *dev_priv = to_i915(dev); > - int i; > - > - for (i = 0; i < dev_priv->num_fence_regs; i++) { > - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; > - struct i915_vma *vma = reg->vma; > - > - /* > - * Commit delayed tiling changes if we have an object still > - * attached to the fence, otherwise just clear the fence. > - */ > - if (vma && !i915_gem_object_is_tiled(vma->obj)) { > - GEM_BUG_ON(!reg->dirty); > - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); > - > - list_move(&reg->link, &dev_priv->mm.fence_list); > - vma->fence = NULL; > - vma = NULL; > - } > - > - fence_write(reg, vma); > - reg->vma = vma; > - } > -} > - > -/** > - * DOC: tiling swizzling details > - * > - * The idea behind tiling is to increase cache hit rates by rearranging > - * pixel data so that a group of pixel accesses are in the same cacheline. > - * Performance improvement from doing this on the back/depth buffer are on > - * the order of 30%. > - * > - * Intel architectures make this somewhat more complicated, though, by > - * adjustments made to addressing of data when the memory is in interleaved > - * mode (matched pairs of DIMMS) to improve memory bandwidth. > - * For interleaved memory, the CPU sends every sequential 64 bytes > - * to an alternate memory channel so it can get the bandwidth from both. > - * > - * The GPU also rearranges its accesses for increased bandwidth to interleaved > - * memory, and it matches what the CPU does for non-tiled. However, when tiled > - * it does it a little differently, since one walks addresses not just in the > - * X direction but also Y. So, along with alternating channels when bit > - * 6 of the address flips, it also alternates when other bits flip -- Bits 9 > - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) > - * are common to both the 915 and 965-class hardware. > - * > - * The CPU also sometimes XORs in higher bits as well, to improve > - * bandwidth doing strided access like we do so frequently in graphics. This > - * is called "Channel XOR Randomization" in the MCH documentation. The result > - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address > - * decode. > - * > - * All of this bit 6 XORing has an effect on our memory management, > - * as we need to make sure that the 3d driver can correctly address object > - * contents. > - * > - * If we don't have interleaved memory, all tiling is safe and no swizzling is > - * required. > - * > - * When bit 17 is XORed in, we simply refuse to tile at all. Bit > - * 17 is not just a page offset, so as we page an object out and back in, > - * individual pages in it will have different bit 17 addresses, resulting in > - * each 64 bytes being swapped with its neighbor! > - * > - * Otherwise, if interleaved, we have to tell the 3d driver what the address > - * swizzling it needs to do is, since it's writing with the CPU to the pages > - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the > - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling > - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order > - * to match what the GPU expects. > - */ > - > -/** > - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern > - * @dev: DRM device > - * > - * Detects bit 6 swizzling of address lookup between IGD access and CPU > - * access through main memory. > - */ > -void > -i915_gem_detect_bit_6_swizzle(struct drm_device *dev) > -{ > - struct drm_i915_private *dev_priv = to_i915(dev); > - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > - > - if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { > - /* > - * On BDW+, swizzling is not used. We leave the CPU memory > - * controller in charge of optimizing memory accesses without > - * the extra address manipulation GPU side. > - * > - * VLV and CHV don't have GPU swizzling. > - */ > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - } else if (INTEL_INFO(dev)->gen >= 6) { > - if (dev_priv->preserve_bios_swizzle) { > - if (I915_READ(DISP_ARB_CTL) & > - DISP_TILE_SURFACE_SWIZZLING) { > - swizzle_x = I915_BIT_6_SWIZZLE_9_10; > - swizzle_y = I915_BIT_6_SWIZZLE_9; > - } else { > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - } > - } else { > - uint32_t dimm_c0, dimm_c1; > - dimm_c0 = I915_READ(MAD_DIMM_C0); > - dimm_c1 = I915_READ(MAD_DIMM_C1); > - dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; > - dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; > - /* Enable swizzling when the channels are populated > - * with identically sized dimms. We don't need to check > - * the 3rd channel because no cpu with gpu attached > - * ships in that configuration. Also, swizzling only > - * makes sense for 2 channels anyway. */ > - if (dimm_c0 == dimm_c1) { > - swizzle_x = I915_BIT_6_SWIZZLE_9_10; > - swizzle_y = I915_BIT_6_SWIZZLE_9; > - } else { > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - } > - } > - } else if (IS_GEN5(dev_priv)) { > - /* On Ironlake whatever DRAM config, GPU always do > - * same swizzling setup. > - */ > - swizzle_x = I915_BIT_6_SWIZZLE_9_10; > - swizzle_y = I915_BIT_6_SWIZZLE_9; > - } else if (IS_GEN2(dev_priv)) { > - /* As far as we know, the 865 doesn't have these bit 6 > - * swizzling issues. > - */ > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && > - !IS_G33(dev_priv))) { > - uint32_t dcc; > - > - /* On 9xx chipsets, channel interleave by the CPU is > - * determined by DCC. For single-channel, neither the CPU > - * nor the GPU do swizzling. For dual channel interleaved, > - * the GPU's interleave is bit 9 and 10 for X tiled, and bit > - * 9 for Y tiled. The CPU's interleave is independent, and > - * can be based on either bit 11 (haven't seen this yet) or > - * bit 17 (common). > - */ > - dcc = I915_READ(DCC); > - switch (dcc & DCC_ADDRESSING_MODE_MASK) { > - case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: > - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - break; > - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: > - if (dcc & DCC_CHANNEL_XOR_DISABLE) { > - /* This is the base swizzling by the GPU for > - * tiled buffers. > - */ > - swizzle_x = I915_BIT_6_SWIZZLE_9_10; > - swizzle_y = I915_BIT_6_SWIZZLE_9; > - } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { > - /* Bit 11 swizzling by the CPU in addition. */ > - swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; > - swizzle_y = I915_BIT_6_SWIZZLE_9_11; > - } else { > - /* Bit 17 swizzling by the CPU in addition. */ > - swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; > - swizzle_y = I915_BIT_6_SWIZZLE_9_17; > - } > - break; > - } > - > - /* check for L-shaped memory aka modified enhanced addressing */ > - if (IS_GEN4(dev_priv) && > - !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { > - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > - } > - > - if (dcc == 0xffffffff) { > - DRM_ERROR("Couldn't read from MCHBAR. " > - "Disabling tiling.\n"); > - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > - } > - } else { > - /* The 965, G33, and newer, have a very flexible memory > - * configuration. It will enable dual-channel mode > - * (interleaving) on as much memory as it can, and the GPU > - * will additionally sometimes enable different bit 6 > - * swizzling for tiled objects from the CPU. > - * > - * Here's what I found on the G965: > - * slot fill memory size swizzling > - * 0A 0B 1A 1B 1-ch 2-ch > - * 512 0 0 0 512 0 O > - * 512 0 512 0 16 1008 X > - * 512 0 0 512 16 1008 X > - * 0 512 0 512 16 1008 X > - * 1024 1024 1024 0 2048 1024 O > - * > - * We could probably detect this based on either the DRB > - * matching, which was the case for the swizzling required in > - * the table above, or from the 1-ch value being less than > - * the minimum size of a rank. > - * > - * Reports indicate that the swizzling actually > - * varies depending upon page placement inside the > - * channels, i.e. we see swizzled pages where the > - * banks of memory are paired and unswizzled on the > - * uneven portion, so leave that as unknown. > - */ > - if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { > - swizzle_x = I915_BIT_6_SWIZZLE_9_10; > - swizzle_y = I915_BIT_6_SWIZZLE_9; > - } > - } > - > - if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || > - swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { > - /* Userspace likes to explode if it sees unknown swizzling, > - * so lie. We will finish the lie when reporting through > - * the get-tiling-ioctl by reporting the physical swizzle > - * mode as unknown instead. > - * > - * As we don't strictly know what the swizzling is, it may be > - * bit17 dependent, and so we need to also prevent the pages > - * from being moved. > - */ > - dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; > - swizzle_x = I915_BIT_6_SWIZZLE_NONE; > - swizzle_y = I915_BIT_6_SWIZZLE_NONE; > - } > - > - dev_priv->mm.bit_6_swizzle_x = swizzle_x; > - dev_priv->mm.bit_6_swizzle_y = swizzle_y; > -} > - > -/* > - * Swap every 64 bytes of this page around, to account for it having a new > - * bit 17 of its physical address and therefore being interpreted differently > - * by the GPU. > - */ > -static void > -i915_gem_swizzle_page(struct page *page) > -{ > - char temp[64]; > - char *vaddr; > - int i; > - > - vaddr = kmap(page); > - > - for (i = 0; i < PAGE_SIZE; i += 128) { > - memcpy(temp, &vaddr[i], 64); > - memcpy(&vaddr[i], &vaddr[i + 64], 64); > - memcpy(&vaddr[i + 64], temp, 64); > - } > - > - kunmap(page); > -} > - > -/** > - * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling > - * @obj: i915 GEM buffer object > - * @pages: the scattergather list of physical pages > - * > - * This function fixes up the swizzling in case any page frame number for this > - * object has changed in bit 17 since that state has been saved with > - * i915_gem_object_save_bit_17_swizzle(). > - * > - * This is called when pinning backing storage again, since the kernel is free > - * to move unpinned backing storage around (either by directly moving pages or > - * by swapping them out and back in again). > - */ > -void > -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, > - struct sg_table *pages) > -{ > - struct sgt_iter sgt_iter; > - struct page *page; > - int i; > - > - if (obj->bit_17 == NULL) > - return; > - > - i = 0; > - for_each_sgt_page(page, sgt_iter, pages) { > - char new_bit_17 = page_to_phys(page) >> 17; > - if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { > - i915_gem_swizzle_page(page); > - set_page_dirty(page); > - } > - i++; > - } > -} > - > -/** > - * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling > - * @obj: i915 GEM buffer object > - * @pages: the scattergather list of physical pages > - * > - * This function saves the bit 17 of each page frame number so that swizzling > - * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must > - * be called before the backing storage can be unpinned. > - */ > -void > -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, > - struct sg_table *pages) > -{ > - const unsigned int page_count = obj->base.size >> PAGE_SHIFT; > - struct sgt_iter sgt_iter; > - struct page *page; > - int i; > - > - if (obj->bit_17 == NULL) { > - obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), > - sizeof(long), GFP_KERNEL); > - if (obj->bit_17 == NULL) { > - DRM_ERROR("Failed to allocate memory for bit 17 " > - "record\n"); > - return; > - } > - } > - > - i = 0; > - > - for_each_sgt_page(page, sgt_iter, pages) { > - if (page_to_phys(page) & (1 << 17)) > - __set_bit(i, obj->bit_17); > - else > - __clear_bit(i, obj->bit_17); > - i++; > - } > -} > diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > new file mode 100644 > index 0000000..cd59dbc > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > @@ -0,0 +1,716 @@ > +/* > + * Copyright © 2008-2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include <drm/drmP.h> > +#include <drm/i915_drm.h> > +#include "i915_drv.h" > + > +/** > + * DOC: fence register handling > + * > + * Important to avoid confusions: "fences" in the i915 driver are not execution > + * fences used to track command completion but hardware detiler objects which > + * wrap a given range of the global GTT. Each platform has only a fairly limited > + * set of these objects. > + * > + * Fences are used to detile GTT memory mappings. They're also connected to the > + * hardware frontbuffer render tracking and hence interact with frontbuffer > + * compression. Furthermore on older platforms fences are required for tiled > + * objects used by the display engine. They can also be used by the render > + * engine - they're required for blitter commands and are optional for render > + * commands. But on gen4+ both display (with the exception of fbc) and rendering > + * have their own tiling state bits and don't need fences. > + * > + * Also note that fences only support X and Y tiling and hence can't be used for > + * the fancier new tiling formats like W, Ys and Yf. > + * > + * Finally note that because fences are such a restricted resource they're > + * dynamically associated with objects. Furthermore fence state is committed to > + * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must > + * explicitly call i915_gem_object_get_fence() to synchronize fencing status > + * for cpu access. Also note that some code wants an unfenced view, for those > + * cases the fence can be removed forcefully with i915_gem_object_put_fence(). > + * > + * Internally these functions will synchronize with userspace access by removing > + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. > + */ > + > +#define pipelined 0 > + > +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, > + struct i915_vma *vma) > +{ > + i915_reg_t fence_reg_lo, fence_reg_hi; > + int fence_pitch_shift; > + u64 val; > + > + if (INTEL_INFO(fence->i915)->gen >= 6) { > + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); > + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); > + fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; > + > + } else { > + fence_reg_lo = FENCE_REG_965_LO(fence->id); > + fence_reg_hi = FENCE_REG_965_HI(fence->id); > + fence_pitch_shift = I965_FENCE_PITCH_SHIFT; > + } > + > + val = 0; > + if (vma) { > + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > + bool is_y_tiled = tiling == I915_TILING_Y; > + unsigned int stride = i915_gem_object_get_stride(vma->obj); > + u32 row_size = stride * (is_y_tiled ? 32 : 8); > + u32 size = rounddown((u32)vma->node.size, row_size); > + > + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; > + val |= vma->node.start & 0xfffff000; > + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; > + if (is_y_tiled) > + val |= BIT(I965_FENCE_TILING_Y_SHIFT); > + val |= I965_FENCE_REG_VALID; > + } > + > + if (!pipelined) { > + struct drm_i915_private *dev_priv = fence->i915; > + > + /* To w/a incoherency with non-atomic 64-bit register updates, > + * we split the 64-bit update into two 32-bit writes. In order > + * for a partial fence not to be evaluated between writes, we > + * precede the update with write to turn off the fence register, > + * and only enable the fence as the last step. > + * > + * For extra levels of paranoia, we make sure each step lands > + * before applying the next step. > + */ > + I915_WRITE(fence_reg_lo, 0); > + POSTING_READ(fence_reg_lo); > + > + I915_WRITE(fence_reg_hi, upper_32_bits(val)); > + I915_WRITE(fence_reg_lo, lower_32_bits(val)); > + POSTING_READ(fence_reg_lo); > + } > +} > + > +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, > + struct i915_vma *vma) > +{ > + u32 val; > + > + val = 0; > + if (vma) { > + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > + bool is_y_tiled = tiling == I915_TILING_Y; > + unsigned int stride = i915_gem_object_get_stride(vma->obj); > + int pitch_val; > + int tile_width; > + > + WARN((vma->node.start & ~I915_FENCE_START_MASK) || > + !is_power_of_2(vma->node.size) || > + (vma->node.start & (vma->node.size - 1)), > + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", > + vma->node.start, > + i915_vma_is_map_and_fenceable(vma), > + vma->node.size); > + > + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) > + tile_width = 128; > + else > + tile_width = 512; > + > + /* Note: pitch better be a power of two tile widths */ > + pitch_val = stride / tile_width; > + pitch_val = ffs(pitch_val) - 1; > + > + val = vma->node.start; > + if (is_y_tiled) > + val |= BIT(I830_FENCE_TILING_Y_SHIFT); > + val |= I915_FENCE_SIZE_BITS(vma->node.size); > + val |= pitch_val << I830_FENCE_PITCH_SHIFT; > + val |= I830_FENCE_REG_VALID; > + } > + > + if (!pipelined) { > + struct drm_i915_private *dev_priv = fence->i915; > + i915_reg_t reg = FENCE_REG(fence->id); > + > + I915_WRITE(reg, val); > + POSTING_READ(reg); > + } > +} > + > +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, > + struct i915_vma *vma) > +{ > + u32 val; > + > + val = 0; > + if (vma) { > + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); > + bool is_y_tiled = tiling == I915_TILING_Y; > + unsigned int stride = i915_gem_object_get_stride(vma->obj); > + u32 pitch_val; > + > + WARN((vma->node.start & ~I830_FENCE_START_MASK) || > + !is_power_of_2(vma->node.size) || > + (vma->node.start & (vma->node.size - 1)), > + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", > + vma->node.start, vma->node.size); > + > + pitch_val = stride / 128; > + pitch_val = ffs(pitch_val) - 1; > + > + val = vma->node.start; > + if (is_y_tiled) > + val |= BIT(I830_FENCE_TILING_Y_SHIFT); > + val |= I830_FENCE_SIZE_BITS(vma->node.size); > + val |= pitch_val << I830_FENCE_PITCH_SHIFT; > + val |= I830_FENCE_REG_VALID; > + } > + > + if (!pipelined) { > + struct drm_i915_private *dev_priv = fence->i915; > + i915_reg_t reg = FENCE_REG(fence->id); > + > + I915_WRITE(reg, val); > + POSTING_READ(reg); > + } > +} > + > +static void fence_write(struct drm_i915_fence_reg *fence, > + struct i915_vma *vma) > +{ > + /* Previous access through the fence register is marshalled by > + * the mb() inside the fault handlers (i915_gem_release_mmaps) > + * and explicitly managed for internal users. > + */ > + > + if (IS_GEN2(fence->i915)) > + i830_write_fence_reg(fence, vma); > + else if (IS_GEN3(fence->i915)) > + i915_write_fence_reg(fence, vma); > + else > + i965_write_fence_reg(fence, vma); > + > + /* Access through the fenced region afterwards is > + * ordered by the posting reads whilst writing the registers. > + */ > + > + fence->dirty = false; > +} > + > +static int fence_update(struct drm_i915_fence_reg *fence, > + struct i915_vma *vma) > +{ > + int ret; > + > + if (vma) { > + if (!i915_vma_is_map_and_fenceable(vma)) > + return -EINVAL; > + > + if (WARN(!i915_gem_object_get_stride(vma->obj) || > + !i915_gem_object_get_tiling(vma->obj), > + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", > + i915_gem_object_get_stride(vma->obj), > + i915_gem_object_get_tiling(vma->obj))) > + return -EINVAL; > + > + ret = i915_gem_active_retire(&vma->last_fence, > + &vma->obj->base.dev->struct_mutex); > + if (ret) > + return ret; > + } > + > + if (fence->vma) { > + ret = i915_gem_active_retire(&fence->vma->last_fence, > + &fence->vma->obj->base.dev->struct_mutex); > + if (ret) > + return ret; > + } > + > + if (fence->vma && fence->vma != vma) { > + /* Ensure that all userspace CPU access is completed before > + * stealing the fence. > + */ > + i915_gem_release_mmap(fence->vma->obj); > + > + fence->vma->fence = NULL; > + fence->vma = NULL; > + > + list_move(&fence->link, &fence->i915->mm.fence_list); > + } > + > + fence_write(fence, vma); > + > + if (vma) { > + if (fence->vma != vma) { > + vma->fence = fence; > + fence->vma = vma; > + } > + > + list_move_tail(&fence->link, &fence->i915->mm.fence_list); > + } > + > + return 0; > +} > + > +/** > + * i915_vma_put_fence - force-remove fence for a VMA > + * @vma: vma to map linearly (not through a fence reg) > + * > + * This function force-removes any fence from the given object, which is useful > + * if the kernel wants to do untiled GTT access. > + * > + * Returns: > + * > + * 0 on success, negative error code on failure. > + */ > +int > +i915_vma_put_fence(struct i915_vma *vma) > +{ > + struct drm_i915_fence_reg *fence = vma->fence; > + > + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > + > + if (!fence) > + return 0; > + > + if (fence->pin_count) > + return -EBUSY; > + > + return fence_update(fence, NULL); > +} > + > +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) > +{ > + struct drm_i915_fence_reg *fence; > + > + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { > + if (fence->pin_count) > + continue; > + > + return fence; > + } > + > + /* Wait for completion of pending flips which consume fences */ > + if (intel_has_pending_fb_unpin(&dev_priv->drm)) > + return ERR_PTR(-EAGAIN); > + > + return ERR_PTR(-EDEADLK); > +} > + > +/** > + * i915_vma_get_fence - set up fencing for a vma > + * @vma: vma to map through a fence reg > + * > + * When mapping objects through the GTT, userspace wants to be able to write > + * to them without having to worry about swizzling if the object is tiled. > + * This function walks the fence regs looking for a free one for @obj, > + * stealing one if it can't find any. > + * > + * It then sets up the reg based on the object's properties: address, pitch > + * and tiling format. > + * > + * For an untiled surface, this removes any existing fence. > + * > + * Returns: > + * > + * 0 on success, negative error code on failure. > + */ > +int > +i915_vma_get_fence(struct i915_vma *vma) > +{ > + struct drm_i915_fence_reg *fence; > + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; > + > + /* Note that we revoke fences on runtime suspend. Therefore the user > + * must keep the device awake whilst using the fence. > + */ > + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > + > + /* Just update our place in the LRU if our fence is getting reused. */ > + if (vma->fence) { > + fence = vma->fence; > + if (!fence->dirty) { > + list_move_tail(&fence->link, > + &fence->i915->mm.fence_list); > + return 0; > + } > + } else if (set) { > + fence = fence_find(to_i915(vma->vm->dev)); > + if (IS_ERR(fence)) > + return PTR_ERR(fence); > + } else > + return 0; > + > + return fence_update(fence, set); > +} > + > +/** > + * i915_gem_restore_fences - restore fence state > + * @dev: DRM device > + * > + * Restore the hw fence state to match the software tracking again, to be called > + * after a gpu reset and on resume. Note that on runtime suspend we only cancel > + * the fences, to be reacquired by the user later. > + */ > +void i915_gem_restore_fences(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + int i; > + > + for (i = 0; i < dev_priv->num_fence_regs; i++) { > + struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; > + struct i915_vma *vma = reg->vma; > + > + /* > + * Commit delayed tiling changes if we have an object still > + * attached to the fence, otherwise just clear the fence. > + */ > + if (vma && !i915_gem_object_is_tiled(vma->obj)) { > + GEM_BUG_ON(!reg->dirty); > + GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); > + > + list_move(&reg->link, &dev_priv->mm.fence_list); > + vma->fence = NULL; > + vma = NULL; > + } > + > + fence_write(reg, vma); > + reg->vma = vma; > + } > +} > + > +/** > + * DOC: tiling swizzling details > + * > + * The idea behind tiling is to increase cache hit rates by rearranging > + * pixel data so that a group of pixel accesses are in the same cacheline. > + * Performance improvement from doing this on the back/depth buffer are on > + * the order of 30%. > + * > + * Intel architectures make this somewhat more complicated, though, by > + * adjustments made to addressing of data when the memory is in interleaved > + * mode (matched pairs of DIMMS) to improve memory bandwidth. > + * For interleaved memory, the CPU sends every sequential 64 bytes > + * to an alternate memory channel so it can get the bandwidth from both. > + * > + * The GPU also rearranges its accesses for increased bandwidth to interleaved > + * memory, and it matches what the CPU does for non-tiled. However, when tiled > + * it does it a little differently, since one walks addresses not just in the > + * X direction but also Y. So, along with alternating channels when bit > + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 > + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) > + * are common to both the 915 and 965-class hardware. > + * > + * The CPU also sometimes XORs in higher bits as well, to improve > + * bandwidth doing strided access like we do so frequently in graphics. This > + * is called "Channel XOR Randomization" in the MCH documentation. The result > + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address > + * decode. > + * > + * All of this bit 6 XORing has an effect on our memory management, > + * as we need to make sure that the 3d driver can correctly address object > + * contents. > + * > + * If we don't have interleaved memory, all tiling is safe and no swizzling is > + * required. > + * > + * When bit 17 is XORed in, we simply refuse to tile at all. Bit > + * 17 is not just a page offset, so as we page an object out and back in, > + * individual pages in it will have different bit 17 addresses, resulting in > + * each 64 bytes being swapped with its neighbor! > + * > + * Otherwise, if interleaved, we have to tell the 3d driver what the address > + * swizzling it needs to do is, since it's writing with the CPU to the pages > + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the > + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling > + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order > + * to match what the GPU expects. > + */ > + > +/** > + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern > + * @dev: DRM device > + * > + * Detects bit 6 swizzling of address lookup between IGD access and CPU > + * access through main memory. > + */ > +void > +i915_gem_detect_bit_6_swizzle(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > + > + if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { > + /* > + * On BDW+, swizzling is not used. We leave the CPU memory > + * controller in charge of optimizing memory accesses without > + * the extra address manipulation GPU side. > + * > + * VLV and CHV don't have GPU swizzling. > + */ > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + } else if (INTEL_INFO(dev)->gen >= 6) { > + if (dev_priv->preserve_bios_swizzle) { > + if (I915_READ(DISP_ARB_CTL) & > + DISP_TILE_SURFACE_SWIZZLING) { > + swizzle_x = I915_BIT_6_SWIZZLE_9_10; > + swizzle_y = I915_BIT_6_SWIZZLE_9; > + } else { > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + } > + } else { > + uint32_t dimm_c0, dimm_c1; > + dimm_c0 = I915_READ(MAD_DIMM_C0); > + dimm_c1 = I915_READ(MAD_DIMM_C1); > + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; > + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; > + /* Enable swizzling when the channels are populated > + * with identically sized dimms. We don't need to check > + * the 3rd channel because no cpu with gpu attached > + * ships in that configuration. Also, swizzling only > + * makes sense for 2 channels anyway. */ > + if (dimm_c0 == dimm_c1) { > + swizzle_x = I915_BIT_6_SWIZZLE_9_10; > + swizzle_y = I915_BIT_6_SWIZZLE_9; > + } else { > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + } > + } > + } else if (IS_GEN5(dev_priv)) { > + /* On Ironlake whatever DRAM config, GPU always do > + * same swizzling setup. > + */ > + swizzle_x = I915_BIT_6_SWIZZLE_9_10; > + swizzle_y = I915_BIT_6_SWIZZLE_9; > + } else if (IS_GEN2(dev_priv)) { > + /* As far as we know, the 865 doesn't have these bit 6 > + * swizzling issues. > + */ > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && > + !IS_G33(dev_priv))) { > + uint32_t dcc; > + > + /* On 9xx chipsets, channel interleave by the CPU is > + * determined by DCC. For single-channel, neither the CPU > + * nor the GPU do swizzling. For dual channel interleaved, > + * the GPU's interleave is bit 9 and 10 for X tiled, and bit > + * 9 for Y tiled. The CPU's interleave is independent, and > + * can be based on either bit 11 (haven't seen this yet) or > + * bit 17 (common). > + */ > + dcc = I915_READ(DCC); > + switch (dcc & DCC_ADDRESSING_MODE_MASK) { > + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: > + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + break; > + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: > + if (dcc & DCC_CHANNEL_XOR_DISABLE) { > + /* This is the base swizzling by the GPU for > + * tiled buffers. > + */ > + swizzle_x = I915_BIT_6_SWIZZLE_9_10; > + swizzle_y = I915_BIT_6_SWIZZLE_9; > + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { > + /* Bit 11 swizzling by the CPU in addition. */ > + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; > + swizzle_y = I915_BIT_6_SWIZZLE_9_11; > + } else { > + /* Bit 17 swizzling by the CPU in addition. */ > + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; > + swizzle_y = I915_BIT_6_SWIZZLE_9_17; > + } > + break; > + } > + > + /* check for L-shaped memory aka modified enhanced addressing */ > + if (IS_GEN4(dev_priv) && > + !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { > + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > + } > + > + if (dcc == 0xffffffff) { > + DRM_ERROR("Couldn't read from MCHBAR. " > + "Disabling tiling.\n"); > + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; > + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; > + } > + } else { > + /* The 965, G33, and newer, have a very flexible memory > + * configuration. It will enable dual-channel mode > + * (interleaving) on as much memory as it can, and the GPU > + * will additionally sometimes enable different bit 6 > + * swizzling for tiled objects from the CPU. > + * > + * Here's what I found on the G965: > + * slot fill memory size swizzling > + * 0A 0B 1A 1B 1-ch 2-ch > + * 512 0 0 0 512 0 O > + * 512 0 512 0 16 1008 X > + * 512 0 0 512 16 1008 X > + * 0 512 0 512 16 1008 X > + * 1024 1024 1024 0 2048 1024 O > + * > + * We could probably detect this based on either the DRB > + * matching, which was the case for the swizzling required in > + * the table above, or from the 1-ch value being less than > + * the minimum size of a rank. > + * > + * Reports indicate that the swizzling actually > + * varies depending upon page placement inside the > + * channels, i.e. we see swizzled pages where the > + * banks of memory are paired and unswizzled on the > + * uneven portion, so leave that as unknown. > + */ > + if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { > + swizzle_x = I915_BIT_6_SWIZZLE_9_10; > + swizzle_y = I915_BIT_6_SWIZZLE_9; > + } > + } > + > + if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || > + swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { > + /* Userspace likes to explode if it sees unknown swizzling, > + * so lie. We will finish the lie when reporting through > + * the get-tiling-ioctl by reporting the physical swizzle > + * mode as unknown instead. > + * > + * As we don't strictly know what the swizzling is, it may be > + * bit17 dependent, and so we need to also prevent the pages > + * from being moved. > + */ > + dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; > + swizzle_x = I915_BIT_6_SWIZZLE_NONE; > + swizzle_y = I915_BIT_6_SWIZZLE_NONE; > + } > + > + dev_priv->mm.bit_6_swizzle_x = swizzle_x; > + dev_priv->mm.bit_6_swizzle_y = swizzle_y; > +} > + > +/* > + * Swap every 64 bytes of this page around, to account for it having a new > + * bit 17 of its physical address and therefore being interpreted differently > + * by the GPU. > + */ > +static void > +i915_gem_swizzle_page(struct page *page) > +{ > + char temp[64]; > + char *vaddr; > + int i; > + > + vaddr = kmap(page); > + > + for (i = 0; i < PAGE_SIZE; i += 128) { > + memcpy(temp, &vaddr[i], 64); > + memcpy(&vaddr[i], &vaddr[i + 64], 64); > + memcpy(&vaddr[i + 64], temp, 64); > + } > + > + kunmap(page); > +} > + > +/** > + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling > + * @obj: i915 GEM buffer object > + * @pages: the scattergather list of physical pages > + * > + * This function fixes up the swizzling in case any page frame number for this > + * object has changed in bit 17 since that state has been saved with > + * i915_gem_object_save_bit_17_swizzle(). > + * > + * This is called when pinning backing storage again, since the kernel is free > + * to move unpinned backing storage around (either by directly moving pages or > + * by swapping them out and back in again). > + */ > +void > +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, > + struct sg_table *pages) > +{ > + struct sgt_iter sgt_iter; > + struct page *page; > + int i; > + > + if (obj->bit_17 == NULL) > + return; > + > + i = 0; > + for_each_sgt_page(page, sgt_iter, pages) { > + char new_bit_17 = page_to_phys(page) >> 17; > + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { > + i915_gem_swizzle_page(page); > + set_page_dirty(page); > + } > + i++; > + } > +} > + > +/** > + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling > + * @obj: i915 GEM buffer object > + * @pages: the scattergather list of physical pages > + * > + * This function saves the bit 17 of each page frame number so that swizzling > + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must > + * be called before the backing storage can be unpinned. > + */ > +void > +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, > + struct sg_table *pages) > +{ > + const unsigned int page_count = obj->base.size >> PAGE_SHIFT; > + struct sgt_iter sgt_iter; > + struct page *page; > + int i; > + > + if (obj->bit_17 == NULL) { > + obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), > + sizeof(long), GFP_KERNEL); > + if (obj->bit_17 == NULL) { > + DRM_ERROR("Failed to allocate memory for bit 17 " > + "record\n"); > + return; > + } > + } > + > + i = 0; > + > + for_each_sgt_page(page, sgt_iter, pages) { > + if (page_to_phys(page) & (1 << 17)) > + __set_bit(i, obj->bit_17); > + else > + __clear_bit(i, obj->bit_17); > + i++; > + } > +} > diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h > new file mode 100644 > index 0000000..22c4a2d > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h > @@ -0,0 +1,51 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#ifndef __I915_FENCE_REG_H__ > +#define __I915_FENCE_REG_H__ > + > +#include <linux/list.h> > + > +struct drm_i915_private; > +struct i915_vma; > + > +struct drm_i915_fence_reg { > + struct list_head link; > + struct drm_i915_private *i915; > + struct i915_vma *vma; > + int pin_count; > + int id; > + /** > + * Whether the tiling parameters for the currently > + * associated fence register have changed. Note that > + * for the purposes of tracking tiling changes we also > + * treat the unfenced register, the register slot that > + * the object occupies whilst it executes a fenced > + * command (such as BLT on gen2/3), as a "fence". > + */ > + bool dirty; > +}; > + > +#endif > + > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index a5fafa3..f60e5a7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -96,13 +96,6 @@ > * > */ > > -static inline struct i915_ggtt * > -i915_vm_to_ggtt(struct i915_address_space *vm) > -{ > - GEM_BUG_ON(!i915_is_ggtt(vm)); > - return container_of(vm, struct i915_ggtt, base); > -} > - > static int > i915_get_ggtt_vma_pages(struct i915_vma *vma); > > @@ -3348,176 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) > i915_ggtt_flush(dev_priv); > } > > -static void > -i915_vma_retire(struct i915_gem_active *active, > - struct drm_i915_gem_request *rq) > -{ > - const unsigned int idx = rq->engine->id; > - struct i915_vma *vma = > - container_of(active, struct i915_vma, last_read[idx]); > - struct drm_i915_gem_object *obj = vma->obj; > - > - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); > - > - i915_vma_clear_active(vma, idx); > - if (i915_vma_is_active(vma)) > - return; > - > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) > - WARN_ON(i915_vma_unbind(vma)); > - > - GEM_BUG_ON(!i915_gem_object_is_active(obj)); > - if (--obj->active_count) > - return; > - > - /* Bump our place on the bound list to keep it roughly in LRU order > - * so that we don't steal from recently used but inactive objects > - * (unless we are forced to ofc!) > - */ > - if (obj->bind_count) > - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); > - > - obj->mm.dirty = true; /* be paranoid */ > - > - if (i915_gem_object_has_active_reference(obj)) { > - i915_gem_object_clear_active_reference(obj); > - i915_gem_object_put(obj); > - } > -} > - > -static void > -i915_ggtt_retire__write(struct i915_gem_active *active, > - struct drm_i915_gem_request *request) > -{ > - struct i915_vma *vma = > - container_of(active, struct i915_vma, last_write); > - > - intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); > -} > - > -void i915_vma_destroy(struct i915_vma *vma) > -{ > - GEM_BUG_ON(vma->node.allocated); > - GEM_BUG_ON(i915_vma_is_active(vma)); > - GEM_BUG_ON(!i915_vma_is_closed(vma)); > - GEM_BUG_ON(vma->fence); > - > - list_del(&vma->vm_link); > - if (!i915_vma_is_ggtt(vma)) > - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); > - > - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); > -} > - > -void i915_vma_close(struct i915_vma *vma) > -{ > - GEM_BUG_ON(i915_vma_is_closed(vma)); > - vma->flags |= I915_VMA_CLOSED; > - > - list_del(&vma->obj_link); > - rb_erase(&vma->obj_node, &vma->obj->vma_tree); > - > - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) > - WARN_ON(i915_vma_unbind(vma)); > -} > - > -static inline long vma_compare(struct i915_vma *vma, > - struct i915_address_space *vm, > - const struct i915_ggtt_view *view) > -{ > - GEM_BUG_ON(view && !i915_is_ggtt(vm)); > - > - if (vma->vm != vm) > - return vma->vm - vm; > - > - if (!view) > - return vma->ggtt_view.type; > - > - if (vma->ggtt_view.type != view->type) > - return vma->ggtt_view.type - view->type; > - > - return memcmp(&vma->ggtt_view.params, > - &view->params, > - sizeof(view->params)); > -} > - > -static struct i915_vma * > -__i915_vma_create(struct drm_i915_gem_object *obj, > - struct i915_address_space *vm, > - const struct i915_ggtt_view *view) > -{ > - struct i915_vma *vma; > - struct rb_node *rb, **p; > - int i; > - > - GEM_BUG_ON(vm->closed); > - > - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); > - if (vma == NULL) > - return ERR_PTR(-ENOMEM); > - > - INIT_LIST_HEAD(&vma->exec_list); > - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) > - init_request_active(&vma->last_read[i], i915_vma_retire); > - init_request_active(&vma->last_write, > - i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); > - init_request_active(&vma->last_fence, NULL); > - list_add(&vma->vm_link, &vm->unbound_list); > - vma->vm = vm; > - vma->obj = obj; > - vma->size = obj->base.size; > - > - if (view) { > - vma->ggtt_view = *view; > - if (view->type == I915_GGTT_VIEW_PARTIAL) { > - vma->size = view->params.partial.size; > - vma->size <<= PAGE_SHIFT; > - } else if (view->type == I915_GGTT_VIEW_ROTATED) { > - vma->size = > - intel_rotation_info_size(&view->params.rotated); > - vma->size <<= PAGE_SHIFT; > - } > - } > - > - if (i915_is_ggtt(vm)) { > - vma->flags |= I915_VMA_GGTT; > - list_add(&vma->obj_link, &obj->vma_list); > - } else { > - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); > - list_add_tail(&vma->obj_link, &obj->vma_list); > - } > - > - rb = NULL; > - p = &obj->vma_tree.rb_node; > - while (*p) { > - struct i915_vma *pos; > - > - rb = *p; > - pos = rb_entry(rb, struct i915_vma, obj_node); > - if (vma_compare(pos, vm, view) < 0) > - p = &rb->rb_right; > - else > - p = &rb->rb_left; > - } > - rb_link_node(&vma->obj_node, rb, p); > - rb_insert_color(&vma->obj_node, &obj->vma_tree); > - > - return vma; > -} > - > -struct i915_vma * > -i915_vma_create(struct drm_i915_gem_object *obj, > - struct i915_address_space *vm, > - const struct i915_ggtt_view *view) > -{ > - lockdep_assert_held(&obj->base.dev->struct_mutex); > - GEM_BUG_ON(view && !i915_is_ggtt(vm)); > - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); > - > - return __i915_vma_create(obj, vm, view); > -} > - > struct i915_vma * > i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -3530,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, > struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); > long cmp; > > - cmp = vma_compare(vma, vm, view); > + cmp = i915_vma_compare(vma, vm, view); > if (cmp == 0) > return vma; > > @@ -3555,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, > > vma = i915_gem_obj_to_vma(obj, vm, view); > if (!vma) { > - vma = __i915_vma_create(obj, vm, view); > + vma = i915_vma_create(obj, vm, view); > GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); > } > > @@ -3747,99 +3570,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) > return ret; > } > > -/** > - * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > - * @vma: VMA to map > - * @cache_level: mapping cache level > - * @flags: flags like global or local mapping > - * > - * DMA addresses are taken from the scatter-gather table of this object (or of > - * this VMA in case of non-default GGTT views) and PTE entries set up. > - * Note that DMA addresses are also the only part of the SG table we care about. > - */ > -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > - u32 flags) > -{ > - u32 bind_flags; > - u32 vma_flags; > - int ret; > - > - if (WARN_ON(flags == 0)) > - return -EINVAL; > - > - bind_flags = 0; > - if (flags & PIN_GLOBAL) > - bind_flags |= I915_VMA_GLOBAL_BIND; > - if (flags & PIN_USER) > - bind_flags |= I915_VMA_LOCAL_BIND; > - > - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); > - if (flags & PIN_UPDATE) > - bind_flags |= vma_flags; > - else > - bind_flags &= ~vma_flags; > - if (bind_flags == 0) > - return 0; > - > - if (vma_flags == 0 && vma->vm->allocate_va_range) { > - trace_i915_va_alloc(vma); > - ret = vma->vm->allocate_va_range(vma->vm, > - vma->node.start, > - vma->node.size); > - if (ret) > - return ret; > - } > - > - ret = vma->vm->bind_vma(vma, cache_level, bind_flags); > - if (ret) > - return ret; > - > - vma->flags |= bind_flags; > - return 0; > -} > - > -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) > -{ > - void __iomem *ptr; > - > - /* Access through the GTT requires the device to be awake. */ > - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > - > - lockdep_assert_held(&vma->vm->dev->struct_mutex); > - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) > - return IO_ERR_PTR(-ENODEV); > - > - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); > - > - ptr = vma->iomap; > - if (ptr == NULL) { > - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, > - vma->node.start, > - vma->node.size); > - if (ptr == NULL) > - return IO_ERR_PTR(-ENOMEM); > - > - vma->iomap = ptr; > - } > - > - __i915_vma_pin(vma); > - return ptr; > -} > - > -void i915_vma_unpin_and_release(struct i915_vma **p_vma) > -{ > - struct i915_vma *vma; > - struct drm_i915_gem_object *obj; > - > - vma = fetch_and_zero(p_vma); > - if (!vma) > - return; > - > - obj = vma->obj; > - > - i915_vma_unpin(vma); > - i915_vma_close(vma); > - > - __i915_gem_object_release_unless_active(obj); > -} > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index c23ef9d..57b5849 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -35,7 +35,9 @@ > #define __I915_GEM_GTT_H__ > > #include <linux/io-mapping.h> > +#include <linux/mm.h> > > +#include "i915_gem_timeline.h" > #include "i915_gem_request.h" > > #define I915_FENCE_REG_NONE -1 > @@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t; > #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) > #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) > > +struct sg_table; > + > enum i915_ggtt_view_type { > I915_GGTT_VIEW_NORMAL = 0, > I915_GGTT_VIEW_ROTATED, > @@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated; > > enum i915_cache_level; > > -/** > - * A VMA represents a GEM BO that is bound into an address space. Therefore, a > - * VMA's presence cannot be guaranteed before binding, or after unbinding the > - * object into/from the address space. > - * > - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime > - * will always be <= an objects lifetime. So object refcounting should cover us. > - */ > -struct i915_vma { > - struct drm_mm_node node; > - struct drm_i915_gem_object *obj; > - struct i915_address_space *vm; > - struct drm_i915_fence_reg *fence; > - struct sg_table *pages; > - void __iomem *iomap; > - u64 size; > - u64 display_alignment; > - > - unsigned int flags; > - /** > - * How many users have pinned this object in GTT space. The following > - * users can each hold at most one reference: pwrite/pread, execbuffer > - * (objects are not allowed multiple times for the same batchbuffer), > - * and the framebuffer code. When switching/pageflipping, the > - * framebuffer code has at most two buffers pinned per crtc. > - * > - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 > - * bits with absolutely no headroom. So use 4 bits. > - */ > -#define I915_VMA_PIN_MASK 0xf > -#define I915_VMA_PIN_OVERFLOW BIT(5) > - > - /** Flags and address space this VMA is bound to */ > -#define I915_VMA_GLOBAL_BIND BIT(6) > -#define I915_VMA_LOCAL_BIND BIT(7) > -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) > - > -#define I915_VMA_GGTT BIT(8) > -#define I915_VMA_CAN_FENCE BIT(9) > -#define I915_VMA_CLOSED BIT(10) > - > - unsigned int active; > - struct i915_gem_active last_read[I915_NUM_ENGINES]; > - struct i915_gem_active last_write; > - struct i915_gem_active last_fence; > - > - /** > - * Support different GGTT views into the same object. > - * This means there can be multiple VMA mappings per object and per VM. > - * i915_ggtt_view_type is used to distinguish between those entries. > - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also > - * assumed in GEM functions which take no ggtt view parameter. > - */ > - struct i915_ggtt_view ggtt_view; > - > - /** This object's place on the active/inactive lists */ > - struct list_head vm_link; > - > - struct list_head obj_link; /* Link in the object's VMA list */ > - struct rb_node obj_node; > - > - /** This vma's place in the batchbuffer or on the eviction list */ > - struct list_head exec_list; > - > - /** > - * Used for performing relocations during execbuffer insertion. > - */ > - struct hlist_node exec_node; > - unsigned long exec_handle; > - struct drm_i915_gem_exec_object2 *exec_entry; > -}; > - > -struct i915_vma * > -i915_vma_create(struct drm_i915_gem_object *obj, > - struct i915_address_space *vm, > - const struct i915_ggtt_view *view); > -void i915_vma_unpin_and_release(struct i915_vma **p_vma); > - > -static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) > -{ > - return vma->flags & I915_VMA_GGTT; > -} > - > -static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) > -{ > - return vma->flags & I915_VMA_CAN_FENCE; > -} > - > -static inline bool i915_vma_is_closed(const struct i915_vma *vma) > -{ > - return vma->flags & I915_VMA_CLOSED; > -} > - > -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) > -{ > - return vma->active; > -} > - > -static inline bool i915_vma_is_active(const struct i915_vma *vma) > -{ > - return i915_vma_get_active(vma); > -} > - > -static inline void i915_vma_set_active(struct i915_vma *vma, > - unsigned int engine) > -{ > - vma->active |= BIT(engine); > -} > - > -static inline void i915_vma_clear_active(struct i915_vma *vma, > - unsigned int engine) > -{ > - vma->active &= ~BIT(engine); > -} > - > -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, > - unsigned int engine) > -{ > - return vma->active & BIT(engine); > -} > - > -static inline u32 i915_ggtt_offset(const struct i915_vma *vma) > -{ > - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > - GEM_BUG_ON(!vma->node.allocated); > - GEM_BUG_ON(upper_32_bits(vma->node.start)); > - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); > - return lower_32_bits(vma->node.start); > -} > +struct i915_vma; > > struct i915_page_dma { > struct page *page; > @@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) > px_dma(ppgtt->base.scratch_pd); > } > > +static inline struct i915_ggtt * > +i915_vm_to_ggtt(struct i915_address_space *vm) > +{ > + GEM_BUG_ON(!i915_is_ggtt(vm)); > + return container_of(vm, struct i915_ggtt, base); > +} > + > int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); > int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); > int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); > @@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, > #define PIN_OFFSET_FIXED BIT(11) > #define PIN_OFFSET_MASK (~4095) > > -int __i915_vma_do_pin(struct i915_vma *vma, > - u64 size, u64 alignment, u64 flags); > -static inline int __must_check > -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > -{ > - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); > - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); > - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); > - > - /* Pin early to prevent the shrinker/eviction logic from destroying > - * our vma as we insert and bind. > - */ > - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) > - return 0; > - > - return __i915_vma_do_pin(vma, size, alignment, flags); > -} > - > -static inline int i915_vma_pin_count(const struct i915_vma *vma) > -{ > - return vma->flags & I915_VMA_PIN_MASK; > -} > - > -static inline bool i915_vma_is_pinned(const struct i915_vma *vma) > -{ > - return i915_vma_pin_count(vma); > -} > - > -static inline void __i915_vma_pin(struct i915_vma *vma) > -{ > - vma->flags++; > - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); > -} > - > -static inline void __i915_vma_unpin(struct i915_vma *vma) > -{ > - GEM_BUG_ON(!i915_vma_is_pinned(vma)); > - vma->flags--; > -} > - > -static inline void i915_vma_unpin(struct i915_vma *vma) > -{ > - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > - __i915_vma_unpin(vma); > -} > - > -/** > - * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture > - * @vma: VMA to iomap > - * > - * The passed in VMA has to be pinned in the global GTT mappable region. > - * An extra pinning of the VMA is acquired for the return iomapping, > - * the caller must call i915_vma_unpin_iomap to relinquish the pinning > - * after the iomapping is no longer required. > - * > - * Callers must hold the struct_mutex. > - * > - * Returns a valid iomapped pointer or ERR_PTR. > - */ > -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); > -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) > - > -/** > - * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap > - * @vma: VMA to unpin > - * > - * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). > - * > - * Callers must hold the struct_mutex. This function is only valid to be > - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). > - */ > -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) > -{ > - lockdep_assert_held(&vma->vm->dev->struct_mutex); > - GEM_BUG_ON(vma->iomap == NULL); > - i915_vma_unpin(vma); > -} > - > -static inline struct page *i915_vma_first_page(struct i915_vma *vma) > -{ > - GEM_BUG_ON(!vma->pages); > - return sg_page(vma->pages->sgl); > -} > - > #endif > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h > new file mode 100644 > index 0000000..014f803 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_object.h > @@ -0,0 +1,337 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#ifndef __I915_GEM_OBJECT_H__ > +#define __I915_GEM_OBJECT_H__ > + > +#include <linux/reservation.h> > + > +#include <drm/drm_vma_manager.h> > +#include <drm/drm_gem.h> > +#include <drm/drmP.h> > + > +#include <drm/i915_drm.h> > + > +struct drm_i915_gem_object_ops { > + unsigned int flags; > +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 > +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 > + > + /* Interface between the GEM object and its backing storage. > + * get_pages() is called once prior to the use of the associated set > + * of pages before to binding them into the GTT, and put_pages() is > + * called after we no longer need them. As we expect there to be > + * associated cost with migrating pages between the backing storage > + * and making them available for the GPU (e.g. clflush), we may hold > + * onto the pages after they are no longer referenced by the GPU > + * in case they may be used again shortly (for example migrating the > + * pages to a different memory domain within the GTT). put_pages() > + * will therefore most likely be called when the object itself is > + * being released or under memory pressure (where we attempt to > + * reap pages for the shrinker). > + */ > + struct sg_table *(*get_pages)(struct drm_i915_gem_object *); > + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); > + > + int (*dmabuf_export)(struct drm_i915_gem_object *); > + void (*release)(struct drm_i915_gem_object *); > +}; > + > +struct drm_i915_gem_object { > + struct drm_gem_object base; > + > + const struct drm_i915_gem_object_ops *ops; > + > + /** List of VMAs backed by this object */ > + struct list_head vma_list; > + struct rb_root vma_tree; > + > + /** Stolen memory for this object, instead of being backed by shmem. */ > + struct drm_mm_node *stolen; > + struct list_head global_link; > + union { > + struct rcu_head rcu; > + struct llist_node freed; > + }; > + > + /** > + * Whether the object is currently in the GGTT mmap. > + */ > + struct list_head userfault_link; > + > + /** Used in execbuf to temporarily hold a ref */ > + struct list_head obj_exec_link; > + > + struct list_head batch_pool_link; > + > + unsigned long flags; > + > + /** > + * Have we taken a reference for the object for incomplete GPU > + * activity? > + */ > +#define I915_BO_ACTIVE_REF 0 > + > + /* > + * Is the object to be mapped as read-only to the GPU > + * Only honoured if hardware has relevant pte bit > + */ > + unsigned long gt_ro:1; > + unsigned int cache_level:3; > + unsigned int cache_dirty:1; > + > + atomic_t frontbuffer_bits; > + unsigned int frontbuffer_ggtt_origin; /* write once */ > + > + /** Current tiling stride for the object, if it's tiled. */ > + unsigned int tiling_and_stride; > +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ > +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) > +#define STRIDE_MASK (~TILING_MASK) > + > + /** Count of VMA actually bound by this object */ > + unsigned int bind_count; > + unsigned int active_count; > + unsigned int pin_display; > + > + struct { > + struct mutex lock; /* protects the pages and their use */ > + atomic_t pages_pin_count; > + > + struct sg_table *pages; > + void *mapping; > + > + struct i915_gem_object_page_iter { > + struct scatterlist *sg_pos; > + unsigned int sg_idx; /* in pages, but 32bit eek! */ > + > + struct radix_tree_root radix; > + struct mutex lock; /* protects this cache */ > + } get_page; > + > + /** > + * Advice: are the backing pages purgeable? > + */ > + unsigned int madv:2; > + > + /** > + * This is set if the object has been written to since the > + * pages were last acquired. > + */ > + bool dirty:1; > + > + /** > + * This is set if the object has been pinned due to unknown > + * swizzling. > + */ > + bool quirked:1; > + } mm; > + > + /** Breadcrumb of last rendering to the buffer. > + * There can only be one writer, but we allow for multiple readers. > + * If there is a writer that necessarily implies that all other > + * read requests are complete - but we may only be lazily clearing > + * the read requests. A read request is naturally the most recent > + * request on a ring, so we may have two different write and read > + * requests on one ring where the write request is older than the > + * read request. This allows for the CPU to read from an active > + * buffer by only waiting for the write to complete. > + */ > + struct reservation_object *resv; > + > + /** References from framebuffers, locks out tiling changes. */ > + unsigned long framebuffer_references; > + > + /** Record of address bit 17 of each page at last unbind. */ > + unsigned long *bit_17; > + > + struct i915_gem_userptr { > + uintptr_t ptr; > + unsigned read_only :1; > + > + struct i915_mm_struct *mm; > + struct i915_mmu_object *mmu_object; > + struct work_struct *work; > + } userptr; > + > + /** for phys allocated objects */ > + struct drm_dma_handle *phys_handle; > + > + struct reservation_object __builtin_resv; > +}; > + > +static inline struct drm_i915_gem_object * > +to_intel_bo(struct drm_gem_object *gem) > +{ > + /* Assert that to_intel_bo(NULL) == NULL */ > + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); > + > + return container_of(gem, struct drm_i915_gem_object, base); > +} > + > +/** > + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle > + * @filp: DRM file private date > + * @handle: userspace handle > + * > + * Returns: > + * > + * A pointer to the object named by the handle if such exists on @filp, NULL > + * otherwise. This object is only valid whilst under the RCU read lock, and > + * note carefully the object may be in the process of being destroyed. > + */ > +static inline struct drm_i915_gem_object * > +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) > +{ > +#ifdef CONFIG_LOCKDEP > + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); > +#endif > + return idr_find(&file->object_idr, handle); > +} > + > +static inline struct drm_i915_gem_object * > +i915_gem_object_lookup(struct drm_file *file, u32 handle) > +{ > + struct drm_i915_gem_object *obj; > + > + rcu_read_lock(); > + obj = i915_gem_object_lookup_rcu(file, handle); > + if (obj && !kref_get_unless_zero(&obj->base.refcount)) > + obj = NULL; > + rcu_read_unlock(); > + > + return obj; > +} > + > +__deprecated > +extern struct drm_gem_object * > +drm_gem_object_lookup(struct drm_file *file, u32 handle); > + > +__attribute__((nonnull)) > +static inline struct drm_i915_gem_object * > +i915_gem_object_get(struct drm_i915_gem_object *obj) > +{ > + drm_gem_object_reference(&obj->base); > + return obj; > +} > + > +__deprecated > +extern void drm_gem_object_reference(struct drm_gem_object *); > + > +__attribute__((nonnull)) > +static inline void > +i915_gem_object_put(struct drm_i915_gem_object *obj) > +{ > + __drm_gem_object_unreference(&obj->base); > +} > + > +__deprecated > +extern void drm_gem_object_unreference(struct drm_gem_object *); > + > +__deprecated > +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); > + > +static inline bool > +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) > +{ > + return atomic_read(&obj->base.refcount.refcount) == 0; > +} > + > +static inline bool > +i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) > +{ > + return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; > +} > + > +static inline bool > +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) > +{ > + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; > +} > + > +static inline bool > +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) > +{ > + return obj->active_count; > +} > + > +static inline bool > +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) > +{ > + return test_bit(I915_BO_ACTIVE_REF, &obj->flags); > +} > + > +static inline void > +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) > +{ > + lockdep_assert_held(&obj->base.dev->struct_mutex); > + __set_bit(I915_BO_ACTIVE_REF, &obj->flags); > +} > + > +static inline void > +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) > +{ > + lockdep_assert_held(&obj->base.dev->struct_mutex); > + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); > +} > + > +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); > + > +static inline unsigned int > +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) > +{ > + return obj->tiling_and_stride & TILING_MASK; > +} > + > +static inline bool > +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) > +{ > + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; > +} > + > +static inline unsigned int > +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) > +{ > + return obj->tiling_and_stride & STRIDE_MASK; > +} > + > +static inline struct intel_engine_cs * > +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) > +{ > + struct intel_engine_cs *engine = NULL; > + struct dma_fence *fence; > + > + rcu_read_lock(); > + fence = reservation_object_get_excl_rcu(obj->resv); > + rcu_read_unlock(); > + > + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) > + engine = to_request(fence)->engine; > + dma_fence_put(fence); > + > + return engine; > +} > + > +#endif > + > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 0f69fad..a56559e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -30,6 +30,9 @@ > #include "i915_gem.h" > #include "i915_sw_fence.h" > > +struct drm_file; > +struct drm_i915_gem_object; > + > struct intel_wait { > struct rb_node node; > struct task_struct *tsk; > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > new file mode 100644 > index 0000000..738ff3a > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -0,0 +1,650 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#include "i915_vma.h" > + > +#include "i915_drv.h" > +#include "intel_ringbuffer.h" > +#include "intel_frontbuffer.h" > + > +#include <drm/drm_gem.h> > + > +static void > +i915_vma_retire(struct i915_gem_active *active, > + struct drm_i915_gem_request *rq) > +{ > + const unsigned int idx = rq->engine->id; > + struct i915_vma *vma = > + container_of(active, struct i915_vma, last_read[idx]); > + struct drm_i915_gem_object *obj = vma->obj; > + > + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); > + > + i915_vma_clear_active(vma, idx); > + if (i915_vma_is_active(vma)) > + return; > + > + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) > + WARN_ON(i915_vma_unbind(vma)); > + > + GEM_BUG_ON(!i915_gem_object_is_active(obj)); > + if (--obj->active_count) > + return; > + > + /* Bump our place on the bound list to keep it roughly in LRU order > + * so that we don't steal from recently used but inactive objects > + * (unless we are forced to ofc!) > + */ > + if (obj->bind_count) > + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); > + > + obj->mm.dirty = true; /* be paranoid */ > + > + if (i915_gem_object_has_active_reference(obj)) { > + i915_gem_object_clear_active_reference(obj); > + i915_gem_object_put(obj); > + } > +} > + > +static void > +i915_ggtt_retire__write(struct i915_gem_active *active, > + struct drm_i915_gem_request *request) > +{ > + struct i915_vma *vma = > + container_of(active, struct i915_vma, last_write); > + > + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); > +} Doesn't fit or work well in i915_gem_request.c ? > + > +static struct i915_vma * > +__i915_vma_create(struct drm_i915_gem_object *obj, > + struct i915_address_space *vm, > + const struct i915_ggtt_view *view) > +{ > + struct i915_vma *vma; > + struct rb_node *rb, **p; > + int i; > + > + GEM_BUG_ON(vm->closed); > + > + vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); > + if (vma == NULL) > + return ERR_PTR(-ENOMEM); > + > + INIT_LIST_HEAD(&vma->exec_list); > + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) > + init_request_active(&vma->last_read[i], i915_vma_retire); > + init_request_active(&vma->last_write, > + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); > + init_request_active(&vma->last_fence, NULL); > + list_add(&vma->vm_link, &vm->unbound_list); > + vma->vm = vm; > + vma->obj = obj; > + vma->size = obj->base.size; > + > + if (view) { > + vma->ggtt_view = *view; > + if (view->type == I915_GGTT_VIEW_PARTIAL) { > + vma->size = view->params.partial.size; > + vma->size <<= PAGE_SHIFT; > + } else if (view->type == I915_GGTT_VIEW_ROTATED) { > + vma->size = > + intel_rotation_info_size(&view->params.rotated); > + vma->size <<= PAGE_SHIFT; > + } > + } > + > + if (i915_is_ggtt(vm)) { > + vma->flags |= I915_VMA_GGTT; > + list_add(&vma->obj_link, &obj->vma_list); > + } else { > + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); > + list_add_tail(&vma->obj_link, &obj->vma_list); > + } > + > + rb = NULL; > + p = &obj->vma_tree.rb_node; > + while (*p) { > + struct i915_vma *pos; > + > + rb = *p; > + pos = rb_entry(rb, struct i915_vma, obj_node); > + if (i915_vma_compare(pos, vm, view) < 0) > + p = &rb->rb_right; > + else > + p = &rb->rb_left; > + } > + rb_link_node(&vma->obj_node, rb, p); > + rb_insert_color(&vma->obj_node, &obj->vma_tree); > + > + return vma; > +} > + > +struct i915_vma * > +i915_vma_create(struct drm_i915_gem_object *obj, > + struct i915_address_space *vm, > + const struct i915_ggtt_view *view) > +{ > + lockdep_assert_held(&obj->base.dev->struct_mutex); > + GEM_BUG_ON(view && !i915_is_ggtt(vm)); > + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); > + > + return __i915_vma_create(obj, vm, view); > +} > + > +/** > + * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > + * @vma: VMA to map > + * @cache_level: mapping cache level > + * @flags: flags like global or local mapping > + * > + * DMA addresses are taken from the scatter-gather table of this object (or of > + * this VMA in case of non-default GGTT views) and PTE entries set up. > + * Note that DMA addresses are also the only part of the SG table we care about. > + */ > +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > + u32 flags) > +{ > + u32 bind_flags; > + u32 vma_flags; > + int ret; > + > + if (WARN_ON(flags == 0)) > + return -EINVAL; > + > + bind_flags = 0; > + if (flags & PIN_GLOBAL) > + bind_flags |= I915_VMA_GLOBAL_BIND; > + if (flags & PIN_USER) > + bind_flags |= I915_VMA_LOCAL_BIND; > + > + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); > + if (flags & PIN_UPDATE) > + bind_flags |= vma_flags; > + else > + bind_flags &= ~vma_flags; > + if (bind_flags == 0) > + return 0; > + > + if (vma_flags == 0 && vma->vm->allocate_va_range) { > + trace_i915_va_alloc(vma); > + ret = vma->vm->allocate_va_range(vma->vm, > + vma->node.start, > + vma->node.size); > + if (ret) > + return ret; > + } > + > + ret = vma->vm->bind_vma(vma, cache_level, bind_flags); > + if (ret) > + return ret; > + > + vma->flags |= bind_flags; > + return 0; > +} > + > +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) > +{ > + void __iomem *ptr; > + > + /* Access through the GTT requires the device to be awake. */ > + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); > + > + lockdep_assert_held(&vma->vm->dev->struct_mutex); > + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) > + return IO_ERR_PTR(-ENODEV); > + > + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); > + > + ptr = vma->iomap; > + if (ptr == NULL) { > + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, > + vma->node.start, > + vma->node.size); > + if (ptr == NULL) > + return IO_ERR_PTR(-ENOMEM); > + > + vma->iomap = ptr; > + } > + > + __i915_vma_pin(vma); > + return ptr; > +} > + > +void i915_vma_unpin_and_release(struct i915_vma **p_vma) > +{ > + struct i915_vma *vma; > + struct drm_i915_gem_object *obj; > + > + vma = fetch_and_zero(p_vma); > + if (!vma) > + return; > + > + obj = vma->obj; > + > + i915_vma_unpin(vma); > + i915_vma_close(vma); > + > + __i915_gem_object_release_unless_active(obj); > +} > + > +bool > +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > +{ > + if (!drm_mm_node_allocated(&vma->node)) > + return false; > + > + if (vma->node.size < size) > + return true; > + > + if (alignment && vma->node.start & (alignment - 1)) > + return true; > + > + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) > + return true; > + > + if (flags & PIN_OFFSET_BIAS && > + vma->node.start < (flags & PIN_OFFSET_MASK)) > + return true; > + > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > + return false; > +} > + > +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) > +{ > + struct drm_i915_gem_object *obj = vma->obj; > + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > + bool mappable, fenceable; > + u32 fence_size, fence_alignment; > + > + fence_size = i915_gem_get_ggtt_size(dev_priv, > + vma->size, > + i915_gem_object_get_tiling(obj)); > + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, > + vma->size, > + i915_gem_object_get_tiling(obj), > + true); > + > + fenceable = (vma->node.size == fence_size && > + (vma->node.start & (fence_alignment - 1)) == 0); > + > + mappable = (vma->node.start + fence_size <= > + dev_priv->ggtt.mappable_end); > + > + /* > + * Explicitly disable for rotated VMA since the display does not > + * need the fence and the VMA is not accessible to other users. > + */ > + if (mappable && fenceable && > + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) > + vma->flags |= I915_VMA_CAN_FENCE; > + else > + vma->flags &= ~I915_VMA_CAN_FENCE; > +} > + > +bool i915_gem_valid_gtt_space(struct i915_vma *vma, > + unsigned long cache_level) > +{ > + struct drm_mm_node *gtt_space = &vma->node; > + struct drm_mm_node *other; > + > + /* > + * On some machines we have to be careful when putting differing types > + * of snoopable memory together to avoid the prefetcher crossing memory > + * domains and dying. During vm initialisation, we decide whether or not > + * these constraints apply and set the drm_mm.color_adjust > + * appropriately. > + */ > + if (vma->vm->mm.color_adjust == NULL) > + return true; > + > + if (!drm_mm_node_allocated(gtt_space)) > + return true; > + > + if (list_empty(&gtt_space->node_list)) > + return true; > + > + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); > + if (other->allocated && !other->hole_follows && other->color != cache_level) > + return false; > + > + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); > + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) > + return false; > + > + return true; > +} > + > +/** > + * i915_vma_insert - finds a slot for the vma in its address space > + * @vma: the vma > + * @size: requested size in bytes (can be larger than the VMA) > + * @alignment: required alignment > + * @flags: mask of PIN_* flags to use > + * > + * First we try to allocate some free space that meets the requirements for > + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, > + * preferrably the oldest idle entry to make room for the new VMA. > + * > + * Returns: > + * 0 on success, negative error code otherwise. > + */ > +static int > +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > +{ > + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); > + struct drm_i915_gem_object *obj = vma->obj; > + u64 start, end; > + int ret; > + > + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); > + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); > + > + size = max(size, vma->size); > + if (flags & PIN_MAPPABLE) > + size = i915_gem_get_ggtt_size(dev_priv, size, > + i915_gem_object_get_tiling(obj)); > + > + alignment = max(max(alignment, vma->display_alignment), > + i915_gem_get_ggtt_alignment(dev_priv, size, > + i915_gem_object_get_tiling(obj), > + flags & PIN_MAPPABLE)); > + > + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > + > + end = vma->vm->total; > + if (flags & PIN_MAPPABLE) > + end = min_t(u64, end, dev_priv->ggtt.mappable_end); > + if (flags & PIN_ZONE_4G) > + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); > + > + /* If binding the object/GGTT view requires more space than the entire > + * aperture has, reject it early before evicting everything in a vain > + * attempt to find space. > + */ > + if (size > end) { > + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", > + size, obj->base.size, > + flags & PIN_MAPPABLE ? "mappable" : "total", > + end); > + return -E2BIG; > + } > + > + ret = i915_gem_object_pin_pages(obj); > + if (ret) > + return ret; > + > + if (flags & PIN_OFFSET_FIXED) { > + u64 offset = flags & PIN_OFFSET_MASK; > + if (offset & (alignment - 1) || offset > end - size) { > + ret = -EINVAL; > + goto err_unpin; > + } > + > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); > + if (ret) > + goto err_unpin; > + } > + } else { > + u32 search_flag, alloc_flag; > + > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } else { > + search_flag = DRM_MM_SEARCH_DEFAULT; > + alloc_flag = DRM_MM_CREATE_DEFAULT; > + } > + > + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, > + * so we know that we always have a minimum alignment of 4096. > + * The drm_mm range manager is optimised to return results > + * with zero alignment, so where possible use the optimal > + * path. > + */ > + if (alignment <= 4096) > + alignment = 0; > + > +search_free: > + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, > + &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(vma->vm, size, alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > + > + goto err_unpin; > + } > + > + GEM_BUG_ON(vma->node.start < start); > + GEM_BUG_ON(vma->node.start + vma->node.size > end); > + } > + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); > + > + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); > + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > + obj->bind_count++; > + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); > + > + return 0; > + > +err_unpin: > + i915_gem_object_unpin_pages(obj); > + return ret; > +} > + > +int __i915_vma_do_pin(struct i915_vma *vma, > + u64 size, u64 alignment, u64 flags) > +{ > + unsigned int bound = vma->flags; > + int ret; > + > + lockdep_assert_held(&vma->vm->dev->struct_mutex); > + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); > + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); > + > + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { > + ret = -EBUSY; > + goto err; > + } > + > + if ((bound & I915_VMA_BIND_MASK) == 0) { > + ret = i915_vma_insert(vma, size, alignment, flags); > + if (ret) > + goto err; > + } > + > + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); > + if (ret) > + goto err; > + > + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) > + __i915_vma_set_map_and_fenceable(vma); > + > + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); > + return 0; > + > +err: > + __i915_vma_unpin(vma); > + return ret; > +} > + > +void i915_vma_destroy(struct i915_vma *vma) > +{ > + GEM_BUG_ON(vma->node.allocated); > + GEM_BUG_ON(i915_vma_is_active(vma)); > + GEM_BUG_ON(!i915_vma_is_closed(vma)); > + GEM_BUG_ON(vma->fence); > + > + list_del(&vma->vm_link); > + if (!i915_vma_is_ggtt(vma)) > + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); > + > + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); > +} > + > +void i915_vma_close(struct i915_vma *vma) > +{ > + GEM_BUG_ON(i915_vma_is_closed(vma)); > + vma->flags |= I915_VMA_CLOSED; > + > + list_del(&vma->obj_link); > + rb_erase(&vma->obj_node, &vma->obj->vma_tree); > + > + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) > + WARN_ON(i915_vma_unbind(vma)); > +} > + > +static void __i915_vma_iounmap(struct i915_vma *vma) > +{ > + GEM_BUG_ON(i915_vma_is_pinned(vma)); > + > + if (vma->iomap == NULL) > + return; > + > + io_mapping_unmap(vma->iomap); > + vma->iomap = NULL; > +} > + > +int i915_vma_unbind(struct i915_vma *vma) > +{ > + struct drm_i915_gem_object *obj = vma->obj; > + unsigned long active; > + int ret; > + > + lockdep_assert_held(&obj->base.dev->struct_mutex); > + > + /* First wait upon any activity as retiring the request may > + * have side-effects such as unpinning or even unbinding this vma. > + */ > + active = i915_vma_get_active(vma); > + if (active) { > + int idx; > + > + /* When a closed VMA is retired, it is unbound - eek. > + * In order to prevent it from being recursively closed, > + * take a pin on the vma so that the second unbind is > + * aborted. > + * > + * Even more scary is that the retire callback may free > + * the object (last active vma). To prevent the explosion > + * we defer the actual object free to a worker that can > + * only proceed once it acquires the struct_mutex (which > + * we currently hold, therefore it cannot free this object > + * before we are finished). > + */ > + __i915_vma_pin(vma); > + > + for_each_active(active, idx) { > + ret = i915_gem_active_retire(&vma->last_read[idx], > + &vma->vm->dev->struct_mutex); > + if (ret) > + break; > + } > + > + __i915_vma_unpin(vma); > + if (ret) > + return ret; > + > + GEM_BUG_ON(i915_vma_is_active(vma)); > + } > + > + if (i915_vma_is_pinned(vma)) > + return -EBUSY; > + > + if (!drm_mm_node_allocated(&vma->node)) > + goto destroy; > + > + GEM_BUG_ON(obj->bind_count == 0); > + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); > + > + if (i915_vma_is_map_and_fenceable(vma)) { > + /* release the fence reg _after_ flushing */ > + ret = i915_vma_put_fence(vma); > + if (ret) > + return ret; > + > + /* Force a pagefault for domain tracking on next user access */ > + i915_gem_release_mmap(obj); > + > + __i915_vma_iounmap(vma); > + vma->flags &= ~I915_VMA_CAN_FENCE; > + } > + > + if (likely(!vma->vm->closed)) { > + trace_i915_vma_unbind(vma); > + vma->vm->unbind_vma(vma); > + } > + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); > + > + drm_mm_remove_node(&vma->node); > + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); > + > + if (vma->pages != obj->mm.pages) { > + GEM_BUG_ON(!vma->pages); > + sg_free_table(vma->pages); > + kfree(vma->pages); > + } > + vma->pages = NULL; > + > + /* Since the unbound list is global, only move to that list if > + * no more VMAs exist. */ > + if (--obj->bind_count == 0) > + list_move_tail(&obj->global_link, > + &to_i915(obj->base.dev)->mm.unbound_list); > + > + /* And finally now the object is completely decoupled from this vma, > + * we can drop its hold on the backing storage and allow it to be > + * reaped by the shrinker. > + */ > + i915_gem_object_unpin_pages(obj); > + > +destroy: > + if (unlikely(i915_vma_is_closed(vma))) > + i915_vma_destroy(vma); > + > + return 0; > +} > + > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > new file mode 100644 > index 0000000..d358b30 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_vma.h > @@ -0,0 +1,342 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#ifndef __I915_VMA_H__ > +#define __I915_VMA_H__ > + > +#include <linux/io-mapping.h> > + > +#include <drm/drm_mm.h> > + > +#include "i915_gem_gtt.h" > +#include "i915_gem_fence_reg.h" > +#include "i915_gem_object.h" > +#include "i915_gem_request.h" > + > + > +enum i915_cache_level; > + > +/** > + * A VMA represents a GEM BO that is bound into an address space. Therefore, a > + * VMA's presence cannot be guaranteed before binding, or after unbinding the > + * object into/from the address space. > + * > + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime > + * will always be <= an objects lifetime. So object refcounting should cover us. > + */ > +struct i915_vma { > + struct drm_mm_node node; > + struct drm_i915_gem_object *obj; > + struct i915_address_space *vm; > + struct drm_i915_fence_reg *fence; > + struct sg_table *pages; > + void __iomem *iomap; > + u64 size; > + u64 display_alignment; > + > + unsigned int flags; > + /** > + * How many users have pinned this object in GTT space. The following > + * users can each hold at most one reference: pwrite/pread, execbuffer > + * (objects are not allowed multiple times for the same batchbuffer), > + * and the framebuffer code. When switching/pageflipping, the > + * framebuffer code has at most two buffers pinned per crtc. > + * > + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 > + * bits with absolutely no headroom. So use 4 bits. > + */ > +#define I915_VMA_PIN_MASK 0xf > +#define I915_VMA_PIN_OVERFLOW BIT(5) > + > + /** Flags and address space this VMA is bound to */ > +#define I915_VMA_GLOBAL_BIND BIT(6) > +#define I915_VMA_LOCAL_BIND BIT(7) > +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) > + > +#define I915_VMA_GGTT BIT(8) > +#define I915_VMA_CAN_FENCE BIT(9) > +#define I915_VMA_CLOSED BIT(10) > + > + unsigned int active; > + struct i915_gem_active last_read[I915_NUM_ENGINES]; > + struct i915_gem_active last_write; > + struct i915_gem_active last_fence; > + > + /** > + * Support different GGTT views into the same object. > + * This means there can be multiple VMA mappings per object and per VM. > + * i915_ggtt_view_type is used to distinguish between those entries. > + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also > + * assumed in GEM functions which take no ggtt view parameter. > + */ > + struct i915_ggtt_view ggtt_view; > + > + /** This object's place on the active/inactive lists */ > + struct list_head vm_link; > + > + struct list_head obj_link; /* Link in the object's VMA list */ > + struct rb_node obj_node; > + > + /** This vma's place in the batchbuffer or on the eviction list */ > + struct list_head exec_list; > + > + /** > + * Used for performing relocations during execbuffer insertion. > + */ > + struct hlist_node exec_node; > + unsigned long exec_handle; > + struct drm_i915_gem_exec_object2 *exec_entry; > +}; > + > +struct i915_vma * > +i915_vma_create(struct drm_i915_gem_object *obj, > + struct i915_address_space *vm, > + const struct i915_ggtt_view *view); > + > +static inline long > +i915_vma_compare(struct i915_vma *vma, > + struct i915_address_space *vm, > + const struct i915_ggtt_view *view) > +{ > + GEM_BUG_ON(view && !i915_vma_is_ggtt(vma)); > + > + if (vma->vm != vm) > + return vma->vm - vm; > + > + if (!view) > + return vma->ggtt_view.type; > + > + if (vma->ggtt_view.type != view->type) > + return vma->ggtt_view.type - view->type; > + > + return memcmp(&vma->ggtt_view.params, > + &view->params, > + sizeof(view->params)); > +} > + > +void i915_vma_unpin_and_release(struct i915_vma **p_vma); > + > +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) > +{ > + return vma->flags & I915_VMA_GGTT; > +} > + > +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) > +{ > + return vma->flags & I915_VMA_CAN_FENCE; > +} > + > +static inline bool i915_vma_is_closed(const struct i915_vma *vma) > +{ > + return vma->flags & I915_VMA_CLOSED; > +} > + > +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) > +{ > + return vma->active; > +} > + > +static inline bool i915_vma_is_active(const struct i915_vma *vma) > +{ > + return i915_vma_get_active(vma); > +} > + > +static inline void i915_vma_set_active(struct i915_vma *vma, > + unsigned int engine) > +{ > + vma->active |= BIT(engine); > +} > + > +static inline void i915_vma_clear_active(struct i915_vma *vma, > + unsigned int engine) > +{ > + vma->active &= ~BIT(engine); > +} > + > +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, > + unsigned int engine) > +{ > + return vma->active & BIT(engine); > +} > + > +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) > +{ > + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > + GEM_BUG_ON(!vma->node.allocated); > + GEM_BUG_ON(upper_32_bits(vma->node.start)); > + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); > + return lower_32_bits(vma->node.start); > +} > + > +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) > +{ > + i915_gem_object_get(vma->obj); > + return vma; > +} > + > +static inline void i915_vma_put(struct i915_vma *vma) > +{ > + i915_gem_object_put(vma->obj); > +} > + > +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > + u32 flags); > +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); > +bool > +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); > +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); > +int __must_check i915_vma_unbind(struct i915_vma *vma); > +void i915_vma_close(struct i915_vma *vma); > +void i915_vma_destroy(struct i915_vma *vma); > + > +int __i915_vma_do_pin(struct i915_vma *vma, > + u64 size, u64 alignment, u64 flags); > +static inline int __must_check > +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > +{ > + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); > + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); > + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); > + > + /* Pin early to prevent the shrinker/eviction logic from destroying > + * our vma as we insert and bind. > + */ > + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) > + return 0; > + > + return __i915_vma_do_pin(vma, size, alignment, flags); > +} > + > +static inline int i915_vma_pin_count(const struct i915_vma *vma) > +{ > + return vma->flags & I915_VMA_PIN_MASK; > +} > + > +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) > +{ > + return i915_vma_pin_count(vma); > +} > + > +static inline void __i915_vma_pin(struct i915_vma *vma) > +{ > + vma->flags++; > + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); > +} > + > +static inline void __i915_vma_unpin(struct i915_vma *vma) > +{ > + GEM_BUG_ON(!i915_vma_is_pinned(vma)); > + vma->flags--; > +} > + > +static inline void i915_vma_unpin(struct i915_vma *vma) > +{ > + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > + __i915_vma_unpin(vma); > +} > + > +/** > + * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture > + * @vma: VMA to iomap > + * > + * The passed in VMA has to be pinned in the global GTT mappable region. > + * An extra pinning of the VMA is acquired for the return iomapping, > + * the caller must call i915_vma_unpin_iomap to relinquish the pinning > + * after the iomapping is no longer required. > + * > + * Callers must hold the struct_mutex. > + * > + * Returns a valid iomapped pointer or ERR_PTR. > + */ > +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); > +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) > + > +/** > + * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap > + * @vma: VMA to unpin > + * > + * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). > + * > + * Callers must hold the struct_mutex. This function is only valid to be > + * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). > + */ > +static inline void i915_vma_unpin_iomap(struct i915_vma *vma) > +{ > + lockdep_assert_held(&vma->vm->dev->struct_mutex); > + GEM_BUG_ON(vma->iomap == NULL); > + i915_vma_unpin(vma); > +} > + > +static inline struct page *i915_vma_first_page(struct i915_vma *vma) > +{ > + GEM_BUG_ON(!vma->pages); > + return sg_page(vma->pages->sgl); > +} > + > +/** > + * i915_vma_pin_fence - pin fencing state > + * @vma: vma to pin fencing for > + * > + * This pins the fencing state (whether tiled or untiled) to make sure the > + * vma (and its object) is ready to be used as a scanout target. Fencing > + * status must be synchronize first by calling i915_vma_get_fence(): > + * > + * The resulting fence pin reference must be released again with > + * i915_vma_unpin_fence(). > + * > + * Returns: > + * > + * True if the vma has a fence, false otherwise. > + */ > +static inline bool > +i915_vma_pin_fence(struct i915_vma *vma) > +{ > + lockdep_assert_held(&vma->vm->dev->struct_mutex); > + if (vma->fence) { > + vma->fence->pin_count++; > + return true; > + } else > + return false; > +} > + > +/** > + * i915_vma_unpin_fence - unpin fencing state > + * @vma: vma to unpin fencing for > + * > + * This releases the fence pin reference acquired through > + * i915_vma_pin_fence. It will handle both objects with and without an > + * attached fence correctly, callers do not need to distinguish this. > + */ > +static inline void > +i915_vma_unpin_fence(struct i915_vma *vma) > +{ > + lockdep_assert_held(&vma->vm->dev->struct_mutex); > + if (vma->fence) { > + GEM_BUG_ON(vma->fence->pin_count <= 0); > + vma->fence->pin_count--; > + } > +} > + > +#endif > + > Looks like code movement to me and I like the idea of more separation so based on that: Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0857e50..3dea46a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ - i915_gem_fence.o \ + i915_gem_fence_reg.o \ i915_gem_gtt.o \ i915_gem_internal.o \ i915_gem.o \ @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_trace_points.o \ + i915_vma.o \ intel_breadcrumbs.o \ intel_engine_cs.o \ intel_hangcheck.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 30777de..ccd0361 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -60,11 +60,15 @@ #include "intel_ringbuffer.h" #include "i915_gem.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_vma.h" + #include "intel_gvt.h" /* General customization: @@ -459,23 +463,6 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -struct drm_i915_fence_reg { - struct list_head link; - struct drm_i915_private *i915; - struct i915_vma *vma; - int pin_count; - int id; - /** - * Whether the tiling parameters for the currently - * associated fence register have changed. Note that - * for the purposes of tracking tiling changes we also - * treat the unfenced register, the register slot that - * the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - bool dirty; -}; - struct sdvo_device_mapping { u8 initialized; u8 dvo_port; @@ -2179,31 +2166,6 @@ enum hdmi_force_audio { #define I915_GTT_OFFSET_NONE ((u32)-1) -struct drm_i915_gem_object_ops { - unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 - - /* Interface between the GEM object and its backing storage. - * get_pages() is called once prior to the use of the associated set - * of pages before to binding them into the GTT, and put_pages() is - * called after we no longer need them. As we expect there to be - * associated cost with migrating pages between the backing storage - * and making them available for the GPU (e.g. clflush), we may hold - * onto the pages after they are no longer referenced by the GPU - * in case they may be used again shortly (for example migrating the - * pages to a different memory domain within the GTT). put_pages() - * will therefore most likely be called when the object itself is - * being released or under memory pressure (where we attempt to - * reap pages for the shrinker). - */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); - void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); - - int (*dmabuf_export)(struct drm_i915_gem_object *); - void (*release)(struct drm_i915_gem_object *); -}; - /* * Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is * considered to be the frontbuffer for the given plane interface-wise. This @@ -2225,292 +2187,6 @@ struct drm_i915_gem_object_ops { #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) -struct drm_i915_gem_object { - struct drm_gem_object base; - - const struct drm_i915_gem_object_ops *ops; - - /** List of VMAs backed by this object */ - struct list_head vma_list; - struct rb_root vma_tree; - - /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; - struct list_head global_link; - union { - struct rcu_head rcu; - struct llist_node freed; - }; - - /** - * Whether the object is currently in the GGTT mmap. - */ - struct list_head userfault_link; - - /** Used in execbuf to temporarily hold a ref */ - struct list_head obj_exec_link; - - struct list_head batch_pool_link; - - unsigned long flags; - - /** - * Have we taken a reference for the object for incomplete GPU - * activity? - */ -#define I915_BO_ACTIVE_REF 0 - - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit - */ - unsigned long gt_ro:1; - unsigned int cache_level:3; - unsigned int cache_dirty:1; - - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - - /** Current tiling stride for the object, if it's tiled. */ - unsigned int tiling_and_stride; -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) -#define STRIDE_MASK (~TILING_MASK) - - /** Count of VMA actually bound by this object */ - unsigned int bind_count; - unsigned int active_count; - unsigned int pin_display; - - struct { - struct mutex lock; /* protects the pages and their use */ - atomic_t pages_pin_count; - - struct sg_table *pages; - void *mapping; - - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ - - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; - - /** - * Advice: are the backing pages purgeable? - */ - unsigned int madv:2; - - /** - * This is set if the object has been written to since the - * pages were last acquired. - */ - bool dirty:1; - - /** - * This is set if the object has been pinned due to unknown - * swizzling. - */ - bool quirked:1; - } mm; - - /** Breadcrumb of last rendering to the buffer. - * There can only be one writer, but we allow for multiple readers. - * If there is a writer that necessarily implies that all other - * read requests are complete - but we may only be lazily clearing - * the read requests. A read request is naturally the most recent - * request on a ring, so we may have two different write and read - * requests on one ring where the write request is older than the - * read request. This allows for the CPU to read from an active - * buffer by only waiting for the write to complete. - */ - struct reservation_object *resv; - - /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; - - /** Record of address bit 17 of each page at last unbind. */ - unsigned long *bit_17; - - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; - - /** for phys allocated objects */ - struct drm_dma_handle *phys_handle; - - struct reservation_object __builtin_resv; -}; - -static inline struct drm_i915_gem_object * -to_intel_bo(struct drm_gem_object *gem) -{ - /* Assert that to_intel_bo(NULL) == NULL */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); - - return container_of(gem, struct drm_i915_gem_object, base); -} - -/** - * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle - * @filp: DRM file private date - * @handle: userspace handle - * - * Returns: - * - * A pointer to the object named by the handle if such exists on @filp, NULL - * otherwise. This object is only valid whilst under the RCU read lock, and - * note carefully the object may be in the process of being destroyed. - */ -static inline struct drm_i915_gem_object * -i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) -{ -#ifdef CONFIG_LOCKDEP - WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); -#endif - return idr_find(&file->object_idr, handle); -} - -static inline struct drm_i915_gem_object * -i915_gem_object_lookup(struct drm_file *file, u32 handle) -{ - struct drm_i915_gem_object *obj; - - rcu_read_lock(); - obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; - rcu_read_unlock(); - - return obj; -} - -__deprecated -extern struct drm_gem_object * -drm_gem_object_lookup(struct drm_file *file, u32 handle); - -__attribute__((nonnull)) -static inline struct drm_i915_gem_object * -i915_gem_object_get(struct drm_i915_gem_object *obj) -{ - drm_gem_object_reference(&obj->base); - return obj; -} - -__deprecated -extern void drm_gem_object_reference(struct drm_gem_object *); - -__attribute__((nonnull)) -static inline void -i915_gem_object_put(struct drm_i915_gem_object *obj) -{ - __drm_gem_object_unreference(&obj->base); -} - -__deprecated -extern void drm_gem_object_unreference(struct drm_gem_object *); - -__deprecated -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); - -static inline bool -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) -{ - return atomic_read(&obj->base.refcount.refcount) == 0; -} - -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; -} - -static inline bool -i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; -} - -static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) -{ - return obj->active_count; -} - -static inline bool -i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) -{ - return test_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __set_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); - -static inline unsigned int -i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & TILING_MASK; -} - -static inline bool -i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; -} - -static inline unsigned int -i915_gem_object_get_stride(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & STRIDE_MASK; -} - -static inline struct intel_engine_cs * -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) -{ - struct intel_engine_cs *engine = NULL; - struct dma_fence *fence; - - rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->resv); - rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - - return engine; -} - -static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) -{ - i915_gem_object_get(vma->obj); - return vma; -} - -static inline void i915_vma_put(struct i915_vma *vma) -{ - i915_gem_object_put(vma->obj); -} - /* * Optimised SGL iterator for GEM objects */ @@ -3222,13 +2898,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); -int __must_check i915_vma_unbind(struct i915_vma *vma); -void i915_vma_close(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); - int i915_gem_object_unbind(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); @@ -3478,54 +3147,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } -/* i915_gem_fence.c */ +/* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); -/** - * i915_vma_pin_fence - pin fencing state - * @vma: vma to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * vma (and its object) is ready to be used as a scanout target. Fencing - * status must be synchronize first by calling i915_vma_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_vma_unpin_fence(). - * - * Returns: - * - * True if the vma has a fence, false otherwise. - */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; -} - -/** - * i915_vma_unpin_fence - unpin fencing state - * @vma: vma to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_vma_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -static inline void -i915_vma_unpin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } -} - void i915_gem_restore_fences(struct drm_device *dev); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1c20edb..d51fb5d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2919,117 +2919,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static void __i915_vma_iounmap(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_pinned(vma)); - - if (vma->iomap == NULL) - return; - - io_mapping_unmap(vma->iomap); - vma->iomap = NULL; -} - -int i915_vma_unbind(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - unsigned long active; - int ret; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - - /* First wait upon any activity as retiring the request may - * have side-effects such as unpinning or even unbinding this vma. - */ - active = i915_vma_get_active(vma); - if (active) { - int idx; - - /* When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - - for_each_active(active, idx) { - ret = i915_gem_active_retire(&vma->last_read[idx], - &vma->vm->dev->struct_mutex); - if (ret) - break; - } - - __i915_vma_unpin(vma); - if (ret) - return ret; - - GEM_BUG_ON(i915_vma_is_active(vma)); - } - - if (i915_vma_is_pinned(vma)) - return -EBUSY; - - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; - - GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - if (i915_vma_is_map_and_fenceable(vma)) { - /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; - } - - if (likely(!vma->vm->closed)) { - trace_i915_vma_unbind(vma); - vma->vm->unbind_vma(vma); - } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - - return 0; -} - static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) { int ret, i; @@ -3057,172 +2946,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -static bool i915_gem_valid_gtt_space(struct i915_vma *vma, - unsigned long cache_level) -{ - struct drm_mm_node *gtt_space = &vma->node; - struct drm_mm_node *other; - - /* - * On some machines we have to be careful when putting differing types - * of snoopable memory together to avoid the prefetcher crossing memory - * domains and dying. During vm initialisation, we decide whether or not - * these constraints apply and set the drm_mm.color_adjust - * appropriately. - */ - if (vma->vm->mm.color_adjust == NULL) - return true; - - if (!drm_mm_node_allocated(gtt_space)) - return true; - - if (list_empty(&gtt_space->node_list)) - return true; - - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); - if (other->allocated && !other->hole_follows && other->color != cache_level) - return false; - - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) - return false; - - return true; -} - -/** - * i915_vma_insert - finds a slot for the vma in its address space - * @vma: the vma - * @size: requested size in bytes (can be larger than the VMA) - * @alignment: required alignment - * @flags: mask of PIN_* flags to use - * - * First we try to allocate some free space that meets the requirements for - * the VMA. Failiing that, if the flags permit, it will evict an old VMA, - * preferrably the oldest idle entry to make room for the new VMA. - * - * Returns: - * 0 on success, negative error code otherwise. - */ -static int -i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); - struct drm_i915_gem_object *obj = vma->obj; - u64 start, end; - int ret; - - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - - size = max(size, vma->size); - if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj)); - - alignment = max(max(alignment, vma->display_alignment), - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - flags & PIN_MAPPABLE)); - - start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - - end = vma->vm->total; - if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); - if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - - /* If binding the object/GGTT view requires more space than the entire - * aperture has, reject it early before evicting everything in a vain - * attempt to find space. - */ - if (size > end) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", - size, obj->base.size, - flags & PIN_MAPPABLE ? "mappable" : "total", - end); - return -E2BIG; - } - - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; - - if (flags & PIN_OFFSET_FIXED) { - u64 offset = flags & PIN_OFFSET_MASK; - if (offset & (alignment - 1) || offset > end - size) { - ret = -EINVAL; - goto err_unpin; - } - - vma->node.start = offset; - vma->node.size = size; - vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) { - ret = i915_gem_evict_for_vma(vma); - if (ret == 0) - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) - goto err_unpin; - } - } else { - u32 search_flag, alloc_flag; - - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; - } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } - - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, - * so we know that we always have a minimum alignment of 4096. - * The drm_mm range manager is optimised to return results - * with zero alignment, so where possible use the optimal - * path. - */ - if (alignment <= 4096) - alignment = 0; - -search_free: - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, - &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(vma->vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; - - goto err_unpin; - } - - GEM_BUG_ON(vma->node.start < start); - GEM_BUG_ON(vma->node.start + vma->node.size > end); - } - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - obj->bind_count++; - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); - - return 0; - -err_unpin: - i915_gem_object_unpin_pages(obj); - return ret; -} - void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force) { @@ -3818,100 +3541,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return ret < 0 ? ret : 0; } -static bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - if (!drm_mm_node_allocated(&vma->node)) - return false; - - if (vma->node.size < size) - return true; - - if (alignment && vma->node.start & (alignment - 1)) - return true; - - if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) - return true; - - if (flags & PIN_OFFSET_BIAS && - vma->node.start < (flags & PIN_OFFSET_MASK)) - return true; - - if (flags & PIN_OFFSET_FIXED && - vma->node.start != (flags & PIN_OFFSET_MASK)) - return true; - - return false; -} - -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_ggtt_size(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj)); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - true); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->ggtt.mappable_end); - - /* - * Explicitly disable for rotated VMA since the display does not - * need the fence and the VMA is not accessible to other users. - */ - if (mappable && fenceable && - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) - vma->flags |= I915_VMA_CAN_FENCE; - else - vma->flags &= ~I915_VMA_CAN_FENCE; -} - -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) -{ - unsigned int bound = vma->flags; - int ret; - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err; - } - - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err; - } - - ret = i915_vma_bind(vma, vma->obj->cache_level, flags); - if (ret) - goto err; - - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); - - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - -err: - __i915_vma_unpin(vma); - return ret; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c deleted file mode 100644 index cd59dbc..0000000 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <drm/drmP.h> -#include <drm/i915_drm.h> -#include "i915_drv.h" - -/** - * DOC: fence register handling - * - * Important to avoid confusions: "fences" in the i915 driver are not execution - * fences used to track command completion but hardware detiler objects which - * wrap a given range of the global GTT. Each platform has only a fairly limited - * set of these objects. - * - * Fences are used to detile GTT memory mappings. They're also connected to the - * hardware frontbuffer render tracking and hence interact with frontbuffer - * compression. Furthermore on older platforms fences are required for tiled - * objects used by the display engine. They can also be used by the render - * engine - they're required for blitter commands and are optional for render - * commands. But on gen4+ both display (with the exception of fbc) and rendering - * have their own tiling state bits and don't need fences. - * - * Also note that fences only support X and Y tiling and hence can't be used for - * the fancier new tiling formats like W, Ys and Yf. - * - * Finally note that because fences are such a restricted resource they're - * dynamically associated with objects. Furthermore fence state is committed to - * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must - * explicitly call i915_gem_object_get_fence() to synchronize fencing status - * for cpu access. Also note that some code wants an unfenced view, for those - * cases the fence can be removed forcefully with i915_gem_object_put_fence(). - * - * Internally these functions will synchronize with userspace access by removing - * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. - */ - -#define pipelined 0 - -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - i915_reg_t fence_reg_lo, fence_reg_hi; - int fence_pitch_shift; - u64 val; - - if (INTEL_INFO(fence->i915)->gen >= 6) { - fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); - fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); - fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; - - } else { - fence_reg_lo = FENCE_REG_965_LO(fence->id); - fence_reg_hi = FENCE_REG_965_HI(fence->id); - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; - } - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 row_size = stride * (is_y_tiled ? 32 : 8); - u32 size = rounddown((u32)vma->node.size, row_size); - - val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; - val |= vma->node.start & 0xfffff000; - val |= (u64)((stride / 128) - 1) << fence_pitch_shift; - if (is_y_tiled) - val |= BIT(I965_FENCE_TILING_Y_SHIFT); - val |= I965_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg_lo, 0); - POSTING_READ(fence_reg_lo); - - I915_WRITE(fence_reg_hi, upper_32_bits(val)); - I915_WRITE(fence_reg_lo, lower_32_bits(val)); - POSTING_READ(fence_reg_lo); - } -} - -static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - u32 val; - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - int pitch_val; - int tile_width; - - WARN((vma->node.start & ~I915_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", - vma->node.start, - i915_vma_is_map_and_fenceable(vma), - vma->node.size); - - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) - tile_width = 128; - else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = stride / tile_width; - pitch_val = ffs(pitch_val) - 1; - - val = vma->node.start; - if (is_y_tiled) - val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I915_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - i915_reg_t reg = FENCE_REG(fence->id); - - I915_WRITE(reg, val); - POSTING_READ(reg); - } -} - -static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - u32 val; - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 pitch_val; - - WARN((vma->node.start & ~I830_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", - vma->node.start, vma->node.size); - - pitch_val = stride / 128; - pitch_val = ffs(pitch_val) - 1; - - val = vma->node.start; - if (is_y_tiled) - val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I830_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - i915_reg_t reg = FENCE_REG(fence->id); - - I915_WRITE(reg, val); - POSTING_READ(reg); - } -} - -static void fence_write(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - /* Previous access through the fence register is marshalled by - * the mb() inside the fault handlers (i915_gem_release_mmaps) - * and explicitly managed for internal users. - */ - - if (IS_GEN2(fence->i915)) - i830_write_fence_reg(fence, vma); - else if (IS_GEN3(fence->i915)) - i915_write_fence_reg(fence, vma); - else - i965_write_fence_reg(fence, vma); - - /* Access through the fenced region afterwards is - * ordered by the posting reads whilst writing the registers. - */ - - fence->dirty = false; -} - -static int fence_update(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - int ret; - - if (vma) { - if (!i915_vma_is_map_and_fenceable(vma)) - return -EINVAL; - - if (WARN(!i915_gem_object_get_stride(vma->obj) || - !i915_gem_object_get_tiling(vma->obj), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - i915_gem_object_get_stride(vma->obj), - i915_gem_object_get_tiling(vma->obj))) - return -EINVAL; - - ret = i915_gem_active_retire(&vma->last_fence, - &vma->obj->base.dev->struct_mutex); - if (ret) - return ret; - } - - if (fence->vma) { - ret = i915_gem_active_retire(&fence->vma->last_fence, - &fence->vma->obj->base.dev->struct_mutex); - if (ret) - return ret; - } - - if (fence->vma && fence->vma != vma) { - /* Ensure that all userspace CPU access is completed before - * stealing the fence. - */ - i915_gem_release_mmap(fence->vma->obj); - - fence->vma->fence = NULL; - fence->vma = NULL; - - list_move(&fence->link, &fence->i915->mm.fence_list); - } - - fence_write(fence, vma); - - if (vma) { - if (fence->vma != vma) { - vma->fence = fence; - fence->vma = vma; - } - - list_move_tail(&fence->link, &fence->i915->mm.fence_list); - } - - return 0; -} - -/** - * i915_vma_put_fence - force-remove fence for a VMA - * @vma: vma to map linearly (not through a fence reg) - * - * This function force-removes any fence from the given object, which is useful - * if the kernel wants to do untiled GTT access. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ -int -i915_vma_put_fence(struct i915_vma *vma) -{ - struct drm_i915_fence_reg *fence = vma->fence; - - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - if (!fence) - return 0; - - if (fence->pin_count) - return -EBUSY; - - return fence_update(fence, NULL); -} - -static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) -{ - struct drm_i915_fence_reg *fence; - - list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { - if (fence->pin_count) - continue; - - return fence; - } - - /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(&dev_priv->drm)) - return ERR_PTR(-EAGAIN); - - return ERR_PTR(-EDEADLK); -} - -/** - * i915_vma_get_fence - set up fencing for a vma - * @vma: vma to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ -int -i915_vma_get_fence(struct i915_vma *vma) -{ - struct drm_i915_fence_reg *fence; - struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; - - /* Note that we revoke fences on runtime suspend. Therefore the user - * must keep the device awake whilst using the fence. - */ - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - /* Just update our place in the LRU if our fence is getting reused. */ - if (vma->fence) { - fence = vma->fence; - if (!fence->dirty) { - list_move_tail(&fence->link, - &fence->i915->mm.fence_list); - return 0; - } - } else if (set) { - fence = fence_find(to_i915(vma->vm->dev)); - if (IS_ERR(fence)) - return PTR_ERR(fence); - } else - return 0; - - return fence_update(fence, set); -} - -/** - * i915_gem_restore_fences - restore fence state - * @dev: DRM device - * - * Restore the hw fence state to match the software tracking again, to be called - * after a gpu reset and on resume. Note that on runtime suspend we only cancel - * the fences, to be reacquired by the user later. - */ -void i915_gem_restore_fences(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int i; - - for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - struct i915_vma *vma = reg->vma; - - /* - * Commit delayed tiling changes if we have an object still - * attached to the fence, otherwise just clear the fence. - */ - if (vma && !i915_gem_object_is_tiled(vma->obj)) { - GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); - - list_move(&reg->link, &dev_priv->mm.fence_list); - vma->fence = NULL; - vma = NULL; - } - - fence_write(reg, vma); - reg->vma = vma; - } -} - -/** - * DOC: tiling swizzling details - * - * The idea behind tiling is to increase cache hit rates by rearranging - * pixel data so that a group of pixel accesses are in the same cacheline. - * Performance improvement from doing this on the back/depth buffer are on - * the order of 30%. - * - * Intel architectures make this somewhat more complicated, though, by - * adjustments made to addressing of data when the memory is in interleaved - * mode (matched pairs of DIMMS) to improve memory bandwidth. - * For interleaved memory, the CPU sends every sequential 64 bytes - * to an alternate memory channel so it can get the bandwidth from both. - * - * The GPU also rearranges its accesses for increased bandwidth to interleaved - * memory, and it matches what the CPU does for non-tiled. However, when tiled - * it does it a little differently, since one walks addresses not just in the - * X direction but also Y. So, along with alternating channels when bit - * 6 of the address flips, it also alternates when other bits flip -- Bits 9 - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) - * are common to both the 915 and 965-class hardware. - * - * The CPU also sometimes XORs in higher bits as well, to improve - * bandwidth doing strided access like we do so frequently in graphics. This - * is called "Channel XOR Randomization" in the MCH documentation. The result - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address - * decode. - * - * All of this bit 6 XORing has an effect on our memory management, - * as we need to make sure that the 3d driver can correctly address object - * contents. - * - * If we don't have interleaved memory, all tiling is safe and no swizzling is - * required. - * - * When bit 17 is XORed in, we simply refuse to tile at all. Bit - * 17 is not just a page offset, so as we page an object out and back in, - * individual pages in it will have different bit 17 addresses, resulting in - * each 64 bytes being swapped with its neighbor! - * - * Otherwise, if interleaved, we have to tell the 3d driver what the address - * swizzling it needs to do is, since it's writing with the CPU to the pages - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order - * to match what the GPU expects. - */ - -/** - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern - * @dev: DRM device - * - * Detects bit 6 swizzling of address lookup between IGD access and CPU - * access through main memory. - */ -void -i915_gem_detect_bit_6_swizzle(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - - if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { - /* - * On BDW+, swizzling is not used. We leave the CPU memory - * controller in charge of optimizing memory accesses without - * the extra address manipulation GPU side. - * - * VLV and CHV don't have GPU swizzling. - */ - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } else if (INTEL_INFO(dev)->gen >= 6) { - if (dev_priv->preserve_bios_swizzle) { - if (I915_READ(DISP_ARB_CTL) & - DISP_TILE_SURFACE_SWIZZLING) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else { - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - } else { - uint32_t dimm_c0, dimm_c1; - dimm_c0 = I915_READ(MAD_DIMM_C0); - dimm_c1 = I915_READ(MAD_DIMM_C1); - dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; - dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; - /* Enable swizzling when the channels are populated - * with identically sized dimms. We don't need to check - * the 3rd channel because no cpu with gpu attached - * ships in that configuration. Also, swizzling only - * makes sense for 2 channels anyway. */ - if (dimm_c0 == dimm_c1) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else { - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - } - } else if (IS_GEN5(dev_priv)) { - /* On Ironlake whatever DRAM config, GPU always do - * same swizzling setup. - */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else if (IS_GEN2(dev_priv)) { - /* As far as we know, the 865 doesn't have these bit 6 - * swizzling issues. - */ - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && - !IS_G33(dev_priv))) { - uint32_t dcc; - - /* On 9xx chipsets, channel interleave by the CPU is - * determined by DCC. For single-channel, neither the CPU - * nor the GPU do swizzling. For dual channel interleaved, - * the GPU's interleave is bit 9 and 10 for X tiled, and bit - * 9 for Y tiled. The CPU's interleave is independent, and - * can be based on either bit 11 (haven't seen this yet) or - * bit 17 (common). - */ - dcc = I915_READ(DCC); - switch (dcc & DCC_ADDRESSING_MODE_MASK) { - case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - break; - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: - if (dcc & DCC_CHANNEL_XOR_DISABLE) { - /* This is the base swizzling by the GPU for - * tiled buffers. - */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { - /* Bit 11 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; - swizzle_y = I915_BIT_6_SWIZZLE_9_11; - } else { - /* Bit 17 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; - swizzle_y = I915_BIT_6_SWIZZLE_9_17; - } - break; - } - - /* check for L-shaped memory aka modified enhanced addressing */ - if (IS_GEN4(dev_priv) && - !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } - - if (dcc == 0xffffffff) { - DRM_ERROR("Couldn't read from MCHBAR. " - "Disabling tiling.\n"); - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } - } else { - /* The 965, G33, and newer, have a very flexible memory - * configuration. It will enable dual-channel mode - * (interleaving) on as much memory as it can, and the GPU - * will additionally sometimes enable different bit 6 - * swizzling for tiled objects from the CPU. - * - * Here's what I found on the G965: - * slot fill memory size swizzling - * 0A 0B 1A 1B 1-ch 2-ch - * 512 0 0 0 512 0 O - * 512 0 512 0 16 1008 X - * 512 0 0 512 16 1008 X - * 0 512 0 512 16 1008 X - * 1024 1024 1024 0 2048 1024 O - * - * We could probably detect this based on either the DRB - * matching, which was the case for the swizzling required in - * the table above, or from the 1-ch value being less than - * the minimum size of a rank. - * - * Reports indicate that the swizzling actually - * varies depending upon page placement inside the - * channels, i.e. we see swizzled pages where the - * banks of memory are paired and unswizzled on the - * uneven portion, so leave that as unknown. - */ - if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } - } - - if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || - swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { - /* Userspace likes to explode if it sees unknown swizzling, - * so lie. We will finish the lie when reporting through - * the get-tiling-ioctl by reporting the physical swizzle - * mode as unknown instead. - * - * As we don't strictly know what the swizzling is, it may be - * bit17 dependent, and so we need to also prevent the pages - * from being moved. - */ - dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - - dev_priv->mm.bit_6_swizzle_x = swizzle_x; - dev_priv->mm.bit_6_swizzle_y = swizzle_y; -} - -/* - * Swap every 64 bytes of this page around, to account for it having a new - * bit 17 of its physical address and therefore being interpreted differently - * by the GPU. - */ -static void -i915_gem_swizzle_page(struct page *page) -{ - char temp[64]; - char *vaddr; - int i; - - vaddr = kmap(page); - - for (i = 0; i < PAGE_SIZE; i += 128) { - memcpy(temp, &vaddr[i], 64); - memcpy(&vaddr[i], &vaddr[i + 64], 64); - memcpy(&vaddr[i + 64], temp, 64); - } - - kunmap(page); -} - -/** - * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling - * @obj: i915 GEM buffer object - * @pages: the scattergather list of physical pages - * - * This function fixes up the swizzling in case any page frame number for this - * object has changed in bit 17 since that state has been saved with - * i915_gem_object_save_bit_17_swizzle(). - * - * This is called when pinning backing storage again, since the kernel is free - * to move unpinned backing storage around (either by directly moving pages or - * by swapping them out and back in again). - */ -void -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - struct sgt_iter sgt_iter; - struct page *page; - int i; - - if (obj->bit_17 == NULL) - return; - - i = 0; - for_each_sgt_page(page, sgt_iter, pages) { - char new_bit_17 = page_to_phys(page) >> 17; - if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { - i915_gem_swizzle_page(page); - set_page_dirty(page); - } - i++; - } -} - -/** - * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling - * @obj: i915 GEM buffer object - * @pages: the scattergather list of physical pages - * - * This function saves the bit 17 of each page frame number so that swizzling - * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must - * be called before the backing storage can be unpinned. - */ -void -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - const unsigned int page_count = obj->base.size >> PAGE_SHIFT; - struct sgt_iter sgt_iter; - struct page *page; - int i; - - if (obj->bit_17 == NULL) { - obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), - sizeof(long), GFP_KERNEL); - if (obj->bit_17 == NULL) { - DRM_ERROR("Failed to allocate memory for bit 17 " - "record\n"); - return; - } - } - - i = 0; - - for_each_sgt_page(page, sgt_iter, pages) { - if (page_to_phys(page) & (1 << 17)) - __set_bit(i, obj->bit_17); - else - __clear_bit(i, obj->bit_17); - i++; - } -} diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c new file mode 100644 index 0000000..cd59dbc --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -0,0 +1,716 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <drm/drmP.h> +#include <drm/i915_drm.h> +#include "i915_drv.h" + +/** + * DOC: fence register handling + * + * Important to avoid confusions: "fences" in the i915 driver are not execution + * fences used to track command completion but hardware detiler objects which + * wrap a given range of the global GTT. Each platform has only a fairly limited + * set of these objects. + * + * Fences are used to detile GTT memory mappings. They're also connected to the + * hardware frontbuffer render tracking and hence interact with frontbuffer + * compression. Furthermore on older platforms fences are required for tiled + * objects used by the display engine. They can also be used by the render + * engine - they're required for blitter commands and are optional for render + * commands. But on gen4+ both display (with the exception of fbc) and rendering + * have their own tiling state bits and don't need fences. + * + * Also note that fences only support X and Y tiling and hence can't be used for + * the fancier new tiling formats like W, Ys and Yf. + * + * Finally note that because fences are such a restricted resource they're + * dynamically associated with objects. Furthermore fence state is committed to + * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must + * explicitly call i915_gem_object_get_fence() to synchronize fencing status + * for cpu access. Also note that some code wants an unfenced view, for those + * cases the fence can be removed forcefully with i915_gem_object_put_fence(). + * + * Internally these functions will synchronize with userspace access by removing + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. + */ + +#define pipelined 0 + +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + i915_reg_t fence_reg_lo, fence_reg_hi; + int fence_pitch_shift; + u64 val; + + if (INTEL_INFO(fence->i915)->gen >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); + fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + + } else { + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); + fence_pitch_shift = I965_FENCE_PITCH_SHIFT; + } + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 row_size = stride * (is_y_tiled ? 32 : 8); + u32 size = rounddown((u32)vma->node.size, row_size); + + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; + val |= vma->node.start & 0xfffff000; + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (is_y_tiled) + val |= BIT(I965_FENCE_TILING_Y_SHIFT); + val |= I965_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg_lo, 0); + POSTING_READ(fence_reg_lo); + + I915_WRITE(fence_reg_hi, upper_32_bits(val)); + I915_WRITE(fence_reg_lo, lower_32_bits(val)); + POSTING_READ(fence_reg_lo); + } +} + +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + u32 val; + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + int pitch_val; + int tile_width; + + WARN((vma->node.start & ~I915_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", + vma->node.start, + i915_vma_is_map_and_fenceable(vma), + vma->node.size); + + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) + tile_width = 128; + else + tile_width = 512; + + /* Note: pitch better be a power of two tile widths */ + pitch_val = stride / tile_width; + pitch_val = ffs(pitch_val) - 1; + + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I915_FENCE_SIZE_BITS(vma->node.size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + + I915_WRITE(reg, val); + POSTING_READ(reg); + } +} + +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + u32 val; + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 pitch_val; + + WARN((vma->node.start & ~I830_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", + vma->node.start, vma->node.size); + + pitch_val = stride / 128; + pitch_val = ffs(pitch_val) - 1; + + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I830_FENCE_SIZE_BITS(vma->node.size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + + I915_WRITE(reg, val); + POSTING_READ(reg); + } +} + +static void fence_write(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + /* Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. + */ + + if (IS_GEN2(fence->i915)) + i830_write_fence_reg(fence, vma); + else if (IS_GEN3(fence->i915)) + i915_write_fence_reg(fence, vma); + else + i965_write_fence_reg(fence, vma); + + /* Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. + */ + + fence->dirty = false; +} + +static int fence_update(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + int ret; + + if (vma) { + if (!i915_vma_is_map_and_fenceable(vma)) + return -EINVAL; + + if (WARN(!i915_gem_object_get_stride(vma->obj) || + !i915_gem_object_get_tiling(vma->obj), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + i915_gem_object_get_stride(vma->obj), + i915_gem_object_get_tiling(vma->obj))) + return -EINVAL; + + ret = i915_gem_active_retire(&vma->last_fence, + &vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + if (fence->vma) { + ret = i915_gem_active_retire(&fence->vma->last_fence, + &fence->vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + if (fence->vma && fence->vma != vma) { + /* Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + i915_gem_release_mmap(fence->vma->obj); + + fence->vma->fence = NULL; + fence->vma = NULL; + + list_move(&fence->link, &fence->i915->mm.fence_list); + } + + fence_write(fence, vma); + + if (vma) { + if (fence->vma != vma) { + vma->fence = fence; + fence->vma = vma; + } + + list_move_tail(&fence->link, &fence->i915->mm.fence_list); + } + + return 0; +} + +/** + * i915_vma_put_fence - force-remove fence for a VMA + * @vma: vma to map linearly (not through a fence reg) + * + * This function force-removes any fence from the given object, which is useful + * if the kernel wants to do untiled GTT access. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int +i915_vma_put_fence(struct i915_vma *vma) +{ + struct drm_i915_fence_reg *fence = vma->fence; + + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + if (!fence) + return 0; + + if (fence->pin_count) + return -EBUSY; + + return fence_update(fence, NULL); +} + +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) +{ + struct drm_i915_fence_reg *fence; + + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + if (fence->pin_count) + continue; + + return fence; + } + + /* Wait for completion of pending flips which consume fences */ + if (intel_has_pending_fb_unpin(&dev_priv->drm)) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-EDEADLK); +} + +/** + * i915_vma_get_fence - set up fencing for a vma + * @vma: vma to map through a fence reg + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * This function walks the fence regs looking for a free one for @obj, + * stealing one if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + * + * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int +i915_vma_get_fence(struct i915_vma *vma) +{ + struct drm_i915_fence_reg *fence; + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + + /* Note that we revoke fences on runtime suspend. Therefore the user + * must keep the device awake whilst using the fence. + */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + /* Just update our place in the LRU if our fence is getting reused. */ + if (vma->fence) { + fence = vma->fence; + if (!fence->dirty) { + list_move_tail(&fence->link, + &fence->i915->mm.fence_list); + return 0; + } + } else if (set) { + fence = fence_find(to_i915(vma->vm->dev)); + if (IS_ERR(fence)) + return PTR_ERR(fence); + } else + return 0; + + return fence_update(fence, set); +} + +/** + * i915_gem_restore_fences - restore fence state + * @dev: DRM device + * + * Restore the hw fence state to match the software tracking again, to be called + * after a gpu reset and on resume. Note that on runtime suspend we only cancel + * the fences, to be reacquired by the user later. + */ +void i915_gem_restore_fences(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int i; + + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; + struct i915_vma *vma = reg->vma; + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (vma && !i915_gem_object_is_tiled(vma->obj)) { + GEM_BUG_ON(!reg->dirty); + GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + + list_move(&reg->link, &dev_priv->mm.fence_list); + vma->fence = NULL; + vma = NULL; + } + + fence_write(reg, vma); + reg->vma = vma; + } +} + +/** + * DOC: tiling swizzling details + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an object out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +/** + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @dev: DRM device + * + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +void +i915_gem_detect_bit_6_swizzle(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + + if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { + /* + * On BDW+, swizzling is not used. We leave the CPU memory + * controller in charge of optimizing memory accesses without + * the extra address manipulation GPU side. + * + * VLV and CHV don't have GPU swizzling. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (INTEL_INFO(dev)->gen >= 6) { + if (dev_priv->preserve_bios_swizzle) { + if (I915_READ(DISP_ARB_CTL) & + DISP_TILE_SURFACE_SWIZZLING) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } else { + uint32_t dimm_c0, dimm_c1; + dimm_c0 = I915_READ(MAD_DIMM_C0); + dimm_c1 = I915_READ(MAD_DIMM_C1); + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + /* Enable swizzling when the channels are populated + * with identically sized dimms. We don't need to check + * the 3rd channel because no cpu with gpu attached + * ships in that configuration. Also, swizzling only + * makes sense for 2 channels anyway. */ + if (dimm_c0 == dimm_c1) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } + } else if (IS_GEN5(dev_priv)) { + /* On Ironlake whatever DRAM config, GPU always do + * same swizzling setup. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (IS_GEN2(dev_priv)) { + /* As far as we know, the 865 doesn't have these bit 6 + * swizzling issues. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && + !IS_G33(dev_priv))) { + uint32_t dcc; + + /* On 9xx chipsets, channel interleave by the CPU is + * determined by DCC. For single-channel, neither the CPU + * nor the GPU do swizzling. For dual channel interleaved, + * the GPU's interleave is bit 9 and 10 for X tiled, and bit + * 9 for Y tiled. The CPU's interleave is independent, and + * can be based on either bit 11 (haven't seen this yet) or + * bit 17 (common). + */ + dcc = I915_READ(DCC); + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (dcc & DCC_CHANNEL_XOR_DISABLE) { + /* This is the base swizzling by the GPU for + * tiled buffers. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { + /* Bit 11 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; + } + break; + } + + /* check for L-shaped memory aka modified enhanced addressing */ + if (IS_GEN4(dev_priv) && + !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + + if (dcc == 0xffffffff) { + DRM_ERROR("Couldn't read from MCHBAR. " + "Disabling tiling.\n"); + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + } else { + /* The 965, G33, and newer, have a very flexible memory + * configuration. It will enable dual-channel mode + * (interleaving) on as much memory as it can, and the GPU + * will additionally sometimes enable different bit 6 + * swizzling for tiled objects from the CPU. + * + * Here's what I found on the G965: + * slot fill memory size swizzling + * 0A 0B 1A 1B 1-ch 2-ch + * 512 0 0 0 512 0 O + * 512 0 512 0 16 1008 X + * 512 0 0 512 16 1008 X + * 0 512 0 512 16 1008 X + * 1024 1024 1024 0 2048 1024 O + * + * We could probably detect this based on either the DRB + * matching, which was the case for the swizzling required in + * the table above, or from the 1-ch value being less than + * the minimum size of a rank. + * + * Reports indicate that the swizzling actually + * varies depending upon page placement inside the + * channels, i.e. we see swizzled pages where the + * banks of memory are paired and unswizzled on the + * uneven portion, so leave that as unknown. + */ + if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } + + if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || + swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { + /* Userspace likes to explode if it sees unknown swizzling, + * so lie. We will finish the lie when reporting through + * the get-tiling-ioctl by reporting the physical swizzle + * mode as unknown instead. + * + * As we don't strictly know what the swizzling is, it may be + * bit17 dependent, and so we need to also prevent the pages + * from being moved. + */ + dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + + dev_priv->mm.bit_6_swizzle_x = swizzle_x; + dev_priv->mm.bit_6_swizzle_y = swizzle_y; +} + +/* + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static void +i915_gem_swizzle_page(struct page *page) +{ + char temp[64]; + char *vaddr; + int i; + + vaddr = kmap(page); + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); +} + +/** + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function fixes up the swizzling in case any page frame number for this + * object has changed in bit 17 since that state has been saved with + * i915_gem_object_save_bit_17_swizzle(). + * + * This is called when pinning backing storage again, since the kernel is free + * to move unpinned backing storage around (either by directly moving pages or + * by swapping them out and back in again). + */ +void +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) + return; + + i = 0; + for_each_sgt_page(page, sgt_iter, pages) { + char new_bit_17 = page_to_phys(page) >> 17; + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { + i915_gem_swizzle_page(page); + set_page_dirty(page); + } + i++; + } +} + +/** + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function saves the bit 17 of each page frame number so that swizzling + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must + * be called before the backing storage can be unpinned. + */ +void +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + const unsigned int page_count = obj->base.size >> PAGE_SHIFT; + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) { + obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), + sizeof(long), GFP_KERNEL); + if (obj->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + i = 0; + + for_each_sgt_page(page, sgt_iter, pages) { + if (page_to_phys(page) & (1 << 17)) + __set_bit(i, obj->bit_17); + else + __clear_bit(i, obj->bit_17); + i++; + } +} diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h new file mode 100644 index 0000000..22c4a2d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_FENCE_REG_H__ +#define __I915_FENCE_REG_H__ + +#include <linux/list.h> + +struct drm_i915_private; +struct i915_vma; + +struct drm_i915_fence_reg { + struct list_head link; + struct drm_i915_private *i915; + struct i915_vma *vma; + int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; +}; + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5fafa3..f60e5a7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -96,13 +96,6 @@ * */ -static inline struct i915_ggtt * -i915_vm_to_ggtt(struct i915_address_space *vm) -{ - GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, base); -} - static int i915_get_ggtt_vma_pages(struct i915_vma *vma); @@ -3348,176 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } -static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) -{ - const unsigned int idx = rq->engine->id; - struct i915_vma *vma = - container_of(active, struct i915_vma, last_read[idx]); - struct drm_i915_gem_object *obj = vma->obj; - - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); - - i915_vma_clear_active(vma, idx); - if (i915_vma_is_active(vma)) - return; - - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); - - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - if (--obj->active_count) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); - - obj->mm.dirty = true; /* be paranoid */ - - if (i915_gem_object_has_active_reference(obj)) { - i915_gem_object_clear_active_reference(obj); - i915_gem_object_put(obj); - } -} - -static void -i915_ggtt_retire__write(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - struct i915_vma *vma = - container_of(active, struct i915_vma, last_write); - - intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->fence); - - list_del(&vma->vm_link); - if (!i915_vma_is_ggtt(vma)) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - -void i915_vma_close(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; - - list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); - - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); -} - -static inline long vma_compare(struct i915_vma *vma, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - - if (vma->vm != vm) - return vma->vm - vm; - - if (!view) - return vma->ggtt_view.type; - - if (vma->ggtt_view.type != view->type) - return vma->ggtt_view.type - view->type; - - return memcmp(&vma->ggtt_view.params, - &view->params, - sizeof(view->params)); -} - -static struct i915_vma * -__i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - struct rb_node *rb, **p; - int i; - - GEM_BUG_ON(vm->closed); - - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); - if (vma == NULL) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&vma->exec_list); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - init_request_active(&vma->last_read[i], i915_vma_retire); - init_request_active(&vma->last_write, - i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); - init_request_active(&vma->last_fence, NULL); - list_add(&vma->vm_link, &vm->unbound_list); - vma->vm = vm; - vma->obj = obj; - vma->size = obj->base.size; - - if (view) { - vma->ggtt_view = *view; - if (view->type == I915_GGTT_VIEW_PARTIAL) { - vma->size = view->params.partial.size; - vma->size <<= PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - vma->size = - intel_rotation_info_size(&view->params.rotated); - vma->size <<= PAGE_SHIFT; - } - } - - if (i915_is_ggtt(vm)) { - vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); - list_add_tail(&vma->obj_link, &obj->vma_list); - } - - rb = NULL; - p = &obj->vma_tree.rb_node; - while (*p) { - struct i915_vma *pos; - - rb = *p; - pos = rb_entry(rb, struct i915_vma, obj_node); - if (vma_compare(pos, vm, view) < 0) - p = &rb->rb_right; - else - p = &rb->rb_left; - } - rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); - - return vma; -} - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); - - return __i915_vma_create(obj, vm, view); -} - struct i915_vma * i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3530,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; - cmp = vma_compare(vma, vm, view); + cmp = i915_vma_compare(vma, vm, view); if (cmp == 0) return vma; @@ -3555,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) { - vma = __i915_vma_create(obj, vm, view); + vma = i915_vma_create(obj, vm, view); GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); } @@ -3747,99 +3570,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; } -/** - * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. - * @vma: VMA to map - * @cache_level: mapping cache level - * @flags: flags like global or local mapping - * - * DMA addresses are taken from the scatter-gather table of this object (or of - * this VMA in case of non-default GGTT views) and PTE entries set up. - * Note that DMA addresses are also the only part of the SG table we care about. - */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) -{ - u32 bind_flags; - u32 vma_flags; - int ret; - - if (WARN_ON(flags == 0)) - return -EINVAL; - - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; - - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (flags & PIN_UPDATE) - bind_flags |= vma_flags; - else - bind_flags &= ~vma_flags; - if (bind_flags == 0) - return 0; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - - ret = vma->vm->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; - - vma->flags |= bind_flags; - return 0; -} - -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) -{ - void __iomem *ptr; - - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); - - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); - - ptr = vma->iomap; - if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, - vma->node.start, - vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); - - vma->iomap = ptr; - } - - __i915_vma_pin(vma); - return ptr; -} - -void i915_vma_unpin_and_release(struct i915_vma **p_vma) -{ - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - - vma = fetch_and_zero(p_vma); - if (!vma) - return; - - obj = vma->obj; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); -} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c23ef9d..57b5849 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -35,7 +35,9 @@ #define __I915_GEM_GTT_H__ #include <linux/io-mapping.h> +#include <linux/mm.h> +#include "i915_gem_timeline.h" #include "i915_gem_request.h" #define I915_FENCE_REG_NONE -1 @@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +struct sg_table; + enum i915_ggtt_view_type { I915_GGTT_VIEW_NORMAL = 0, I915_GGTT_VIEW_ROTATED, @@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated; enum i915_cache_level; -/** - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - struct drm_i915_fence_reg *fence; - struct sg_table *pages; - void __iomem *iomap; - u64 size; - u64 display_alignment; - - unsigned int flags; - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. - */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) - - /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) - -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) - - unsigned int active; - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; - struct i915_gem_active last_fence; - - /** - * Support different GGTT views into the same object. - * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also - * assumed in GEM functions which take no ggtt view parameter. - */ - struct i915_ggtt_view ggtt_view; - - /** This object's place on the active/inactive lists */ - struct list_head vm_link; - - struct list_head obj_link; /* Link in the object's VMA list */ - struct rb_node obj_node; - - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; - - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; -}; - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); -void i915_vma_unpin_and_release(struct i915_vma **p_vma); - -static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_GGTT; -} - -static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CAN_FENCE; -} - -static inline bool i915_vma_is_closed(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CLOSED; -} - -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) -{ - return vma->active; -} - -static inline bool i915_vma_is_active(const struct i915_vma *vma) -{ - return i915_vma_get_active(vma); -} - -static inline void i915_vma_set_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active |= BIT(engine); -} - -static inline void i915_vma_clear_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active &= ~BIT(engine); -} - -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, - unsigned int engine) -{ - return vma->active & BIT(engine); -} - -static inline u32 i915_ggtt_offset(const struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(!vma->node.allocated); - GEM_BUG_ON(upper_32_bits(vma->node.start)); - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); - return lower_32_bits(vma->node.start); -} +struct i915_vma; struct i915_page_dma { struct page *page; @@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, base); +} + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); @@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, #define PIN_OFFSET_FIXED BIT(11) #define PIN_OFFSET_MASK (~4095) -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) - return 0; - - return __i915_vma_do_pin(vma, size, alignment, flags); -} - -static inline int i915_vma_pin_count(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_PIN_MASK; -} - -static inline bool i915_vma_is_pinned(const struct i915_vma *vma) -{ - return i915_vma_pin_count(vma); -} - -static inline void __i915_vma_pin(struct i915_vma *vma) -{ - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); -} - -static inline void __i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_pinned(vma)); - vma->flags--; -} - -static inline void i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - __i915_vma_unpin(vma); -} - -/** - * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture - * @vma: VMA to iomap - * - * The passed in VMA has to be pinned in the global GTT mappable region. - * An extra pinning of the VMA is acquired for the return iomapping, - * the caller must call i915_vma_unpin_iomap to relinquish the pinning - * after the iomapping is no longer required. - * - * Callers must hold the struct_mutex. - * - * Returns a valid iomapped pointer or ERR_PTR. - */ -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) - -/** - * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap - * @vma: VMA to unpin - * - * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). - * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). - */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} - -static inline struct page *i915_vma_first_page(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - return sg_page(vma->pages->sgl); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h new file mode 100644 index 0000000..014f803 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -0,0 +1,337 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_OBJECT_H__ +#define __I915_GEM_OBJECT_H__ + +#include <linux/reservation.h> + +#include <drm/drm_vma_manager.h> +#include <drm/drm_gem.h> +#include <drm/drmP.h> + +#include <drm/i915_drm.h> + +struct drm_i915_gem_object_ops { + unsigned int flags; +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 + + /* Interface between the GEM object and its backing storage. + * get_pages() is called once prior to the use of the associated set + * of pages before to binding them into the GTT, and put_pages() is + * called after we no longer need them. As we expect there to be + * associated cost with migrating pages between the backing storage + * and making them available for the GPU (e.g. clflush), we may hold + * onto the pages after they are no longer referenced by the GPU + * in case they may be used again shortly (for example migrating the + * pages to a different memory domain within the GTT). put_pages() + * will therefore most likely be called when the object itself is + * being released or under memory pressure (where we attempt to + * reap pages for the shrinker). + */ + struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + + int (*dmabuf_export)(struct drm_i915_gem_object *); + void (*release)(struct drm_i915_gem_object *); +}; + +struct drm_i915_gem_object { + struct drm_gem_object base; + + const struct drm_i915_gem_object_ops *ops; + + /** List of VMAs backed by this object */ + struct list_head vma_list; + struct rb_root vma_tree; + + /** Stolen memory for this object, instead of being backed by shmem. */ + struct drm_mm_node *stolen; + struct list_head global_link; + union { + struct rcu_head rcu; + struct llist_node freed; + }; + + /** + * Whether the object is currently in the GGTT mmap. + */ + struct list_head userfault_link; + + /** Used in execbuf to temporarily hold a ref */ + struct list_head obj_exec_link; + + struct list_head batch_pool_link; + + unsigned long flags; + + /** + * Have we taken a reference for the object for incomplete GPU + * activity? + */ +#define I915_BO_ACTIVE_REF 0 + + /* + * Is the object to be mapped as read-only to the GPU + * Only honoured if hardware has relevant pte bit + */ + unsigned long gt_ro:1; + unsigned int cache_level:3; + unsigned int cache_dirty:1; + + atomic_t frontbuffer_bits; + unsigned int frontbuffer_ggtt_origin; /* write once */ + + /** Current tiling stride for the object, if it's tiled. */ + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) + + /** Count of VMA actually bound by this object */ + unsigned int bind_count; + unsigned int active_count; + unsigned int pin_display; + + struct { + struct mutex lock; /* protects the pages and their use */ + atomic_t pages_pin_count; + + struct sg_table *pages; + void *mapping; + + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ + } get_page; + + /** + * Advice: are the backing pages purgeable? + */ + unsigned int madv:2; + + /** + * This is set if the object has been written to since the + * pages were last acquired. + */ + bool dirty:1; + + /** + * This is set if the object has been pinned due to unknown + * swizzling. + */ + bool quirked:1; + } mm; + + /** Breadcrumb of last rendering to the buffer. + * There can only be one writer, but we allow for multiple readers. + * If there is a writer that necessarily implies that all other + * read requests are complete - but we may only be lazily clearing + * the read requests. A read request is naturally the most recent + * request on a ring, so we may have two different write and read + * requests on one ring where the write request is older than the + * read request. This allows for the CPU to read from an active + * buffer by only waiting for the write to complete. + */ + struct reservation_object *resv; + + /** References from framebuffers, locks out tiling changes. */ + unsigned long framebuffer_references; + + /** Record of address bit 17 of each page at last unbind. */ + unsigned long *bit_17; + + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + /** for phys allocated objects */ + struct drm_dma_handle *phys_handle; + + struct reservation_object __builtin_resv; +}; + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +/** + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle + * @filp: DRM file private date + * @handle: userspace handle + * + * Returns: + * + * A pointer to the object named by the handle if such exists on @filp, NULL + * otherwise. This object is only valid whilst under the RCU read lock, and + * note carefully the object may be in the process of being destroyed. + */ +static inline struct drm_i915_gem_object * +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); +#endif + return idr_find(&file->object_idr, handle); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + struct drm_i915_gem_object *obj; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, handle); + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + rcu_read_unlock(); + + return obj; +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); + +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + __drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); + +static inline bool +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) +{ + return atomic_read(&obj->base.refcount.refcount) == 0; +} + +static inline bool +i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; +} + +static inline bool +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return obj->active_count; +} + +static inline bool +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) +{ + return test_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __set_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); + +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + +static inline struct intel_engine_cs * +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) +{ + struct intel_engine_cs *engine = NULL; + struct dma_fence *fence; + + rcu_read_lock(); + fence = reservation_object_get_excl_rcu(obj->resv); + rcu_read_unlock(); + + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + dma_fence_put(fence); + + return engine; +} + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0f69fad..a56559e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,9 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +struct drm_file; +struct drm_i915_gem_object; + struct intel_wait { struct rb_node node; struct task_struct *tsk; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c new file mode 100644 index 0000000..738ff3a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -0,0 +1,650 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_vma.h" + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_frontbuffer.h" + +#include <drm/drm_gem.h> + +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) + WARN_ON(i915_vma_unbind(vma)); + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + if (--obj->active_count) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + + obj->mm.dirty = true; /* be paranoid */ + + if (i915_gem_object_has_active_reference(obj)) { + i915_gem_object_clear_active_reference(obj); + i915_gem_object_put(obj); + } +} + +static void +i915_ggtt_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + struct i915_vma *vma = + container_of(active, struct i915_vma, last_write); + + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); +} + +static struct i915_vma * +__i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + struct rb_node *rb, **p; + int i; + + GEM_BUG_ON(vm->closed); + + vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); + if (vma == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_write, + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); + init_request_active(&vma->last_fence, NULL); + list_add(&vma->vm_link, &vm->unbound_list); + vma->vm = vm; + vma->obj = obj; + vma->size = obj->base.size; + + if (view) { + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } + + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; + list_add(&vma->obj_link, &obj->vma_list); + } else { + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + list_add_tail(&vma->obj_link, &obj->vma_list); + } + + rb = NULL; + p = &obj->vma_tree.rb_node; + while (*p) { + struct i915_vma *pos; + + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + if (i915_vma_compare(pos, vm, view) < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma_tree); + + return vma; +} + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + + return __i915_vma_create(obj, vm, view); +} + +/** + * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. + * @vma: VMA to map + * @cache_level: mapping cache level + * @flags: flags like global or local mapping + * + * DMA addresses are taken from the scatter-gather table of this object (or of + * this VMA in case of non-default GGTT views) and PTE entries set up. + * Note that DMA addresses are also the only part of the SG table we care about. + */ +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags) +{ + u32 bind_flags; + u32 vma_flags; + int ret; + + if (WARN_ON(flags == 0)) + return -EINVAL; + + bind_flags = 0; + if (flags & PIN_GLOBAL) + bind_flags |= I915_VMA_GLOBAL_BIND; + if (flags & PIN_USER) + bind_flags |= I915_VMA_LOCAL_BIND; + + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + if (flags & PIN_UPDATE) + bind_flags |= vma_flags; + else + bind_flags &= ~vma_flags; + if (bind_flags == 0) + return 0; + + if (vma_flags == 0 && vma->vm->allocate_va_range) { + trace_i915_va_alloc(vma); + ret = vma->vm->allocate_va_range(vma->vm, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + ret = vma->vm->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + + vma->flags |= bind_flags; + return 0; +} + +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) +{ + void __iomem *ptr; + + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) + return IO_ERR_PTR(-ENODEV); + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + + ptr = vma->iomap; + if (ptr == NULL) { + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, + vma->node.start, + vma->node.size); + if (ptr == NULL) + return IO_ERR_PTR(-ENOMEM); + + vma->iomap = ptr; + } + + __i915_vma_pin(vma); + return ptr; +} + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + obj = vma->obj; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_release_unless_active(obj); +} + +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + if (!drm_mm_node_allocated(&vma->node)) + return false; + + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) + return true; + + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) + return true; + + if (flags & PIN_OFFSET_BIAS && + vma->node.start < (flags & PIN_OFFSET_MASK)) + return true; + + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + + return false; +} + +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_ggtt_size(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj), + true); + + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + dev_priv->ggtt.mappable_end); + + /* + * Explicitly disable for rotated VMA since the display does not + * need the fence and the VMA is not accessible to other users. + */ + if (mappable && fenceable && + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) + vma->flags |= I915_VMA_CAN_FENCE; + else + vma->flags &= ~I915_VMA_CAN_FENCE; +} + +bool i915_gem_valid_gtt_space(struct i915_vma *vma, + unsigned long cache_level) +{ + struct drm_mm_node *gtt_space = &vma->node; + struct drm_mm_node *other; + + /* + * On some machines we have to be careful when putting differing types + * of snoopable memory together to avoid the prefetcher crossing memory + * domains and dying. During vm initialisation, we decide whether or not + * these constraints apply and set the drm_mm.color_adjust + * appropriately. + */ + if (vma->vm->mm.color_adjust == NULL) + return true; + + if (!drm_mm_node_allocated(gtt_space)) + return true; + + if (list_empty(&gtt_space->node_list)) + return true; + + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); + if (other->allocated && !other->hole_follows && other->color != cache_level) + return false; + + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) + return false; + + return true; +} + +/** + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment + * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; + u64 start, end; + int ret; + + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + alignment = max(max(alignment, vma->display_alignment), + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE)); + + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + + end = vma->vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->ggtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); + + /* If binding the object/GGTT view requires more space than the entire + * aperture has, reject it early before evicting everything in a vain + * attempt to find space. + */ + if (size > end) { + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, + flags & PIN_MAPPABLE ? "mappable" : "total", + end); + return -E2BIG; + } + + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + if (flags & PIN_OFFSET_FIXED) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { + ret = -EINVAL; + goto err_unpin; + } + + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; + } + } else { + u32 search_flag, alloc_flag; + + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + +search_free: + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(vma->vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; + + goto err_unpin; + } + + GEM_BUG_ON(vma->node.start < start); + GEM_BUG_ON(vma->node.start + vma->node.size > end); + } + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); + + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); + return ret; +} + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) +{ + unsigned int bound = vma->flags; + int ret; + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; + } + + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; + } + + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; + + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) + __i915_vma_set_map_and_fenceable(vma); + + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + return 0; + +err: + __i915_vma_unpin(vma); + return ret; +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!i915_vma_is_closed(vma)); + GEM_BUG_ON(vma->fence); + + list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); + + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) + WARN_ON(i915_vma_unbind(vma)); +} + +static void __i915_vma_iounmap(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (vma->iomap == NULL) + return; + + io_mapping_unmap(vma->iomap); + vma->iomap = NULL; +} + +int i915_vma_unbind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + unsigned long active; + int ret; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + * + * Even more scary is that the retire callback may free + * the object (last active vma). To prevent the explosion + * we defer the actual object free to a worker that can + * only proceed once it acquires the struct_mutex (which + * we currently hold, therefore it cannot free this object + * before we are finished). + */ + __i915_vma_pin(vma); + + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } + + __i915_vma_unpin(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); + } + + if (i915_vma_is_pinned(vma)) + return -EBUSY; + + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + if (i915_vma_is_map_and_fenceable(vma)) { + /* release the fence reg _after_ flushing */ + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + __i915_vma_iounmap(vma); + vma->flags &= ~I915_VMA_CAN_FENCE; + } + + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + if (vma->pages != obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + +destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); + + return 0; +} + diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h new file mode 100644 index 0000000..d358b30 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -0,0 +1,342 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_VMA_H__ +#define __I915_VMA_H__ + +#include <linux/io-mapping.h> + +#include <drm/drm_mm.h> + +#include "i915_gem_gtt.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" +#include "i915_gem_request.h" + + +enum i915_cache_level; + +/** + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; + struct sg_table *pages; + void __iomem *iomap; + u64 size; + u64 display_alignment; + + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) + + /** Flags and address space this VMA is bound to */ +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CAN_FENCE BIT(9) +#define I915_VMA_CLOSED BIT(10) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; + + /** + * Support different GGTT views into the same object. + * This means there can be multiple VMA mappings per object and per VM. + * i915_ggtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * assumed in GEM functions which take no ggtt view parameter. + */ + struct i915_ggtt_view ggtt_view; + + /** This object's place on the active/inactive lists */ + struct list_head vm_link; + + struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; +}; + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); + +static inline long +i915_vma_compare(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_vma_is_ggtt(vma)); + + if (vma->vm != vm) + return vma->vm - vm; + + if (!view) + return vma->ggtt_view.type; + + if (vma->ggtt_view.type != view->type) + return vma->ggtt_view.type - view->type; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)); +} + +void i915_vma_unpin_and_release(struct i915_vma **p_vma); + +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CAN_FENCE; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) +{ + i915_gem_object_get(vma->obj); + return vma; +} + +static inline void i915_vma_put(struct i915_vma *vma) +{ + i915_gem_object_put(vma->obj); +} + +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags); +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +int __must_check i915_vma_unbind(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); +} + +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_PIN_MASK; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->flags++; + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->flags--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} + +/** + * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture + * @vma: VMA to iomap + * + * The passed in VMA has to be pinned in the global GTT mappable region. + * An extra pinning of the VMA is acquired for the return iomapping, + * the caller must call i915_vma_unpin_iomap to relinquish the pinning + * after the iomapping is no longer required. + * + * Callers must hold the struct_mutex. + * + * Returns a valid iomapped pointer or ERR_PTR. + */ +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) + +/** + * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap + * @vma: VMA to unpin + * + * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). + * + * Callers must hold the struct_mutex. This function is only valid to be + * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + */ +static inline void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON(vma->iomap == NULL); + i915_vma_unpin(vma); +} + +static inline struct page *i915_vma_first_page(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + return sg_page(vma->pages->sgl); +} + +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; +} + +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ +static inline void +i915_vma_unpin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } +} + +#endif +

[v3] drm/i915: Split out i915_vma.c

Commit Message

Comments

Patch