[05/33] drm/i915/gem: Prepare gen7 cmdparser for async execution

Message ID	20191212140459.1307617-5-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=0W9h=2C=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7AB022173E From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Thu, 12 Dec 2019 14:04:31 +0000 Message-Id: <20191212140459.1307617-5-chris@chris-wilson.co.uk> In-Reply-To: <20191212140459.1307617-1-chris@chris-wilson.co.uk> References: <20191212140459.1307617-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 05/33] drm/i915/gem: Prepare gen7 cmdparser for async execution Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/33] drm/i915: Use EAGAIN for trylock failures \| expand [01/33] drm/i915: Use EAGAIN for trylock failures [02/33] drm/i915: Eliminate the trylock for awaiting an earlier request [03/33] drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp [04/33] drm/i915/uc: Ignore maybe-unused debug-only i915 local [05/33] drm/i915/gem: Prepare gen7 cmdparser for async execution [06/33] drm/i915/gem: Asynchronous cmdparser [07/33] drm/i915/gt: Mark up ips_mchdev pointer access [08/33] drm/i915/gt: Mark context->state vma as active while pinned [09/33] drm/i915/gt: Mark ring->vma as active while pinned [10/33] drm/i915/selftests: Disable heartbeats around long queues [11/33] drm/i915/selftests: Impose a timeout for request submission [12/33] drm/i915/gt: Remove direct invocation of breadcrumb signaling [13/33] drm/i915: Tweak scheduler's kick_submission() [14/33] drm/i915: Flush idle barriers when waiting [15/33] drm/i915: Allow userspace to specify ringsize on construction [16/33] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions [17/33] drm/i915/gt: Teach veng to defer the context allocation [18/33] drm/i915: Drop GEM context as a direct link from i915_request [19/33] drm/i915: Push the use-semaphore marker onto the intel_context [20/33] drm/i915: Remove i915->kernel_context [21/33] drm/i915: Move i915_gem_init_contexts() earlier [22/33] drm/i915/uc: Use an internal buffer for firmware images [23/33] drm/i915/gt: Pull GT initialisation under intel_gt_init() [24/33] drm/i915/gt: Pull intel_gt_init_hw() into intel_gt_resume() [25/33] drm/i915/gt: Merge engine init/setup loops [26/33] drm/i915/gt: Expose engine properties via sysfs [27/33] drm/i915/gt: Expose engine->mmio_base via sysfs [28/33] drm/i915/gt: Expose timeslice duration to sysfs [29/33] drm/i915/gt: Expose busywait duration to sysfs [30/33] drm/i915/gt: Expose reset stop timeout via sysfs [31/33] drm/i915/gt: Expose preempt reset timeout via sysfs [32/33] drm/i915/gt: Expose heartbeat interval via sysfs [33/33] HAX: Use aliasing-ppgtt for gen7

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 13f88fc536c7..4e546b6fff8e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -228,6 +228,7 @@ struct i915_execbuffer { struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ + struct i915_vma *trampoline; /** trampoline used for chaining */ /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; @@ -1946,31 +1947,13 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) } static struct i915_vma * -shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +shadow_batch_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned int flags) { - struct i915_address_space *vm; struct i915_vma *vma; - u64 flags; int err; - /* - * PPGTT backed shadow buffers must be mapped RO, to prevent - * post-scan tampering - */ - if (CMDPARSER_USES_GGTT(eb->i915)) { - vm = &eb->engine->gt->ggtt->vm; - flags = PIN_GLOBAL; - } else { - vm = eb->context->vm; - if (!vm->has_read_only) { - DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); - return ERR_PTR(-EINVAL); - } - - i915_gem_object_set_readonly(obj); - flags = PIN_USER; - } - vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) return vma; @@ -1985,59 +1968,80 @@ shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) static int eb_parse(struct i915_execbuffer *eb) { struct intel_engine_pool_node *pool; - struct i915_vma *vma; + struct i915_vma *shadow, *trampoline; + unsigned int len; int err; if (!eb_use_cmdparser(eb)) return 0; - pool = intel_engine_get_pool(eb->engine, eb->batch_len); + len = eb->batch_len; + if (!CMDPARSER_USES_GGTT(eb->i915)) { + /* + * ppGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (!eb->context->vm->has_read_only) { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return -EINVAL; + } + } else { + len += I915_CMD_PARSER_TRAMPOLINE_SIZE; + } + + pool = intel_engine_get_pool(eb->engine, len); if (IS_ERR(pool)) return PTR_ERR(pool); - vma = shadow_batch_pin(eb, pool->obj); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); + shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); goto err; } + i915_gem_object_set_readonly(shadow->obj); + + trampoline = NULL; + if (CMDPARSER_USES_GGTT(eb->i915)) { + trampoline = shadow; + + shadow = shadow_batch_pin(pool->obj, + &eb->engine->gt->ggtt->vm, + PIN_GLOBAL); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + shadow = trampoline; + goto err_shadow; + } + + eb->batch_flags |= I915_DISPATCH_SECURE; + } err = intel_engine_cmd_parser(eb->engine, eb->batch, eb->batch_start_offset, eb->batch_len, - vma); - if (err) { - /* - * Unsafe GGTT-backed buffers can still be submitted safely - * as non-secure. - * For PPGTT backing however, we have no choice but to forcibly - * reject unsafe buffers - */ - if (i915_vma_is_ggtt(vma) && err == -EACCES) - err = 0; - - goto err_unpin; - } + shadow, trampoline); + if (err) + goto err_trampoline; - eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->vma[eb->buffer_count] = i915_vma_get(shadow); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; + shadow->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->trampoline = trampoline; eb->batch_start_offset = 0; - eb->batch = vma; - - if (i915_vma_is_ggtt(vma)) - eb->batch_flags |= I915_DISPATCH_SECURE; - - /* eb->batch_len unchanged */ + eb->batch = shadow; - vma->private = pool; + shadow->private = pool; return 0; -err_unpin: - i915_vma_unpin(vma); +err_trampoline: + if (trampoline) + i915_vma_unpin(trampoline); +err_shadow: + i915_vma_unpin(shadow); err: intel_engine_pool_put(pool); return err; @@ -2089,6 +2093,16 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (eb->trampoline) { + GEM_BUG_ON(eb->batch_start_offset); + err = eb->engine->emit_bb_start(eb->request, + eb->trampoline->node.start + + eb->batch_len, + 0, 0); + if (err) + return err; + } + if (i915_gem_context_nopreempt(eb->gem_context)) eb->request->flags |= I915_REQUEST_NOPREEMPT; @@ -2470,6 +2484,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.trampoline = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { @@ -2667,6 +2682,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_vma: if (eb.exec) eb_release_vmas(&eb); + if (eb.trampoline) + i915_vma_unpin(eb.trampoline); mutex_unlock(&dev->struct_mutex); err_engine: eb_unpin_engine(&eb); diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index c68c0e033f30..51b8718513bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -340,4 +340,12 @@ static inline u64 gen8_noncanonical_addr(u64 address) return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); } +static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags) +{ + *cs++ = MI_BATCH_BUFFER_START | flags; + *cs++ = addr; + + return cs; +} + #endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ddc8bf5175d9..34b0ea403a96 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1362,8 +1362,7 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -static unsigned long * -alloc_whitelist(struct drm_i915_private *i915, u32 batch_length) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; @@ -1373,9 +1372,6 @@ alloc_whitelist(struct drm_i915_private *i915, u32 batch_length) * reasonably cheap due to kmalloc caches. */ - if (CMDPARSER_USES_GGTT(i915)) - return NULL; - /* Prefer to report transient allocation failure rather than hit oom */ jmp = bitmap_zalloc(DIV_ROUND_UP(batch_length, sizeof(u32)), GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); @@ -1394,6 +1390,7 @@ alloc_whitelist(struct drm_i915_private *i915, u32 batch_length) * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question + * @trampoline: whether to emit a conditional trampoline at the end of the batch * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1406,7 +1403,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, u32 batch_offset, u32 batch_length, - struct i915_vma *shadow) + struct i915_vma *shadow, + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; @@ -1430,8 +1428,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } - /* Defer failure until attempted use */ - jump_whitelist = alloc_whitelist(engine->i915, batch_length); + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1493,6 +1493,47 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, } } while (1); + if (trampoline) { + /* + * With the trampoline, the shadow is executed twice. + * + * 1 - starting at offset 0, in privileged mode + * 2 - starting at offset batch_len, as non-privileged + * + * Only if the batch is valid and safe to execute, do we + * allow the first privileged execution to proceed. If not, + * we terminate the first batch and use the second batchbuffer + * entry to chain to the original unsafe non-privileged batch, + * leaving it to the HW to validate. + */ + *batch_end = MI_BATCH_BUFFER_END; + + if (ret) { + /* Batch unsafe to execute with privileges, cancel! */ + cmd = page_mask_bits(shadow->obj->mm.mapping); + *cmd = MI_BATCH_BUFFER_END; + + /* If batch is unsafe but valid, jump to the original */ + if (ret == -EACCES) { + unsigned int flags; + + flags = MI_BATCH_NON_SECURE_I965; + if (IS_HASWELL(engine->i915)) + flags = MI_BATCH_NON_SECURE_HSW; + + GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7)); + __gen6_emit_bb_start(batch_end, + batch_addr, + flags); + + ret = 0; /* allow execution */ + } + } + + if (needs_clflush_after) + drm_clflush_virt_range(batch_end, 8); + } + if (needs_clflush_after) { void *ptr = page_mask_bits(shadow->obj->mm.mapping); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45f31197b390..0781b6326b8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1949,7 +1949,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, u32 batch_offset, u32 batch_length, - struct i915_vma *shadow); + struct i915_vma *shadow, + bool trampoline); +#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ static inline struct intel_device_info *

[05/33] drm/i915/gem: Prepare gen7 cmdparser for async execution

Commit Message

Patch