[RFC,25/29] drm/i915: gvt: vGPU execlist workload submission

Message ID	1453976511-27322-26-git-send-email-zhi.a.wang@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Zhi Wang <zhi.a.wang@intel.com> To: intel-gfx@lists.freedesktop.org, igvt-g@lists.01.org Date: Thu, 28 Jan 2016 18:21:47 +0800 Message-Id: <1453976511-27322-26-git-send-email-zhi.a.wang@intel.com> In-Reply-To: <1453976511-27322-1-git-send-email-zhi.a.wang@intel.com> References: <1453976511-27322-1-git-send-email-zhi.a.wang@intel.com> Cc: daniel.vetter@ffwll.ch, david.j.cowperthwaite@intel.com Subject: [Intel-gfx] [RFC 25/29] drm/i915: gvt: vGPU execlist workload submission Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 4d49d00..8d9dd1e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -312,6 +312,266 @@ static bool emulate_execlist_schedule_in(struct gvt_virtual_execlist_info *info, return true; } +static bool execlist_workload_complete(struct gvt_workload *workload) +{ + struct vgt_device *vgt = workload->vgt; + struct gvt_virtual_execlist_info *info = + &vgt->virtual_execlist_info[workload->ring_id]; + struct gvt_workload *next_workload; + struct list_head *next = workload_q_head(vgt, workload->ring_id); + bool lite_restore = false; + + gvt_dbg_el("complete workload %p status %d", workload, workload->status); + + if (workload->status) + goto out; + + if (!list_empty(workload_q_head(vgt, workload->ring_id))) { + struct execlist_ctx_descriptor_format *this_desc, *next_desc; + + next_workload = container_of(next, struct gvt_workload, list); + this_desc = &workload->ctx_desc; + next_desc = &next_workload->ctx_desc; + + lite_restore = same_context(this_desc, next_desc); + } + + if (lite_restore) { + gvt_dbg_el("next workload context is same as current - no schedule-out"); + goto out; + } + + if (!emulate_execlist_ctx_schedule_out(info, &workload->ctx_desc)) { + kfree(workload); + return false; + } + +out: + gvt_destroy_mm(workload->shadow_mm); + kfree(workload); + return true; +} + +void gvt_get_context_pdp_root_pointer(struct vgt_device *vgt, + struct execlist_ring_context *ring_context, + u32 pdp[8]) +{ + struct gvt_execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW; + u32 v; + int i; + + for (i = 0; i < 8; i++) { + hypervisor_read_va(vgt, &pdp_pair[i].val, &v, 4, 1); + pdp[7 - i] = v; + } +} + +void gvt_set_context_pdp_root_pointer(struct vgt_device *vgt, + struct execlist_ring_context *ring_context, + u32 pdp[8]) +{ + struct gvt_execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW; + int i; + + for (i = 0; i < 8; i++) + pdp_pair[i].val = pdp[7 - i]; +} + +static struct execlist_ring_context *get_ring_context(struct vgt_device *vgt, + u32 lrca) +{ + struct execlist_ring_context *context; + u32 gma = (lrca + 1) << GTT_PAGE_SHIFT; + + context = (struct execlist_ring_context *) + gvt_gma_to_va(vgt->gtt.ggtt_mm, gma); + + return context; +} + +static bool prepare_workload(struct gvt_workload *workload) +{ + struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; + struct gvt_mm *mm; + gtt_type_t root_entry_type; + int page_table_level; + u32 pdp[8]; + + if (desc->addressing_mode == 1) { /* legacy 32-bit */ + page_table_level = 3; + root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ + page_table_level = 4; + root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + } else { + gvt_err("Advanced Context mode(SVM) is not supported!\n"); + return false; + } + + gvt_get_context_pdp_root_pointer(workload->vgt, workload->ring_context, pdp); + + mm = gvt_create_mm(workload->vgt, GVT_MM_PPGTT, root_entry_type, + pdp, page_table_level, 0); + if (!mm) { + gvt_err("fail to create mm object.\n"); + return false; + } + + workload->shadow_mm = mm; + + return true; +} + +bool submit_context(struct vgt_device *vgt, int ring_id, + struct execlist_ctx_descriptor_format *desc) +{ + struct list_head *q = workload_q_head(vgt, ring_id); + struct gvt_workload *last_workload = list_empty(q) ? NULL : + container_of(q->prev, struct gvt_workload, list); + struct gvt_workload *workload = NULL; + + struct execlist_ring_context *ring_context = get_ring_context( + vgt, desc->lrca); + + u32 head, tail, start, ctl; + + if (!ring_context) { + gvt_err("invalid guest context LRCA: %x", desc->lrca); + return false; + } + + hypervisor_read_va(vgt, &ring_context->ring_header.val, + &head, 4, 1); + + hypervisor_read_va(vgt, &ring_context->ring_tail.val, + &tail, 4, 1); + + head &= RB_HEAD_OFF_MASK; + tail &= RB_TAIL_OFF_MASK; + + if (last_workload && same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d same workload as last workload", ring_id); + if (last_workload->dispatched) { + gvt_dbg_el("ring id %d last workload has been dispatched", + ring_id); + gvt_dbg_el("ctx head %x real head %lx", + head, last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + } else { + gvt_dbg_el("ring id %d merged into last workload", ring_id); + /* + * if last workload hasn't been dispatched (scanned + shadowed), + * and the context for current submission is just the same as last + * workload context, then we can merge this submission into + * last workload. + */ + last_workload->rb_tail = tail; + return true; + } + } + + gvt_dbg_el("ring id %d begin a new workload", ring_id); + + workload = kzalloc(sizeof(*workload), GFP_KERNEL); + if (!workload) { + gvt_err("fail to allocate memory for workload"); + return false; + } + + /* record some ring buffer register values for scan and shadow */ + hypervisor_read_va(vgt, &ring_context->rb_start.val, + &start, 4, 1); + hypervisor_read_va(vgt, &ring_context->rb_ctrl.val, + &ctl, 4, 1); + + INIT_LIST_HEAD(&workload->list); + + init_waitqueue_head(&workload->shadow_ctx_status_wq); + atomic_set(&workload->shadow_ctx_active, 0); + + workload->vgt = vgt; + workload->ring_id = ring_id; + workload->ctx_desc = *desc; + workload->ring_context = ring_context; + workload->rb_head = head; + workload->rb_tail = tail; + workload->rb_start = start; + workload->rb_ctl = ctl; + workload->complete = execlist_workload_complete; + workload->status = -EINPROGRESS; + + gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x", + workload, ring_id, head, tail, start, ctl); + + if (!prepare_workload(workload)) { + kfree(workload); + return false; + } + + queue_workload(workload); + + return true; +} + +bool gvt_execlist_elsp_submit(struct vgt_device *vgt, int ring_id) +{ + struct gvt_virtual_execlist_info *info = + &vgt->virtual_execlist_info[ring_id]; + struct execlist_ctx_descriptor_format *desc[2], valid_desc[2]; + unsigned long valid_desc_bitmap = 0; + int i; + + memset(valid_desc, 0, sizeof(valid_desc)); + + desc[0] = (struct execlist_ctx_descriptor_format *)&info->bundle.data[2]; + desc[1] = (struct execlist_ctx_descriptor_format *)&info->bundle.data[0]; + + for (i = 0; i < 2; i++) { + if (!desc[i]->valid) + continue; + + if (!desc[i]->privilege_access) { + gvt_err("[vgt %d] unexpected GGTT elsp submission", vgt->id); + return false; + } + + /* TODO: add another guest context checks here. */ + set_bit(i, &valid_desc_bitmap); + valid_desc[i] = *desc[i]; + } + + if (!valid_desc_bitmap) { + gvt_err("[vgt %d] no valid desc in a elsp submission", + vgt->id); + return false; + } + + if (!test_bit(0, (void *)&valid_desc_bitmap) && + test_bit(1, (void *)&valid_desc_bitmap)) { + gvt_err("[vgt %d] weird elsp submission, desc 0 is not valid", + vgt->id); + return false; + } + + if (!emulate_execlist_schedule_in(info, valid_desc)) { + gvt_err("[vgt %d] fail to emulate execlist schedule-in", vgt->id); + return false; + } + + /* submit workload */ + for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) { + if (!submit_context(vgt, ring_id, &valid_desc[i])) { + gvt_err("[vgt %d] fail to schedule workload", vgt->id); + return false; + } + } + return true; +} + static bool init_virtual_execlist_info(struct vgt_device *vgt, int ring_id, struct gvt_virtual_execlist_info *info) { @@ -325,6 +585,8 @@ static bool init_virtual_execlist_info(struct vgt_device *vgt, info->execlist[0].index = 0; info->execlist[1].index = 1; + INIT_LIST_HEAD(&info->workload_q_head); + ctx_status_ptr_reg = execlist_ring_mmio(info->ring_id, _EL_OFFSET_STATUS_PTR); @@ -339,6 +601,8 @@ bool gvt_init_virtual_execlist_info(struct vgt_device *vgt) { int i; + atomic_set(&vgt->running_workload_num, 0); + /* each ring has a virtual execlist engine */ for (i = 0; i < I915_NUM_RINGS; i++) init_virtual_execlist_info(vgt, diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index bcd5a9e..1b7b4e6 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -107,20 +107,73 @@ struct execlist_context_status_format { }; }; +struct gvt_execlist_mmio_pair { + u32 addr; + u32 val; +}; + +/* The first 52 dwords in register state context */ +struct execlist_ring_context { + u32 nop1; + u32 lri_cmd_1; + struct gvt_execlist_mmio_pair ctx_ctrl; + struct gvt_execlist_mmio_pair ring_header; + struct gvt_execlist_mmio_pair ring_tail; + struct gvt_execlist_mmio_pair rb_start; + struct gvt_execlist_mmio_pair rb_ctrl; + struct gvt_execlist_mmio_pair bb_cur_head_UDW; + struct gvt_execlist_mmio_pair bb_cur_head_LDW; + struct gvt_execlist_mmio_pair bb_state; + struct gvt_execlist_mmio_pair second_bb_addr_UDW; + struct gvt_execlist_mmio_pair second_bb_addr_LDW; + struct gvt_execlist_mmio_pair second_bb_state; + struct gvt_execlist_mmio_pair bb_per_ctx_ptr; + struct gvt_execlist_mmio_pair rcs_indirect_ctx; + struct gvt_execlist_mmio_pair rcs_indirect_ctx_offset; + u32 nop2; + u32 nop3; + u32 nop4; + u32 lri_cmd_2; + struct gvt_execlist_mmio_pair ctx_timestamp; + struct gvt_execlist_mmio_pair pdp3_UDW; + struct gvt_execlist_mmio_pair pdp3_LDW; + struct gvt_execlist_mmio_pair pdp2_UDW; + struct gvt_execlist_mmio_pair pdp2_LDW; + struct gvt_execlist_mmio_pair pdp1_UDW; + struct gvt_execlist_mmio_pair pdp1_LDW; + struct gvt_execlist_mmio_pair pdp0_UDW; + struct gvt_execlist_mmio_pair pdp0_LDW; +}; + struct gvt_execlist_state { struct execlist_ctx_descriptor_format ctx[2]; u32 index; }; +struct gvt_execlist_write_bundle { + u32 data[4]; + u32 index; +}; + struct gvt_virtual_execlist_info { struct gvt_execlist_state execlist[2]; struct gvt_execlist_state *running_execlist; struct gvt_execlist_state *pending_execlist; struct execlist_ctx_descriptor_format *running_context; + struct gvt_execlist_write_bundle bundle; int ring_id; struct vgt_device *vgt; + struct list_head workload_q_head; }; bool gvt_init_virtual_execlist_info(struct vgt_device *vgt); +void gvt_get_context_pdp_root_pointer(struct vgt_device *vgt, + struct execlist_ring_context *ring_context, + u32 pdp[8]); + +void gvt_set_context_pdp_root_pointer(struct vgt_device *vgt, + struct execlist_ring_context *ring_context, + u32 pdp[8]); + #endif /*_GVT_EXECLIST_H_*/ diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f40788b..02e5a6e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -43,6 +43,7 @@ #include "edid.h" #include "display.h" #include "execlist.h" +#include "scheduler.h" #define GVT_MAX_VGPU 8 @@ -151,6 +152,7 @@ struct vgt_device { atomic_t active; struct gvt_virtual_device_state state; struct gvt_virtual_execlist_info virtual_execlist_info[I915_NUM_RINGS]; + atomic_t running_workload_num; struct gvt_statistics stat; struct gvt_vgtt_info gtt; void *hypervisor_data; @@ -698,6 +700,8 @@ void gvt_emulate_display_events(struct pgt_device *pdev); bool gvt_setup_control_interface(struct pgt_device *pdev); void gvt_clean_control_interface(struct pgt_device *pdev); +bool gvt_execlist_elsp_submit(struct vgt_device *vgt, int ring_id); + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 638a295..356cfc4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1100,6 +1100,26 @@ bool fpga_dbg_write(struct vgt_device *vgt, unsigned int reg, return gvt_default_mmio_write(vgt, reg, &v, bytes); } +static bool elsp_write(struct vgt_device *vgt, unsigned int offset, + void *p_data, unsigned int bytes) +{ + int ring_id = gvt_render_mmio_to_ring_id(offset); + struct gvt_virtual_execlist_info *info = + &vgt->virtual_execlist_info[ring_id]; + u32 data = *(u32 *)p_data; + bool rc = true; + + info->bundle.data[info->bundle.index] = data; + + if (info->bundle.index == 3) + rc = gvt_execlist_elsp_submit(vgt, ring_id); + + ++info->bundle.index; + info->bundle.index &= 0x3; + + return rc; +} + struct gvt_reg_info gvt_general_reg_info[] = { /* Interrupt registers - GT */ {_RING_IMR(RENDER_RING_BASE), 4, F_RDR, 0, D_ALL, NULL, gvt_reg_imr_handler}, @@ -1912,15 +1932,15 @@ struct gvt_reg_info gvt_broadwell_reg_info[] = { {_REG_VCS2_TIMESTAMP, 8, F_PT, 0, D_BDW_PLUS, NULL, NULL}, {_REG_RCS_EXECLIST_SUBMITPORT, 4, F_VIRT, 0, D_BDW_PLUS, - mmio_not_allow_read, NULL}, + mmio_not_allow_read, elsp_write}, {_REG_VCS_EXECLIST_SUBMITPORT, 4, F_VIRT, 0, D_BDW_PLUS, - mmio_not_allow_read, NULL}, + mmio_not_allow_read, elsp_write}, {_REG_VECS_EXECLIST_SUBMITPORT, 4, F_VIRT, 0, D_BDW_PLUS, - mmio_not_allow_read, NULL}, + mmio_not_allow_read, elsp_write}, {_REG_VCS2_EXECLIST_SUBMITPORT, 4, F_VIRT, 0, D_BDW_PLUS, - mmio_not_allow_read, NULL}, + mmio_not_allow_read, elsp_write}, {_REG_BCS_EXECLIST_SUBMITPORT, 4, F_VIRT, 0, D_BDW_PLUS, - mmio_not_allow_read, NULL}, + mmio_not_allow_read, elsp_write}, {_REG_RCS_EXECLIST_STATUS, 8, F_RDR, 0, D_BDW_PLUS, NULL, mmio_not_allow_write}, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h new file mode 100644 index 0000000..dd24fda --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -0,0 +1,59 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _GVT_SCHEDULER_H_ +#define _GVT_SCHEDULER_H_ + +struct gvt_workload { + struct vgt_device *vgt; + int ring_id; + + struct drm_i915_gem_request *req; + + /* if this workload has been dispatched to i915? */ + bool dispatched; + int status; + atomic_t shadow_ctx_active; + wait_queue_head_t shadow_ctx_status_wq; + + /* execlist context information */ + struct execlist_ctx_descriptor_format ctx_desc; + struct execlist_ring_context *ring_context; + unsigned long rb_head, rb_tail, rb_ctl, rb_start; + + struct gvt_mm *shadow_mm; + + /* different submission model may need different complete handler */ + bool (*complete)(struct gvt_workload *); + + struct list_head list; +}; + +#define workload_q_head(vgt, ring_id) \ + (&(vgt->virtual_execlist_info[ring_id].workload_q_head)) + +#define queue_workload(workload) \ + list_add_tail(&workload->list, \ + workload_q_head(workload->vgt, workload->ring_id)) + +#endif

[RFC,25/29] drm/i915: gvt: vGPU execlist workload submission

Commit Message

Patch