[RFC,3/8] drm/i915: Handle event stop and destroy for GPU commands submitted

Message ID	1436950306-14147-4-git-send-email-sourab.gupta@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: sourab.gupta@intel.com To: intel-gfx@lists.freedesktop.org Date: Wed, 15 Jul 2015 14:21:41 +0530 Message-Id: <1436950306-14147-4-git-send-email-sourab.gupta@intel.com> In-Reply-To: <1436950306-14147-1-git-send-email-sourab.gupta@intel.com> References: <1436950306-14147-1-git-send-email-sourab.gupta@intel.com> Cc: Insoo Woo <insoo.woo@intel.com>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Jabin Wu <jabin.wu@intel.com>, Sourab Gupta <sourab.gupta@intel.com> Subject: [Intel-gfx] [RFC 3/8] drm/i915: Handle event stop and destroy for GPU commands submitted Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6984150..41a01bd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1679,6 +1679,7 @@ struct i915_gen_pmu_node { struct list_head head; struct drm_i915_gem_request *req; u32 offset; + bool discard; u32 ctx_id; }; @@ -2008,6 +2009,7 @@ struct drm_i915_private { } buffer; struct list_head node_list; struct work_struct work_timer; + struct work_struct work_event_destroy; } gen_pmu; void (*insert_profile_cmd[I915_PROFILE_MAX]) diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 350b560..107570e 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -441,6 +441,36 @@ int i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv) return 0; } +void i915_gen_pmu_release_request_ref(struct drm_i915_private *dev_priv) +{ + struct i915_gen_pmu_node *entry, *next; + struct drm_i915_gem_request *req; + unsigned long lock_flags; + int ret; + + list_for_each_entry_safe + (entry, next, &dev_priv->gen_pmu.node_list, head) { + req = entry->req; + if (req) { + ret = i915_mutex_lock_interruptible(dev_priv->dev); + if (ret) + break; + i915_gem_request_assign(&entry->req, NULL); + mutex_unlock(&dev_priv->dev->struct_mutex); + } + + /* + * This fn won't be running concurrently with forward snapshots + * work fn. These are the only two places where list entries + * will be deleted. So no need of protecting full loop? + */ + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); + list_del(&entry->head); + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); + kfree(entry); + } +} + static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, struct i915_gen_pmu_node *node) { @@ -479,11 +509,19 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work) unsigned long lock_flags; int ret; + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); + if (dev_priv->gen_pmu.event_active == false) { + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); + return; + } + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); + list_for_each_entry_safe (entry, next, &dev_priv->gen_pmu.node_list, head) { req = entry->req; if (req && i915_gem_request_completed(req, true)) { - forward_one_gen_pmu_sample(dev_priv, entry); + if (!entry->discard) + forward_one_gen_pmu_sample(dev_priv, entry); ret = i915_mutex_lock_interruptible(dev_priv->dev); if (ret) break; @@ -667,18 +705,11 @@ out: static void gen_buffer_destroy(struct drm_i915_private *i915) { - unsigned long lock_flags; - mutex_lock(&i915->dev->struct_mutex); vunmap(i915->gen_pmu.buffer.addr); i915_gem_object_ggtt_unpin(i915->gen_pmu.buffer.obj); drm_gem_object_unreference(&i915->gen_pmu.buffer.obj->base); mutex_unlock(&i915->dev->struct_mutex); - - spin_lock_irqsave(&i915->gen_pmu.lock, lock_flags); - i915->gen_pmu.buffer.obj = NULL; - i915->gen_pmu.buffer.addr = NULL; - spin_unlock_irqrestore(&i915->gen_pmu.lock, lock_flags); } static void i915_gen_event_destroy(struct perf_event *event) @@ -688,10 +719,44 @@ static void i915_gen_event_destroy(struct perf_event *event) WARN_ON(event->parent); - gen_buffer_destroy(i915); + cancel_work_sync(&i915->gen_pmu.work_timer); BUG_ON(i915->gen_pmu.exclusive_event != event); i915->gen_pmu.exclusive_event = NULL; + + /* We can deference our local copy of dest buffer here, since + * an active reference of buffer would be taken while + * inserting commands. So the buffer would be freed up only + * after GPU is done with it. + */ + gen_buffer_destroy(i915); + + schedule_work(&i915->gen_pmu.work_event_destroy); +} + +void i915_gen_pmu_event_destroy_work(struct work_struct *__work) +{ + struct drm_i915_private *dev_priv = + container_of(__work, typeof(*dev_priv), + gen_pmu.work_event_destroy); + unsigned long lock_flags; + int ret; + + ret = i915_gen_pmu_wait_gpu(dev_priv); + if (ret) + goto out; + + i915_gen_pmu_release_request_ref(dev_priv); + +out: + /* + * Done at end, as this excludes a new event till we've done processing + * the old one + */ + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); + dev_priv->gen_pmu.buffer.obj = NULL; + dev_priv->gen_pmu.buffer.addr = NULL; + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); } static int alloc_obj(struct drm_i915_private *dev_priv, @@ -1412,6 +1477,7 @@ static int i915_gen_event_init(struct perf_event *event) { struct drm_i915_private *dev_priv = container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu); + unsigned long lock_flags; int ret = 0; if (event->attr.type != event->pmu->type) @@ -1420,8 +1486,12 @@ static int i915_gen_event_init(struct perf_event *event) /* To avoid the complexity of having to accurately filter * data and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->gen_pmu.buffer.obj) + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); + if (dev_priv->gen_pmu.buffer.obj) { + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); return -EBUSY; + } + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); /* * We need to check for CAP_SYS_ADMIN capability as we profile all @@ -1464,16 +1534,18 @@ static void i915_gen_event_stop(struct perf_event *event, int flags) { struct drm_i915_private *dev_priv = container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu); + struct i915_gen_pmu_node *entry; unsigned long lock_flags; + hrtimer_cancel(&dev_priv->gen_pmu.timer); + gen_pmu_flush_snapshots(dev_priv); + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); dev_priv->gen_pmu.event_active = false; - + list_for_each_entry(entry, &dev_priv->gen_pmu.node_list, head) + entry->discard = true; spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); - hrtimer_cancel(&dev_priv->gen_pmu.timer); - gen_pmu_flush_snapshots(dev_priv); - event->hw.state = PERF_HES_STOPPED; } @@ -1660,6 +1732,9 @@ void i915_gen_pmu_register(struct drm_device *dev) i915->gen_pmu.timer.function = hrtimer_sample_gen; INIT_WORK(&i915->gen_pmu.work_timer, forward_gen_pmu_snapshots_work); + INIT_WORK(&i915->gen_pmu.work_event_destroy, + i915_gen_pmu_event_destroy_work); + spin_lock_init(&i915->gen_pmu.lock); i915->gen_pmu.pmu.capabilities = PERF_PMU_CAP_IS_DEVICE; @@ -1690,6 +1765,7 @@ void i915_gen_pmu_unregister(struct drm_device *dev) return; cancel_work_sync(&i915->gen_pmu.work_timer); + cancel_work_sync(&i915->gen_pmu.work_event_destroy); perf_pmu_unregister(&i915->gen_pmu.pmu); i915->gen_pmu.pmu.event_init = NULL;

[RFC,3/8] drm/i915: Handle event stop and destroy for GPU commands submitted

Commit Message

Patch