[09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines

Message ID	1478251844-23509-10-git-send-email-sourab.gupta@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: sourab.gupta@intel.com To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Nov 2016 15:00:38 +0530 Message-Id: <1478251844-23509-10-git-send-email-sourab.gupta@intel.com> In-Reply-To: <1478251844-23509-1-git-send-email-sourab.gupta@intel.com> References: <1478251844-23509-1-git-send-email-sourab.gupta@intel.com> Cc: "Christopher S . Hall" <christopher.s.hall@intel.com>, Daniel Vetter <daniel.vetter@ffwll.ch>, Sourab Gupta <sourab.gupta@intel.com>, Matthew Auld <matthew.auld@intel.com>, Thomas Gleixner <tglx@linutronix.de> Subject: [Intel-gfx] [PATCH 09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0f171f8..a05335a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1814,7 +1814,8 @@ struct i915_perf_stream_ops { * Routine to emit the commands in the command streamer associated * with the corresponding gpu engine. */ - void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag); + void (*command_stream_hook)(struct i915_perf_stream *stream, + struct drm_i915_gem_request *req, u32 tag); }; enum i915_perf_stream_state { @@ -1839,6 +1840,9 @@ struct i915_perf_stream { /* Whether command stream based data collection is enabled */ bool cs_mode; + /* Whether the OA unit is in use */ + bool using_oa; + const struct i915_perf_stream_ops *ops; }; @@ -1870,7 +1874,16 @@ struct i915_oa_ops { struct i915_perf_cs_data_node { struct list_head link; struct drm_i915_gem_request *request; - u32 offset; + + /* Offsets into the GEM obj holding the data */ + u32 start_offset; + u32 oa_offset; + u32 ts_offset; + + /* buffer size corresponding to this entry */ + u32 size; + + /* Other metadata */ u32 ctx_id; u32 pid; u32 tag; @@ -2189,14 +2202,14 @@ struct drm_i915_private { spinlock_t hook_lock; - struct { - struct i915_perf_stream *exclusive_stream; - u32 specific_ctx_id; + struct hrtimer poll_check_timer; + struct i915_perf_stream *exclusive_stream; + wait_queue_head_t poll_wq[I915_NUM_ENGINES]; + atomic_t pollin[I915_NUM_ENGINES]; - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - atomic_t pollin; + struct { + u32 specific_ctx_id; bool periodic; int period_exponent; @@ -2241,13 +2254,13 @@ struct drm_i915_private { u8 *addr; #define I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW (1<<0) u32 status; - } command_stream_buf; + } command_stream_buf[I915_NUM_ENGINES]; u32 last_ctx_id; u32 last_pid; u32 last_tag; - struct list_head node_list; - spinlock_t node_list_lock; + struct list_head node_list[I915_NUM_ENGINES]; + spinlock_t node_list_lock[I915_NUM_ENGINES]; } perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ca523b1..516fd54 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -250,12 +250,17 @@ static u32 i915_perf_stream_paranoid = true; /* For determining the behavior on overflow of command stream samples */ #define CMD_STREAM_BUF_OVERFLOW_ALLOWED -/* Data common to periodic and RCS based samples */ -struct oa_sample_data { +#define OA_ADDR_ALIGN 64 +#define TS_ADDR_ALIGN 8 +#define I915_PERF_TS_SAMPLE_SIZE 8 + +/* Data common to all samples (periodic OA / CS based OA / Timestamps) */ +struct sample_data { u32 source; u32 ctx_id; u32 pid; u32 tag; + u64 ts; const u8 *report; }; @@ -313,6 +318,7 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { #define SAMPLE_CTX_ID (1<<2) #define SAMPLE_PID (1<<3) #define SAMPLE_TAG (1<<4) +#define SAMPLE_TS (1<<5) struct perf_open_properties { u32 sample_flags; @@ -350,8 +356,9 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *request, mutex_lock(&dev_priv->perf.streams_lock); list_for_each_entry(stream, &dev_priv->perf.streams, link) { if ((stream->state == I915_PERF_STREAM_ENABLED) && - stream->cs_mode) - stream->ops->command_stream_hook(request, tag); + stream->cs_mode && + (stream->engine == engine->id)) + stream->ops->command_stream_hook(stream, request, tag); } mutex_unlock(&dev_priv->perf.streams_lock); } @@ -365,16 +372,15 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *request, * eventually, when the request associated with new entry completes. */ static void release_some_perf_entries(struct drm_i915_private *dev_priv, - u32 target_size) + enum intel_engine_id id, u32 target_size) { struct i915_perf_cs_data_node *entry, *next; - u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size; u32 size = 0; list_for_each_entry_safe - (entry, next, &dev_priv->perf.node_list, link) { + (entry, next, &dev_priv->perf.node_list[id], link) { - size += entry_size; + size += entry->size; i915_gem_request_put(entry->request); list_del(&entry->link); kfree(entry); @@ -392,43 +398,61 @@ static void release_some_perf_entries(struct drm_i915_private *dev_priv, * appropriate status flag is set, and function returns -ENOSPC. */ static int insert_perf_entry(struct drm_i915_private *dev_priv, + struct i915_perf_stream *stream, struct i915_perf_cs_data_node *entry) { struct i915_perf_cs_data_node *first_entry, *last_entry; - int max_offset = dev_priv->perf.command_stream_buf.obj->base.size; - u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size; + u32 sample_flags = stream->sample_flags; + enum intel_engine_id id = stream->engine; + int max_offset = dev_priv->perf.command_stream_buf[id].obj->base.size; + u32 offset, entry_size = 0; + bool sample_ts = false; int ret = 0; - spin_lock(&dev_priv->perf.node_list_lock); - if (list_empty(&dev_priv->perf.node_list)) { - entry->offset = 0; + if (stream->sample_flags & SAMPLE_OA_REPORT) + entry_size += dev_priv->perf.oa.oa_buffer.format_size; + else if (sample_flags & SAMPLE_TS) { + /* + * XXX: Since TS data can anyways be derived from OA report, so + * no need to capture it for RCS engine, if capture oa data is + * called already. + */ + entry_size += I915_PERF_TS_SAMPLE_SIZE; + sample_ts = true; + } + + spin_lock(&dev_priv->perf.node_list_lock[id]); + if (list_empty(&dev_priv->perf.node_list[id])) { + offset = 0; goto out; } - first_entry = list_first_entry(&dev_priv->perf.node_list, + first_entry = list_first_entry(&dev_priv->perf.node_list[id], typeof(*first_entry), link); - last_entry = list_last_entry(&dev_priv->perf.node_list, + last_entry = list_last_entry(&dev_priv->perf.node_list[id], typeof(*last_entry), link); - if (last_entry->offset >= first_entry->offset) { + if (last_entry->start_offset >= first_entry->start_offset) { /* Sufficient space available at the end of buffer? */ - if (last_entry->offset + 2*entry_size < max_offset) - entry->offset = last_entry->offset + entry_size; + if (last_entry->start_offset + last_entry->size + entry_size + < max_offset) + offset = last_entry->start_offset + last_entry->size; /* * Wraparound condition. Is sufficient space available at * beginning of buffer? */ - else if (entry_size < first_entry->offset) - entry->offset = 0; + else if (entry_size < first_entry->start_offset) + offset = 0; /* Insufficient space */ else { #ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED - u32 target_size = entry_size - first_entry->offset; + u32 target_size = entry_size - + first_entry->start_offset; - release_some_perf_entries(dev_priv, target_size); - entry->offset = 0; + release_some_perf_entries(dev_priv, id, target_size); + offset = 0; #else - dev_priv->perf.command_stream_buf.status |= + dev_priv->perf.command_stream_buf[id].status |= I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW; ret = -ENOSPC; goto out_unlock; @@ -436,19 +460,21 @@ static int insert_perf_entry(struct drm_i915_private *dev_priv, } } else { /* Sufficient space available? */ - if (last_entry->offset + 2*entry_size < first_entry->offset) - entry->offset = last_entry->offset + entry_size; + if (last_entry->start_offset + last_entry->size + entry_size + < first_entry->start_offset) + offset = last_entry->start_offset + last_entry->size; /* Insufficient space */ else { #ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED u32 target_size = entry_size - - (first_entry->offset - last_entry->offset - - entry_size); + (first_entry->start_offset - + last_entry->start_offset - + last_entry->size); - release_some_perf_entries(dev_priv, target_size); - entry->offset = last_entry->offset + entry_size; + release_some_perf_entries(dev_priv, id, target_size); + offset = last_entry->start_offset + last_entry->size; #else - dev_priv->perf.command_stream_buf.status |= + dev_priv->perf.command_stream_buf[id].status |= I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW; ret = -ENOSPC; goto out_unlock; @@ -457,45 +483,43 @@ static int insert_perf_entry(struct drm_i915_private *dev_priv, } out: - list_add_tail(&entry->link, &dev_priv->perf.node_list); + entry->start_offset = offset; + entry->size = entry_size; + if (stream->sample_flags & SAMPLE_OA_REPORT) { + entry->oa_offset = offset; + /* Ensure 64 byte alignment of oa_offset */ + entry->oa_offset = ALIGN(entry->oa_offset, OA_ADDR_ALIGN); + offset = entry->oa_offset + + dev_priv->perf.oa.oa_buffer.format_size; + } + if (sample_ts) { + entry->ts_offset = offset; + /* Ensure 8 byte alignment of ts_offset */ + entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN); + offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE; + } + + list_add_tail(&entry->link, &dev_priv->perf.node_list[id]); #ifndef CMD_STREAM_BUF_OVERFLOW_ALLOWED out_unlock: #endif - spin_unlock(&dev_priv->perf.node_list_lock); + spin_unlock(&dev_priv->perf.node_list_lock[id]); return ret; } -static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req, - u32 tag) +static int i915_ring_stream_capture_oa(struct drm_i915_gem_request *req, + u32 offset) { struct drm_i915_private *dev_priv = req->i915; struct intel_ring *ring = req->ring; - struct i915_gem_context *ctx = req->ctx; - struct i915_perf_cs_data_node *entry; u32 addr = 0; int ret; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - DRM_ERROR("alloc failed\n"); - return; - } - - ret = insert_perf_entry(dev_priv, entry); - if (ret) - goto out_free; - ret = intel_ring_begin(req, 4); if (ret) - goto out; - - entry->ctx_id = ctx->hw_id; - entry->pid = current->pid; - entry->tag = tag; - i915_gem_request_assign(&entry->request, req); + return ret; - addr = dev_priv->perf.command_stream_buf.vma->node.start + - entry->offset; + addr = dev_priv->perf.command_stream_buf[RCS].vma->node.start + offset; /* addr should be 64 byte aligned */ BUG_ON(addr & 0x3f); @@ -512,18 +536,124 @@ static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req, intel_ring_emit(ring, MI_NOOP); } intel_ring_advance(ring); - i915_vma_move_to_active(dev_priv->perf.command_stream_buf.vma, req, + return 0; +} + +static int i915_ring_stream_capture_ts(struct drm_i915_gem_request *req, + u32 offset) +{ + struct drm_i915_private *dev_priv = req->i915; + enum intel_engine_id id = req->engine->id; + struct intel_ring *ring = req->ring; + u32 addr = 0; + int ret; + + ret = intel_ring_begin(req, 6); + if (ret) + return ret; + + addr = dev_priv->perf.command_stream_buf[id].vma->node.start + offset; + + if (id == RCS) { + if (INTEL_INFO(dev_priv)->gen >= 8) + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + else + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_TIMESTAMP_WRITE); + intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + if (INTEL_INFO(dev_priv)->gen >= 8) { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } + } else { + uint32_t cmd; + + cmd = MI_FLUSH_DW + 1; + if (INTEL_INFO(dev_priv)->gen >= 8) + cmd += 1; + + cmd |= MI_FLUSH_DW_OP_STAMP; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, addr | MI_FLUSH_DW_USE_GTT); + if (INTEL_INFO(dev_priv)->gen >= 8) { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } + intel_ring_emit(ring, MI_NOOP); + } + intel_ring_advance(ring); + + return 0; +} + +static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream, + struct drm_i915_gem_request *req, u32 tag) +{ + struct drm_i915_private *dev_priv = req->i915; + struct i915_gem_context *ctx = req->ctx; + enum intel_engine_id id = stream->engine; + u32 sample_flags = stream->sample_flags; + struct i915_perf_cs_data_node *entry; + int ret = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + DRM_ERROR("alloc failed\n"); + return; + } + + ret = insert_perf_entry(dev_priv, stream, entry); + if (ret) + goto err_free; + + entry->ctx_id = ctx->hw_id; + entry->pid = current->pid; + entry->tag = tag; + i915_gem_request_assign(&entry->request, req); + + if (sample_flags & SAMPLE_OA_REPORT) { + ret = i915_ring_stream_capture_oa(req, entry->oa_offset); + if (ret) + goto err_unref; + } else if (sample_flags & SAMPLE_TS) { + /* + * XXX: Since TS data can anyways be derived from OA report, so + * no need to capture it for RCS engine, if capture oa data is + * called already. + */ + ret = i915_ring_stream_capture_ts(req, entry->ts_offset); + if (ret) + goto err_unref; + } + + + i915_vma_move_to_active(dev_priv->perf.command_stream_buf[id].vma, req, EXEC_OBJECT_WRITE); return; -out: - spin_lock(&dev_priv->perf.node_list_lock); + +err_unref: + i915_gem_request_put(entry->request); + spin_lock(&dev_priv->perf.node_list_lock[id]); list_del(&entry->link); - spin_unlock(&dev_priv->perf.node_list_lock); -out_free: + spin_unlock(&dev_priv->perf.node_list_lock[id]); +err_free: kfree(entry); } -static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv) +static int i915_ring_stream_wait_gpu(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { struct i915_perf_cs_data_node *last_entry = NULL; struct drm_i915_gem_request *req = NULL; @@ -534,14 +664,14 @@ static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv) * implicitly wait for the prior submitted requests. The refcount * of the requests is not decremented here. */ - spin_lock(&dev_priv->perf.node_list_lock); + spin_lock(&dev_priv->perf.node_list_lock[id]); - if (!list_empty(&dev_priv->perf.node_list)) { - last_entry = list_last_entry(&dev_priv->perf.node_list, + if (!list_empty(&dev_priv->perf.node_list[id])) { + last_entry = list_last_entry(&dev_priv->perf.node_list[id], struct i915_perf_cs_data_node, link); req = last_entry->request; } - spin_unlock(&dev_priv->perf.node_list_lock); + spin_unlock(&dev_priv->perf.node_list_lock[id]); if (!req) return 0; @@ -554,17 +684,18 @@ static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv) return 0; } -static void i915_oa_rcs_free_requests(struct drm_i915_private *dev_priv) +static void i915_ring_stream_free_requests(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { struct i915_perf_cs_data_node *entry, *next; list_for_each_entry_safe - (entry, next, &dev_priv->perf.node_list, link) { + (entry, next, &dev_priv->perf.node_list[id], link) { i915_gem_request_put(entry->request); - spin_lock(&dev_priv->perf.node_list_lock); + spin_lock(&dev_priv->perf.node_list_lock[id]); list_del(&entry->link); - spin_unlock(&dev_priv->perf.node_list_lock); + spin_unlock(&dev_priv->perf.node_list_lock[id]); kfree(entry); } } @@ -708,11 +839,11 @@ static int append_oa_status(struct i915_perf_stream *stream, } /** - * Copies single OA report into userspace read() buffer. + * Copies single sample into userspace read() buffer. */ -static int append_oa_sample(struct i915_perf_stream *stream, +static int append_sample(struct i915_perf_stream *stream, char __user *buf, size_t count, - size_t *offset, struct oa_sample_data *data) + size_t *offset, struct sample_data *data) { struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = dev_priv->perf.oa.oa_buffer.format_size; @@ -755,6 +886,12 @@ static int append_oa_sample(struct i915_perf_stream *stream, buf += 4; } + if (sample_flags & SAMPLE_TS) { + if (copy_to_user(buf, &data->ts, I915_PERF_TS_SAMPLE_SIZE)) + return -EFAULT; + buf += I915_PERF_TS_SAMPLE_SIZE; + } + if (sample_flags & SAMPLE_OA_REPORT) { if (copy_to_user(buf, data->report, report_size)) return -EFAULT; @@ -772,7 +909,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, { struct drm_i915_private *dev_priv = stream->dev_priv; u32 sample_flags = stream->sample_flags; - struct oa_sample_data data = { 0 }; + struct sample_data data = { 0 }; if (sample_flags & SAMPLE_OA_SOURCE_INFO) { enum drm_i915_perf_oa_event_source source; @@ -803,10 +940,15 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, if (sample_flags & SAMPLE_TAG) data.tag = dev_priv->perf.last_tag; + /* Derive timestamp from OA report, after scaling with the ts base */ +#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report" + if (sample_flags & SAMPLE_TS) + data.ts = 0; + if (sample_flags & SAMPLE_OA_REPORT) data.report = report; - return append_oa_sample(stream, buf, count, offset, &data); + return append_sample(stream, buf, count, offset, &data); } /** @@ -927,7 +1069,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * an invalid ID. It could be good to annotate these * reports with a _CTX_SWITCH_AWAY reason later. */ - if (!dev_priv->perf.oa.exclusive_stream->ctx || + if (!dev_priv->perf.exclusive_stream->ctx || dev_priv->perf.oa.specific_ctx_id == ctx_id || dev_priv->perf.oa.oa_buffer.last_ctx_id == ctx_id) { @@ -938,7 +1080,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * the switch-away reports with an invalid * context id to be recognisable by userspace. */ - if (dev_priv->perf.oa.exclusive_stream->ctx && + if (dev_priv->perf.exclusive_stream->ctx && dev_priv->perf.oa.specific_ctx_id != ctx_id) report32[2] = 0xffffffff; @@ -1284,32 +1426,40 @@ static int gen7_oa_read(struct i915_perf_stream *stream, } /** - * Copies a command stream OA report into userspace read() buffer, while also - * forwarding the periodic OA reports with timestamp lower than CS report. + * Copy one command stream report into userspace read() buffer. + * For OA reports, also forward the periodic OA reports with timestamp + * lower than current CS OA sample. * * NB: some data may be successfully copied to the userspace buffer * even if an error is returned, and this is reflected in the * updated @read_state. */ -static int append_oa_rcs_sample(struct i915_perf_stream *stream, +static int append_one_cs_sample(struct i915_perf_stream *stream, char __user *buf, size_t count, size_t *offset, struct i915_perf_cs_data_node *node) { struct drm_i915_private *dev_priv = stream->dev_priv; - struct oa_sample_data data = { 0 }; - const u8 *report = dev_priv->perf.command_stream_buf.addr + - node->offset; + enum intel_engine_id id = stream->engine; + struct sample_data data = { 0 }; u32 sample_flags = stream->sample_flags; - u32 report_ts; - int ret; + int ret = 0; - /* First, append the periodic OA samples having lower timestamps */ - report_ts = *(u32 *)(report + 4); - ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset, - report_ts, U32_MAX); - if (ret) - return ret; + if (sample_flags & SAMPLE_OA_REPORT) { + const u8 *report = dev_priv->perf.command_stream_buf[id].addr + + node->oa_offset; + u32 sample_ts = *(u32 *)(report + 4); + + data.report = report; + + /* First, append the periodic OA samples having lower + * timestamp values + */ + ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset, + sample_ts, U32_MAX); + if (ret) + return ret; + } if (sample_flags & SAMPLE_OA_SOURCE_INFO) data.source = I915_PERF_OA_EVENT_SOURCE_RCS; @@ -1329,25 +1479,38 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream, dev_priv->perf.last_tag = node->tag; } - if (sample_flags & SAMPLE_OA_REPORT) - data.report = report; + if (sample_flags & SAMPLE_TS) { + /* For RCS, if OA samples are also being collected, derive the + * timestamp from OA report, after scaling with the TS base. + * Else, forward the timestamp collected via command stream. + */ +#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report" + if (sample_flags & SAMPLE_OA_REPORT) + data.ts = 0; + else + data.ts = *(u64 *) + (dev_priv->perf.command_stream_buf[id].addr + + node->ts_offset); + } + - return append_oa_sample(stream, buf, count, offset, &data); + return append_sample(stream, buf, count, offset, &data); } /** - * Copies all OA reports into userspace read() buffer. This includes command - * stream as well as periodic OA reports. + * Copies all samples into userspace read() buffer. This includes command + * stream samples as well as periodic OA reports (if enabled). * * NB: some data may be successfully copied to the userspace buffer * even if an error is returned, and this is reflected in the * updated @read_state. */ -static int oa_rcs_append_reports(struct i915_perf_stream *stream, +static int append_command_stream_samples(struct i915_perf_stream *stream, char __user *buf, size_t count, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_perf_cs_data_node *entry, *next; + enum intel_engine_id id = stream->engine; LIST_HEAD(free_list); int ret = 0; #ifndef CMD_STREAM_BUF_OVERFLOW_ALLOWED @@ -1364,24 +1527,24 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream, ~I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW; } #endif - spin_lock(&dev_priv->perf.node_list_lock); - if (list_empty(&dev_priv->perf.node_list)) { - spin_unlock(&dev_priv->perf.node_list_lock); + spin_lock(&dev_priv->perf.node_list_lock[id]); + if (list_empty(&dev_priv->perf.node_list[id])) { + spin_unlock(&dev_priv->perf.node_list_lock[id]); goto pending_periodic; } list_for_each_entry_safe(entry, next, - &dev_priv->perf.node_list, link) { + &dev_priv->perf.node_list[id], link) { if (!i915_gem_request_completed(entry->request)) break; list_move_tail(&entry->link, &free_list); } - spin_unlock(&dev_priv->perf.node_list_lock); + spin_unlock(&dev_priv->perf.node_list_lock[id]); if (list_empty(&free_list)) goto pending_periodic; list_for_each_entry_safe(entry, next, &free_list, link) { - ret = append_oa_rcs_sample(stream, buf, count, offset, entry); + ret = append_one_cs_sample(stream, buf, count, offset, entry); if (ret) break; @@ -1391,14 +1554,15 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream, } /* Don't discard remaining entries, keep them for next read */ - spin_lock(&dev_priv->perf.node_list_lock); - list_splice(&free_list, &dev_priv->perf.node_list); - spin_unlock(&dev_priv->perf.node_list_lock); + spin_lock(&dev_priv->perf.node_list_lock[id]); + list_splice(&free_list, &dev_priv->perf.node_list[id]); + spin_unlock(&dev_priv->perf.node_list_lock[id]); return ret; pending_periodic: - if (!dev_priv->perf.oa.n_pending_periodic_samples) + if (!((stream->sample_flags & SAMPLE_OA_REPORT) && + dev_priv->perf.oa.n_pending_periodic_samples)) return 0; ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset, @@ -1427,15 +1591,16 @@ static enum cs_buf_data_state command_stream_buf_state( struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + enum intel_engine_id id = stream->engine; struct i915_perf_cs_data_node *entry = NULL; struct drm_i915_gem_request *request = NULL; - spin_lock(&dev_priv->perf.node_list_lock); - entry = list_first_entry_or_null(&dev_priv->perf.node_list, + spin_lock(&dev_priv->perf.node_list_lock[id]); + entry = list_first_entry_or_null(&dev_priv->perf.node_list[id], struct i915_perf_cs_data_node, link); if (entry) request = entry->request; - spin_unlock(&dev_priv->perf.node_list_lock); + spin_unlock(&dev_priv->perf.node_list_lock[id]); if (!entry) return CS_BUF_EMPTY; @@ -1453,23 +1618,23 @@ static enum cs_buf_data_state command_stream_buf_state( static bool stream_have_data__unlocked(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - enum cs_buf_data_state cs_buf_state; - u32 num_samples, last_ts = 0; - - /* Note: oa_buffer_num_samples() is ok to run unlocked as it just - * performs mmio reads of the OA buffer head + tail pointers and - * it's assumed we're handling some operation that implies the stream - * can't be destroyed until completion (such as a read()) that ensures - * the device + OA buffer can't disappear - */ - dev_priv->perf.oa.n_pending_periodic_samples = 0; - dev_priv->perf.oa.pending_periodic_ts = 0; - num_samples = dev_priv->perf.oa.ops.oa_buffer_num_samples(dev_priv, - &last_ts); - if (stream->cs_mode) + enum cs_buf_data_state cs_buf_state = CS_BUF_EMPTY; + u32 num_samples = 0, last_ts = 0; + + if (stream->sample_flags & SAMPLE_OA_REPORT) { + /* Note: oa_buffer_num_samples() is ok to run unlocked as it + * just performs mmio reads of the OA buffer head + tail + * pointers and it's assumed we're handling some operation that + * implies the stream can't be destroyed until completion (such + * as a read()) that ensures the device + OA buffer can't + * disappear + */ + dev_priv->perf.oa.n_pending_periodic_samples = 0; + dev_priv->perf.oa.pending_periodic_ts = 0; + num_samples = dev_priv->perf.oa.ops.oa_buffer_num_samples( + dev_priv, &last_ts); + } else if (stream->cs_mode) cs_buf_state = command_stream_buf_state(stream); - else - cs_buf_state = CS_BUF_EMPTY; /* * Note: We can safely forward the periodic OA samples in the case we @@ -1481,9 +1646,13 @@ static bool stream_have_data__unlocked(struct i915_perf_stream *stream) */ switch (cs_buf_state) { case CS_BUF_EMPTY: - dev_priv->perf.oa.n_pending_periodic_samples = num_samples; - dev_priv->perf.oa.pending_periodic_ts = last_ts; - return (num_samples != 0); + if (stream->sample_flags & SAMPLE_OA_REPORT) { + dev_priv->perf.oa.n_pending_periodic_samples = + num_samples; + dev_priv->perf.oa.pending_periodic_ts = last_ts; + return (num_samples != 0); + } else + return false; case CS_BUF_HAVE_DATA: return true; @@ -1494,9 +1663,10 @@ static bool stream_have_data__unlocked(struct i915_perf_stream *stream) } } -static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) +static int i915_ring_stream_wait_unlocked(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + enum intel_engine_id id = stream->engine; int ret; /* We would wait indefinitly if periodic sampling is not enabled */ @@ -1504,25 +1674,25 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) return -EIO; if (stream->cs_mode) { - ret = i915_oa_rcs_wait_gpu(dev_priv); + ret = i915_ring_stream_wait_gpu(dev_priv, id); if (ret) return ret; } - return wait_event_interruptible(dev_priv->perf.oa.poll_wq, + return wait_event_interruptible(dev_priv->perf.poll_wq[id], stream_have_data__unlocked(stream)); } -static void i915_oa_poll_wait(struct i915_perf_stream *stream, +static void i915_ring_stream_poll_wait(struct i915_perf_stream *stream, struct file *file, poll_table *wait) { struct drm_i915_private *dev_priv = stream->dev_priv; - poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); + poll_wait(file, &dev_priv->perf.poll_wq[stream->engine], wait); } -static int i915_oa_read(struct i915_perf_stream *stream, +static int i915_ring_stream_read(struct i915_perf_stream *stream, char __user *buf, size_t count, size_t *offset) @@ -1530,24 +1700,27 @@ static int i915_oa_read(struct i915_perf_stream *stream, struct drm_i915_private *dev_priv = stream->dev_priv; if (stream->cs_mode) - return oa_rcs_append_reports(stream, buf, count, offset); - else + return append_command_stream_samples(stream, buf, count, offset); + else if (stream->sample_flags & SAMPLE_OA_REPORT) return dev_priv->perf.oa.ops.read(stream, buf, count, offset, U32_MAX, U32_MAX); + else + return -EINVAL; } static void -free_command_stream_buf(struct drm_i915_private *dev_priv) +free_command_stream_buf(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_object_unpin_map(dev_priv->perf.command_stream_buf.obj); - __i915_vma_unpin(dev_priv->perf.command_stream_buf.vma); - i915_gem_object_put(dev_priv->perf.command_stream_buf.obj); + i915_gem_object_unpin_map(dev_priv->perf.command_stream_buf[id].obj); + __i915_vma_unpin(dev_priv->perf.command_stream_buf[id].vma); + i915_gem_object_put(dev_priv->perf.command_stream_buf[id].obj); - dev_priv->perf.command_stream_buf.obj = NULL; - dev_priv->perf.command_stream_buf.vma = NULL; - dev_priv->perf.command_stream_buf.addr = NULL; + dev_priv->perf.command_stream_buf[id].obj = NULL; + dev_priv->perf.command_stream_buf[id].vma = NULL; + dev_priv->perf.command_stream_buf[id].addr = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1569,16 +1742,13 @@ free_oa_buffer(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); } -static void i915_oa_stream_destroy(struct i915_perf_stream *stream) +static void i915_ring_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); - - if (stream->cs_mode) - free_command_stream_buf(dev_priv); + BUG_ON(stream != dev_priv->perf.exclusive_stream); - if (dev_priv->perf.oa.oa_buffer.obj) { + if (stream->using_oa) { dev_priv->perf.oa.ops.disable_metric_set(dev_priv); free_oa_buffer(dev_priv); @@ -1587,7 +1757,10 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) intel_runtime_pm_put(dev_priv); } - dev_priv->perf.oa.exclusive_stream = NULL; + if (stream->cs_mode) + free_command_stream_buf(dev_priv, stream->engine); + + dev_priv->perf.exclusive_stream = NULL; } static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -1620,9 +1793,7 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) */ memset(dev_priv->perf.oa.oa_buffer.addr, 0, SZ_16M); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. */ - atomic_set(&dev_priv->perf.oa.pollin, false); + atomic_set(&dev_priv->perf.pollin[RCS], false); } static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -1658,9 +1829,7 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) */ memset(dev_priv->perf.oa.oa_buffer.addr, 0, SZ_16M); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. */ - atomic_set(&dev_priv->perf.oa.pollin, false); + atomic_set(&dev_priv->perf.pollin[RCS], false); } static int alloc_obj(struct drm_i915_private *dev_priv, @@ -1746,30 +1915,33 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) return 0; } -static int alloc_command_stream_buf(struct drm_i915_private *dev_priv) +static int alloc_command_stream_buf(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { struct drm_i915_gem_object *bo; struct i915_vma *vma; u8 *obj_addr; int ret; - BUG_ON(dev_priv->perf.command_stream_buf.obj); + BUG_ON(dev_priv->perf.command_stream_buf[id].obj); ret = alloc_obj(dev_priv, &bo, &vma, &obj_addr); if (ret) return ret; - dev_priv->perf.command_stream_buf.obj = bo; - dev_priv->perf.command_stream_buf.vma = vma; - dev_priv->perf.command_stream_buf.addr = obj_addr; - if (WARN_ON(!list_empty(&dev_priv->perf.node_list))) - INIT_LIST_HEAD(&dev_priv->perf.node_list); + dev_priv->perf.command_stream_buf[id].obj = bo; + dev_priv->perf.command_stream_buf[id].vma = vma; + dev_priv->perf.command_stream_buf[id].addr = obj_addr; + if (WARN_ON(!list_empty(&dev_priv->perf.node_list[id]))) + INIT_LIST_HEAD(&dev_priv->perf.node_list[id]); + + atomic_set(&dev_priv->perf.pollin[id], false); DRM_DEBUG_DRIVER( "command stream buf initialized, gtt offset = 0x%x, vaddr = %p", (unsigned int) - dev_priv->perf.command_stream_buf.vma->node.start, - dev_priv->perf.command_stream_buf.addr); + dev_priv->perf.command_stream_buf[id].vma->node.start, + dev_priv->perf.command_stream_buf[id].addr); return 0; } @@ -2031,14 +2203,14 @@ static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { assert_spin_locked(&dev_priv->perf.hook_lock); - if (dev_priv->perf.oa.exclusive_stream->state != + if (dev_priv->perf.exclusive_stream->state != I915_PERF_STREAM_DISABLED) { unsigned long ctx_id = 0; - if (dev_priv->perf.oa.exclusive_stream->ctx) + if (dev_priv->perf.exclusive_stream->ctx) ctx_id = dev_priv->perf.oa.specific_ctx_id; - if (dev_priv->perf.oa.exclusive_stream->ctx == NULL || ctx_id) { + if (dev_priv->perf.exclusive_stream->ctx == NULL || ctx_id) { bool periodic = dev_priv->perf.oa.periodic; u32 period_exponent = dev_priv->perf.oa.period_exponent; u32 report_format = dev_priv->perf.oa.oa_buffer.format; @@ -2103,14 +2275,15 @@ static void gen8_oa_enable(struct drm_i915_private *dev_priv) GEN8_OA_COUNTER_ENABLE); } -static void i915_oa_stream_enable(struct i915_perf_stream *stream) +static void i915_ring_stream_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - dev_priv->perf.oa.ops.oa_enable(dev_priv); + if (stream->sample_flags & SAMPLE_OA_REPORT) + dev_priv->perf.oa.ops.oa_enable(dev_priv); - if (dev_priv->perf.oa.periodic) - hrtimer_start(&dev_priv->perf.oa.poll_check_timer, + if (stream->cs_mode || dev_priv->perf.oa.periodic) + hrtimer_start(&dev_priv->perf.poll_check_timer, ns_to_ktime(POLL_PERIOD), HRTIMER_MODE_REL_PINNED); } @@ -2125,19 +2298,20 @@ static void gen8_oa_disable(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_OACONTROL, 0); } -static void i915_oa_stream_disable(struct i915_perf_stream *stream) +static void i915_ring_stream_disable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (dev_priv->perf.oa.periodic) - hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); + if (stream->cs_mode || dev_priv->perf.oa.periodic) + hrtimer_cancel(&dev_priv->perf.poll_check_timer); if (stream->cs_mode) { - i915_oa_rcs_wait_gpu(dev_priv); - i915_oa_rcs_free_requests(dev_priv); + i915_ring_stream_wait_gpu(dev_priv, stream->engine); + i915_ring_stream_free_requests(dev_priv, stream->engine); } - dev_priv->perf.oa.ops.oa_disable(dev_priv); + if (stream->sample_flags & SAMPLE_OA_REPORT) + dev_priv->perf.oa.ops.oa_disable(dev_priv); } static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) @@ -2147,16 +2321,16 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) } static const struct i915_perf_stream_ops i915_oa_stream_ops = { - .destroy = i915_oa_stream_destroy, - .enable = i915_oa_stream_enable, - .disable = i915_oa_stream_disable, - .wait_unlocked = i915_oa_wait_unlocked, - .poll_wait = i915_oa_poll_wait, - .read = i915_oa_read, - .command_stream_hook = i915_perf_command_stream_hook_oa, + .destroy = i915_ring_stream_destroy, + .enable = i915_ring_stream_enable, + .disable = i915_ring_stream_disable, + .wait_unlocked = i915_ring_stream_wait_unlocked, + .poll_wait = i915_ring_stream_poll_wait, + .read = i915_ring_stream_read, + .command_stream_hook = i915_ring_stream_cs_hook, }; -static int i915_oa_stream_init(struct i915_perf_stream *stream, +static int i915_ring_stream_init(struct i915_perf_stream *stream, struct drm_i915_perf_open_param *param, struct perf_open_properties *props) { @@ -2165,15 +2339,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, SAMPLE_OA_SOURCE_INFO); bool require_cs_mode = props->sample_flags & (SAMPLE_PID | SAMPLE_TAG); - bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT; + bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT | + SAMPLE_TS); int ret; /* To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.oa.exclusive_stream) { - DRM_ERROR("OA unit already in use\n"); + if (dev_priv->perf.exclusive_stream) { + DRM_ERROR("Stream already in use\n"); return -EBUSY; } @@ -2224,6 +2399,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } stream->engine= RCS; + stream->using_oa = true; format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; @@ -2316,8 +2492,22 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, require_cs_mode = true; } + if (props->sample_flags & SAMPLE_TS) { + stream->sample_flags |= SAMPLE_TS; + stream->sample_size += I915_PERF_TS_SAMPLE_SIZE; + + /* + * NB: it's meaningful to request SAMPLE_TS with just CS + * mode or periodic OA mode sampling but we don't allow + * SAMPLE_TS without either mode + */ + if (!require_oa_unit) + require_cs_mode = true; + } + if (require_cs_mode && !props->cs_mode) { - DRM_ERROR("PID or TAG sampling require a ring to be specified"); + DRM_ERROR( + "PID, TAG or TS sampling require a ring to be specified"); ret = -EINVAL; goto cs_error; } @@ -2332,11 +2522,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, /* * The only time we should allow enabling CS mode if it's not - * strictly required, is if SAMPLE_CTX_ID has been requested - * as it's usable with periodic OA or CS sampling. + * strictly required, is if SAMPLE_CTX_ID or SAMPLE_TS has been + * requested, as they're usable with periodic OA or CS sampling. */ if (!require_cs_mode && - !(props->sample_flags & SAMPLE_CTX_ID)) { + !(props->sample_flags & (SAMPLE_CTX_ID|SAMPLE_TS))) { DRM_ERROR( "Ring given without requesting any CS specific property"); ret = -EINVAL; @@ -2344,6 +2534,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } stream->cs_mode = true; + stream->engine = props->engine; if (props->sample_flags & SAMPLE_PID) { stream->sample_flags |= SAMPLE_PID; @@ -2355,14 +2546,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->sample_size += 4; } - ret = alloc_command_stream_buf(dev_priv); + ret = alloc_command_stream_buf(dev_priv, stream->engine); if (ret) goto cs_error; } stream->ops = &i915_oa_stream_ops; - dev_priv->perf.oa.exclusive_stream = stream; + dev_priv->perf.exclusive_stream = stream; return 0; @@ -2398,8 +2589,8 @@ i915_oa_legacy_context_pin_notify_locked(struct drm_i915_private *dev_priv, if (dev_priv->perf.oa.ops.update_hw_ctx_id_locked == NULL) return; - if (dev_priv->perf.oa.exclusive_stream && - dev_priv->perf.oa.exclusive_stream->ctx == ctx) { + if (dev_priv->perf.exclusive_stream && + dev_priv->perf.exclusive_stream->ctx == ctx) { struct i915_vma *vma = ctx->engine[RCS].state; u32 ctx_id = i915_ggtt_offset(vma); @@ -2468,8 +2659,8 @@ void i915_oa_legacy_ctx_switch_notify(struct drm_i915_gem_request *req) if (dev_priv->perf.oa.ops.legacy_ctx_switch_unlocked == NULL) return; - if (dev_priv->perf.oa.exclusive_stream && - dev_priv->perf.oa.exclusive_stream->state != + if (dev_priv->perf.exclusive_stream && + dev_priv->perf.exclusive_stream->state != I915_PERF_STREAM_DISABLED) { /* XXX: We don't take a lock here and this may run @@ -2626,21 +2817,19 @@ static ssize_t i915_perf_read(struct file *file, } if (ret >= 0) { - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. */ - atomic_set(&dev_priv->perf.oa.pollin, false); + atomic_set(&dev_priv->perf.pollin[stream->engine], false); } return ret; } -static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) +static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer) { struct i915_perf_stream *stream; struct drm_i915_private *dev_priv = container_of(hrtimer, typeof(*dev_priv), - perf.oa.poll_check_timer); + perf.poll_check_timer); /* No need to protect the streams list here, since the hrtimer is * disabled before the stream is removed from list, and currently a @@ -2649,8 +2838,9 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) */ list_for_each_entry(stream, &dev_priv->perf.streams, link) { if (stream_have_data__unlocked(stream)) { - atomic_set(&dev_priv->perf.oa.pollin, true); - wake_up(&dev_priv->perf.oa.poll_wq); + atomic_set(&dev_priv->perf.pollin[stream->engine], + true); + wake_up(&dev_priv->perf.poll_wq[stream->engine]); } } @@ -2674,7 +2864,7 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, * the hrtimer/oa_poll_check_timer_cb to notify us when there are * samples to read. */ - if (atomic_read(&dev_priv->perf.oa.pollin)) + if (atomic_read(&dev_priv->perf.pollin[stream->engine])) events |= POLLIN; return events; @@ -2862,7 +3052,7 @@ int i915_perf_open_ioctl_locked(struct drm_device *dev, stream->dev_priv = dev_priv; stream->ctx = specific_ctx; - ret = i915_oa_stream_init(stream, param, props); + ret = i915_ring_stream_init(stream, param, props); if (ret) goto err_alloc; @@ -3007,21 +3197,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, case DRM_I915_PERF_PROP_ENGINE: { unsigned int user_ring_id = value & I915_EXEC_RING_MASK; - enum intel_engine_id engine; if (user_ring_id > I915_USER_RINGS) return -EINVAL; - /* XXX: Currently only RCS is supported. - * Remove this check when support for other - * engines is added - */ - engine = user_ring_map[user_ring_id]; - if (engine != RCS) - return -EINVAL; - props->cs_mode = true; - props->engine = engine; + props->engine = user_ring_map[user_ring_id]; } break; case DRM_I915_PERF_PROP_SAMPLE_CTX_ID: @@ -3033,6 +3214,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, case DRM_I915_PERF_PROP_SAMPLE_TAG: props->sample_flags |= SAMPLE_TAG; break; + case DRM_I915_PERF_PROP_SAMPLE_TS: + props->sample_flags |= SAMPLE_TS; + break; case DRM_I915_PERF_PROP_MAX: BUG(); } @@ -3180,22 +3364,27 @@ static struct ctl_table dev_root[] = { void i915_perf_init(struct drm_i915_private *dev_priv) { + int i; + if (!(IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_CHERRYVIEW(dev_priv) || IS_SKYLAKE(dev_priv))) return; - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, + hrtimer_init(&dev_priv->perf.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); + dev_priv->perf.poll_check_timer.function = poll_check_timer_cb; + + for (i = 0; i < I915_NUM_ENGINES; i++) { + INIT_LIST_HEAD(&dev_priv->perf.node_list[i]); + spin_lock_init(&dev_priv->perf.node_list_lock[i]); + init_waitqueue_head(&dev_priv->perf.poll_wq[i]); + } INIT_LIST_HEAD(&dev_priv->perf.streams); - INIT_LIST_HEAD(&dev_priv->perf.node_list); mutex_init(&dev_priv->perf.lock); mutex_init(&dev_priv->perf.streams_lock); spin_lock_init(&dev_priv->perf.hook_lock); - spin_lock_init(&dev_priv->perf.node_list_lock); if (IS_HASWELL(dev_priv)) { dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ef21d17..89a7078 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -413,6 +413,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) #define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_OP_STAMP (3<<14) #define MI_FLUSH_DW_OP_MASK (3<<14) #define MI_FLUSH_DW_NOTIFY (1<<8) #define MI_INVALIDATE_BSD (1<<7) @@ -496,6 +497,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_TIMESTAMP_WRITE (3<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_WRITE_FLUSH (1<<12) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 15921c7..0dcc325 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1324,6 +1324,12 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_SAMPLE_TAG, + /** + * The value of this property set to 1 requests inclusion of timestamp + * in the perf sample data. + */ + DRM_I915_PERF_PROP_SAMPLE_TS, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -1392,6 +1398,7 @@ enum drm_i915_perf_record_type { * { u32 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID * { u32 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID * { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG + * { u64 timestamp; } && DRM_I915_PERF_PROP_SAMPLE_TS * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA * }; */

[09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines

Commit Message

Patch