From patchwork Sat Feb 13 20:33:10 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 79235 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o1DKXIjL031096 for ; Sat, 13 Feb 2010 20:33:55 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id AEB409E9AF; Sat, 13 Feb 2010 12:33:16 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from azsmga101.ch.intel.com (mga07.intel.com [143.182.124.22]) by gabe.freedesktop.org (Postfix) with ESMTP id CDA169E980 for ; Sat, 13 Feb 2010 12:33:14 -0800 (PST) Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga101.ch.intel.com with ESMTP; 13 Feb 2010 12:33:13 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.49,468,1262592000"; d="scan'208";a="243850998" Received: from unknown (HELO localhost.localdomain) ([10.255.17.232]) by azsmga001.ch.intel.com with ESMTP; 13 Feb 2010 12:33:12 -0800 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sat, 13 Feb 2010 20:33:10 +0000 Message-Id: <1266093190-18155-1-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.6.6.2 Subject: [Intel-gfx] [PATCH] drm/i915: Record batch buffer following GPU error X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.9 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces@lists.freedesktop.org Errors-To: intel-gfx-bounces@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Sat, 13 Feb 2010 20:33:55 +0000 (UTC) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 55340de..9854c76 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -350,6 +350,36 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data) return 0; } +static const char *pin_flag(int pinned) +{ + if (pinned > 0) + return " P"; + else if (pinned < 0) + return " p"; + else + return ""; +} + +static const char *tiling_flag(int tiling) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return ""; + case I915_TILING_X: return " X"; + case I915_TILING_Y: return " Y"; + } +} + +static const char *dirty_flag(int dirty) +{ + return dirty ? " dirty" : ""; +} + +static const char *purgeable_flag(int purgeable) +{ + return purgeable ? " purgeable" : ""; +} + static int i915_error_state(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *) m->private; @@ -357,6 +387,7 @@ static int i915_error_state(struct seq_file *m, void *unused) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_error_state *error; unsigned long flags; + int i; spin_lock_irqsave(&dev_priv->error_lock, flags); if (!dev_priv->first_error) { @@ -368,6 +399,7 @@ static int i915_error_state(struct seq_file *m, void *unused) seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); + seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device); seq_printf(m, "EIR: 0x%08x\n", error->eir); seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); @@ -379,6 +411,57 @@ static int i915_error_state(struct seq_file *m, void *unused) seq_printf(m, " INSTPS: 0x%08x\n", error->instps); seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1); } + seq_printf(m, "seqno: 0x%08x\n", error->seqno); + + if (error->active_bo_count) { + int i; + + seq_printf(m, "Buffers [%d]:\n", error->active_bo_count); + + for (i = 0; i < error->active_bo_count; i++) { + seq_printf(m, " %08x %8zd %08x %08x %08x%s%s%s%s", + error->active_bo[i].gtt_offset, + error->active_bo[i].size, + error->active_bo[i].read_domains, + error->active_bo[i].write_domain, + error->active_bo[i].seqno, + pin_flag(error->active_bo[i].pinned), + tiling_flag(error->active_bo[i].tiling), + dirty_flag(error->active_bo[i].dirty), + purgeable_flag(error->active_bo[i].purgeable)); + + if (error->active_bo[i].name) + seq_printf(m, " (name: %d)", error->active_bo[i].name); + if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE) + seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg); + + seq_printf(m, "\n"); + } + } + + for (i = 0; i < ARRAY_SIZE(error->batchbuffer); i++) { + if (error->batchbuffer[i] && + i915_gem_object_get_pages(error->batchbuffer[i], 0) == 0) { + struct drm_gem_object *obj = error->batchbuffer[i]; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset); + i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE); + + i915_gem_object_put_pages(obj); + } + } + + if (error->ringbuffer && + i915_gem_object_get_pages(error->ringbuffer, 0) == 0) { + struct drm_gem_object *obj = error->ringbuffer; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + seq_printf(m, "--- ringbuffer = 0x%08x\n", obj_priv->gtt_offset); + i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE); + + i915_gem_object_put_pages(obj); + } out: spin_unlock_irqrestore(&dev_priv->error_lock, flags); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3afe361..6228dad 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1642,6 +1642,8 @@ int i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + i915_destroy_error_state(dev); + destroy_workqueue(dev_priv->wq); del_timer_sync(&dev_priv->hangcheck_timer); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2e493ec..c8a1143 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -138,6 +138,9 @@ struct sdvo_device_mapping { }; struct drm_i915_error_state { + struct work_struct work; + struct drm_device *dev; + u32 eir; u32 pgtbl_er; u32 pipeastat; @@ -150,7 +153,24 @@ struct drm_i915_error_state { u32 instps; u32 instdone1; u32 seqno; + u64 bbaddr; struct timeval time; + struct drm_gem_object *ringbuffer; + struct drm_gem_object *batchbuffer[2]; + struct drm_i915_error_buffer { + size_t size; + u32 name; + u32 seqno; + u32 gtt_offset; + u32 read_domains; + u32 write_domain; + u32 fence_reg; + s32 pinned:2; + u32 tiling:2; + u32 dirty:1; + u32 purgeable:1; + } *active_bo; + u32 active_bo_count; }; struct drm_i915_display_funcs { @@ -768,6 +788,7 @@ extern int i965_reset(struct drm_device *dev, u8 flags); /* i915_irq.c */ void i915_hangcheck_elapsed(unsigned long data); +void i915_destroy_error_state(struct drm_device *dev); extern int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_irq_wait(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8b35f5e..3f7e85a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -432,6 +432,121 @@ static void i915_error_work_func(struct work_struct *work) } } +static struct drm_gem_object * +clone_obj(struct drm_device *dev, + struct drm_gem_object *src) +{ + struct drm_gem_object *dst; + struct drm_i915_gem_object *src_priv, *dst_priv; + int page, page_count; + + dst = drm_gem_object_alloc(dev, src->size); + if (dst == NULL) + return NULL; + + if (i915_gem_object_get_pages(src, 0)) + goto error_unref; + + if (i915_gem_object_get_pages(dst, 0)) + goto error_src; + + src_priv = src->driver_private; + dst_priv = dst->driver_private; + + page_count = src->size / PAGE_SIZE; + for (page = 0; page < page_count; page++) { + void *s = kmap_atomic(src_priv->pages[page], KM_USER0); + void *d = kmap_atomic(dst_priv->pages[page], KM_USER1); + memcpy(d, s, PAGE_SIZE); + kunmap_atomic(d, KM_USER1); + kunmap_atomic(s, KM_USER0); + } + + i915_gem_object_put_pages(dst); + i915_gem_object_put_pages(src); + + /* We lie here, but it makes later analysis easier. */ + dst_priv->gtt_offset = src_priv->gtt_offset; + + return dst; + +error_src: + i915_gem_object_put_pages(src); +error_unref: + drm_gem_object_unreference(dst); + return NULL; +} + +static void i915_error_state_free(struct drm_device *dev, + struct drm_i915_error_state *error) +{ + if (error->batchbuffer[0] || error->batchbuffer[1] || error->ringbuffer) { + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(error->ringbuffer); + drm_gem_object_unreference(error->batchbuffer[0]); + drm_gem_object_unreference(error->batchbuffer[1]); + mutex_unlock(&dev->struct_mutex); + } + + kfree(error->active_bo); + kfree(error); +} + +static u32 +i915_get_bbaddr(struct drm_device *dev, u32 *ring) +{ + u32 cmd; + + if (IS_I830(dev) || IS_845G(dev)) + cmd = MI_BATCH_BUFFER; + else if (IS_I965G(dev)) + cmd = (MI_BATCH_BUFFER_START | (2 << 6) | + MI_BATCH_NON_SECURE_I965); + else + cmd = (MI_BATCH_BUFFER_START | (2 << 6)); + + return ring[0] == cmd ? ring[1] : 0; +} + +static u32 +i915_ringbuffer_last_batch(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 head, bbaddr; + u32 *ring; + + /* Locate the current position in the ringbuffer and walk back + * to find the most recently dispatched batch buffer. + */ + bbaddr = 0; + head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + ring = (u32 *)(dev_priv->ring.virtual_start + head); + + while (--ring > (u32 *)dev_priv->ring.virtual_start) { + bbaddr = i915_get_bbaddr(dev, ring); + if (bbaddr) + break; + } + + if (bbaddr == 0) { + ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size); + while (--ring > (u32 *)dev_priv->ring.virtual_start) { + bbaddr = i915_get_bbaddr(dev, ring); + if (bbaddr) + break; + } + } + + return bbaddr; +} + +static void i915_error_state_free_work_fn(struct work_struct *work) +{ + struct drm_i915_error_state *error = + container_of(work, struct drm_i915_error_state, work); + i915_error_state_free(error->dev, error); +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -444,19 +559,26 @@ static void i915_error_work_func(struct work_struct *work) static void i915_capture_error_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; struct drm_i915_error_state *error; unsigned long flags; + u32 bbaddr; + int count; spin_lock_irqsave(&dev_priv->error_lock, flags); - if (dev_priv->first_error) - goto out; + error = dev_priv->first_error; + spin_unlock_irqrestore(&dev_priv->error_lock, flags); + if (error) + return; error = kmalloc(sizeof(*error), GFP_ATOMIC); if (!error) { - DRM_DEBUG_DRIVER("out ot memory, not capturing error state\n"); - goto out; + DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); + return; } + error->dev = dev; + error->seqno = i915_get_gem_seqno(dev); error->eir = I915_READ(EIR); error->pgtbl_er = I915_READ(PGTBL_ER); error->pipeastat = I915_READ(PIPEASTAT); @@ -467,6 +589,7 @@ static void i915_capture_error_state(struct drm_device *dev) error->ipehr = I915_READ(IPEHR); error->instdone = I915_READ(INSTDONE); error->acthd = I915_READ(ACTHD); + error->bbaddr = 0; } else { error->ipeir = I915_READ(IPEIR_I965); error->ipehr = I915_READ(IPEHR_I965); @@ -474,14 +597,111 @@ static void i915_capture_error_state(struct drm_device *dev) error->instps = I915_READ(INSTPS); error->instdone1 = I915_READ(INSTDONE1); error->acthd = I915_READ(ACTHD_I965); + error->bbaddr = I915_READ64(BB_ADDR); } - do_gettimeofday(&error->time); + bbaddr = i915_ringbuffer_last_batch(dev); + + /* Grab the current batchbuffer, most likely to have crashed. */ + error->batchbuffer[0] = NULL; + error->batchbuffer[1] = NULL; + count = 0; + spin_lock_irqsave(&dev_priv->mm.active_list_lock, flags); + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + + if (error->batchbuffer[0] == NULL && + bbaddr >= obj_priv->gtt_offset && + bbaddr < obj_priv->gtt_offset + obj->size) + error->batchbuffer[0] = obj; + + if (error->batchbuffer[1] == NULL && + error->acthd >= obj_priv->gtt_offset && + error->acthd < obj_priv->gtt_offset + obj->size && + error->batchbuffer[0] != obj) + error->batchbuffer[1] = obj; + + count++; + } + spin_unlock_irqrestore(&dev_priv->mm.active_list_lock, flags); + + /* We need to copy these to an anonymous buffer as the simplest + * method to avoid being overwritten by userpace. + */ + if (error->batchbuffer[0]) + error->batchbuffer[0] = clone_obj(dev, error->batchbuffer[0]); + if (error->batchbuffer[1]) + error->batchbuffer[1] = clone_obj(dev, error->batchbuffer[1]); + + /* Record the ringbuffer */ + error->ringbuffer = clone_obj(dev, dev_priv->ring.ring_obj); + + /* Record buffers on the active list. */ + error->active_bo = NULL; + error->active_bo_count = 0; + + if (count) + error->active_bo = kmalloc(sizeof(*error->active_bo)*count, GFP_ATOMIC); + + if (error->active_bo) { + int i; + + i = 0; + spin_lock_irqsave(&dev_priv->mm.active_list_lock, flags); + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + + error->active_bo[i].size = obj->size; + error->active_bo[i].name = obj->name; + error->active_bo[i].seqno = obj_priv->last_rendering_seqno; + error->active_bo[i].gtt_offset = obj_priv->gtt_offset; + error->active_bo[i].read_domains = obj->read_domains; + error->active_bo[i].write_domain = obj->write_domain; + error->active_bo[i].fence_reg = obj_priv->fence_reg; + error->active_bo[i].pinned = 0; + if (obj_priv->pin_count > 0) + error->active_bo[i].pinned = 1; + if (obj_priv->user_pin_count > 0) + error->active_bo[i].pinned = -1; + error->active_bo[i].tiling = obj_priv->tiling_mode; + error->active_bo[i].dirty = obj_priv->dirty; + error->active_bo[i].purgeable = obj_priv->madv != I915_MADV_WILLNEED; + + if (++i == count) + break; + } + spin_unlock_irqrestore(&dev_priv->mm.active_list_lock, flags); + error->active_bo_count = i; + } - dev_priv->first_error = error; + do_gettimeofday(&error->time); -out: + spin_lock_irqsave(&dev_priv->error_lock, flags); + if (dev_priv->first_error == NULL) { + dev_priv->first_error = error; + error = NULL; + } spin_unlock_irqrestore(&dev_priv->error_lock, flags); + + if (error) { + /* as we are currently in IRQ context we need to free from a workqueue */ + INIT_WORK(&error->work, i915_error_state_free_work_fn); + queue_work(dev_priv->wq, &error->work); + } +} + +void i915_destroy_error_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_error_state *error; + + spin_lock(&dev_priv->error_lock); + error = dev_priv->first_error; + dev_priv->first_error = NULL; + spin_unlock(&dev_priv->error_lock); + + if (error) + i915_error_state_free(dev, error); } /** diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3ecb00b..2ad81bd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -325,6 +325,7 @@ #define CM0_COLOR_EVICT_DISABLE (1<<3) #define CM0_DEPTH_WRITE_DISABLE (1<<1) #define CM0_RC_OP_FLUSH_DISABLE (1<<0) +#define BB_ADDR 0x02140 /* 8 bytes */ #define GFX_FLSH_CNTL 0x02170 /* 915+ only */