diff mbox

[1/3] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V4

Message ID 1268145912-26276-2-git-send-email-jglisse@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jerome Glisse March 9, 2010, 2:45 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index bd2e7aa..8988df7 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -490,6 +490,12 @@  int evergreen_mc_init(struct radeon_device *rdev)
 	return 0;
 }
 
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
+{
+	/* FIXME: implement for evergreen */
+	return false;
+}
+
 int evergreen_gpu_reset(struct radeon_device *rdev)
 {
 	/* FIXME: implement for evergreen */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 91eb762..e4487f3 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1772,6 +1772,92 @@  int r100_rb2d_reset(struct radeon_device *rdev)
 	return -1;
 }
 
+void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
+{
+	lockup->last_cp_rptr = cp->rptr;
+	lockup->last_jiffies = jiffies;
+}
+
+/**
+ * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
+ * @rdev:	radeon device structure
+ * @lockup:	r100_gpu_lockup structure holding CP lockup tracking informations
+ * @cp:		radeon_cp structure holding CP information
+ *
+ * We don't need to initialize the lockup tracking information as we will either
+ * have CP rptr to a different value of jiffies wrap around which will force
+ * initialization of the lockup tracking informations.
+ *
+ * A possible false positivie is if we get call after while and last_cp_rptr ==
+ * the current CP rptr, even if it's unlikely it might happen. To avoid this
+ * if the elapsed time since last call is bigger than 2 second than we return
+ * false and update the tracking information. Due to this the caller must call
+ * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be reported
+ * the fencing code should be cautious about that.
+ *
+ * Caller should write to the ring to force CP to do something so we don't get
+ * false positive when CP is just gived nothing to do.
+ *
+ **/
+bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
+{
+	unsigned long cjiffies, elapsed;
+
+	cjiffies = jiffies;
+	if (!time_after(cjiffies, lockup->last_jiffies)) {
+		/* likely a wrap around */
+		lockup->last_cp_rptr = cp->rptr;
+		lockup->last_jiffies = jiffies;
+		return false;
+	}
+	if (cp->rptr != lockup->last_cp_rptr) {
+		/* CP is still working no lockup */
+		lockup->last_cp_rptr = cp->rptr;
+		lockup->last_jiffies = jiffies;
+		return false;
+	}
+	elapsed = jiffies_to_msecs(cjiffies - lockup->last_jiffies);
+	if (elapsed >= 3000) {
+		/* very likely the improbable case where current
+		 * rptr is equal to last recorded, a while ago, rptr
+		 * this is more likely a false positive update tracking
+		 * information which should force us to be recall at
+		 * latter point
+		 */
+		lockup->last_cp_rptr = cp->rptr;
+		lockup->last_jiffies = jiffies;
+		return false;
+	}
+	if (elapsed >= 1000) {
+		dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed);
+		return true;
+	}
+	/* give a chance to the GPU ... */
+	return false;
+}
+
+bool r100_gpu_is_lockup(struct radeon_device *rdev)
+{
+	u32 rbbm_status;
+	int r;
+
+	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
+	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
+		r100_gpu_lockup_update(&rdev->config.r100.lockup, &rdev->cp);
+		return false;
+	}
+	/* force CP activities */
+	r = radeon_ring_lock(rdev, 2);
+	if (!r) {
+		/* PACKET2 NOP */
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_unlock_commit(rdev);
+	}
+	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp);
+}
+
 int r100_gpu_reset(struct radeon_device *rdev)
 {
 	uint32_t status;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 4cef90c..23346f6 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -26,8 +26,9 @@ 
  *          Jerome Glisse
  */
 #include <linux/seq_file.h>
-#include "drmP.h"
-#include "drm.h"
+#include <drm/drmP.h>
+#include <drm/drm.h>
+#include <drm/drm_crtc_helper.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_drm.h"
@@ -424,12 +425,35 @@  int r300_ga_reset(struct radeon_device *rdev)
 	return -1;
 }
 
+bool r300_gpu_is_lockup(struct radeon_device *rdev)
+{
+	u32 rbbm_status;
+	int r;
+
+	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
+	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
+		r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+		return false;
+	}
+	/* force CP activities */
+	r = radeon_ring_lock(rdev, 2);
+	if (!r) {
+		/* PACKET2 NOP */
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_unlock_commit(rdev);
+	}
+	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+}
+
 int r300_gpu_reset(struct radeon_device *rdev)
 {
 	uint32_t status;
 
 	/* reset order likely matter */
 	status = RREG32(RADEON_RBBM_STATUS);
+	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
 	/* reset HDP */
 	r100_hdp_reset(rdev);
 	/* reset rb2d */
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c522901..4520685 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -788,7 +788,7 @@  int r600_gpu_soft_reset(struct radeon_device *rdev)
 		dev_info(rdev->dev, "  R_008020_GRBM_SOFT_RESET=0x%08X\n", tmp);
 		WREG32(R_008020_GRBM_SOFT_RESET, tmp);
 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
-		udelay(50);
+		mdelay(1);
 		WREG32(R_008020_GRBM_SOFT_RESET, 0);
 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
 	}
@@ -828,16 +828,16 @@  int r600_gpu_soft_reset(struct radeon_device *rdev)
 	dev_info(rdev->dev, "  R_000E60_SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
-	udelay(50);
+	mdelay(1);
 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
-	udelay(50);
+	mdelay(1);
 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
 	/* Wait a little for things to settle down */
-	udelay(50);
+	mdelay(1);
 	dev_info(rdev->dev, "  R_008010_GRBM_STATUS=0x%08X\n",
 		RREG32(R_008010_GRBM_STATUS));
 	dev_info(rdev->dev, "  R_008014_GRBM_STATUS2=0x%08X\n",
@@ -852,6 +852,32 @@  int r600_gpu_soft_reset(struct radeon_device *rdev)
 	return 0;
 }
 
+bool r600_gpu_is_lockup(struct radeon_device *rdev)
+{
+	u32 srbm_status;
+	u32 grbm_status;
+	u32 grbm_status2;
+	int r;
+
+	srbm_status = RREG32(R_000E50_SRBM_STATUS);
+	grbm_status = RREG32(R_008010_GRBM_STATUS);
+	grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
+	if (!G_008010_GUI_ACTIVE(grbm_status)) {
+		r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+		return false;
+	}
+	/* force CP activities */
+	r = radeon_ring_lock(rdev, 2);
+	if (!r) {
+		/* PACKET2 NOP */
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_unlock_commit(rdev);
+	}
+	rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+}
+
 int r600_gpu_reset(struct radeon_device *rdev)
 {
 	return r600_gpu_soft_reset(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 829e26e..e97a118 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -97,6 +97,7 @@  extern int radeon_audio;
  * symbol;
  */
 #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */
+#define RADEON_FENCE_JIFFIES_TIMEOUT	(HZ / 2)
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE		16
 #define RADEON_DEBUGFS_MAX_NUM_FILES	32
@@ -179,7 +180,8 @@  struct radeon_fence_driver {
 	uint32_t			scratch_reg;
 	atomic_t			seq;
 	uint32_t			last_seq;
-	unsigned long			count_timeout;
+	unsigned long			last_jiffies;
+	unsigned long			last_timeout;
 	wait_queue_head_t		queue;
 	rwlock_t			lock;
 	struct list_head		created;
@@ -194,7 +196,6 @@  struct radeon_fence {
 	struct list_head		list;
 	/* protected by radeon_fence.lock */
 	uint32_t			seq;
-	unsigned long			timeout;
 	bool				emited;
 	bool				signaled;
 };
@@ -742,6 +743,7 @@  struct radeon_asic {
 	int (*resume)(struct radeon_device *rdev);
 	int (*suspend)(struct radeon_device *rdev);
 	void (*vga_set_state)(struct radeon_device *rdev, bool state);
+	bool (*gpu_is_lockup)(struct radeon_device *rdev);
 	int (*gpu_reset)(struct radeon_device *rdev);
 	void (*gart_tlb_flush)(struct radeon_device *rdev);
 	int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr);
@@ -800,59 +802,68 @@  struct radeon_asic {
 /*
  * Asic structures
  */
+struct r100_gpu_lockup {
+	unsigned long	last_jiffies;
+	u32		last_cp_rptr;
+};
+
 struct r100_asic {
-	const unsigned	*reg_safe_bm;
-	unsigned	reg_safe_bm_size;
-	u32		hdp_cntl;
+	const unsigned		*reg_safe_bm;
+	unsigned		reg_safe_bm_size;
+	u32			hdp_cntl;
+	struct r100_gpu_lockup	lockup;
 };
 
 struct r300_asic {
-	const unsigned	*reg_safe_bm;
-	unsigned	reg_safe_bm_size;
-	u32		resync_scratch;
-	u32		hdp_cntl;
+	const unsigned		*reg_safe_bm;
+	unsigned		reg_safe_bm_size;
+	u32			resync_scratch;
+	u32			hdp_cntl;
+	struct r100_gpu_lockup	lockup;
 };
 
 struct r600_asic {
-	unsigned max_pipes;
-	unsigned max_tile_pipes;
-	unsigned max_simds;
-	unsigned max_backends;
-	unsigned max_gprs;
-	unsigned max_threads;
-	unsigned max_stack_entries;
-	unsigned max_hw_contexts;
-	unsigned max_gs_threads;
-	unsigned sx_max_export_size;
-	unsigned sx_max_export_pos_size;
-	unsigned sx_max_export_smx_size;
-	unsigned sq_num_cf_insts;
-	unsigned tiling_nbanks;
-	unsigned tiling_npipes;
-	unsigned tiling_group_size;
+	unsigned		max_pipes;
+	unsigned		max_tile_pipes;
+	unsigned		max_simds;
+	unsigned		max_backends;
+	unsigned		max_gprs;
+	unsigned		max_threads;
+	unsigned		max_stack_entries;
+	unsigned		max_hw_contexts;
+	unsigned		max_gs_threads;
+	unsigned		sx_max_export_size;
+	unsigned		sx_max_export_pos_size;
+	unsigned		sx_max_export_smx_size;
+	unsigned		sq_num_cf_insts;
+	unsigned		tiling_nbanks;
+	unsigned		tiling_npipes;
+	unsigned		tiling_group_size;
+	struct r100_gpu_lockup	lockup;
 };
 
 struct rv770_asic {
-	unsigned max_pipes;
-	unsigned max_tile_pipes;
-	unsigned max_simds;
-	unsigned max_backends;
-	unsigned max_gprs;
-	unsigned max_threads;
-	unsigned max_stack_entries;
-	unsigned max_hw_contexts;
-	unsigned max_gs_threads;
-	unsigned sx_max_export_size;
-	unsigned sx_max_export_pos_size;
-	unsigned sx_max_export_smx_size;
-	unsigned sq_num_cf_insts;
-	unsigned sx_num_of_sets;
-	unsigned sc_prim_fifo_size;
-	unsigned sc_hiz_tile_fifo_size;
-	unsigned sc_earlyz_tile_fifo_fize;
-	unsigned tiling_nbanks;
-	unsigned tiling_npipes;
-	unsigned tiling_group_size;
+	unsigned		max_pipes;
+	unsigned		max_tile_pipes;
+	unsigned		max_simds;
+	unsigned		max_backends;
+	unsigned		max_gprs;
+	unsigned		max_threads;
+	unsigned		max_stack_entries;
+	unsigned		max_hw_contexts;
+	unsigned		max_gs_threads;
+	unsigned		sx_max_export_size;
+	unsigned		sx_max_export_pos_size;
+	unsigned		sx_max_export_smx_size;
+	unsigned		sq_num_cf_insts;
+	unsigned		sx_num_of_sets;
+	unsigned		sc_prim_fifo_size;
+	unsigned		sc_hiz_tile_fifo_size;
+	unsigned		sc_earlyz_tile_fifo_fize;
+	unsigned		tiling_nbanks;
+	unsigned		tiling_npipes;
+	unsigned		tiling_group_size;
+	struct r100_gpu_lockup	lockup;
 };
 
 union radeon_asic_config {
@@ -1135,6 +1146,7 @@  static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
+#define radeon_gpu_is_lockup(rdev) (rdev)->asic->gpu_is_lockup((rdev))
 #define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev))
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p))
@@ -1233,6 +1245,8 @@  extern int r100_cs_packet_parse(struct radeon_cs_parser *p,
 				unsigned idx);
 extern void r100_enable_bm(struct radeon_device *rdev);
 extern void r100_set_common_regs(struct radeon_device *rdev);
+extern void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp);
+extern bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp);
 
 /* rv200,rv250,rv280 */
 extern void r200_set_safe_registers(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index d3a157b..cb1afb2 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -52,6 +52,7 @@  extern int r100_resume(struct radeon_device *rdev);
 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void r100_vga_set_state(struct radeon_device *rdev, bool state);
+bool r100_gpu_is_lockup(struct radeon_device *rdev);
 int r100_gpu_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
@@ -89,6 +90,7 @@  static struct radeon_asic r100_asic = {
 	.suspend = &r100_suspend,
 	.resume = &r100_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r100_gpu_is_lockup,
 	.gpu_reset = &r100_gpu_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
@@ -135,6 +137,7 @@  static struct radeon_asic r200_asic = {
 	.suspend = &r100_suspend,
 	.resume = &r100_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r100_gpu_is_lockup,
 	.gpu_reset = &r100_gpu_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
@@ -174,6 +177,7 @@  extern int r300_init(struct radeon_device *rdev);
 extern void r300_fini(struct radeon_device *rdev);
 extern int r300_suspend(struct radeon_device *rdev);
 extern int r300_resume(struct radeon_device *rdev);
+extern bool r300_gpu_is_lockup(struct radeon_device *rdev);
 extern int r300_gpu_reset(struct radeon_device *rdev);
 extern void r300_ring_start(struct radeon_device *rdev);
 extern void r300_fence_ring_emit(struct radeon_device *rdev,
@@ -192,6 +196,7 @@  static struct radeon_asic r300_asic = {
 	.suspend = &r300_suspend,
 	.resume = &r300_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
@@ -231,6 +236,7 @@  static struct radeon_asic r300_asic_pcie = {
 	.suspend = &r300_suspend,
 	.resume = &r300_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
@@ -275,6 +281,7 @@  static struct radeon_asic r420_asic = {
 	.suspend = &r420_suspend,
 	.resume = &r420_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
@@ -325,6 +332,7 @@  static struct radeon_asic rs400_asic = {
 	.suspend = &rs400_suspend,
 	.resume = &rs400_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &rs400_gart_tlb_flush,
 	.gart_set_page = &rs400_gart_set_page,
@@ -385,6 +393,7 @@  static struct radeon_asic rs600_asic = {
 	.suspend = &rs600_suspend,
 	.resume = &rs600_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &rs600_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
@@ -434,6 +443,7 @@  static struct radeon_asic rs690_asic = {
 	.suspend = &rs690_suspend,
 	.resume = &rs690_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &r300_gpu_reset,
 	.gart_tlb_flush = &rs400_gart_tlb_flush,
 	.gart_set_page = &rs400_gart_set_page,
@@ -487,6 +497,7 @@  static struct radeon_asic rv515_asic = {
 	.suspend = &rv515_suspend,
 	.resume = &rv515_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &rv515_gpu_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
@@ -531,6 +542,7 @@  static struct radeon_asic r520_asic = {
 	.suspend = &rv515_suspend,
 	.resume = &r520_resume,
 	.vga_set_state = &r100_vga_set_state,
+	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.gpu_reset = &rv515_gpu_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
@@ -587,6 +599,7 @@  int r600_copy_dma(struct radeon_device *rdev,
 		  struct radeon_fence *fence);
 int r600_irq_process(struct radeon_device *rdev);
 int r600_irq_set(struct radeon_device *rdev);
+bool r600_gpu_is_lockup(struct radeon_device *rdev);
 int r600_gpu_reset(struct radeon_device *rdev);
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
@@ -611,6 +624,7 @@  static struct radeon_asic r600_asic = {
 	.resume = &r600_resume,
 	.cp_commit = &r600_cp_commit,
 	.vga_set_state = &r600_vga_set_state,
+	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.gpu_reset = &r600_gpu_reset,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
@@ -648,7 +662,6 @@  int rv770_init(struct radeon_device *rdev);
 void rv770_fini(struct radeon_device *rdev);
 int rv770_suspend(struct radeon_device *rdev);
 int rv770_resume(struct radeon_device *rdev);
-int rv770_gpu_reset(struct radeon_device *rdev);
 
 static struct radeon_asic rv770_asic = {
 	.init = &rv770_init,
@@ -656,7 +669,8 @@  static struct radeon_asic rv770_asic = {
 	.suspend = &rv770_suspend,
 	.resume = &rv770_resume,
 	.cp_commit = &r600_cp_commit,
-	.gpu_reset = &rv770_gpu_reset,
+	.gpu_is_lockup = &r600_gpu_is_lockup,
+	.gpu_reset = &r600_gpu_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
@@ -694,6 +708,7 @@  int evergreen_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
 int evergreen_suspend(struct radeon_device *rdev);
 int evergreen_resume(struct radeon_device *rdev);
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
 int evergreen_gpu_reset(struct radeon_device *rdev);
 void evergreen_bandwidth_update(struct radeon_device *rdev);
 void evergreen_hpd_init(struct radeon_device *rdev);
@@ -708,6 +723,7 @@  static struct radeon_asic evergreen_asic = {
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
 	.cp_commit = NULL,
+	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.gpu_reset = &evergreen_gpu_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 8495d4e..3931542 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -57,7 +57,6 @@  int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 		radeon_fence_ring_emit(rdev, fence);
 
 	fence->emited = true;
-	fence->timeout = jiffies + ((2000 * HZ) / 1000);
 	list_del(&fence->list);
 	list_add_tail(&fence->list, &rdev->fence_drv.emited);
 	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
@@ -70,15 +69,34 @@  static bool radeon_fence_poll_locked(struct radeon_device *rdev)
 	struct list_head *i, *n;
 	uint32_t seq;
 	bool wake = false;
+	unsigned long cjiffies;
 
-	if (rdev == NULL) {
-		return true;
-	}
-	if (rdev->shutdown) {
-		return true;
-	}
 	seq = RREG32(rdev->fence_drv.scratch_reg);
-	rdev->fence_drv.last_seq = seq;
+	if (seq != rdev->fence_drv.last_seq) {
+		rdev->fence_drv.last_seq = seq;
+		rdev->fence_drv.last_jiffies = jiffies;
+		rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
+	} else {
+		cjiffies = jiffies;
+		if (time_after(cjiffies, rdev->fence_drv.last_jiffies)) {
+			cjiffies -= rdev->fence_drv.last_jiffies;
+			if (time_after(rdev->fence_drv.last_timeout, cjiffies)) {
+				/* update the timeout */
+				rdev->fence_drv.last_timeout -= cjiffies;
+			} else {
+				/* the 500ms timeout is elapsed we should test
+				 * for GPU lockup
+				 */
+				rdev->fence_drv.last_timeout = 1;
+			}
+		} else {
+			/* wrap around update last jiffies, we will just wait
+			 * a little longer
+			 */
+			rdev->fence_drv.last_jiffies = cjiffies;
+		}
+		return false;
+	}
 	n = NULL;
 	list_for_each(i, &rdev->fence_drv.emited) {
 		fence = list_entry(i, struct radeon_fence, list);
@@ -170,9 +188,8 @@  bool radeon_fence_signaled(struct radeon_fence *fence)
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	struct radeon_device *rdev;
-	unsigned long cur_jiffies;
-	unsigned long timeout;
-	bool expired = false;
+	unsigned long irq_flags, timeout;
+	u32 seq;
 	int r;
 
 	if (fence == NULL) {
@@ -183,14 +200,10 @@  int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	if (radeon_fence_signaled(fence)) {
 		return 0;
 	}
-
+	timeout = rdev->fence_drv.last_timeout;
 retry:
-	cur_jiffies = jiffies;
-	timeout = HZ / 100;
-	if (time_after(fence->timeout, cur_jiffies)) {
-		timeout = fence->timeout - cur_jiffies;
-	}
-
+	/* save current sequence used to check for GPU lockup */
+	seq = rdev->fence_drv.last_seq;
 	if (intr) {
 		radeon_irq_kms_sw_irq_get(rdev);
 		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
@@ -205,38 +218,34 @@  retry:
 		radeon_irq_kms_sw_irq_put(rdev);
 	}
 	if (unlikely(!radeon_fence_signaled(fence))) {
-		if (unlikely(r == 0)) {
-			expired = true;
+		/* we were interrupted for some reason and fence isn't
+		 * isn't signaled yet, resume wait
+		 */
+		if (r) {
+			timeout = r;
+			goto retry;
 		}
-		if (unlikely(expired)) {
-			timeout = 1;
-			if (time_after(cur_jiffies, fence->timeout)) {
-				timeout = cur_jiffies - fence->timeout;
-			}
-			timeout = jiffies_to_msecs(timeout);
-			if (timeout > 500) {
-				DRM_ERROR("fence(%p:0x%08X) %lums timeout "
-					  "going to reset GPU\n",
-					  fence, fence->seq, timeout);
-				radeon_gpu_reset(rdev);
-				WREG32(rdev->fence_drv.scratch_reg, fence->seq);
-			}
+		/* don't protect read access to rdev->fence_drv.last_seq
+		 * if we experiencing a lockup the value doesn't change
+		 */
+		if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
+			/* good news we believe it's a lockup */
+			dev_warn(rdev->dev, "GPU lockup (last fence id 0x%08X)\n", seq);
+			r = radeon_gpu_reset(rdev);
+			if (r)
+				return r;
+			/* FIXME: what should we do ? marking everyone
+			 * as signaled for now
+			 */
+			WREG32(rdev->fence_drv.scratch_reg, fence->seq);
 		}
+		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
+		write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+		rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
+		rdev->fence_drv.last_jiffies = jiffies;
+		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
 		goto retry;
 	}
-	if (unlikely(expired)) {
-		rdev->fence_drv.count_timeout++;
-		cur_jiffies = jiffies;
-		timeout = 1;
-		if (time_after(cur_jiffies, fence->timeout)) {
-			timeout = cur_jiffies - fence->timeout;
-		}
-		timeout = jiffies_to_msecs(timeout);
-		DRM_ERROR("fence(%p:0x%08X) %lums timeout\n",
-			  fence, fence->seq, timeout);
-		DRM_ERROR("last signaled fence(0x%08X)\n",
-			  rdev->fence_drv.last_seq);
-	}
 	return 0;
 }
 
@@ -332,7 +341,6 @@  int radeon_fence_driver_init(struct radeon_device *rdev)
 	INIT_LIST_HEAD(&rdev->fence_drv.created);
 	INIT_LIST_HEAD(&rdev->fence_drv.emited);
 	INIT_LIST_HEAD(&rdev->fence_drv.signaled);
-	rdev->fence_drv.count_timeout = 0;
 	init_waitqueue_head(&rdev->fence_drv.queue);
 	rdev->fence_drv.initialized = true;
 	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 37887de..16c793b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -917,12 +917,6 @@  int rv770_mc_init(struct radeon_device *rdev)
 	return 0;
 }
 
-int rv770_gpu_reset(struct radeon_device *rdev)
-{
-	/* FIXME: implement any rv770 specific bits */
-	return r600_gpu_reset(rdev);
-}
-
 static int rv770_startup(struct radeon_device *rdev)
 {
 	int r;