diff mbox

[2/3] drm/radeon: add command submission IDs

Message ID 1411054213-11173-2-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König Sept. 18, 2014, 3:30 p.m. UTC
From: Christian König <christian.koenig@amd.com>

This patch adds a new 64bit ID as a result to each command submission.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/Makefile     |   2 +-
 drivers/gpu/drm/radeon/radeon.h     |  13 +-
 drivers/gpu/drm/radeon/radeon_cs.c  |  13 ++
 drivers/gpu/drm/radeon/radeon_kms.c |  41 +++----
 drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++
 include/uapi/drm/radeon_drm.h       |   1 +
 6 files changed, 277 insertions(+), 22 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c

Comments

Jerome Glisse Sept. 19, 2014, 12:42 a.m. UTC | #1
On Thu, Sep 18, 2014 at 05:30:12PM +0200, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
> 
> This patch adds a new 64bit ID as a result to each command submission.

NAK for design reasons.

First and foremost we DO NOT WANT TO ALLOCATE ANYTHING for submission id.
I know that hooking up to fence make life easy but this is not how it
should be done.

What you want to expose is the per ring 32bit seq value and report to user
space the value and the ring id. To handle the wrap around you add a uniq
32bit wrap counter which is incremented every 1 << 30 or some big number
seq value number (but must be smaller than 1 << 31). Then you use arithmetic
to determine if a cs seq number is done or not (ie the seq wrap around
counter diff should not be bigger then 2 or 3) and the seq number should
obviously be after again using arithmetic diff like jiffies code.

All this being hidden in the kernel all the user space have to know is :
32 bit seq value per ring
32 bit ring uniq id
32 wrap counter per ring

With such scheme you never allocate anything or worry about any fence.
Way simpler and less code.

Cheers,
Jérôme


> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/radeon/Makefile     |   2 +-
>  drivers/gpu/drm/radeon/radeon.h     |  13 +-
>  drivers/gpu/drm/radeon/radeon_cs.c  |  13 ++
>  drivers/gpu/drm/radeon/radeon_kms.c |  41 +++----
>  drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++
>  include/uapi/drm/radeon_drm.h       |   1 +
>  6 files changed, 277 insertions(+), 22 deletions(-)
>  create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c
> 
> diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
> index 8cc9f68..21ee928 100644
> --- a/drivers/gpu/drm/radeon/Makefile
> +++ b/drivers/gpu/drm/radeon/Makefile
> @@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
>  	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
>  	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
>  	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
> -	radeon_sync.o
> +	radeon_sync.o radeon_seq.o
>  
>  # add async DMA block
>  radeon-y += \
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index b3b4e96..dbfd346 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -423,6 +423,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
>  }
>  
>  /*
> + * Userspace command submission identifier generation
> + */
> +struct radeon_seq;
> +
> +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
> +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
> +void radeon_seq_destroy(struct radeon_seq **seq);
> +
> +/*
>   * Tiling registers
>   */
>  struct radeon_surface_reg {
> @@ -946,7 +955,9 @@ struct radeon_vm_manager {
>   * file private structure
>   */
>  struct radeon_fpriv {
> -	struct radeon_vm		vm;
> +	struct radeon_vm	vm;
> +	struct mutex		seq_lock;
> +	struct radeon_seq	*seq;
>  };
>  
>  /*
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index b8f20a5..0c0f0b3 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -413,6 +413,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
>  	unsigned i;
>  
>  	if (!error) {
> +		if (parser->chunk_flags &&
> +		    parser->chunk_flags->length_dw > 4) {
> +			struct radeon_fpriv *fpriv = parser->filp->driver_priv;
> +			uint32_t __user *to = parser->chunk_flags->user_ptr;
> +			uint64_t id;
> +
> +			mutex_lock(&fpriv->seq_lock);
> +			id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
> +			mutex_unlock(&fpriv->seq_lock);
> +
> +			copy_to_user(&to[3], &id, sizeof(uint64_t));
> +		}
> +
>  		/* Sort the buffer list from the smallest to largest buffer,
>  		 * which affects the order of buffers in the LRU list.
>  		 * This assures that the smallest buffers are added first
> diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
> index 85ee6f7..38880af 100644
> --- a/drivers/gpu/drm/radeon/radeon_kms.c
> +++ b/drivers/gpu/drm/radeon/radeon_kms.c
> @@ -578,39 +578,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
>   */
>  int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  {
> +	struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
>  	struct radeon_device *rdev = dev->dev_private;
>  	int r;
>  
> -	file_priv->driver_priv = NULL;
> +	if (unlikely(!fpriv))
> +		return -ENOMEM;
> +
> +	file_priv->driver_priv = fpriv;
>  
>  	r = pm_runtime_get_sync(dev->dev);
>  	if (r < 0)
> -		return r;
> +		goto error;
>  
>  	/* new gpu have virtual address space support */
>  	if (rdev->family >= CHIP_CAYMAN) {
> -		struct radeon_fpriv *fpriv;
>  		struct radeon_vm *vm;
>  		int r;
>  
> -		fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
> -		if (unlikely(!fpriv)) {
> -			return -ENOMEM;
> -		}
> -
>  		vm = &fpriv->vm;
>  		r = radeon_vm_init(rdev, vm);
> -		if (r) {
> -			kfree(fpriv);
> -			return r;
> -		}
> +		if (r)
> +			goto error;
>  
>  		if (rdev->accel_working) {
>  			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>  			if (r) {
>  				radeon_vm_fini(rdev, vm);
> -				kfree(fpriv);
> -				return r;
> +				goto error;
>  			}
>  
>  			/* map the ib pool buffer read only into
> @@ -623,16 +618,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  						  RADEON_VM_PAGE_SNOOPED);
>  			if (r) {
>  				radeon_vm_fini(rdev, vm);
> -				kfree(fpriv);
> -				return r;
> +				goto error;
>  			}
>  		}
> -		file_priv->driver_priv = fpriv;
>  	}
>  
> +	mutex_init(&fpriv->seq_lock);
> +
>  	pm_runtime_mark_last_busy(dev->dev);
>  	pm_runtime_put_autosuspend(dev->dev);
>  	return 0;
> +
> +error:
> +	kfree(fpriv);
> +	return r;
>  }
>  
>  /**
> @@ -646,11 +645,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  void radeon_driver_postclose_kms(struct drm_device *dev,
>  				 struct drm_file *file_priv)
>  {
> +	struct radeon_fpriv *fpriv = file_priv->driver_priv;
>  	struct radeon_device *rdev = dev->dev_private;
>  
> +	radeon_seq_destroy(&fpriv->seq);
> +
>  	/* new gpu have virtual address space support */
>  	if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
> -		struct radeon_fpriv *fpriv = file_priv->driver_priv;
>  		struct radeon_vm *vm = &fpriv->vm;
>  		int r;
>  
> @@ -664,9 +665,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
>  		}
>  
>  		radeon_vm_fini(rdev, vm);
> -		kfree(fpriv);
> -		file_priv->driver_priv = NULL;
>  	}
> +	kfree(fpriv);
> +	file_priv->driver_priv = NULL;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c
> new file mode 100644
> index 0000000..d8857f1
> --- /dev/null
> +++ b/drivers/gpu/drm/radeon/radeon_seq.c
> @@ -0,0 +1,229 @@
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +/*
> + * Authors:
> + *    Christian König <christian.koenig@amd.com>
> + */
> +
> +#include <drm/drmP.h>
> +#include "radeon.h"
> +
> +/*
> + * ID sequences
> + * This code generates a 64bit identifier for a command submission.
> + * It works by adding the fence of the command submission to a automatically
> + * resizing ring buffer.
> + */
> +
> +struct radeon_seq {
> +	uint64_t		start;
> +	uint64_t		end;
> +	uint64_t		mask;
> +	struct radeon_seq	*replacement;
> +};
> +
> +/**
> + * radeon_seq_create - create a new sequence object
> + *
> + * @start: start value for this sequence
> + * @size: size of the ring buffer, must be power of two
> + *
> + * Allocate and initialize a new ring buffer and header.
> + * Returns NULL if allocation fails, new object otherwise.
> + */
> +static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
> +{
> +	unsigned bytes = sizeof(struct radeon_seq) +
> +		size * sizeof(struct radeon_fence *);
> +
> +	struct radeon_seq *seq;
> +
> +	seq = kmalloc(bytes, GFP_KERNEL);
> +	if (!seq)
> +		return NULL;
> +
> +	seq->start = start;
> +	seq->end = start;
> +	seq->mask = size - 1;
> +	seq->replacement = NULL;
> +
> +	return seq;
> +}
> +
> +/**
> + * radeon_seq_ring - get pointer to ring buffer
> + *
> + * @seq: sequence object
> + *
> + * Calculate the address of the ring buffer.
> + */
> +static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
> +{
> +	return (struct radeon_fence **)&seq[1];
> +}
> +
> +/**
> + * radeon_seq_try_free - try to free fences from the ring buffer
> + *
> + * @seq: sequence object
> + *
> + * Try to free fences from the start of the ring buffer.
> + */
> +static void radeon_seq_try_free(struct radeon_seq *seq)
> +{
> +	struct radeon_fence **ring = radeon_seq_ring(seq);
> +
> +	while (seq->start != seq->end) {
> +		unsigned idx = seq->start & seq->mask;
> +		struct radeon_fence *fence = ring[idx];
> +
> +		if (!radeon_fence_signaled(fence))
> +			break;
> +
> +		radeon_fence_unref(&fence);
> +		++seq->start;
> +	}
> +}
> +
> +/**
> + * radeon_seq_add - add new fence to the end of the ring buffer
> + *
> + * @seq: sequence object
> + * @f: the fence object
> + *
> + * Add the fence and return the generated ID.
> + */
> +static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
> +{
> +	struct radeon_fence **ring = radeon_seq_ring(seq);
> +
> +	ring[seq->end & seq->mask] = radeon_fence_ref(f);
> +	return seq->end++;
> +}
> +
> +/**
> + * radeon_seq_push - check for room and add the fence
> + *
> + * @seq: sequence object
> + * @fence: the fence object
> + *
> + * Check for room on the ring buffer, if there isn't enough
> + * reallocate the sequence object and add the fence.
> + * Returns the generated ID.
> + */
> +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
> +{
> +	unsigned size_for_new_seq = 4;
> +	uint64_t start_for_new_seq = 1;
> +
> +	if (*seq) {
> +		/* try to release old replacements */
> +		while ((*seq)->replacement) {
> +			radeon_seq_try_free(*seq);
> +			if ((*seq)->start == (*seq)->end) {
> +				struct radeon_seq *repl = (*seq)->replacement;
> +
> +				kfree(*seq);
> +				*seq = repl;
> +			} else {
> +				/* move on to the current container */
> +				seq = &(*seq)->replacement;
> +			}
> +		}
> +
> +		/* check if we have enough room for one more fence */
> +		radeon_seq_try_free(*seq);
> +		if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
> +			return radeon_seq_add(*seq, fence);
> +
> +		/* not enough room, let's allocate a replacement */
> +		size_for_new_seq = ((*seq)->mask + 1) * 2;
> +		start_for_new_seq = (*seq)->end + 1;
> +		seq = &(*seq)->replacement;
> +	}
> +
> +	*seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
> +	if (!*seq) {
> +		/* not enough memory for a new sequence object, but failing
> +		   here isn't a good idea either cause the commands are already
> +		   submitted to the hardware. So just block on the fence. */
> +		int r = radeon_fence_wait(fence, false);
> +		if (r)
> +			DRM_ERROR("Error waiting for fence (%d)\n", r);
> +		return 0;
> +	}
> +	return radeon_seq_add(*seq, fence);
> +}
> +
> +/**
> + * radeon_seq_query - lockup fence by it's ID
> + *
> + * @seq: sequence object
> + * @id: the generated ID
> + *
> + * Lockup the associated fence by it's ID.
> + * Returns fence object or NULL if it couldn't be found.
> + */
> +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
> +{
> +	struct radeon_fence **ring;
> +
> +	while (seq && id > seq->end)
> +		seq = seq->replacement;
> +
> +	if (!seq || id < seq->start)
> +		return NULL;
> +
> +	ring = radeon_seq_ring(seq);
> +	return ring[id & seq->mask];
> +}
> +
> +/**
> + * radeon_seq_destroy - destroy the sequence object
> + *
> + * @seq_ptr: pointer to sequence object
> + *
> + * Destroy the sequence objects and release all fence references taken.
> + */
> +void radeon_seq_destroy(struct radeon_seq **seq_ptr)
> +{
> +	struct radeon_seq *seq = *seq_ptr;
> +	while (seq) {
> +		struct radeon_seq *repl = seq->replacement;
> +		unsigned start = seq->start & seq->mask;
> +		unsigned end = seq->end & seq->mask;
> +		struct radeon_fence **ring;
> +		unsigned i;
> +
> +		ring = radeon_seq_ring(seq);
> +		for (i = start; i < end; ++i)
> +			radeon_fence_unref(&ring[i]);
> +
> +		kfree(seq);
> +		seq = repl;
> +	}
> +	*seq_ptr = NULL;
> +}
> diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
> index 50d0fb4..6b2b2e7 100644
> --- a/include/uapi/drm/radeon_drm.h
> +++ b/include/uapi/drm/radeon_drm.h
> @@ -959,6 +959,7 @@ struct drm_radeon_gem_va {
>  #define RADEON_CS_RING_VCE          4
>  /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
>  /* 0 = normal, + = higher priority, - = lower priority */
> +/* The fourth and fives dword are a 64bit fence ID generated for this CS */
>  
>  struct drm_radeon_cs_chunk {
>  	uint32_t		chunk_id;
> -- 
> 1.9.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 8cc9f68..21ee928 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -81,7 +81,7 @@  radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
 	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
-	radeon_sync.o
+	radeon_sync.o radeon_seq.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b3b4e96..dbfd346 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -423,6 +423,15 @@  static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
 }
 
 /*
+ * Userspace command submission identifier generation
+ */
+struct radeon_seq;
+
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
+void radeon_seq_destroy(struct radeon_seq **seq);
+
+/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -946,7 +955,9 @@  struct radeon_vm_manager {
  * file private structure
  */
 struct radeon_fpriv {
-	struct radeon_vm		vm;
+	struct radeon_vm	vm;
+	struct mutex		seq_lock;
+	struct radeon_seq	*seq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index b8f20a5..0c0f0b3 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -413,6 +413,19 @@  static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	unsigned i;
 
 	if (!error) {
+		if (parser->chunk_flags &&
+		    parser->chunk_flags->length_dw > 4) {
+			struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+			uint32_t __user *to = parser->chunk_flags->user_ptr;
+			uint64_t id;
+
+			mutex_lock(&fpriv->seq_lock);
+			id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
+			mutex_unlock(&fpriv->seq_lock);
+
+			copy_to_user(&to[3], &id, sizeof(uint64_t));
+		}
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 85ee6f7..38880af 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -578,39 +578,34 @@  void radeon_driver_lastclose_kms(struct drm_device *dev)
  */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
 	struct radeon_device *rdev = dev->dev_private;
 	int r;
 
-	file_priv->driver_priv = NULL;
+	if (unlikely(!fpriv))
+		return -ENOMEM;
+
+	file_priv->driver_priv = fpriv;
 
 	r = pm_runtime_get_sync(dev->dev);
 	if (r < 0)
-		return r;
+		goto error;
 
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN) {
-		struct radeon_fpriv *fpriv;
 		struct radeon_vm *vm;
 		int r;
 
-		fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
-		if (unlikely(!fpriv)) {
-			return -ENOMEM;
-		}
-
 		vm = &fpriv->vm;
 		r = radeon_vm_init(rdev, vm);
-		if (r) {
-			kfree(fpriv);
-			return r;
-		}
+		if (r)
+			goto error;
 
 		if (rdev->accel_working) {
 			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 
 			/* map the ib pool buffer read only into
@@ -623,16 +618,20 @@  int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 						  RADEON_VM_PAGE_SNOOPED);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 		}
-		file_priv->driver_priv = fpriv;
 	}
 
+	mutex_init(&fpriv->seq_lock);
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 	return 0;
+
+error:
+	kfree(fpriv);
+	return r;
 }
 
 /**
@@ -646,11 +645,13 @@  int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 void radeon_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = file_priv->driver_priv;
 	struct radeon_device *rdev = dev->dev_private;
 
+	radeon_seq_destroy(&fpriv->seq);
+
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
-		struct radeon_fpriv *fpriv = file_priv->driver_priv;
 		struct radeon_vm *vm = &fpriv->vm;
 		int r;
 
@@ -664,9 +665,9 @@  void radeon_driver_postclose_kms(struct drm_device *dev,
 		}
 
 		radeon_vm_fini(rdev, vm);
-		kfree(fpriv);
-		file_priv->driver_priv = NULL;
 	}
+	kfree(fpriv);
+	file_priv->driver_priv = NULL;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c
new file mode 100644
index 0000000..d8857f1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_seq.c
@@ -0,0 +1,229 @@ 
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+
+/*
+ * ID sequences
+ * This code generates a 64bit identifier for a command submission.
+ * It works by adding the fence of the command submission to a automatically
+ * resizing ring buffer.
+ */
+
+struct radeon_seq {
+	uint64_t		start;
+	uint64_t		end;
+	uint64_t		mask;
+	struct radeon_seq	*replacement;
+};
+
+/**
+ * radeon_seq_create - create a new sequence object
+ *
+ * @start: start value for this sequence
+ * @size: size of the ring buffer, must be power of two
+ *
+ * Allocate and initialize a new ring buffer and header.
+ * Returns NULL if allocation fails, new object otherwise.
+ */
+static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
+{
+	unsigned bytes = sizeof(struct radeon_seq) +
+		size * sizeof(struct radeon_fence *);
+
+	struct radeon_seq *seq;
+
+	seq = kmalloc(bytes, GFP_KERNEL);
+	if (!seq)
+		return NULL;
+
+	seq->start = start;
+	seq->end = start;
+	seq->mask = size - 1;
+	seq->replacement = NULL;
+
+	return seq;
+}
+
+/**
+ * radeon_seq_ring - get pointer to ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Calculate the address of the ring buffer.
+ */
+static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
+{
+	return (struct radeon_fence **)&seq[1];
+}
+
+/**
+ * radeon_seq_try_free - try to free fences from the ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Try to free fences from the start of the ring buffer.
+ */
+static void radeon_seq_try_free(struct radeon_seq *seq)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	while (seq->start != seq->end) {
+		unsigned idx = seq->start & seq->mask;
+		struct radeon_fence *fence = ring[idx];
+
+		if (!radeon_fence_signaled(fence))
+			break;
+
+		radeon_fence_unref(&fence);
+		++seq->start;
+	}
+}
+
+/**
+ * radeon_seq_add - add new fence to the end of the ring buffer
+ *
+ * @seq: sequence object
+ * @f: the fence object
+ *
+ * Add the fence and return the generated ID.
+ */
+static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	ring[seq->end & seq->mask] = radeon_fence_ref(f);
+	return seq->end++;
+}
+
+/**
+ * radeon_seq_push - check for room and add the fence
+ *
+ * @seq: sequence object
+ * @fence: the fence object
+ *
+ * Check for room on the ring buffer, if there isn't enough
+ * reallocate the sequence object and add the fence.
+ * Returns the generated ID.
+ */
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
+{
+	unsigned size_for_new_seq = 4;
+	uint64_t start_for_new_seq = 1;
+
+	if (*seq) {
+		/* try to release old replacements */
+		while ((*seq)->replacement) {
+			radeon_seq_try_free(*seq);
+			if ((*seq)->start == (*seq)->end) {
+				struct radeon_seq *repl = (*seq)->replacement;
+
+				kfree(*seq);
+				*seq = repl;
+			} else {
+				/* move on to the current container */
+				seq = &(*seq)->replacement;
+			}
+		}
+
+		/* check if we have enough room for one more fence */
+		radeon_seq_try_free(*seq);
+		if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
+			return radeon_seq_add(*seq, fence);
+
+		/* not enough room, let's allocate a replacement */
+		size_for_new_seq = ((*seq)->mask + 1) * 2;
+		start_for_new_seq = (*seq)->end + 1;
+		seq = &(*seq)->replacement;
+	}
+
+	*seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
+	if (!*seq) {
+		/* not enough memory for a new sequence object, but failing
+		   here isn't a good idea either cause the commands are already
+		   submitted to the hardware. So just block on the fence. */
+		int r = radeon_fence_wait(fence, false);
+		if (r)
+			DRM_ERROR("Error waiting for fence (%d)\n", r);
+		return 0;
+	}
+	return radeon_seq_add(*seq, fence);
+}
+
+/**
+ * radeon_seq_query - lockup fence by it's ID
+ *
+ * @seq: sequence object
+ * @id: the generated ID
+ *
+ * Lockup the associated fence by it's ID.
+ * Returns fence object or NULL if it couldn't be found.
+ */
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
+{
+	struct radeon_fence **ring;
+
+	while (seq && id > seq->end)
+		seq = seq->replacement;
+
+	if (!seq || id < seq->start)
+		return NULL;
+
+	ring = radeon_seq_ring(seq);
+	return ring[id & seq->mask];
+}
+
+/**
+ * radeon_seq_destroy - destroy the sequence object
+ *
+ * @seq_ptr: pointer to sequence object
+ *
+ * Destroy the sequence objects and release all fence references taken.
+ */
+void radeon_seq_destroy(struct radeon_seq **seq_ptr)
+{
+	struct radeon_seq *seq = *seq_ptr;
+	while (seq) {
+		struct radeon_seq *repl = seq->replacement;
+		unsigned start = seq->start & seq->mask;
+		unsigned end = seq->end & seq->mask;
+		struct radeon_fence **ring;
+		unsigned i;
+
+		ring = radeon_seq_ring(seq);
+		for (i = start; i < end; ++i)
+			radeon_fence_unref(&ring[i]);
+
+		kfree(seq);
+		seq = repl;
+	}
+	*seq_ptr = NULL;
+}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 50d0fb4..6b2b2e7 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -959,6 +959,7 @@  struct drm_radeon_gem_va {
 #define RADEON_CS_RING_VCE          4
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */
 
 struct drm_radeon_cs_chunk {
 	uint32_t		chunk_id;