@@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
- radeon_sync.o
+ radeon_sync.o radeon_seq.o
# add async DMA block
radeon-y += \
@@ -433,6 +433,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
}
/*
+ * Userspace command submission identifier generation
+ */
+struct radeon_seq;
+
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
+void radeon_seq_destroy(struct radeon_seq **seq);
+
+/*
* Tiling registers
*/
struct radeon_surface_reg {
@@ -975,7 +984,9 @@ struct radeon_vm_manager {
* file private structure
*/
struct radeon_fpriv {
- struct radeon_vm vm;
+ struct radeon_vm vm;
+ struct mutex seq_lock;
+ struct radeon_seq *seq;
};
/*
@@ -398,6 +398,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
unsigned i;
if (!error) {
+ if (parser->chunk_flags &&
+ parser->chunk_flags->length_dw > 4) {
+ struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+ uint32_t __user *to = parser->chunk_flags->user_ptr;
+ uint64_t id;
+
+ mutex_lock(&fpriv->seq_lock);
+ id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
+ mutex_unlock(&fpriv->seq_lock);
+
+ copy_to_user(&to[3], &id, sizeof(uint64_t));
+ }
+
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
* This assures that the smallest buffers are added first
@@ -585,39 +585,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
*/
int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
{
+ struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
struct radeon_device *rdev = dev->dev_private;
int r;
- file_priv->driver_priv = NULL;
+ if (unlikely(!fpriv))
+ return -ENOMEM;
+
+ file_priv->driver_priv = fpriv;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
- return r;
+ goto error;
/* new gpu have virtual address space support */
if (rdev->family >= CHIP_CAYMAN) {
- struct radeon_fpriv *fpriv;
struct radeon_vm *vm;
int r;
- fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
- if (unlikely(!fpriv)) {
- return -ENOMEM;
- }
-
vm = &fpriv->vm;
r = radeon_vm_init(rdev, vm);
- if (r) {
- kfree(fpriv);
- return r;
- }
+ if (r)
+ goto error;
if (rdev->accel_working) {
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
if (r) {
radeon_vm_fini(rdev, vm);
- kfree(fpriv);
- return r;
+ goto error;
}
/* map the ib pool buffer read only into
@@ -630,16 +625,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
RADEON_VM_PAGE_SNOOPED);
if (r) {
radeon_vm_fini(rdev, vm);
- kfree(fpriv);
- return r;
+ goto error;
}
}
- file_priv->driver_priv = fpriv;
}
+ mutex_init(&fpriv->seq_lock);
+
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
+
+error:
+ kfree(fpriv);
+ return r;
}
/**
@@ -653,11 +652,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
void radeon_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv)
{
+ struct radeon_fpriv *fpriv = file_priv->driver_priv;
struct radeon_device *rdev = dev->dev_private;
+ radeon_seq_destroy(&fpriv->seq);
+
/* new gpu have virtual address space support */
if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
- struct radeon_fpriv *fpriv = file_priv->driver_priv;
struct radeon_vm *vm = &fpriv->vm;
int r;
@@ -671,9 +672,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
}
radeon_vm_fini(rdev, vm);
- kfree(fpriv);
- file_priv->driver_priv = NULL;
}
+ kfree(fpriv);
+ file_priv->driver_priv = NULL;
}
/**
new file mode 100644
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+
+/*
+ * ID sequences
+ * This code generates a 64bit identifier for a command submission.
+ * It works by adding the fence of the command submission to a automatically
+ * resizing ring buffer.
+ */
+
+struct radeon_seq {
+ uint64_t start;
+ uint64_t end;
+ uint64_t mask;
+ struct radeon_seq *replacement;
+};
+
+/**
+ * radeon_seq_create - create a new sequence object
+ *
+ * @start: start value for this sequence
+ * @size: size of the ring buffer, must be power of two
+ *
+ * Allocate and initialize a new ring buffer and header.
+ * Returns NULL if allocation fails, new object otherwise.
+ */
+static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
+{
+ unsigned bytes = sizeof(struct radeon_seq) +
+ size * sizeof(struct radeon_fence *);
+
+ struct radeon_seq *seq;
+
+ seq = kmalloc(bytes, GFP_KERNEL);
+ if (!seq)
+ return NULL;
+
+ seq->start = start;
+ seq->end = start;
+ seq->mask = size - 1;
+ seq->replacement = NULL;
+
+ return seq;
+}
+
+/**
+ * radeon_seq_ring - get pointer to ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Calculate the address of the ring buffer.
+ */
+static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
+{
+ return (struct radeon_fence **)&seq[1];
+}
+
+/**
+ * radeon_seq_try_free - try to free fences from the ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Try to free fences from the start of the ring buffer.
+ */
+static void radeon_seq_try_free(struct radeon_seq *seq)
+{
+ struct radeon_fence **ring = radeon_seq_ring(seq);
+
+ while (seq->start != seq->end) {
+ unsigned idx = seq->start & seq->mask;
+ struct radeon_fence *fence = ring[idx];
+
+ if (!radeon_fence_signaled(fence))
+ break;
+
+ radeon_fence_unref(&fence);
+ ++seq->start;
+ }
+}
+
+/**
+ * radeon_seq_add - add new fence to the end of the ring buffer
+ *
+ * @seq: sequence object
+ * @f: the fence object
+ *
+ * Add the fence and return the generated ID.
+ */
+static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
+{
+ struct radeon_fence **ring = radeon_seq_ring(seq);
+
+ ring[seq->end & seq->mask] = radeon_fence_ref(f);
+ return seq->end++;
+}
+
+/**
+ * radeon_seq_push - check for room and add the fence
+ *
+ * @seq: sequence object
+ * @fence: the fence object
+ *
+ * Check for room on the ring buffer, if there isn't enough
+ * reallocate the sequence object and add the fence.
+ * Returns the generated ID.
+ */
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
+{
+ unsigned size_for_new_seq = 4;
+ uint64_t start_for_new_seq = 1;
+
+ if (*seq) {
+ /* try to release old replacements */
+ while ((*seq)->replacement) {
+ radeon_seq_try_free(*seq);
+ if ((*seq)->start == (*seq)->end) {
+ struct radeon_seq *repl = (*seq)->replacement;
+
+ kfree(*seq);
+ *seq = repl;
+ } else {
+ /* move on to the current container */
+ seq = &(*seq)->replacement;
+ }
+ }
+
+ /* check if we have enough room for one more fence */
+ radeon_seq_try_free(*seq);
+ if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
+ return radeon_seq_add(*seq, fence);
+
+ /* not enough room, let's allocate a replacement */
+ size_for_new_seq = ((*seq)->mask + 1) * 2;
+ start_for_new_seq = (*seq)->end + 1;
+ seq = &(*seq)->replacement;
+ }
+
+ *seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
+ if (!*seq) {
+ /* not enough memory for a new sequence object, but failing
+ here isn't a good idea either cause the commands are already
+ submitted to the hardware. So just block on the fence. */
+ int r = radeon_fence_wait(fence, false);
+ if (r)
+ DRM_ERROR("Error waiting for fence (%d)\n", r);
+ return 0;
+ }
+ return radeon_seq_add(*seq, fence);
+}
+
+/**
+ * radeon_seq_query - lockup fence by it's ID
+ *
+ * @seq: sequence object
+ * @id: the generated ID
+ *
+ * Lockup the associated fence by it's ID.
+ * Returns fence object or NULL if it couldn't be found.
+ */
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
+{
+ struct radeon_fence **ring;
+
+ while (seq && id > seq->end)
+ seq = seq->replacement;
+
+ if (!seq || id < seq->start)
+ return NULL;
+
+ ring = radeon_seq_ring(seq);
+ return ring[id & seq->mask];
+}
+
+/**
+ * radeon_seq_destroy - destroy the sequence object
+ *
+ * @seq_ptr: pointer to sequence object
+ *
+ * Destroy the sequence objects and release all fence references taken.
+ */
+void radeon_seq_destroy(struct radeon_seq **seq_ptr)
+{
+ struct radeon_seq *seq = *seq_ptr;
+ while (seq) {
+ struct radeon_seq *repl = seq->replacement;
+ unsigned start = seq->start & seq->mask;
+ unsigned end = seq->end & seq->mask;
+ struct radeon_fence **ring;
+ unsigned i;
+
+ ring = radeon_seq_ring(seq);
+ for (i = start; i < end; ++i)
+ radeon_fence_unref(&ring[i]);
+
+ kfree(seq);
+ seq = repl;
+ }
+ *seq_ptr = NULL;
+}
@@ -959,6 +959,7 @@ struct drm_radeon_gem_va {
#define RADEON_CS_RING_VCE 4
/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
/* 0 = normal, + = higher priority, - = lower priority */
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */
struct drm_radeon_cs_chunk {
uint32_t chunk_id;