@@ -1566,10 +1566,11 @@ void i915_gem_free_all_phys_object(struct drm_device *dev);
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
+i915_gem_get_gtt_physical_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride);
uint32_t
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- int tiling_mode, bool fenced);
+ int tiling_mode, int stride, bool fenced);
size_t
i915_gem_get_tile_row_size(struct drm_device *dev, int tiling_mode, int stride);
@@ -1453,16 +1453,39 @@ i915_gem_get_tile_row_size(struct drm_device *dev, int tiling_mode, int stride)
return stride / i915_gem_get_tile_width(dev, tiling_mode) * PAGE_SIZE;
}
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+static uint32_t
+i915_gem_get_gtt_linear_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride)
{
uint32_t gtt_size;
- if (INTEL_INFO(dev)->gen >= 4 ||
- tiling_mode == I915_TILING_NONE)
+ if (tiling_mode == I915_TILING_NONE)
return size;
- /* Previous chips need a power-of-two fence region when tiling */
+ /*
+ * Applications can set tiling for objects with sizes not aligned to
+ * the object's tile row size. This is a valid use case for example if
+ * the application wants to reuse the same object with different tile
+ * settings. In this case we have to limit the linear range
+ * (programmed to the fence register) to the last full tile row
+ * offset, otherwise users of this object would have access to the
+ * object following it in the GTT space by reading/writing to the
+ * unaligned part. For the unaligned part we don't have backing storage
+ * even for a single linear pixel line, so tiling wouldn't work
+ * properly anyway.
+ */
+ size = rounddown(size, i915_gem_get_tile_row_size(dev, tiling_mode,
+ stride));
+ if (INTEL_INFO(dev)->gen >= 4)
+ return size;
+
+ /*
+ * Previous chips need a power-of-two fence region when tiling. This
+ * might set an unaligned object's linear size between its last full
+ * tile row offset and the next tile row offset, which is a problem as
+ * pointed out above. For these objects we'll overallocate GTT space
+ * up to the next tile row offset, see i915_gem_get_gtt_physical_size().
+ */
if (INTEL_INFO(dev)->gen == 3)
gtt_size = 1024*1024;
else
@@ -1474,6 +1497,28 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
return gtt_size;
}
+uint32_t
+i915_gem_get_gtt_physical_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride)
+{
+ uint32_t linear_size;
+
+ if (INTEL_INFO(dev)->gen >= 4 || tiling_mode == I915_TILING_NONE)
+ return size;
+
+ linear_size = i915_gem_get_gtt_linear_size(dev, size, tiling_mode,
+ stride);
+ /*
+ * Overallocate in case linear_size is greater than the object's last
+ * full tile row offset and smaller than the tile row offset after
+ * that.
+ */
+ size = roundup(size,
+ i915_gem_get_tile_row_size(dev, tiling_mode, stride));
+
+ return max(size, linear_size);
+}
+
/**
* i915_gem_get_gtt_alignment - return required GTT alignment for an object
* @obj: object to check
@@ -1483,7 +1528,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
*/
uint32_t
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- int tiling_mode, bool fenced)
+ int tiling_mode, int stride, bool fenced)
{
/*
* Minimum alignment is 4k (GTT page size), but might be greater
@@ -1497,7 +1542,7 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
* Previous chips need to be aligned to the size of the smallest
* fence register that can contain the object.
*/
- return i915_gem_get_gtt_size(dev, size, tiling_mode);
+ return i915_gem_get_gtt_linear_size(dev, size, tiling_mode, stride);
}
static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -2542,8 +2587,11 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
}
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
val = (uint64_t)((obj->gtt_offset + size - 4096) &
0xfffff000) << 32;
val |= obj->gtt_offset & 0xfffff000;
@@ -2566,10 +2614,13 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
u32 val;
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
int pitch_val;
int tile_width;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
(size & -size) != size ||
(obj->gtt_offset & (size - 1)),
@@ -2610,9 +2661,12 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
uint32_t val;
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
uint32_t pitch_val;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
(size & -size) != size ||
(obj->gtt_offset & (size - 1)),
@@ -2903,16 +2957,17 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
return -EINVAL;
}
- fence_size = i915_gem_get_gtt_size(dev,
- obj->base.size,
- obj->tiling_mode);
+ fence_size = i915_gem_get_gtt_physical_size(dev, obj->base.size,
+ obj->tiling_mode, obj->stride);
fence_alignment = i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode, true);
+ obj->tiling_mode,
+ obj->stride, true);
unfenced_alignment =
i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode, false);
+ obj->tiling_mode,
+ obj->stride, false);
if (alignment == 0)
alignment = map_and_fenceable ? fence_alignment :
@@ -253,7 +253,8 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
/* Is the current GTT allocation valid for the change in tiling? */
static bool
-i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
+i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode,
+ int stride)
{
u32 size;
u32 align;
@@ -261,6 +262,11 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
if (tiling_mode == I915_TILING_NONE)
return true;
+ size = i915_gem_get_gtt_physical_size(obj->base.dev, obj->base.size,
+ tiling_mode, stride);
+ if (obj->gtt_space->size != size)
+ return false;
+
if (INTEL_INFO(obj->base.dev)->gen >= 4)
return true;
@@ -272,12 +278,8 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
return false;
}
- size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode);
- if (obj->gtt_space->size != size)
- return false;
-
align = i915_gem_get_gtt_alignment(obj->base.dev, obj->base.size,
- tiling_mode, true);
+ tiling_mode, stride, true);
if (obj->gtt_offset & (align - 1))
return false;
@@ -361,13 +363,15 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
obj->map_and_fenceable =
obj->gtt_space == NULL ||
(obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
- i915_gem_object_fence_ok(obj, args->tiling_mode));
+ i915_gem_object_fence_ok(obj, args->tiling_mode,
+ args->stride));
/* Rebind if we need a change of alignment */
if (!obj->map_and_fenceable) {
u32 unfenced_alignment =
i915_gem_get_gtt_alignment(dev, obj->base.size,
args->tiling_mode,
+ args->stride,
false);
if (obj->gtt_offset & (unfenced_alignment - 1))
ret = i915_gem_object_unbind(obj);
The GTT space needed for tiled objects might be bigger than the linear size programmed into the correpsonding fence register. For example for the following buffer on a Gen5+ HW: - allocation size: 4 * 4096 bytes - tiling mode: X tiled - stride: 1536 This needs 6 * 4096 bytes of GTT space to cover all the pixels in the buffer, but at the moment we allocate only 4 * 4096. This means that any buffer following this tiled buffer in the GTT space will be corrupted if pixels belonging to the last two tiles are written. Fix this by limiting the linear size to the object's last full tile row offset (3 * 4096 bytes in the above example). Beyond this offset we don't have enough backing storage to hold even a single linear pixel line, so tiling wouldn't work properly anyway. For old HW with power-of-two fence regions we might have to also overallocate GTT space in case the POT offset happens to be not aligned to the tile row size. The page frames for the overallocated area will be backed by the single GTT scratch page used already for padding objects with a POT fence region. Note that this is more of a security/robustness problem and not fixing any reported issue that I know of. This is because applications will normally access only the part of the buffer that is tile row size aligned. In v2: - Instead of always overallocating for unaligned objects, limit the linear size - and overallocate only when necessary for POT fence sizes. Discussed this with Chris Wilson. - Force buffer-rebind if the GTT size changes after a tiling change. (Chris Wilson) Signed-off-by: Imre Deak <imre.deak@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem.c | 85 ++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++--- 3 files changed, 84 insertions(+), 24 deletions(-)