@@ -833,6 +833,22 @@ void gem_context_set_param(int fd, struct local_i915_gem_context_param *p)
do_ioctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_SETPARAM, p);
}
+int __gem_context_require_param(int fd, uint64_t param)
+{
+ struct local_i915_gem_context_param p;
+ int ret;
+
+ p.context = 0;
+ p.param = param;
+ p.value = 0;
+ p.size = 0;
+
+ ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p);
+ if (ret)
+ return -errno;
+ return 0;
+}
+
/**
* gem_context_require_param:
* @fd: open i915 drm file descriptor
@@ -843,14 +859,7 @@ void gem_context_set_param(int fd, struct local_i915_gem_context_param *p)
*/
void gem_context_require_param(int fd, uint64_t param)
{
- struct local_i915_gem_context_param p;
-
- p.context = 0;
- p.param = param;
- p.value = 0;
- p.size = 0;
-
- igt_require(drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0);
+ igt_require(__gem_context_require_param(fd, param) == 0);
}
void gem_context_require_ban_period(int fd)
@@ -105,9 +105,11 @@ struct local_i915_gem_context_param {
#define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1
#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3
+#define LOCAL_CONTEXT_PARAM_TRTT 0x4
uint64_t value;
};
void gem_context_require_ban_period(int fd);
+int __gem_context_require_param(int fd, uint64_t param);
void gem_context_require_param(int fd, uint64_t param);
void gem_context_get_param(int fd, struct local_i915_gem_context_param *p);
void gem_context_set_param(int fd, struct local_i915_gem_context_param *p);
@@ -64,6 +64,7 @@ TESTS_progs_M = \
gem_streaming_writes \
gem_tiled_blits \
gem_tiled_partial_pwrite_pread \
+ gem_trtt \
gem_userptr_blits \
gem_write_read_ring_switch \
kms_addfb_basic \
new file mode 100644
@@ -0,0 +1,446 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Akash Goel <akash.goel@intel.com>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <malloc.h>
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_io.h"
+#include "i915_drm.h"
+#include <assert.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include "igt_kms.h"
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define BO_SIZE 4096
+#define EXEC_OBJECT_PINNED (1<<4)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+
+#define NO_PPGTT 0
+#define ALIASING_PPGTT 1
+#define FULL_32_BIT_PPGTT 2
+#define FULL_48_BIT_PPGTT 3
+/* uses_full_ppgtt
+ * Finds supported PPGTT details.
+ * @fd DRM fd
+ * @min can be
+ * 0 - No PPGTT
+ * 1 - Aliasing PPGTT
+ * 2 - Full PPGTT (32b)
+ * 3 - Full PPGTT (48b)
+ * RETURNS true/false if min support is present
+*/
+static bool uses_full_ppgtt(int fd, int min)
+{
+ struct drm_i915_getparam gp;
+ int val = 0;
+
+ memset(&gp, 0, sizeof(gp));
+ gp.param = 18; /* HAS_ALIASING_PPGTT */
+ gp.value = &val;
+
+ if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
+ return 0;
+
+ errno = 0;
+ return val >= min;
+}
+
+/* has_softpin_support
+ * Finds if softpin feature is supported
+ * @fd DRM fd
+*/
+static bool has_softpin_support(int fd)
+{
+ struct drm_i915_getparam gp;
+ int val = 0;
+
+ memset(&gp, 0, sizeof(gp));
+ gp.param = 37; /* I915_PARAM_HAS_EXEC_SOFTPIN */
+ gp.value = &val;
+
+ if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
+ return 0;
+
+ errno = 0;
+ return (val == 1);
+}
+
+/* has_trtt_support
+ * Finds if trtt hw is present
+ * @fd DRM fd
+*/
+static bool has_trtt_support(int fd)
+{
+ int ret = __gem_context_require_param(fd, LOCAL_CONTEXT_PARAM_TRTT);
+
+ errno = 0;
+ return (ret == 0);
+}
+
+/* mmap_bo
+ * helper for creating a CPU mmapping of the buffer
+ * @fd - drm fd
+ * @handle - handle of the buffer to mmap
+ * @size: size of the buffer
+*/
+static void* mmap_bo(int fd, uint32_t handle, uint64_t size)
+{
+ uint32_t *ptr = gem_mmap__cpu(fd, handle, 0, size, PROT_READ);
+ gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+ return ptr;
+}
+
+/* emit_store_dword
+ * populate batch buffer with MI_STORE_DWORD_IMM command
+ * @fd: drm file descriptor
+ * @cmd_buf: batch buffer
+ * @dw_offset: write offset in batch buffer
+ * @vaddr: destination Virtual address
+ * @data: u32 data to be stored at destination
+*/
+static int emit_store_dword(int fd, uint32_t *cmd_buf, uint32_t dw_offset,
+ uint64_t vaddr, uint32_t data)
+{
+ cmd_buf[dw_offset++] = MI_STORE_DWORD_IMM;
+ cmd_buf[dw_offset++] = vaddr & 0xFFFFFFFC;
+ cmd_buf[dw_offset++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */
+ cmd_buf[dw_offset++] = data;
+
+ return dw_offset;
+}
+
+/* emit_store_qword
+ * populate batch buffer with MI_STORE_DWORD_IMM command
+ * @fd: drm file descriptor
+ * @cmd_buf: batch buffer
+ * @dw_offset: write offset in batch buffer
+ * @vaddr: destination Virtual address
+ * @data: u64 data to be stored at destination
+*/
+static int emit_store_qword(int fd, uint32_t *cmd_buf, uint32_t dw_offset,
+ uint64_t vaddr, uint64_t data)
+{
+ cmd_buf[dw_offset++] = MI_STORE_DWORD_IMM | 0x3;
+ cmd_buf[dw_offset++] = vaddr & 0xFFFFFFFC;
+ cmd_buf[dw_offset++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */
+ cmd_buf[dw_offset++] = data;
+ cmd_buf[dw_offset++] = data >> 32;
+
+ return dw_offset;
+}
+
+/* emit_bb_end
+ * populate batch buffer with MI_BATCH_BUFFER_END command
+ * @fd: drm file descriptor
+ * @cmd_buf: batch buffer
+ * @dw_offset: write offset in batch buffer
+*/
+static int emit_bb_end(int fd, uint32_t *cmd_buf, uint32_t dw_offset)
+{
+ cmd_buf[dw_offset++] = MI_BATCH_BUFFER_END;
+ cmd_buf[dw_offset++] = 0;
+
+ return dw_offset;
+}
+
+/* setup_execbuffer
+ * helper for buffer execution
+ * @execbuf - pointer to execbuffer
+ * @exec_object - pointer to exec object2 struct
+ * @ring - ring to be used
+ * @buffer_count - how manu buffers to submit
+ * @batch_length - length of batch buffer
+*/
+static void setup_execbuffer(struct drm_i915_gem_execbuffer2 *execbuf,
+ struct drm_i915_gem_exec_object2 *exec_object,
+ uint32_t ctx_id, int ring, int buffer_count, int batch_length)
+{
+ memset(execbuf, 0, sizeof(*execbuf));
+
+ execbuf->buffers_ptr = (unsigned long)exec_object;
+ execbuf->buffer_count = buffer_count;
+ execbuf->batch_len = batch_length;
+ execbuf->flags = ring;
+ i915_execbuffer2_set_context_id(*execbuf, ctx_id);
+}
+
+#define TABLE_SIZE 0x1000
+#define TILE_SIZE 0x10000
+
+#define TRTT_SEGMENT_SIZE (1ULL << 44)
+#define PPGTT_SIZE (1ULL << 48)
+
+#define NULL_TILE_PATTERN 0xFFFFFFFF
+#define INVALID_TILE_PATTERN 0xFFFFFFFE
+
+struct local_i915_gem_context_trtt_param {
+ uint64_t segment_base_addr;
+ uint64_t l3_table_address;
+ uint32_t invd_tile_val;
+ uint32_t null_tile_val;
+};
+
+/* setup_trtt
+ * Helper function to request KMD to enable TRTT
+ * @fd - drm fd
+ * @ctx_id - id of the context for which TRTT is to be enabled
+ * @l3_table_address - GFX address of the L3 table
+ * @segment_base_addr - offset of the TRTT segment in PPGTT space
+ */
+static void
+setup_trtt(int fd, uint32_t ctx_id, uint64_t l3_table_address,
+ uint64_t segment_base_addr)
+{
+ struct local_i915_gem_context_param ctx_param;
+ struct local_i915_gem_context_trtt_param trtt_param;
+
+ memset(&ctx_param, 0, sizeof(ctx_param));
+
+ trtt_param.null_tile_val = NULL_TILE_PATTERN;
+ trtt_param.invd_tile_val = INVALID_TILE_PATTERN;
+ trtt_param.l3_table_address = l3_table_address;
+ trtt_param.segment_base_addr = segment_base_addr;
+
+ ctx_param.context = ctx_id;
+ ctx_param.size = sizeof(trtt_param);
+ ctx_param.param = LOCAL_CONTEXT_PARAM_TRTT;
+ ctx_param.value = (uint64_t)&trtt_param;
+
+ gem_context_set_param(fd, &ctx_param);
+}
+
+/* bo_alloc_setup
+ * allocate bo and populate exec object
+ * @exec_object2 - pointer to exec object
+ * @bo_sizee - buffer size
+ * @flags - exec flags
+ * @bo_offset - pointer to the current PPGTT offset
+ */
+static void bo_alloc_setup(int fd, struct drm_i915_gem_exec_object2 *exec_object2,
+ uint64_t bo_size, uint64_t flags, uint64_t *bo_offset)
+{
+ memset(exec_object2, 0, sizeof(*exec_object2));
+ exec_object2->handle = gem_create(fd, bo_size);
+ exec_object2->flags = flags;
+
+ if (bo_offset)
+ {
+ exec_object2->offset = *bo_offset;
+ *bo_offset += bo_size;
+ }
+}
+
+/* basic test
+ * This test will create a context, allocate a L3 table page, 2 pages apiece
+ * for L2/L1 tables and couple of data buffers of 64KB in size, matching the
+ * Tile size. The 2 data buffers will be mapped to the 2 ends of TRTT virtual
+ * space. Series of MI_STORE_DWORD_IMM commands will be added in the batch
+ * buffer to first update the TR-TT table entries and then to update the data
+ * buffers using their TR-TT VA, exercising the table programming done
+ * previously.
+ * Invoke CONTEXT_SETPARAM ioctl to request KMD to enable TRTT.
+ * Invoke execbuffer to submit the batch buffer.
+ * Verify value of first DWORD in the 2 data buffer matches the data asked
+ * to be written by the GPU.
+ */
+static void submit_trtt_context(int fd, uint64_t segment_base_addr)
+{
+ enum {
+ L3_TBL,
+ L2_TBL1,
+ L2_TBL2,
+ L1_TBL1,
+ L1_TBL2,
+ DATA1,
+ DATA2,
+ BATCH,
+ NUM_BUFFERS,
+ };
+
+ int ring, len = 0;
+ uint32_t *ptr;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 exec_object2[NUM_BUFFERS];
+ uint32_t batch_buffer[BO_SIZE];
+ uint32_t ctx_id, data32;
+ uint64_t address, data64, cur_ppgtt_off, exec_flags;
+ uint64_t first_tile_addr, last_tile_addr;
+
+ first_tile_addr = segment_base_addr;
+ last_tile_addr = first_tile_addr + TRTT_SEGMENT_SIZE - TILE_SIZE;
+
+ if (segment_base_addr == 0) {
+ /* Use the default context for first iteration */
+ ctx_id = 0;
+ cur_ppgtt_off = TRTT_SEGMENT_SIZE;
+ exec_flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ } else {
+ ctx_id = gem_context_create(fd);
+ cur_ppgtt_off = 0;
+ exec_flags = 0;
+ }
+
+ /* first allocate Batch buffer BO */
+ bo_alloc_setup(fd, &exec_object2[BATCH], BO_SIZE, exec_flags, NULL);
+
+ /* table BOs and data buffer BOs are written by GPU and are soft pinned */
+ exec_flags |= (EXEC_OBJECT_WRITE | EXEC_OBJECT_PINNED);
+
+ /* Allocate a L3 table BO */
+ bo_alloc_setup(fd, &exec_object2[L3_TBL], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
+
+ /* Allocate two L2 table BOs */
+ bo_alloc_setup(fd, &exec_object2[L2_TBL1], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
+ bo_alloc_setup(fd, &exec_object2[L2_TBL2], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
+
+ /* Allocate two L1 table BOs */
+ bo_alloc_setup(fd, &exec_object2[L1_TBL1], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
+ bo_alloc_setup(fd, &exec_object2[L1_TBL2], TABLE_SIZE, exec_flags, &cur_ppgtt_off);
+
+ /* Align the PPGTT offsets for the 2 data buffers to next 64 KB boundary */
+ cur_ppgtt_off = ALIGN(cur_ppgtt_off, TILE_SIZE);
+
+ /* Allocate two Data buffer BOs */
+ bo_alloc_setup(fd, &exec_object2[DATA1], TILE_SIZE, exec_flags, &cur_ppgtt_off);
+ bo_alloc_setup(fd, &exec_object2[DATA2], TILE_SIZE, exec_flags, &cur_ppgtt_off);
+
+ /* Add commands to update the two L3 table entries to point them to the L2 tables*/
+ address = exec_object2[L3_TBL].offset;
+ data64 = exec_object2[L2_TBL1].offset;
+ len = emit_store_qword(fd, batch_buffer, len, address, data64);
+
+ address = exec_object2[L3_TBL].offset + 511*sizeof(uint64_t);
+ data64 = exec_object2[L2_TBL2].offset;
+ len = emit_store_qword(fd, batch_buffer, len, address, data64);
+
+ /* Add commands to update an entry of 2 L2 tables to point them to the L1 tables*/
+ address = exec_object2[L2_TBL1].offset;
+ data64 = exec_object2[L1_TBL1].offset;
+ len = emit_store_qword(fd, batch_buffer, len, address, data64);
+
+ address = exec_object2[L2_TBL2].offset + 511*sizeof(uint64_t);
+ data64 = exec_object2[L1_TBL2].offset;
+ len = emit_store_qword(fd, batch_buffer, len, address, data64);
+
+ /* Add commands to update an entry of 2 L1 tables to point them to the data buffers*/
+ address = exec_object2[L1_TBL1].offset;
+ data32 = exec_object2[DATA1].offset >> 16;
+ len = emit_store_dword(fd, batch_buffer, len, address, data32);
+
+ address = exec_object2[L1_TBL2].offset + 1023*sizeof(uint32_t);
+ data32 = exec_object2[DATA2].offset >> 16;
+ len = emit_store_dword(fd, batch_buffer, len, address, data32);
+
+ /* Add commands to update the 2 data buffers, using their TRTT VA */
+ data32 = 0x12345678;
+ len = emit_store_dword(fd, batch_buffer, len, first_tile_addr, data32);
+ len = emit_store_dword(fd, batch_buffer, len, last_tile_addr, data32);
+
+ len = emit_bb_end(fd, batch_buffer, len);
+ gem_write(fd, exec_object2[BATCH].handle, 0, batch_buffer, len*4);
+
+ /* Request KMD to setup the TR-TT */
+ setup_trtt(fd, ctx_id, exec_object2[L3_TBL].offset, first_tile_addr);
+
+ ring = I915_EXEC_RENDER;
+ setup_execbuffer(&execbuf, exec_object2, ctx_id, ring, NUM_BUFFERS, len*4);
+
+ /* submit command buffer */
+ gem_execbuf(fd, &execbuf);
+
+ /* read the 2 data buffers to check for the value written by the GPU */
+ ptr = mmap_bo(fd, exec_object2[DATA1].handle, TILE_SIZE);
+ igt_fail_on_f(ptr[0] != data32,
+ "\nCPU read does not match GPU write,\
+ expected: 0x%x, got: 0x%x\n",
+ data32, ptr[0]);
+
+ ptr = mmap_bo(fd, exec_object2[DATA2].handle, TILE_SIZE);
+ igt_fail_on_f(ptr[0] != data32,
+ "\nCPU read does not match GPU write,\
+ expected: 0x%x, got: 0x%x\n",
+ data32, ptr[0]);
+
+ gem_close(fd, exec_object2[L3_TBL].handle);
+ gem_close(fd, exec_object2[L2_TBL1].handle);
+ gem_close(fd, exec_object2[L2_TBL2].handle);
+ gem_close(fd, exec_object2[L1_TBL1].handle);
+ gem_close(fd, exec_object2[L1_TBL2].handle);
+ gem_close(fd, exec_object2[DATA1].handle);
+ gem_close(fd, exec_object2[DATA2].handle);
+ gem_close(fd, exec_object2[BATCH].handle);
+
+ if (ctx_id)
+ gem_context_destroy(fd, ctx_id);
+}
+
+static void gem_basic_trtt_use(void)
+{
+ int fd;
+ uint64_t segment_base_addr;
+
+ fd = drm_open_driver(DRIVER_INTEL);
+ igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT));
+ igt_require(has_softpin_support(fd));
+ igt_require(has_trtt_support(fd));
+
+ for (segment_base_addr = 0;
+ segment_base_addr < PPGTT_SIZE;
+ segment_base_addr += TRTT_SEGMENT_SIZE)
+ {
+ submit_trtt_context(fd, segment_base_addr);
+ }
+
+ close(fd);
+}
+
+igt_main
+{
+
+ /* test needs 48 PPGTT & Soft Pin support */
+ igt_subtest("basic") {
+ gem_basic_trtt_use();
+ }
+}
+