@@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
/* indicate some job from this context once cause gpu hang */
#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4)
/* Context priority level */
#define AMDGPU_CTX_PRIORITY_UNSET -2048
@@ -272,13 +275,14 @@ union drm_amdgpu_vm {
/* sched ioctl */
#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2
struct drm_amdgpu_sched_in {
/* AMDGPU_SCHED_OP_* */
__u32 op;
__u32 fd;
__s32 priority;
- __u32 flags;
+ __u32 ctx_id;
};
union drm_amdgpu_sched {
@@ -523,6 +527,9 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09
struct drm_amdgpu_cs_chunk {
__u32 chunk_id;
@@ -565,6 +572,11 @@ union drm_amdgpu_cs {
* caches (L2/vL1/sL1/I$). */
#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
struct drm_amdgpu_cs_chunk_ib {
__u32 _pad;
/** AMDGPU_IB_FLAG_* */
@@ -598,6 +610,12 @@ struct drm_amdgpu_cs_chunk_sem {
__u32 handle;
};
+struct drm_amdgpu_cs_chunk_syncobj {
+ __u32 handle;
+ __u32 flags;
+ __u64 point;
+};
+
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2
@@ -673,6 +691,7 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
/* Subquery id: Query DMCU firmware version */
#define AMDGPU_INFO_FW_DMCU 0x12
+ #define AMDGPU_INFO_FW_TA 0x13
/* number of bytes moved for TTM migration */
#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f
/* the used VRAM size */
@@ -726,6 +745,37 @@ struct drm_amdgpu_cs_chunk_data {
/* Number of VRAM page faults on CPU access. */
#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E
#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20
+
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13)
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
@@ -729,8 +729,18 @@ struct drm_syncobj_handle {
__u32 pad;
};
+struct drm_syncobj_transfer {
+ __u32 src_handle;
+ __u32 dst_handle;
+ __u64 src_point;
+ __u64 dst_point;
+ __u32 flags;
+ __u32 pad;
+};
+
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
@@ -741,12 +751,33 @@ struct drm_syncobj_wait {
__u32 pad;
};
+struct drm_syncobj_timeline_wait {
+ __u64 handles;
+ /* wait on specific timeline point for every handles*/
+ __u64 points;
+ /* absolute timeout */
+ __s64 timeout_nsec;
+ __u32 count_handles;
+ __u32 flags;
+ __u32 first_signaled; /* only valid when not waiting all */
+ __u32 pad;
+};
+
+
struct drm_syncobj_array {
__u64 handles;
__u32 count_handles;
__u32 pad;
};
+struct drm_syncobj_timeline_array {
+ __u64 handles;
+ __u64 points;
+ __u32 count_handles;
+ __u32 pad;
+};
+
+
/* Query current scanout sequence number */
struct drm_crtc_get_sequence {
__u32 crtc_id; /* requested crtc_id */
@@ -903,6 +934,11 @@ extern "C" {
#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease)
#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
+#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+
/**
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
@@ -33,7 +33,6 @@
extern "C" {
#endif
-#define DRM_DISPLAY_INFO_LEN 32
#define DRM_CONNECTOR_NAME_LEN 32
#define DRM_DISPLAY_MODE_LEN 32
#define DRM_PROP_NAME_LEN 32
@@ -622,7 +621,8 @@ struct drm_color_ctm {
struct drm_color_lut {
/*
- * Data is U0.16 fixed point format.
+ * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
+ * 0xffff == 1.0.
*/
__u16 red;
__u16 green;
@@ -136,6 +136,8 @@ enum drm_i915_gem_engine_class {
struct i915_engine_class_instance {
__u16 engine_class; /* see enum drm_i915_gem_engine_class */
__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
};
/**
@@ -355,6 +357,8 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
#define DRM_I915_QUERY 0x39
+#define DRM_I915_GEM_VM_CREATE 0x3a
+#define DRM_I915_GEM_VM_DESTROY 0x3b
/* Must be kept compact -- no holes */
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +419,8 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -598,6 +604,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_MMAP_GTT_COHERENT 52
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1120,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT (1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1464,8 +1485,9 @@ struct drm_i915_gem_context_create_ext {
__u32 ctx_id; /* output: id of new context*/
__u32 flags;
#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1)
#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
- (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+ (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
__u64 extensions;
};
@@ -1507,6 +1529,41 @@ struct drm_i915_gem_context_param {
* On creation, all new contexts are marked as recoverable.
*/
#define I915_CONTEXT_PARAM_RECOVERABLE 0x8
+
+ /*
+ * The id of the associated virtual memory address space (ppGTT) of
+ * this context. Can be retrieved and passed to another context
+ * (on the same fd) for both to use the same ppGTT and so share
+ * address layouts, and avoid reloading the page tables on context
+ * switches between themselves.
+ *
+ * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+ */
+#define I915_CONTEXT_PARAM_VM 0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ * engine_class: I915_ENGINE_CLASS_INVALID,
+ * engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ */
+#define I915_CONTEXT_PARAM_ENGINES 0xa
/* Must be kept compact -- no holes and well documented */
__u64 value;
@@ -1540,9 +1597,10 @@ struct drm_i915_gem_context_param_sseu {
struct i915_engine_class_instance engine;
/*
- * Unused for now. Must be cleared to zero.
+ * Unknown flags must be cleared to zero.
*/
__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
/*
* Mask of slices to enable for the context. Valid values are a subset
@@ -1570,12 +1628,115 @@ struct drm_i915_gem_context_param_sseu {
__u32 rsvd;
};
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+ struct i915_user_extension base;
+
+ __u16 engine_index;
+ __u16 num_siblings;
+ __u32 flags; /* all undefined flags must be zero */
+
+ __u64 mbz64; /* reserved for future use; must be zero */
+
+ struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+ struct i915_user_extension base; \
+ __u16 engine_index; \
+ __u16 num_siblings; \
+ __u32 flags; \
+ __u64 mbz64; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+ struct i915_user_extension base;
+
+ struct i915_engine_class_instance master;
+
+ __u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+ __u16 num_bonds;
+
+ __u64 flags; /* all undefined flags must be zero */
+ __u64 mbz64[4]; /* reserved for future use; must be zero */
+
+ struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+ struct i915_user_extension base; \
+ struct i915_engine_class_instance master; \
+ __u16 virtual_index; \
+ __u16 num_bonds; \
+ __u64 flags; \
+ __u64 mbz64[4]; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+struct i915_context_param_engines {
+ __u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+ struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+ __u64 extensions; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
struct drm_i915_gem_context_create_ext_setparam {
#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
struct i915_user_extension base;
struct drm_i915_gem_context_param param;
};
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+ struct i915_user_extension base;
+ __u32 clone_id;
+ __u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES (1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS (1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2)
+#define I915_CONTEXT_CLONE_SSEU (1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE (1u << 4)
+#define I915_CONTEXT_CLONE_VM (1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+ __u64 rsvd;
+};
+
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
__u32 pad;
@@ -1821,6 +1982,7 @@ struct drm_i915_perf_oa_config {
struct drm_i915_query_item {
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+#define DRM_I915_QUERY_ENGINE_INFO 2
/* Must be kept compact -- no holes and well documented */
/*
@@ -1919,6 +2081,47 @@ struct drm_i915_query_topology_info {
__u8 data[];
};
+/**
+ * struct drm_i915_engine_info
+ *
+ * Describes one engine and it's capabilities as known to the driver.
+ */
+struct drm_i915_engine_info {
+ /** Engine class and instance. */
+ struct i915_engine_class_instance engine;
+
+ /** Reserved field. */
+ __u32 rsvd0;
+
+ /** Engine flags. */
+ __u64 flags;
+
+ /** Capabilities of this engine. */
+ __u64 capabilities;
+#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
+#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
+
+ /** Reserved fields. */
+ __u64 rsvd1[4];
+};
+
+/**
+ * struct drm_i915_query_engine_info
+ *
+ * Engine info query enumerates all engines known to the driver by filling in
+ * an array of struct drm_i915_engine_info structures.
+ */
+struct drm_i915_query_engine_info {
+ /** Number of struct drm_i915_engine_info structs following. */
+ __u32 num_engines;
+
+ /** MBZ */
+ __u32 rsvd[3];
+
+ /** Marker for drm_i915_engine_info structures. */
+ struct drm_i915_engine_info engines[];
+};
+
#if defined(__cplusplus)
}
#endif
new file mode 100644
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DRM_H__
+#define __LIMA_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+enum drm_lima_param_gpu_id {
+ DRM_LIMA_PARAM_GPU_ID_UNKNOWN,
+ DRM_LIMA_PARAM_GPU_ID_MALI400,
+ DRM_LIMA_PARAM_GPU_ID_MALI450,
+};
+
+enum drm_lima_param {
+ DRM_LIMA_PARAM_GPU_ID,
+ DRM_LIMA_PARAM_NUM_PP,
+ DRM_LIMA_PARAM_GP_VERSION,
+ DRM_LIMA_PARAM_PP_VERSION,
+};
+
+/**
+ * get various information of the GPU
+ */
+struct drm_lima_get_param {
+ __u32 param; /* in, value in enum drm_lima_param */
+ __u32 pad; /* pad, must be zero */
+ __u64 value; /* out, parameter value */
+};
+
+/**
+ * create a buffer for used by GPU
+ */
+struct drm_lima_gem_create {
+ __u32 size; /* in, buffer size */
+ __u32 flags; /* in, currently no flags, must be zero */
+ __u32 handle; /* out, GEM buffer handle */
+ __u32 pad; /* pad, must be zero */
+};
+
+/**
+ * get information of a buffer
+ */
+struct drm_lima_gem_info {
+ __u32 handle; /* in, GEM buffer handle */
+ __u32 va; /* out, virtual address mapped into GPU MMU */
+ __u64 offset; /* out, used to mmap this buffer to CPU */
+};
+
+#define LIMA_SUBMIT_BO_READ 0x01
+#define LIMA_SUBMIT_BO_WRITE 0x02
+
+/* buffer information used by one task */
+struct drm_lima_gem_submit_bo {
+ __u32 handle; /* in, GEM buffer handle */
+ __u32 flags; /* in, buffer read/write by GPU */
+};
+
+#define LIMA_GP_FRAME_REG_NUM 6
+
+/* frame used to setup GP for each task */
+struct drm_lima_gp_frame {
+ __u32 frame[LIMA_GP_FRAME_REG_NUM];
+};
+
+#define LIMA_PP_FRAME_REG_NUM 23
+#define LIMA_PP_WB_REG_NUM 12
+
+/* frame used to setup mali400 GPU PP for each task */
+struct drm_lima_m400_pp_frame {
+ __u32 frame[LIMA_PP_FRAME_REG_NUM];
+ __u32 num_pp;
+ __u32 wb[3 * LIMA_PP_WB_REG_NUM];
+ __u32 plbu_array_address[4];
+ __u32 fragment_stack_address[4];
+};
+
+/* frame used to setup mali450 GPU PP for each task */
+struct drm_lima_m450_pp_frame {
+ __u32 frame[LIMA_PP_FRAME_REG_NUM];
+ __u32 num_pp;
+ __u32 wb[3 * LIMA_PP_WB_REG_NUM];
+ __u32 use_dlbu;
+ __u32 _pad;
+ union {
+ __u32 plbu_array_address[8];
+ __u32 dlbu_regs[4];
+ };
+ __u32 fragment_stack_address[8];
+};
+
+#define LIMA_PIPE_GP 0x00
+#define LIMA_PIPE_PP 0x01
+
+#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
+
+/**
+ * submit a task to GPU
+ *
+ * User can always merge multi sync_file and drm_syncobj
+ * into one drm_syncobj as in_sync[0], but we reserve
+ * in_sync[1] for another task's out_sync to avoid the
+ * export/import/merge pass when explicit sync.
+ */
+struct drm_lima_gem_submit {
+ __u32 ctx; /* in, context handle task is submitted to */
+ __u32 pipe; /* in, which pipe to use, GP/PP */
+ __u32 nr_bos; /* in, array length of bos field */
+ __u32 frame_size; /* in, size of frame field */
+ __u64 bos; /* in, array of drm_lima_gem_submit_bo */
+ __u64 frame; /* in, GP/PP frame */
+ __u32 flags; /* in, submit flags */
+ __u32 out_sync; /* in, drm_syncobj handle used to wait task finish after submission */
+ __u32 in_sync[2]; /* in, drm_syncobj handle used to wait before start this task */
+};
+
+#define LIMA_GEM_WAIT_READ 0x01
+#define LIMA_GEM_WAIT_WRITE 0x02
+
+/**
+ * wait pending GPU task finish of a buffer
+ */
+struct drm_lima_gem_wait {
+ __u32 handle; /* in, GEM buffer handle */
+ __u32 op; /* in, CPU want to read/write this buffer */
+ __s64 timeout_ns; /* in, wait timeout in absulute time */
+};
+
+/**
+ * create a context
+ */
+struct drm_lima_ctx_create {
+ __u32 id; /* out, context handle */
+ __u32 _pad; /* pad, must be zero */
+};
+
+/**
+ * free a context
+ */
+struct drm_lima_ctx_free {
+ __u32 id; /* in, context handle */
+ __u32 _pad; /* pad, must be zero */
+};
+
+#define DRM_LIMA_GET_PARAM 0x00
+#define DRM_LIMA_GEM_CREATE 0x01
+#define DRM_LIMA_GEM_INFO 0x02
+#define DRM_LIMA_GEM_SUBMIT 0x03
+#define DRM_LIMA_GEM_WAIT 0x04
+#define DRM_LIMA_CTX_CREATE 0x05
+#define DRM_LIMA_CTX_FREE 0x06
+
+#define DRM_IOCTL_LIMA_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GET_PARAM, struct drm_lima_get_param)
+#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
+#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
+#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
+#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
+#define DRM_IOCTL_LIMA_CTX_CREATE DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_CTX_CREATE, struct drm_lima_ctx_create)
+#define DRM_IOCTL_LIMA_CTX_FREE DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_CTX_FREE, struct drm_lima_ctx_free)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __LIMA_DRM_H__ */
@@ -74,6 +74,8 @@ struct drm_msm_timespec {
#define MSM_PARAM_TIMESTAMP 0x05
#define MSM_PARAM_GMEM_BASE 0x06
#define MSM_PARAM_NR_RINGS 0x07
+#define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */
+#define MSM_PARAM_FAULTS 0x09
struct drm_msm_param {
__u32 pipe; /* in, MSM_PIPE_x */
@@ -286,6 +288,16 @@ struct drm_msm_submitqueue {
__u32 id; /* out, identifier */
};
+#define MSM_SUBMITQUEUE_PARAM_FAULTS 0
+
+struct drm_msm_submitqueue_query {
+ __u64 data;
+ __u32 id;
+ __u32 param;
+ __u32 len;
+ __u32 pad;
+};
+
#define DRM_MSM_GET_PARAM 0x00
/* placeholder:
#define DRM_MSM_SET_PARAM 0x01
@@ -302,6 +314,7 @@ struct drm_msm_submitqueue {
*/
#define DRM_MSM_SUBMITQUEUE_NEW 0x0A
#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
+#define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -313,6 +326,7 @@ struct drm_msm_submitqueue {
#define DRM_IOCTL_MSM_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
#if defined(__cplusplus)
}
@@ -133,12 +133,63 @@ struct drm_nouveau_gem_cpu_fini {
#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC 0x05 /* deprecated */
#define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */
#define DRM_NOUVEAU_NVIF 0x07
+#define DRM_NOUVEAU_SVM_INIT 0x08
+#define DRM_NOUVEAU_SVM_BIND 0x09
#define DRM_NOUVEAU_GEM_NEW 0x40
#define DRM_NOUVEAU_GEM_PUSHBUF 0x41
#define DRM_NOUVEAU_GEM_CPU_PREP 0x42
#define DRM_NOUVEAU_GEM_CPU_FINI 0x43
#define DRM_NOUVEAU_GEM_INFO 0x44
+struct drm_nouveau_svm_init {
+ __u64 unmanaged_addr;
+ __u64 unmanaged_size;
+};
+
+struct drm_nouveau_svm_bind {
+ __u64 header;
+ __u64 va_start;
+ __u64 va_end;
+ __u64 npages;
+ __u64 stride;
+ __u64 result;
+ __u64 reserved0;
+ __u64 reserved1;
+};
+
+#define NOUVEAU_SVM_BIND_COMMAND_SHIFT 0
+#define NOUVEAU_SVM_BIND_COMMAND_BITS 8
+#define NOUVEAU_SVM_BIND_COMMAND_MASK ((1 << 8) - 1)
+#define NOUVEAU_SVM_BIND_PRIORITY_SHIFT 8
+#define NOUVEAU_SVM_BIND_PRIORITY_BITS 8
+#define NOUVEAU_SVM_BIND_PRIORITY_MASK ((1 << 8) - 1)
+#define NOUVEAU_SVM_BIND_TARGET_SHIFT 16
+#define NOUVEAU_SVM_BIND_TARGET_BITS 32
+#define NOUVEAU_SVM_BIND_TARGET_MASK 0xffffffff
+
+/*
+ * Below is use to validate ioctl argument, userspace can also use it to make
+ * sure that no bit are set beyond known fields for a given kernel version.
+ */
+#define NOUVEAU_SVM_BIND_VALID_BITS 48
+#define NOUVEAU_SVM_BIND_VALID_MASK ((1ULL << NOUVEAU_SVM_BIND_VALID_BITS) - 1)
+
+
+/*
+ * NOUVEAU_BIND_COMMAND__MIGRATE: synchronous migrate to target memory.
+ * result: number of page successfuly migrate to the target memory.
+ */
+#define NOUVEAU_SVM_BIND_COMMAND__MIGRATE 0
+
+/*
+ * NOUVEAU_SVM_BIND_HEADER_TARGET__GPU_VRAM: target the GPU VRAM memory.
+ */
+#define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM (1UL << 31)
+
+
+#define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init)
+#define DRM_IOCTL_NOUVEAU_SVM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind)
+
#define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
@@ -37,6 +37,7 @@ extern "C" {
#define DRM_V3D_GET_PARAM 0x04
#define DRM_V3D_GET_BO_OFFSET 0x05
#define DRM_V3D_SUBMIT_TFU 0x06
+#define DRM_V3D_SUBMIT_CSD 0x07
#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -45,6 +46,7 @@ extern "C" {
#define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
+#define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
/**
* struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -190,6 +192,7 @@ enum drm_v3d_param {
DRM_V3D_PARAM_V3D_CORE0_IDENT1,
DRM_V3D_PARAM_V3D_CORE0_IDENT2,
DRM_V3D_PARAM_SUPPORTS_TFU,
+ DRM_V3D_PARAM_SUPPORTS_CSD,
};
struct drm_v3d_get_param {
@@ -230,6 +233,31 @@ struct drm_v3d_submit_tfu {
__u32 out_sync;
};
+/* Submits a compute shader for dispatch. This job will block on any
+ * previous compute shaders submitted on this fd, and any other
+ * synchronization must be performed with in_sync/out_sync.
+ */
+struct drm_v3d_submit_csd {
+ __u32 cfg[7];
+ __u32 coef[4];
+
+ /* Pointer to a u32 array of the BOs that are referenced by the job.
+ */
+ __u64 bo_handles;
+
+ /* Number of BO handles passed in (size is that times 4). */
+ __u32 bo_handle_count;
+
+ /* sync object to block on before running the CSD job. Each
+ * CSD job will execute in the order submitted to its FD.
+ * Synchronization against rendering/TFU jobs or CSD from
+ * other fds requires using sync objects.
+ */
+ __u32 in_sync;
+ /* Sync object to signal when the CSD job is done. */
+ __u32 out_sync;
+};
+
#if defined(__cplusplus)
}
#endif