@@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see:
Documentation/process/license-rules.rst
for more details.
+
+All contributions to the Linux Kernel are subject to this COPYING file.
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
new file mode 100644
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ */
+
+#ifndef _IOMMU_H
+#define _IOMMU_H
+
+#include <linux/types.h>
+
+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+ IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
+ IOMMU_FAULT_PAGE_REQ, /* page request fault */
+};
+
+enum iommu_fault_reason {
+ IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+ /* Could not access the PASID table (fetch caused external abort) */
+ IOMMU_FAULT_REASON_PASID_FETCH,
+
+ /* PASID entry is invalid or has configuration errors */
+ IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+ /*
+ * PASID is out of range (e.g. exceeds the maximum PASID
+ * supported by the IOMMU) or disabled.
+ */
+ IOMMU_FAULT_REASON_PASID_INVALID,
+
+ /*
+ * An external abort occurred fetching (or updating) a translation
+ * table descriptor
+ */
+ IOMMU_FAULT_REASON_WALK_EABT,
+
+ /*
+ * Could not access the page table entry (Bad address),
+ * actual translation fault
+ */
+ IOMMU_FAULT_REASON_PTE_FETCH,
+
+ /* Protection flag check failed */
+ IOMMU_FAULT_REASON_PERMISSION,
+
+ /* access flag check failed */
+ IOMMU_FAULT_REASON_ACCESS,
+
+ /* Output address of a translation stage caused Address Size fault */
+ IOMMU_FAULT_REASON_OOR_ADDRESS,
+};
+
+/**
+ * struct iommu_fault_unrecoverable - Unrecoverable fault data
+ * @reason: reason of the fault, from &enum iommu_fault_reason
+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
+ * @pasid: Process Address Space ID
+ * @perm: requested permission access using by the incoming transaction
+ * (IOMMU_FAULT_PERM_* values)
+ * @addr: offending page address
+ * @fetch_addr: address that caused a fetch abort, if any
+ */
+struct iommu_fault_unrecoverable {
+ __u32 reason;
+#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
+#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
+ __u32 flags;
+ __u32 pasid;
+ __u32 perm;
+ __u64 addr;
+ __u64 fetch_addr;
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values)
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
+ __u32 flags;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 perm;
+ __u64 addr;
+ __u64 private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @padding: reserved for future use (should be zero)
+ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ * @padding2: sets the fault size to allow for future extensions
+ */
+struct iommu_fault {
+ __u32 type;
+ __u32 padding;
+ union {
+ struct iommu_fault_unrecoverable event;
+ struct iommu_fault_page_request prm;
+ __u8 padding2[56];
+ };
+};
+
+/**
+ * enum iommu_page_response_code - Return status of fault handlers
+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is "Success" in PCI PRI.
+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ * this device if possible. This is "Response Failure" in PCI PRI.
+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is "Invalid Request" in PCI PRI.
+ */
+enum iommu_page_response_code {
+ IOMMU_PAGE_RESP_SUCCESS = 0,
+ IOMMU_PAGE_RESP_INVALID,
+ IOMMU_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_page_response - Generic page response information
+ * @version: API version of this structure
+ * @flags: encodes whether the corresponding fields are valid
+ * (IOMMU_FAULT_PAGE_RESPONSE_* values)
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code from &enum iommu_page_response_code
+ */
+struct iommu_page_response {
+#define IOMMU_PAGE_RESP_VERSION_1 1
+ __u32 version;
+#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
+ __u32 flags;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 code;
+};
+
+/* defines the granularity of the invalidation */
+enum iommu_inv_granularity {
+ IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */
+ IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */
+ IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */
+ IOMMU_INV_GRANU_NR, /* number of invalidation granularities */
+};
+
+/**
+ * struct iommu_inv_addr_info - Address Selective Invalidation Structure
+ *
+ * @flags: indicates the granularity of the address-selective invalidation
+ * - If the PASID bit is set, the @pasid field is populated and the invalidation
+ * relates to cache entries tagged with this PASID and matching the address
+ * range.
+ * - If ARCHID bit is set, @archid is populated and the invalidation relates
+ * to cache entries tagged with this architecture specific ID and matching
+ * the address range.
+ * - Both PASID and ARCHID can be set as they may tag different caches.
+ * - If neither PASID or ARCHID is set, global addr invalidation applies.
+ * - The LEAF flag indicates whether only the leaf PTE caching needs to be
+ * invalidated and other paging structure caches can be preserved.
+ * @pasid: process address space ID
+ * @archid: architecture-specific ID
+ * @addr: first stage/level input address
+ * @granule_size: page/block size of the mapping in bytes
+ * @nb_granules: number of contiguous granules to be invalidated
+ */
+struct iommu_inv_addr_info {
+#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0)
+#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1)
+#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2)
+ __u32 flags;
+ __u32 archid;
+ __u64 pasid;
+ __u64 addr;
+ __u64 granule_size;
+ __u64 nb_granules;
+};
+
+/**
+ * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure
+ *
+ * @flags: indicates the granularity of the PASID-selective invalidation
+ * - If the PASID bit is set, the @pasid field is populated and the invalidation
+ * relates to cache entries tagged with this PASID and matching the address
+ * range.
+ * - If the ARCHID bit is set, the @archid is populated and the invalidation
+ * relates to cache entries tagged with this architecture specific ID and
+ * matching the address range.
+ * - Both PASID and ARCHID can be set as they may tag different caches.
+ * - At least one of PASID or ARCHID must be set.
+ * @pasid: process address space ID
+ * @archid: architecture-specific ID
+ */
+struct iommu_inv_pasid_info {
+#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0)
+#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1)
+ __u32 flags;
+ __u32 archid;
+ __u64 pasid;
+};
+
+/**
+ * struct iommu_cache_invalidate_info - First level/stage invalidation
+ * information
+ * @version: API version of this structure
+ * @cache: bitfield that allows to select which caches to invalidate
+ * @granularity: defines the lowest granularity used for the invalidation:
+ * domain > PASID > addr
+ * @padding: reserved for future use (should be zero)
+ * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID
+ * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR
+ *
+ * Not all the combinations of cache/granularity are valid:
+ *
+ * +--------------+---------------+---------------+---------------+
+ * | type / | DEV_IOTLB | IOTLB | PASID |
+ * | granularity | | | cache |
+ * +==============+===============+===============+===============+
+ * | DOMAIN | N/A | Y | Y |
+ * +--------------+---------------+---------------+---------------+
+ * | PASID | Y | Y | Y |
+ * +--------------+---------------+---------------+---------------+
+ * | ADDR | Y | Y | N/A |
+ * +--------------+---------------+---------------+---------------+
+ *
+ * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than
+ * @version and @cache.
+ *
+ * If multiple cache types are invalidated simultaneously, they all
+ * must support the used granularity.
+ */
+struct iommu_cache_invalidate_info {
+#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1
+ __u32 version;
+/* IOMMU paging structure cache */
+#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */
+#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */
+#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */
+#define IOMMU_CACHE_INV_TYPE_NR (3)
+ __u8 cache;
+ __u8 granularity;
+ __u8 padding[2];
+ union {
+ struct iommu_inv_pasid_info pasid_info;
+ struct iommu_inv_addr_info addr_info;
+ };
+};
+
+/**
+ * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
+ * SVA binding.
+ *
+ * @flags: VT-d PASID table entry attributes
+ * @pat: Page attribute table data to compute effective memory type
+ * @emt: Extended memory type
+ *
+ * Only guest vIOMMU selectable and effective options are passed down to
+ * the host IOMMU.
+ */
+struct iommu_gpasid_bind_data_vtd {
+#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */
+#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */
+#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */
+#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */
+#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */
+#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */
+ __u64 flags;
+ __u32 pat;
+ __u32 emt;
+};
+
+/**
+ * struct iommu_gpasid_bind_data - Information about device and guest PASID binding
+ * @version: Version of this data structure
+ * @format: PASID table entry format
+ * @flags: Additional information on guest bind request
+ * @gpgd: Guest page directory base of the guest mm to bind
+ * @hpasid: Process address space ID used for the guest mm in host IOMMU
+ * @gpasid: Process address space ID used for the guest mm in guest IOMMU
+ * @addr_width: Guest virtual address width
+ * @padding: Reserved for future use (should be zero)
+ * @vtd: Intel VT-d specific data
+ *
+ * Guest to host PASID mapping can be an identity or non-identity, where guest
+ * has its own PASID space. For non-identify mapping, guest to host PASID lookup
+ * is needed when VM programs guest PASID into an assigned device. VMM may
+ * trap such PASID programming then request host IOMMU driver to convert guest
+ * PASID to host PASID based on this bind data.
+ */
+struct iommu_gpasid_bind_data {
+#define IOMMU_GPASID_BIND_VERSION_1 1
+ __u32 version;
+#define IOMMU_PASID_FORMAT_INTEL_VTD 1
+ __u32 format;
+#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */
+ __u64 flags;
+ __u64 gpgd;
+ __u64 hpasid;
+ __u64 gpasid;
+ __u32 addr_width;
+ __u8 padding[12];
+ /* Vendor specific data */
+ union {
+ struct iommu_gpasid_bind_data_vtd vtd;
+ };
+};
+
+/**
+ * struct iommu_pasid_smmuv3 - ARM SMMUv3 Stream Table Entry stage 1 related
+ * information
+ * @version: API version of this structure
+ * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table
+ * or 2-level table)
+ * @s1dss: STE s1dss (specifies the behavior when @pasid_bits != 0
+ * and no PASID is passed along with the incoming transaction)
+ * @padding: reserved for future use (should be zero)
+ *
+ * The PASID table is referred to as the Context Descriptor (CD) table on ARM
+ * SMMUv3. Please refer to the ARM SMMU 3.x spec (ARM IHI 0070A) for full
+ * details.
+ */
+struct iommu_pasid_smmuv3 {
+#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1
+ __u32 version;
+ __u8 s1fmt;
+ __u8 s1dss;
+ __u8 padding[2];
+};
+
+/**
+ * struct iommu_pasid_table_config - PASID table data used to bind guest PASID
+ * table to the host IOMMU
+ * @version: API version to prepare for future extensions
+ * @format: format of the PASID table
+ * @base_ptr: guest physical address of the PASID table
+ * @pasid_bits: number of PASID bits used in the PASID table
+ * @config: indicates whether the guest translation stage must
+ * be translated, bypassed or aborted.
+ * @padding: reserved for future use (should be zero)
+ * @smmuv3: table information when @format is %IOMMU_PASID_FORMAT_SMMUV3
+ */
+struct iommu_pasid_table_config {
+#define PASID_TABLE_CFG_VERSION_1 1
+ __u32 version;
+#define IOMMU_PASID_FORMAT_SMMUV3 1
+ __u32 format;
+ __u64 base_ptr;
+ __u8 pasid_bits;
+#define IOMMU_PASID_CONFIG_TRANSLATE 1
+#define IOMMU_PASID_CONFIG_BYPASS 2
+#define IOMMU_PASID_CONFIG_ABORT 3
+ __u8 config;
+ __u8 padding[6];
+ union {
+ struct iommu_pasid_smmuv3 smmuv3;
+ };
+};
+
+#endif /* _IOMMU_H */
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#include <linux/iommu.h>
#define VFIO_API_VERSION 0
@@ -329,6 +330,9 @@ struct vfio_region_info_cap_type {
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
+#define VFIO_REGION_TYPE_NESTED (2)
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT (1)
+
/**
* struct vfio_region_gfx_edid - EDID region layout.
*
@@ -455,11 +459,30 @@ struct vfio_irq_info {
#define VFIO_IRQ_INFO_MASKABLE (1 << 1)
#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
#define VFIO_IRQ_INFO_NORESIZE (1 << 3)
+#define VFIO_IRQ_INFO_FLAG_CAPS (1 << 4) /* Info supports caps */
__u32 index; /* IRQ index */
__u32 count; /* Number of IRQs within this index */
+ __u32 cap_offset; /* Offset within info struct of first cap */
};
#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
+/*
+ * The irq type capability allows IRQs unique to a specific device or
+ * class of devices to be exposed.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IRQ_INFO_CAP_TYPE 3
+
+struct vfio_irq_info_cap_type {
+ struct vfio_info_cap_header header;
+ __u32 type; /* global per bus driver */
+ __u32 subtype; /* type specific */
+};
+
+#define VFIO_IRQ_TYPE_NESTED (1)
+#define VFIO_IRQ_SUBTYPE_DMA_FAULT (1)
+
/**
* VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
*
@@ -561,7 +584,8 @@ enum {
VFIO_PCI_MSIX_IRQ_INDEX,
VFIO_PCI_ERR_IRQ_INDEX,
VFIO_PCI_REQ_IRQ_INDEX,
- VFIO_PCI_NUM_IRQS
+ VFIO_PCI_NUM_IRQS = 5 /* Fixed user ABI, IRQ indexes >=5 use */
+ /* device specific cap to define content */
};
/*
@@ -707,6 +731,38 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
+
+/*
+ * Capability exposed by the DMA fault region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT 6
+
+struct vfio_region_info_cap_fault {
+ struct vfio_info_cap_header header;
+ __u32 version;
+};
+
+/*
+ * DMA Fault Region Layout
+ * @tail: index relative to the start of the ring buffer at which the
+ * consumer finds the next item in the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @head: index relative to the start of the ring buffer at which the
+ * producer (kernel) inserts items into the buffers
+ */
+struct vfio_region_dma_fault {
+ /* Write-Only */
+ __u32 tail;
+ /* Read-Only */
+ __u32 entry_size;
+ __u32 nb_entries;
+ __u32 offset;
+ __u32 head;
+};
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
@@ -794,6 +850,57 @@ struct vfio_iommu_type1_dma_unmap {
#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
+/**
+ * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
+ * struct vfio_iommu_type1_set_pasid_table)
+ *
+ * The SET operation passes a PASID table to the host while the
+ * UNSET operation detaches the one currently programmed. Setting
+ * a table while another is already programmed replaces the old table.
+ */
+struct vfio_iommu_type1_set_pasid_table {
+ __u32 argsz;
+ __u32 flags;
+#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
+#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
+ struct iommu_pasid_table_config config; /* used on SET */
+};
+
+#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
+
+/**
+ * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 23,
+ * struct vfio_iommu_type1_cache_invalidate)
+ *
+ * Propagate guest IOMMU cache invalidation to the host.
+ */
+struct vfio_iommu_type1_cache_invalidate {
+ __u32 argsz;
+ __u32 flags;
+ struct iommu_cache_invalidate_info info;
+};
+#define VFIO_IOMMU_CACHE_INVALIDATE _IO(VFIO_TYPE, VFIO_BASE + 23)
+
+/**
+ * VFIO_IOMMU_SET_MSI_BINDING - _IOWR(VFIO_TYPE, VFIO_BASE + 24,
+ * struct vfio_iommu_type1_set_msi_binding)
+ *
+ * Pass a stage 1 MSI doorbell mapping to the host so that this
+ * latter can build a nested stage2 mapping. Or conversely tear
+ * down a previously bound stage 1 MSI binding.
+ */
+struct vfio_iommu_type1_set_msi_binding {
+ __u32 argsz;
+ __u32 flags;
+#define VFIO_IOMMU_BIND_MSI (1 << 0)
+#define VFIO_IOMMU_UNBIND_MSI (1 << 1)
+ __u64 iova; /* MSI guest IOVA */
+ /* Fields below are used on BIND */
+ __u64 gpa; /* MSI guest physical address */
+ __u64 size; /* size of stage1 mapping (bytes) */
+};
+#define VFIO_IOMMU_SET_MSI_BINDING _IO(VFIO_TYPE, VFIO_BASE + 24)
+
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
/*
This is an update against https://github.com/eauger/linux/tree/will-arm-smmu-updates-2stage-v10 Signed-off-by: Eric Auger <eric.auger@redhat.com> --- linux-headers/COPYING | 2 + linux-headers/asm-x86/kvm.h | 1 + linux-headers/linux/iommu.h | 375 ++++++++++++++++++++++++++++++++++++ linux-headers/linux/vfio.h | 109 ++++++++++- 4 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 linux-headers/linux/iommu.h