diff mbox series

[16/16] iommupt: Add the Intel VT-D second stage page table format

Message ID 16-v1-01fa10580981+1d-iommu_pt_jgg@nvidia.com (mailing list archive)
State New
Headers show
Series Consolidate iommu page table implementations | expand

Commit Message

Jason Gunthorpe Aug. 15, 2024, 3:11 p.m. UTC
The VT-D second stage format is almost the same as the x86 PAE format,
except the bit encodings in the PTE are different and a few new PTE
features, like force coherency are present.

Among all the formats it is unique in not having a designated present bit.

Cc: Tina Zhang <tina.zhang@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/generic_pt/Kconfig           |   6 +
 drivers/iommu/generic_pt/fmt/Makefile      |   2 +
 drivers/iommu/generic_pt/fmt/defs_vtdss.h  |  21 ++
 drivers/iommu/generic_pt/fmt/iommu_vtdss.c |   8 +
 drivers/iommu/generic_pt/fmt/vtdss.h       | 276 +++++++++++++++++++++
 include/linux/generic_pt/common.h          |   4 +
 include/linux/generic_pt/iommu.h           |  12 +
 7 files changed, 329 insertions(+)
 create mode 100644 drivers/iommu/generic_pt/fmt/defs_vtdss.h
 create mode 100644 drivers/iommu/generic_pt/fmt/iommu_vtdss.c
 create mode 100644 drivers/iommu/generic_pt/fmt/vtdss.h

Comments

Zhang, Tina Aug. 19, 2024, 2:51 a.m. UTC | #1
> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, August 15, 2024 11:12 PM
> Cc: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>; Lu Baolu
> <baolu.lu@linux.intel.com>; David Hildenbrand <david@redhat.com>;
> Christoph Hellwig <hch@lst.de>; iommu@lists.linux.dev; Joao Martins
> <joao.m.martins@oracle.com>; Tian, Kevin <kevin.tian@intel.com>;
> kvm@vger.kernel.org; linux-mm@kvack.org; Pasha Tatashin
> <pasha.tatashin@soleen.com>; Peter Xu <peterx@redhat.com>; Ryan
> Roberts <ryan.roberts@arm.com>; Sean Christopherson
> <seanjc@google.com>; Zhang, Tina <tina.zhang@intel.com>
> Subject: [PATCH 16/16] iommupt: Add the Intel VT-D second stage page table
> format
> 
> The VT-D second stage format is almost the same as the x86 PAE format,
> except the bit encodings in the PTE are different and a few new PTE features,
> like force coherency are present.
> 
> Among all the formats it is unique in not having a designated present bit.
> 
> Cc: Tina Zhang <tina.zhang@intel.com>
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/iommu/generic_pt/Kconfig           |   6 +
>  drivers/iommu/generic_pt/fmt/Makefile      |   2 +
>  drivers/iommu/generic_pt/fmt/defs_vtdss.h  |  21 ++
>  drivers/iommu/generic_pt/fmt/iommu_vtdss.c |   8 +
>  drivers/iommu/generic_pt/fmt/vtdss.h       | 276 +++++++++++++++++++++
>  include/linux/generic_pt/common.h          |   4 +
>  include/linux/generic_pt/iommu.h           |  12 +
>  7 files changed, 329 insertions(+)
>  create mode 100644 drivers/iommu/generic_pt/fmt/defs_vtdss.h
>  create mode 100644 drivers/iommu/generic_pt/fmt/iommu_vtdss.c
>  create mode 100644 drivers/iommu/generic_pt/fmt/vtdss.h
> 
> diff --git a/drivers/iommu/generic_pt/Kconfig
> b/drivers/iommu/generic_pt/Kconfig
> index 2d08b58e953e4d..c17e09e2d03025 100644
> --- a/drivers/iommu/generic_pt/Kconfig
> +++ b/drivers/iommu/generic_pt/Kconfig
> @@ -90,6 +90,11 @@ config IOMMU_PT_DART
> 
>  	  If unsure, say N here.
> 
> +config IOMMU_PT_VTDSS
> +       tristate "IOMMU page table for Intel VT-D IOMMU Second Stage"
> +	depends on !GENERIC_ATOMIC64 # for cmpxchg64
> +	default n
> +
>  config IOMMU_PT_X86PAE
>         tristate "IOMMU page table for x86 PAE"
>  	depends on !GENERIC_ATOMIC64 # for cmpxchg64 @@ -105,6
> +110,7 @@ config IOMMUT_PT_KUNIT_TEST
>  	depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
>  	depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
>  	depends on IOMMU_PT_DART || !IOMMU_PT_DART
> +	depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
>  	depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE
>  	default KUNIT_ALL_TESTS
>  endif
> diff --git a/drivers/iommu/generic_pt/fmt/Makefile
> b/drivers/iommu/generic_pt/fmt/Makefile
> index 1e10be24758fef..5a77c64d432534 100644
> --- a/drivers/iommu/generic_pt/fmt/Makefile
> +++ b/drivers/iommu/generic_pt/fmt/Makefile
> @@ -10,6 +10,8 @@ iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K)
> += armv8_64k
> 
>  iommu_pt_fmt-$(CONFIG_IOMMU_PT_DART) += dart
> 
> +iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
> +
>  iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae
> 
>  IOMMU_PT_KUNIT_TEST :=
> diff --git a/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> new file mode 100644
> index 00000000000000..4a239bcaae2a90
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + *
> + */
> +#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H
> +#define __GENERIC_PT_FMT_DEFS_VTDSS_H
> +
> +#include <linux/generic_pt/common.h>
> +#include <linux/types.h>
> +
> +typedef u64 pt_vaddr_t;
> +typedef u64 pt_oaddr_t;
> +
> +struct vtdss_pt_write_attrs {
> +	u64 descriptor_bits;
> +	gfp_t gfp;
> +};
> +#define pt_write_attrs vtdss_pt_write_attrs
> +
> +#endif
> diff --git a/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> new file mode 100644
> index 00000000000000..12e7829815047b
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> @@ -0,0 +1,8 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES  */ #define
> +PT_FMT vtdss #define PT_SUPPORTED_FEATURES 0
> +
> +#include "iommu_template.h"
> diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h
> b/drivers/iommu/generic_pt/fmt/vtdss.h
> new file mode 100644
> index 00000000000000..233731365ac62d
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/vtdss.h
> @@ -0,0 +1,276 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + *
> + * Intel VT-D Second Stange 5/4 level page table
> + *
> + * This is described in
> + *   Section "3.7 Second-Stage Translation"
> + *   Section "9.8 Second-Stage Paging Entries"
> + *
> + * Of the "Intel Virtualization Technology for Directed I/O
> +Architecture
> + * Specification".
> + *
> + * The named levels in the spec map to the pts->level as:
> + *   Table/SS-PTE - 0
> + *   Directory/SS-PDE - 1
> + *   Directory Ptr/SS-PDPTE - 2
> + *   PML4/SS-PML4E - 3
> + *   PML5/SS-PML5E - 4
> + * FIXME:
> + *  force_snooping
> + *  1g optional
> + *  forbid read-only
> + *  Use of direct clflush instead of DMA API  */ #ifndef
> +__GENERIC_PT_FMT_VTDSS_H #define __GENERIC_PT_FMT_VTDSS_H
> +
> +#include "defs_vtdss.h"
> +#include "../pt_defs.h"
> +
> +#include <linux/bitfield.h>
> +#include <linux/container_of.h>
> +#include <linux/log2.h>
> +
> +enum {
> +	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
> +	PT_MAX_VA_ADDRESS_LG2 = 57,
> +	PT_ENTRY_WORD_SIZE = sizeof(u64),
> +	PT_MAX_TOP_LEVEL = 4,
> +	PT_GRANUAL_LG2SZ = 12,
> +	PT_TABLEMEM_LG2SZ = 12,
> +};
> +
> +/* Shared descriptor bits */
> +enum {
> +	VTDSS_FMT_R = BIT(0),
> +	VTDSS_FMT_W = BIT(1),
> +	VTDSS_FMT_X = BIT(2),

VT-d Spec doesn't have this BIT(2) defined.

> +/*
> + * Requires Tina's series:
> + *
> +https://patch.msgid.link/r/20231106071226.9656-3-tina.zhang@intel.com
> + * See my github for an integrated version  */ #if
> +defined(GENERIC_PT_KUNIT) &&
> +IS_ENABLED(CONFIG_CONFIG_IOMMU_IO_PGTABLE_VTD)
> +#include <linux/io-pgtable.h>
> +
> +static struct io_pgtable_ops *
> +vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg,
> +				struct device *iommu_dev,
> +				struct io_pgtable_cfg **unused_pgtbl_cfg) {
> +	struct io_pgtable_cfg pgtbl_cfg = {};
> +
> +	pgtbl_cfg.ias = 48;
> +	pgtbl_cfg.oas = 52;

Since the alloca_io_pgtable_ops() is used for PT allocation, the pgtbl_cfg.ias and pgtbl_cfg.oas can be provided with the theoretical max address sizes or simply leave them unassigned here. Otherwise, it may seem confusing because the proper values may need to consult on VT-d cap registers.

The VT-d driver will assign valid values to those fields anyway when alloc_io_pgtable_ops() is being invoked.

Regards,
-Tina
Jason Gunthorpe Aug. 19, 2024, 3:53 p.m. UTC | #2
On Mon, Aug 19, 2024 at 02:51:11AM +0000, Zhang, Tina wrote:

> > +/* Shared descriptor bits */
> > +enum {
> > +	VTDSS_FMT_R = BIT(0),
> > +	VTDSS_FMT_W = BIT(1),
> > +	VTDSS_FMT_X = BIT(2),
> 
> VT-d Spec doesn't have this BIT(2) defined.

It does:

 Figure 9-8. Format for Second-Stage Paging Entries

 Bit 2 = X^1

 1. X field is ignored by hardware if Execute Request Support (ERS) is
 reported as Clear in the Extended Capability Register or if SSEE=0 in
 the scalable-mode PASID-table entry referencing the second-stage
 paging entries.

> > +static struct io_pgtable_ops *
> > +vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg,
> > +				struct device *iommu_dev,
> > +				struct io_pgtable_cfg **unused_pgtbl_cfg) {
> > +	struct io_pgtable_cfg pgtbl_cfg = {};
> > +
> > +	pgtbl_cfg.ias = 48;
> > +	pgtbl_cfg.oas = 52;
> 
> Since the alloca_io_pgtable_ops() is used for PT allocation, the
> pgtbl_cfg.ias and pgtbl_cfg.oas can be provided with the theoretical
> max address sizes or simply leave them unassigned here.

It doesn't work if they are unassigned. The map op returns EFAULT.

Thanks,
Jason
Yi Liu Aug. 20, 2024, 8:22 a.m. UTC | #3
On 2024/8/19 23:53, Jason Gunthorpe wrote:
> On Mon, Aug 19, 2024 at 02:51:11AM +0000, Zhang, Tina wrote:
> 
>>> +/* Shared descriptor bits */
>>> +enum {
>>> +	VTDSS_FMT_R = BIT(0),
>>> +	VTDSS_FMT_W = BIT(1),
>>> +	VTDSS_FMT_X = BIT(2),
>>
>> VT-d Spec doesn't have this BIT(2) defined.
> 
> It does:
> 
>   Figure 9-8. Format for Second-Stage Paging Entries
> 
>   Bit 2 = X^1
> 
>   1. X field is ignored by hardware if Execute Request Support (ERS) is
>   reported as Clear in the Extended Capability Register or if SSEE=0 in
>   the scalable-mode PASID-table entry referencing the second-stage
>   paging entries.

it was deprecated. :( Refer to the latest spec (after 4.1). And the ERS
bit is going to be deprecated as well.

11.4.3 Extended Capability Register

"This field is planned for deprecation. Implementations must
report this field as Clear to indicate that the remapping unit does
not support requests-with-PASID that have a value of 1 in the
Execute-Requested (ER) field."
diff mbox series

Patch

diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index 2d08b58e953e4d..c17e09e2d03025 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -90,6 +90,11 @@  config IOMMU_PT_DART
 
 	  If unsure, say N here.
 
+config IOMMU_PT_VTDSS
+       tristate "IOMMU page table for Intel VT-D IOMMU Second Stage"
+	depends on !GENERIC_ATOMIC64 # for cmpxchg64
+	default n
+
 config IOMMU_PT_X86PAE
        tristate "IOMMU page table for x86 PAE"
 	depends on !GENERIC_ATOMIC64 # for cmpxchg64
@@ -105,6 +110,7 @@  config IOMMUT_PT_KUNIT_TEST
 	depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
 	depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
 	depends on IOMMU_PT_DART || !IOMMU_PT_DART
+	depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
 	depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE
 	default KUNIT_ALL_TESTS
 endif
diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile
index 1e10be24758fef..5a77c64d432534 100644
--- a/drivers/iommu/generic_pt/fmt/Makefile
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -10,6 +10,8 @@  iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k
 
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_DART) += dart
 
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
+
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae
 
 IOMMU_PT_KUNIT_TEST :=
diff --git a/drivers/iommu/generic_pt/fmt/defs_vtdss.h b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
new file mode 100644
index 00000000000000..4a239bcaae2a90
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H
+#define __GENERIC_PT_FMT_DEFS_VTDSS_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+typedef u64 pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct vtdss_pt_write_attrs {
+	u64 descriptor_bits;
+	gfp_t gfp;
+};
+#define pt_write_attrs vtdss_pt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_vtdss.c b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
new file mode 100644
index 00000000000000..12e7829815047b
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
@@ -0,0 +1,8 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT vtdss
+#define PT_SUPPORTED_FEATURES 0
+
+#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h b/drivers/iommu/generic_pt/fmt/vtdss.h
new file mode 100644
index 00000000000000..233731365ac62d
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/vtdss.h
@@ -0,0 +1,276 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * Intel VT-D Second Stange 5/4 level page table
+ *
+ * This is described in
+ *   Section "3.7 Second-Stage Translation"
+ *   Section "9.8 Second-Stage Paging Entries"
+ *
+ * Of the "Intel Virtualization Technology for Directed I/O Architecture
+ * Specification".
+ *
+ * The named levels in the spec map to the pts->level as:
+ *   Table/SS-PTE - 0
+ *   Directory/SS-PDE - 1
+ *   Directory Ptr/SS-PDPTE - 2
+ *   PML4/SS-PML4E - 3
+ *   PML5/SS-PML5E - 4
+ * FIXME:
+ *  force_snooping
+ *  1g optional
+ *  forbid read-only
+ *  Use of direct clflush instead of DMA API
+ */
+#ifndef __GENERIC_PT_FMT_VTDSS_H
+#define __GENERIC_PT_FMT_VTDSS_H
+
+#include "defs_vtdss.h"
+#include "../pt_defs.h"
+
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/log2.h>
+
+enum {
+	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
+	PT_MAX_VA_ADDRESS_LG2 = 57,
+	PT_ENTRY_WORD_SIZE = sizeof(u64),
+	PT_MAX_TOP_LEVEL = 4,
+	PT_GRANUAL_LG2SZ = 12,
+	PT_TABLEMEM_LG2SZ = 12,
+};
+
+/* Shared descriptor bits */
+enum {
+	VTDSS_FMT_R = BIT(0),
+	VTDSS_FMT_W = BIT(1),
+	VTDSS_FMT_X = BIT(2),
+	VTDSS_FMT_A = BIT(8),
+	VTDSS_FMT_D = BIT(9),
+	VTDSS_FMT_SNP = BIT(11),
+	VTDSS_FMT_OA = GENMASK_ULL(51, 12),
+};
+
+/* PDPTE/PDE */
+enum {
+	VTDSS_FMT_PS = BIT(7),
+};
+
+#define common_to_vtdss_pt(common_ptr) \
+	container_of_const(common_ptr, struct pt_vtdss, common)
+#define to_vtdss_pt(pts) common_to_vtdss_pt((pts)->range->common)
+
+static inline pt_oaddr_t vtdss_pt_table_pa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_TABLEMEM_LG2SZ);
+}
+#define pt_table_pa vtdss_pt_table_pa
+
+static inline pt_oaddr_t vtdss_pt_entry_oa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_GRANUAL_LG2SZ);
+}
+#define pt_entry_oa vtdss_pt_entry_oa
+
+static inline bool vtdss_pt_can_have_leaf(const struct pt_state *pts)
+{
+	return pts->level <= 2;
+}
+#define pt_can_have_leaf vtdss_pt_can_have_leaf
+
+static inline unsigned int vtdss_pt_table_item_lg2sz(const struct pt_state *pts)
+{
+	return PT_GRANUAL_LG2SZ +
+	       (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level;
+}
+#define pt_table_item_lg2sz vtdss_pt_table_item_lg2sz
+
+static inline unsigned int vtdss_pt_num_items_lg2(const struct pt_state *pts)
+{
+	return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 vtdss_pt_num_items_lg2
+
+static inline enum pt_entry_type vtdss_pt_load_entry_raw(struct pt_state *pts)
+{
+	const u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	pts->entry = entry = READ_ONCE(tablep[pts->index]);
+	if (!entry)
+		return PT_ENTRY_EMPTY;
+	if (pts->level == 0 ||
+	    (vtdss_pt_can_have_leaf(pts) && (pts->entry & VTDSS_FMT_PS)))
+		return PT_ENTRY_OA;
+	return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw vtdss_pt_load_entry_raw
+
+static inline void
+vtdss_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+			    unsigned int oasz_lg2,
+			    const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	entry = FIELD_PREP(VTDSS_FMT_OA, log2_div(oa, PT_GRANUAL_LG2SZ)) |
+		attrs->descriptor_bits;
+	if (pts->level != 0)
+		entry |= VTDSS_FMT_PS;
+
+	WRITE_ONCE(tablep[pts->index], entry);
+	pts->entry = entry;
+}
+#define pt_install_leaf_entry vtdss_pt_install_leaf_entry
+
+static inline bool vtdss_pt_install_table(struct pt_state *pts,
+					  pt_oaddr_t table_pa,
+					  const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	/*
+	 * FIXME according to the SDM D is ignored by HW on table pointers?
+	 * io_pgtable_v2 sets it
+	 */
+	entry = VTDSS_FMT_R | VTDSS_FMT_W |
+		FIELD_PREP(VTDSS_FMT_OA, log2_div(table_pa, PT_GRANUAL_LG2SZ));
+	return pt_table_install64(&tablep[pts->index], entry, pts->entry);
+}
+#define pt_install_table vtdss_pt_install_table
+
+static inline void vtdss_pt_attr_from_entry(const struct pt_state *pts,
+					    struct pt_write_attrs *attrs)
+{
+	attrs->descriptor_bits = pts->entry & (VTDSS_FMT_R | VTDSS_FMT_W |
+					       VTDSS_FMT_X | VTDSS_FMT_SNP);
+}
+#define pt_attr_from_entry vtdss_pt_attr_from_entry
+
+static inline void vtdss_pt_clear_entry(struct pt_state *pts,
+					unsigned int num_contig_lg2)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+
+	WRITE_ONCE(tablep[pts->index], 0);
+}
+#define pt_clear_entry vtdss_pt_clear_entry
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_vtdss
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+	return &container_of(iommu_table, struct pt_iommu_table, iommu)
+			->vtdss_pt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+	return &container_of(common, struct pt_iommu_table, vtdss_pt.common)
+			->iommu;
+}
+
+static inline int vtdss_pt_iommu_set_prot(struct pt_common *common,
+					  struct pt_write_attrs *attrs,
+					  unsigned int iommu_prot)
+{
+	u64 pte = 0;
+
+	/*
+	 * VTDSS does not have a present bit, so we tell if any entry is present
+	 * by checking for R or W.
+	 */
+	if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+		return -EINVAL;
+
+	/*
+	 * FIXME: The VTD driver has a bug setting DMA_FL_PTE_PRESENT on the SS
+	 * table, which forces R on always.
+	 */
+	pte |= VTDSS_FMT_R;
+
+	if (iommu_prot & IOMMU_READ)
+		pte |= VTDSS_FMT_R;
+	if (iommu_prot & IOMMU_WRITE)
+		pte |= VTDSS_FMT_W;
+/* FIXME	if (dmar_domain->set_pte_snp)
+		pte |= VTDSS_FMT_SNP; */
+
+	attrs->descriptor_bits = pte;
+	return 0;
+}
+#define pt_iommu_set_prot vtdss_pt_iommu_set_prot
+
+static inline int vtdss_pt_iommu_fmt_init(struct pt_iommu_vtdss *iommu_table,
+					  struct pt_iommu_vtdss_cfg *cfg)
+{
+	struct pt_vtdss *table = &iommu_table->vtdss_pt;
+
+	/* FIXME configurable */
+	pt_top_set_level(&table->common, 3);
+	return 0;
+}
+#define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init
+
+#if defined(GENERIC_PT_KUNIT)
+static void vtdss_pt_kunit_setup_cfg(struct pt_iommu_vtdss_cfg *cfg)
+{
+}
+#define pt_kunit_setup_cfg vtdss_pt_kunit_setup_cfg
+#endif
+
+/*
+ * Requires Tina's series:
+ *  https://patch.msgid.link/r/20231106071226.9656-3-tina.zhang@intel.com
+ * See my github for an integrated version
+ */
+#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_CONFIG_IOMMU_IO_PGTABLE_VTD)
+#include <linux/io-pgtable.h>
+
+static struct io_pgtable_ops *
+vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg,
+				struct device *iommu_dev,
+				struct io_pgtable_cfg **unused_pgtbl_cfg)
+{
+	struct io_pgtable_cfg pgtbl_cfg = {};
+
+	pgtbl_cfg.ias = 48;
+	pgtbl_cfg.oas = 52;
+	pgtbl_cfg.vtd_cfg.cap_reg = 4 << 8;
+	pgtbl_cfg.vtd_cfg.ecap_reg = BIT(26) | BIT(60) | BIT_ULL(48) | BIT_ULL(56);
+	pgtbl_cfg.pgsize_bitmap = SZ_4K;
+	pgtbl_cfg.coherent_walk = true;
+	return alloc_io_pgtable_ops(INTEL_IOMMU, &pgtbl_cfg, NULL);
+}
+#define pt_iommu_alloc_io_pgtable vtdss_pt_iommu_alloc_io_pgtable
+
+static void vtdss_pt_iommu_setup_ref_table(struct pt_iommu_vtdss *iommu_table,
+					   struct io_pgtable_ops *pgtbl_ops)
+{
+	struct io_pgtable_cfg *pgtbl_cfg =
+		&io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg;
+	struct pt_common *common = &iommu_table->vtdss_pt.common;
+
+	pt_top_set(common, __va(pgtbl_cfg->vtd_cfg.pgd), 3);
+}
+#define pt_iommu_setup_ref_table vtdss_pt_iommu_setup_ref_table
+
+static u64 vtdss_pt_kunit_cmp_mask_entry(struct pt_state *pts)
+{
+	if (pts->type == PT_ENTRY_TABLE)
+		return pts->entry & (~(u64)(VTDSS_FMT_OA));
+	return pts->entry;
+}
+#define pt_kunit_cmp_mask_entry vtdss_pt_kunit_cmp_mask_entry
+#endif
+
+#endif
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index 558302fe1e0324..a3469132db7dda 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -145,6 +145,10 @@  enum {
 	PT_FEAT_DART_V2 = PT_FEAT_FMT_START,
 };
 
+struct pt_vtdss {
+	struct pt_common common;
+};
+
 struct pt_x86pae {
 	struct pt_common common;
 };
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 351a69fe62dd1d..b9ecab07b0223d 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -317,6 +317,18 @@  struct pt_iommu_dart_cfg {
 int pt_iommu_dart_init(struct pt_iommu_dart *table,
 		       struct pt_iommu_dart_cfg *cfg, gfp_t gfp);
 
+struct pt_iommu_vtdss {
+	struct pt_iommu iommu;
+	struct pt_vtdss vtdss_pt;
+};
+
+struct pt_iommu_vtdss_cfg {
+	struct device *iommu_device;
+	unsigned int features;
+};
+int pt_iommu_vtdss_init(struct pt_iommu_vtdss *table,
+			struct pt_iommu_vtdss_cfg *cfg, gfp_t gfp);
+
 struct pt_iommu_x86pae {
 	struct pt_iommu iommu;
 	struct pt_x86pae x86pae_pt;