@@ -29,10 +29,49 @@ config IOMMU_PT
Generic library for building IOMMU page tables
if IOMMU_PT
+config IOMMU_PT_ARMV8_4K
+ tristate "IOMMU page table for 64 bit ARMv8 4k page size"
+ depends on !GENERIC_ATOMIC64 # for cmpxchg64
+ default n
+ help
+ Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long
+ descriptor pagetable format. This format supports both stage-1 and
+ stage-2, as well as address spaces up to 48-bits in size. 4K
+ granule size version.
+
+ If unsure, say N here.
+
+config IOMMU_PT_ARMV8_16K
+ tristate "IOMMU page table for 64 bit ARMv8 16k page size"
+ depends on !GENERIC_ATOMIC64 # for cmpxchg64
+ default n
+ help
+ Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long
+ descriptor pagetable format. This format supports both stage-1 and
+ stage-2, as well as address spaces up to 48-bits in size. 4K
+ granule size version.
+
+ If unsure, say N here.
+
+config IOMMU_PT_ARMV8_64K
+ tristate "IOMMU page table for 64 bit ARMv8 64k page size"
+ depends on !GENERIC_ATOMIC64 # for cmpxchg64
+ default n
+ help
+ Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long
+ descriptor pagetable format. This format supports both stage-1 and
+ stage-2, as well as address spaces up to 48-bits in size. 4K
+ granule size version.
+
+ If unsure, say N here.
+
config IOMMUT_PT_KUNIT_TEST
tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
select IOMMU_IO_PGTABLE
depends on KUNIT
+ depends on IOMMU_PT_ARMV8_4K || !IOMMU_PT_ARMV8_4K
+ depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
+ depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
default KUNIT_ALL_TESTS
endif
endif
@@ -1,5 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_4K) += armv8_4k
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_16K) += armv8_16k
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k
+
IOMMU_PT_KUNIT_TEST :=
define create_format
obj-$(2) += iommu_$(1).o
new file mode 100644
@@ -0,0 +1,621 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * The page table format described by the ARMv8 VMSAv8-64 chapter in the
+ * Architecture Reference Manual. With the right cfg this will also implement
+ * the VMSAv8-32 Long Descriptor format.
+ *
+ * This was called io-pgtable-arm.c and ARM_xx_LPAE_Sx.
+ *
+ * NOTE! The level numbering is consistent with the Generic Page Table API, but
+ * is backwards from what the ARM documents use. What ARM calls level 3 this
+ * calls level 0.
+ *
+ * Present in io-pgtable-arm.c but not here:
+ * ARM_MALI_LPAE
+ * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA
+ */
+#ifndef __GENERIC_PT_FMT_ARMV8_H
+#define __GENERIC_PT_FMT_ARMV8_H
+
+#include "defs_armv8.h"
+#include "../pt_defs.h"
+
+#include <asm/page.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/container_of.h>
+#include <linux/errno.h>
+#include <linux/limits.h>
+#include <linux/sizes.h>
+
+#if ARMV8_GRANUAL_SIZE == 4096
+enum {
+ PT_MAX_TOP_LEVEL = 3,
+ PT_GRANUAL_LG2SZ = 12,
+};
+#elif ARMV8_GRANUAL_SIZE == 16384
+enum {
+ PT_MAX_TOP_LEVEL = 3,
+ PT_GRANUAL_LG2SZ = 14,
+};
+#elif ARMV8_GRANUAL_SIZE == 65536
+enum {
+ PT_MAX_TOP_LEVEL = 2,
+ PT_GRANUAL_LG2SZ = 16,
+};
+#else
+#error "Invalid ARMV8_GRANUAL_SIZE"
+#endif
+
+enum {
+ PT_MAX_OUTPUT_ADDRESS_LG2 = 48,
+ /*
+ * Currently only support up to 48 bits of usable address, the 64k 52
+ * bit mode is not supported.
+ */
+ PT_MAX_VA_ADDRESS_LG2 = 48,
+ PT_TABLEMEM_LG2SZ = PT_GRANUAL_LG2SZ,
+ PT_ENTRY_WORD_SIZE = sizeof(u64),
+};
+
+/* Common PTE bits */
+enum {
+ ARMV8PT_FMT_VALID = BIT(0),
+ ARMV8PT_FMT_PAGE = BIT(1),
+ ARMV8PT_FMT_TABLE = BIT(1),
+ ARMV8PT_FMT_NS = BIT(5),
+ ARMV8PT_FMT_SH = GENMASK(9, 8),
+ ARMV8PT_FMT_AF = BIT(10),
+
+ ARMV8PT_FMT_OA52 = GENMASK_ULL(15, 12),
+ ARMV8PT_FMT_OA48 = GENMASK_ULL(47, PT_GRANUAL_LG2SZ),
+
+ ARMV8PT_FMT_DBM = BIT_ULL(51),
+ ARMV8PT_FMT_CONTIG = BIT_ULL(52),
+ ARMV8PT_FMT_UXN = BIT_ULL(53),
+ ARMV8PT_FMT_PXN = BIT_ULL(54),
+ ARMV8PT_FMT_NSTABLE = BIT_ULL(63),
+};
+
+/* S1 PTE bits */
+enum {
+ ARMV8PT_FMT_ATTRINDX = GENMASK(4, 2),
+ ARMV8PT_FMT_AP = GENMASK(7, 6),
+ ARMV8PT_FMT_nG = BIT(11),
+};
+
+enum {
+ ARMV8PT_MAIR_ATTR_IDX_CACHE = 1,
+ ARMV8PT_MAIR_ATTR_IDX_DEV = 2,
+
+ ARMV8PT_SH_IS = 3,
+ ARMV8PT_SH_OS = 2,
+
+ ARMV8PT_AP_UNPRIV = 1,
+ ARMV8PT_AP_RDONLY = 2,
+};
+
+/* S2 PTE bits */
+enum {
+ ARMV8PT_FMT_S2MEMATTR = GENMASK(5, 2),
+ ARMV8PT_FMT_S2AP = GENMASK(7, 6),
+};
+
+enum {
+ /*
+ * For !S2FWB these code to:
+ * 1111 = Normal outer write back cachable / Inner Write Back Cachable
+ * Permit S1 to override
+ * 0101 = Normal Non-cachable / Inner Non-cachable
+ * 0001 = Device / Device-nGnRE
+ * For S2FWB these code to:
+ * 0110 Force Normal Write Back
+ * 0101 Normal* is forced Normal-NC, Device unchanged
+ * 0001 Force Device-nGnRE
+ */
+ ARMV8PT_MEMATTR_FWB_WB = 6,
+ ARMV8PT_MEMATTR_OIWB = 0xf,
+ ARMV8PT_MEMATTR_NC = 5,
+ ARMV8PT_MEMATTR_DEV = 1,
+
+ ARMV8PT_S2AP_READ = 1,
+ ARMV8PT_S2AP_WRITE = 2,
+};
+
+#define common_to_armv8pt(common_ptr) \
+ container_of_const(common_ptr, struct pt_armv8, common)
+#define to_armv8pt(pts) common_to_armv8pt((pts)->range->common)
+
+static inline pt_oaddr_t armv8pt_oa(const struct pt_state *pts)
+{
+ u64 entry = pts->entry;
+ pt_oaddr_t oa;
+
+ oa = log2_mul(FIELD_GET(ARMV8PT_FMT_OA48, entry), PT_GRANUAL_LG2SZ);
+
+ /* LPA support on 64K page size */
+ if (PT_GRANUAL_SIZE == SZ_64K)
+ oa |= ((pt_oaddr_t)FIELD_GET(ARMV8PT_FMT_OA52, entry)) << 52;
+ return oa;
+}
+
+static inline pt_oaddr_t armv8pt_table_pa(const struct pt_state *pts)
+{
+ return armv8pt_oa(pts);
+}
+#define pt_table_pa armv8pt_table_pa
+
+/*
+ * Return a block or page entry pointing at a physical address Returns the
+ * address adjusted for the item in a contiguous case.
+ */
+static inline pt_oaddr_t armv8pt_item_oa(const struct pt_state *pts)
+{
+ return armv8pt_oa(pts);
+}
+#define pt_item_oa armv8pt_item_oa
+
+static inline bool armv8pt_can_have_leaf(const struct pt_state *pts)
+{
+ /*
+ * See D5-18 Translation granule sizes, with block and page sizes, and
+ * output address ranges
+ */
+ if ((PT_GRANUAL_SIZE == SZ_4K && pts->level > 2) ||
+ (PT_GRANUAL_SIZE == SZ_16K && pts->level > 1) ||
+ (PT_GRANUAL_SIZE == SZ_64K && pts_feature(pts, PT_FEAT_ARMV8_LPA) && pts->level > 2) ||
+ (PT_GRANUAL_SIZE == SZ_64K && !pts_feature(pts, PT_FEAT_ARMV8_LPA) && pts->level > 1))
+ return false;
+ return true;
+}
+#define pt_can_have_leaf armv8pt_can_have_leaf
+
+static inline unsigned int armv8pt_table_item_lg2sz(const struct pt_state *pts)
+{
+ return PT_GRANUAL_LG2SZ +
+ (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level;
+}
+#define pt_table_item_lg2sz armv8pt_table_item_lg2sz
+
+/* Number contigous entries that ARMV8PT_FMT_CONTIG will join at this level */
+static inline unsigned short
+armv8pt_contig_count_lg2(const struct pt_state *pts)
+{
+ if (PT_GRANUAL_SIZE == SZ_4K)
+ return ilog2(16); /* 64KB, 2MB */
+ else if (PT_GRANUAL_SIZE == SZ_16K && pts->level == 1)
+ return ilog2(32); /* 1GB */
+ else if (PT_GRANUAL_SIZE == SZ_16K && pts->level == 0)
+ return ilog2(128); /* 2M */
+ else if (PT_GRANUAL_SIZE == SZ_64K)
+ return ilog2(32); /* 2M, 16G */
+ return ilog2(1);
+}
+#define pt_contig_count_lg2 armv8pt_contig_count_lg2
+
+static inline unsigned int
+armv8pt_entry_num_contig_lg2(const struct pt_state *pts)
+{
+ if (pts->entry & ARMV8PT_FMT_CONTIG)
+ return armv8pt_contig_count_lg2(pts);
+ return ilog2(1);
+}
+#define pt_entry_num_contig_lg2 armv8pt_entry_num_contig_lg2
+
+static inline pt_vaddr_t armv8pt_full_va_prefix(const struct pt_common *common)
+{
+ if (pt_feature(common, PT_FEAT_ARMV8_TTBR1))
+ return PT_VADDR_MAX;
+ return 0;
+}
+#define pt_full_va_prefix armv8pt_full_va_prefix
+
+static inline unsigned int armv8pt_num_items_lg2(const struct pt_state *pts)
+{
+ /* FIXME S2 concatenated tables */
+ return PT_GRANUAL_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 armv8pt_num_items_lg2
+
+static inline enum pt_entry_type armv8pt_load_entry_raw(struct pt_state *pts)
+{
+ const u64 *tablep = pt_cur_table(pts, u64);
+ u64 entry;
+
+ pts->entry = entry = READ_ONCE(tablep[pts->index]);
+ if (!(entry & ARMV8PT_FMT_VALID))
+ return PT_ENTRY_EMPTY;
+ if (pts->level != 0 && (entry & ARMV8PT_FMT_TABLE))
+ return PT_ENTRY_TABLE;
+
+ /*
+ * Suppress returning VALID for levels that cannot have a page to remove
+ * code.
+ */
+ if (!armv8pt_can_have_leaf(pts))
+ return PT_ENTRY_EMPTY;
+
+ /* Must be a block or page, don't check the page bit on level 0 */
+ return PT_ENTRY_OA;
+}
+#define pt_load_entry_raw armv8pt_load_entry_raw
+
+static inline void
+armv8pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+ unsigned int oasz_lg2,
+ const struct pt_write_attrs *attrs)
+{
+ unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
+ u64 *tablep = pt_cur_table(pts, u64);
+ u64 entry;
+
+ PT_WARN_ON(log2_mod(oa, oasz_lg2));
+
+ entry = ARMV8PT_FMT_VALID |
+ FIELD_PREP(ARMV8PT_FMT_OA48, log2_div(oa, PT_GRANUAL_LG2SZ)) |
+ FIELD_PREP(ARMV8PT_FMT_OA52, oa >> 48) | attrs->descriptor_bits;
+
+ /*
+ * On the last level the leaf is called a page and has the page/table bit set,
+ * on other levels it is called a block and has it clear.
+ */
+ if (pts->level == 0)
+ entry |= ARMV8PT_FMT_PAGE;
+
+ if (oasz_lg2 != isz_lg2) {
+ u64 *end;
+
+ PT_WARN_ON(oasz_lg2 != isz_lg2 + armv8pt_contig_count_lg2(pts));
+ PT_WARN_ON(log2_mod(pts->index, armv8pt_contig_count_lg2(pts)));
+
+ entry |= ARMV8PT_FMT_CONTIG;
+ tablep += pts->index;
+ end = tablep + log2_to_int(armv8pt_contig_count_lg2(pts));
+ for (; tablep != end; tablep++) {
+ WRITE_ONCE(*tablep, entry);
+ entry += FIELD_PREP(
+ ARMV8PT_FMT_OA48,
+ log2_to_int(isz_lg2 - PT_GRANUAL_LG2SZ));
+ }
+ } else {
+ WRITE_ONCE(tablep[pts->index], entry);
+ }
+ pts->entry = entry;
+}
+#define pt_install_leaf_entry armv8pt_install_leaf_entry
+
+static inline bool armv8pt_install_table(struct pt_state *pts,
+ pt_oaddr_t table_pa,
+ const struct pt_write_attrs *attrs)
+{
+ u64 *tablep = pt_cur_table(pts, u64);
+ u64 entry;
+
+ entry = ARMV8PT_FMT_VALID | ARMV8PT_FMT_TABLE |
+ FIELD_PREP(ARMV8PT_FMT_OA48,
+ log2_div(table_pa, PT_GRANUAL_LG2SZ)) |
+ FIELD_PREP(ARMV8PT_FMT_OA52, table_pa >> 48);
+
+ if (pts_feature(pts, PT_FEAT_ARMV8_NS))
+ entry |= ARMV8PT_FMT_NSTABLE;
+
+ return pt_table_install64(&tablep[pts->index], entry, pts->entry);
+}
+#define pt_install_table armv8pt_install_table
+
+static inline void armv8pt_attr_from_entry(const struct pt_state *pts,
+ struct pt_write_attrs *attrs)
+{
+ attrs->descriptor_bits =
+ pts->entry &
+ (ARMV8PT_FMT_SH | ARMV8PT_FMT_AF | ARMV8PT_FMT_UXN |
+ ARMV8PT_FMT_PXN | ARMV8PT_FMT_ATTRINDX | ARMV8PT_FMT_AP |
+ ARMV8PT_FMT_nG | ARMV8PT_FMT_S2MEMATTR | ARMV8PT_FMT_S2AP);
+}
+#define pt_attr_from_entry armv8pt_attr_from_entry
+
+static inline void armv8pt_clear_entry(struct pt_state *pts,
+ unsigned int num_contig_lg2)
+{
+ u64 *tablep = pt_cur_table(pts, u64);
+ u64 *end;
+
+ PT_WARN_ON(log2_mod(pts->index, num_contig_lg2));
+
+ tablep += pts->index;
+ end = tablep + log2_to_int(num_contig_lg2);
+ for (; tablep != end; tablep++)
+ WRITE_ONCE(*tablep, 0);
+}
+#define pt_clear_entry armv8pt_clear_entry
+
+/*
+ * Call fn over all the items in an entry. If the entry is contiguous this
+ * iterates over the entire contiguous entry, including items preceding
+ * pts->va. always_inline avoids an indirect function call.
+ */
+static __always_inline bool armv8pt_reduce_contig(const struct pt_state *pts,
+ bool (*fn)(u64 *tablep,
+ u64 entry))
+{
+ u64 *tablep = pt_cur_table(pts, u64);
+
+ if (pts->entry & ARMV8PT_FMT_CONTIG) {
+ unsigned int num_contig_lg2 = armv8pt_contig_count_lg2(pts);
+ u64 *end;
+
+ tablep += log2_set_mod(pts->index, 0, num_contig_lg2);
+ end = tablep + log2_to_int(num_contig_lg2);
+ for (; tablep != end; tablep++)
+ if (fn(tablep, READ_ONCE(*tablep)))
+ return true;
+ return false;
+ }
+ return fn(tablep + pts->index, pts->entry);
+}
+
+static inline bool armv8pt_check_is_dirty_s1(u64 *tablep, u64 entry)
+{
+ return (entry & (ARMV8PT_FMT_DBM |
+ FIELD_PREP(ARMV8PT_FMT_AP, ARMV8PT_AP_RDONLY))) ==
+ ARMV8PT_FMT_DBM;
+}
+
+static bool armv6pt_clear_dirty_s1(u64 *tablep, u64 entry)
+{
+ WRITE_ONCE(*tablep,
+ entry | FIELD_PREP(ARMV8PT_FMT_AP, ARMV8PT_AP_RDONLY));
+ return false;
+}
+
+static inline bool armv8pt_check_is_dirty_s2(u64 *tablep, u64 entry)
+{
+ const u64 DIRTY = ARMV8PT_FMT_DBM |
+ FIELD_PREP(ARMV8PT_FMT_S2AP, ARMV8PT_S2AP_WRITE);
+
+ return (entry & DIRTY) == DIRTY;
+}
+
+static bool armv6pt_clear_dirty_s2(u64 *tablep, u64 entry)
+{
+ WRITE_ONCE(*tablep, entry & ~(u64)FIELD_PREP(ARMV8PT_FMT_S2AP,
+ ARMV8PT_S2AP_WRITE));
+ return false;
+}
+
+static inline bool armv8pt_entry_write_is_dirty(const struct pt_state *pts)
+{
+ if (!pts_feature(pts, PT_FEAT_ARMV8_S2))
+ return armv8pt_reduce_contig(pts, armv8pt_check_is_dirty_s1);
+ else
+ return armv8pt_reduce_contig(pts, armv8pt_check_is_dirty_s2);
+}
+#define pt_entry_write_is_dirty armv8pt_entry_write_is_dirty
+
+static inline void armv8pt_entry_set_write_clean(struct pt_state *pts)
+{
+ if (!pts_feature(pts, PT_FEAT_ARMV8_S2))
+ armv8pt_reduce_contig(pts, armv6pt_clear_dirty_s1);
+ else
+ armv8pt_reduce_contig(pts, armv6pt_clear_dirty_s2);
+}
+#define pt_entry_set_write_clean armv8pt_entry_set_write_clean
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_armv8
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+ return &container_of(iommu_table, struct pt_iommu_table, iommu)
+ ->armpt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+ return &container_of(common, struct pt_iommu_table, armpt.common)->iommu;
+}
+
+static inline int armv8pt_iommu_set_prot(struct pt_common *common,
+ struct pt_write_attrs *attrs,
+ unsigned int iommu_prot)
+{
+ bool is_s1 = !pt_feature(common, PT_FEAT_ARMV8_S2);
+ u64 pte = 0;
+
+ if (is_s1) {
+ u64 ap = 0;
+
+ if (!(iommu_prot & IOMMU_WRITE) && (iommu_prot & IOMMU_READ))
+ ap |= ARMV8PT_AP_RDONLY;
+ if (!(iommu_prot & IOMMU_PRIV))
+ ap |= ARMV8PT_AP_UNPRIV;
+ pte = ARMV8PT_FMT_nG | FIELD_PREP(ARMV8PT_FMT_AP, ap);
+
+ if (iommu_prot & IOMMU_MMIO)
+ pte |= FIELD_PREP(ARMV8PT_FMT_ATTRINDX,
+ ARMV8PT_MAIR_ATTR_IDX_DEV);
+ else if (iommu_prot & IOMMU_CACHE)
+ pte |= FIELD_PREP(ARMV8PT_FMT_ATTRINDX,
+ ARMV8PT_MAIR_ATTR_IDX_CACHE);
+ } else {
+ u64 s2ap = 0;
+
+ if (iommu_prot & IOMMU_READ)
+ s2ap |= ARMV8PT_S2AP_READ;
+ if (iommu_prot & IOMMU_WRITE)
+ s2ap |= ARMV8PT_S2AP_WRITE;
+ pte = FIELD_PREP(ARMV8PT_FMT_S2AP, s2ap);
+
+ if (iommu_prot & IOMMU_MMIO)
+ pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR,
+ ARMV8PT_MEMATTR_DEV);
+ else if ((iommu_prot & IOMMU_CACHE) &&
+ pt_feature(common, PT_FEAT_ARMV8_S2FWB))
+ pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR,
+ ARMV8PT_MEMATTR_FWB_WB);
+ else if (iommu_prot & IOMMU_CACHE)
+ pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR,
+ ARMV8PT_MEMATTR_OIWB);
+ else
+ pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR,
+ ARMV8PT_MEMATTR_NC);
+ }
+
+ /*
+ * For DBM the writable entry starts out dirty to avoid the HW doing
+ * memory accesses to dirty it. We can just leave the DBM bit
+ * permanently set with no cost.
+ */
+ if (pt_feature(common, PT_FEAT_ARMV8_DBM) && (iommu_prot & IOMMU_WRITE))
+ pte |= ARMV8PT_FMT_DBM;
+
+ if (iommu_prot & IOMMU_CACHE)
+ pte |= FIELD_PREP(ARMV8PT_FMT_SH, ARMV8PT_SH_IS);
+ else
+ pte |= FIELD_PREP(ARMV8PT_FMT_SH, ARMV8PT_SH_OS);
+
+ /* FIXME for mali:
+ pte |= ARM_LPAE_PTE_SH_OS;
+ */
+
+ if (iommu_prot & IOMMU_NOEXEC)
+ pte |= ARMV8PT_FMT_UXN | ARMV8PT_FMT_PXN;
+
+ if (pt_feature(common, PT_FEAT_ARMV8_NS))
+ pte |= ARMV8PT_FMT_NS;
+
+ // FIXME not on mali:
+ pte |= ARMV8PT_FMT_AF;
+
+ attrs->descriptor_bits = pte;
+ return 0;
+}
+#define pt_iommu_set_prot armv8pt_iommu_set_prot
+
+static inline int armv8pt_iommu_fmt_init(struct pt_iommu_armv8 *iommu_table,
+ struct pt_iommu_armv8_cfg *cfg)
+{
+ struct pt_armv8 *armv8pt = &iommu_table->armpt;
+ unsigned int levels;
+
+ /* Atomicity of dirty bits conflicts with an incoherent cache */
+ if ((cfg->features & PT_FEAT_ARMV8_DBM) &&
+ (cfg->features & PT_FEAT_DMA_INCOHERENT))
+ return -EOPNOTSUPP;
+
+ /* FIXME are these inputs supposed to be an exact request, or a HW capability? */
+
+ if (cfg->ias_lg2 <= PT_GRANUAL_LG2SZ)
+ return -EINVAL;
+
+ if ((PT_GRANUAL_SIZE == SZ_64K && cfg->oas_lg2 > 52) ||
+ (PT_GRANUAL_SIZE != SZ_64K && cfg->oas_lg2 > 48))
+ return -EINVAL;
+
+ /*if (cfg->ias > 48)
+ table->feat_lva = true; */
+
+ cfg->ias_lg2 = min(cfg->ias_lg2, PT_MAX_VA_ADDRESS_LG2);
+
+ levels = DIV_ROUND_UP(cfg->ias_lg2 - PT_GRANUAL_LG2SZ,
+ PT_GRANUAL_LG2SZ - ilog2(sizeof(u64)));
+ if (levels > PT_MAX_TOP_LEVEL + 1)
+ return -EINVAL;
+
+ /*
+ * Table D5-6 PA size implications for the VTCR_EL2.{T0SZ, SL0}
+ * Single level is not supported without FEAT_TTST, which we are not
+ * implementing.
+ */
+ if (pt_feature(&armv8pt->common, PT_FEAT_ARMV8_S2) &&
+ PT_GRANUAL_SIZE == SZ_4K && levels == 1)
+ return -EINVAL;
+
+ /* FIXME - test me S2 concatenated translation tables
+ if (levels > 1 && cfg->is_s2 &&
+ cfg->ias_lg2 - (ARMV8PT_LVL0_ITEM_LG2SZ * (levels - 1)) <= 4)
+ levels--;
+ */
+ pt_top_set_level(&armv8pt->common, levels - 1);
+ armv8pt->common.max_vasz_lg2 = cfg->ias_lg2;
+ armv8pt->common.max_oasz_lg2 = cfg->oas_lg2;
+ return 0;
+}
+#define pt_iommu_fmt_init armv8pt_iommu_fmt_init
+
+#if defined(GENERIC_PT_KUNIT)
+static inline void armv8pt_kunit_setup_cfg(struct pt_iommu_armv8_cfg *cfg)
+{
+ cfg->ias_lg2 = 48;
+ cfg->oas_lg2 = 48;
+
+ cfg->features &= ~(BIT(PT_FEAT_ARMV8_TTBR1) | BIT(PT_FEAT_ARMV8_S2) |
+ BIT(PT_FEAT_ARMV8_DBM) | BIT(PT_FEAT_ARMV8_S2FWB) |
+ BIT(PT_FEAT_ARMV8_NS));
+}
+#define pt_kunit_setup_cfg armv8pt_kunit_setup_cfg
+#endif
+
+#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)
+#include <linux/io-pgtable.h>
+
+static struct io_pgtable_ops *
+armv8pt_iommu_alloc_io_pgtable(struct pt_iommu_armv8_cfg *cfg,
+ struct device *iommu_dev,
+ struct io_pgtable_cfg **unused_pgtbl_cfg)
+{
+ struct io_pgtable_cfg pgtbl_cfg = {};
+ enum io_pgtable_fmt fmt;
+
+ pgtbl_cfg.ias = cfg->ias_lg2;
+ pgtbl_cfg.oas = cfg->oas_lg2;
+ if (PT_GRANUAL_SIZE == SZ_64K)
+ pgtbl_cfg.pgsize_bitmap |= SZ_64K | SZ_512M;
+ if (PT_GRANUAL_SIZE == SZ_16K)
+ pgtbl_cfg.pgsize_bitmap |= SZ_16K | SZ_32M;
+ if (PT_GRANUAL_SIZE == SZ_4K)
+ pgtbl_cfg.pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+ pgtbl_cfg.coherent_walk = true;
+
+ if (cfg->features & BIT(PT_FEAT_ARMV8_S2))
+ fmt = ARM_64_LPAE_S2;
+ else
+ fmt = ARM_64_LPAE_S1;
+
+ return alloc_io_pgtable_ops(fmt, &pgtbl_cfg, NULL);
+}
+#define pt_iommu_alloc_io_pgtable armv8pt_iommu_alloc_io_pgtable
+
+static void armv8pt_iommu_setup_ref_table(struct pt_iommu_armv8 *iommu_table,
+ struct io_pgtable_ops *pgtbl_ops)
+{
+ struct io_pgtable_cfg *pgtbl_cfg =
+ &io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg;
+ struct pt_common *common = &iommu_table->armpt.common;
+
+ /* FIXME should determine the level from the pgtbl_cfg */
+ if (pt_feature(common, PT_FEAT_ARMV8_S2))
+ pt_top_set(common, __va(pgtbl_cfg->arm_lpae_s2_cfg.vttbr),
+ pt_top_get_level(common));
+ else
+ pt_top_set(common, __va(pgtbl_cfg->arm_lpae_s1_cfg.ttbr),
+ pt_top_get_level(common));
+}
+#define pt_iommu_setup_ref_table armv8pt_iommu_setup_ref_table
+
+static u64 armv8pt_kunit_cmp_mask_entry(struct pt_state *pts)
+{
+ if (pts->type == PT_ENTRY_TABLE)
+ return pts->entry & (~(u64)(ARMV8PT_FMT_OA48));
+ return pts->entry & (~(u64)ARMV8PT_FMT_CONTIG);
+}
+#define pt_kunit_cmp_mask_entry armv8pt_kunit_cmp_mask_entry
+#endif
+
+#endif
new file mode 100644
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * VMSAv8-64 translation table in AArch64 mode
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_ARMV8_H
+#define __GENERIC_PT_FMT_DEFS_ARMV8_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+/* Header self-compile default defines */
+#ifndef ARMV8_GRANUAL_SIZE
+#define ARMV8_GRANUAL_SIZE 4096
+#endif
+
+typedef u64 pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct armv8pt_write_attrs {
+ u64 descriptor_bits;
+ gfp_t gfp;
+};
+#define pt_write_attrs armv8pt_write_attrs
+
+#endif
new file mode 100644
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT armv8
+#define PT_FMT_VARIANT 16k
+#define PT_SUPPORTED_FEATURES \
+ (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \
+ BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \
+ BIT(PT_FEAT_ARMV8_S2FWB))
+#define ARMV8_GRANUAL_SIZE 16384
+
+#include "iommu_template.h"
new file mode 100644
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT armv8
+#define PT_FMT_VARIANT 4k
+#define PT_SUPPORTED_FEATURES \
+ (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \
+ BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \
+ BIT(PT_FEAT_ARMV8_S2FWB))
+#define ARMV8_GRANUAL_SIZE 4096
+
+#include "iommu_template.h"
new file mode 100644
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT armv8
+#define PT_FMT_VARIANT 64k
+#define PT_SUPPORTED_FEATURES \
+ (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \
+ BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \
+ BIT(PT_FEAT_ARMV8_S2FWB))
+#define ARMV8_GRANUAL_SIZE 65536
+
+#include "iommu_template.h"
@@ -100,4 +100,26 @@ enum {
PT_FEAT_FMT_START,
};
+struct pt_armv8 {
+ struct pt_common common;
+};
+
+enum {
+ /* Use the upper address space instead of lower */
+ PT_FEAT_ARMV8_TTBR1 = PT_FEAT_FMT_START,
+ /*
+ * Large Physical Address extension allows larger page sizes on 64k.
+ * Larger physical addresess are always supported
+ */
+ PT_FEAT_ARMV8_LPA,
+ /* Use the Stage 2 format instead of Stage 1 */
+ PT_FEAT_ARMV8_S2,
+ /* Use Dirty Bit Modifier, necessary for IOMMU dirty tracking */
+ PT_FEAT_ARMV8_DBM,
+ /* For S2 uses the Force Write Back coding of the S2MEMATTR */
+ PT_FEAT_ARMV8_S2FWB,
+ /* Set the NS and NSTable bits in all entries */
+ PT_FEAT_ARMV8_NS,
+};
+
#endif
@@ -204,4 +204,77 @@ static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
iommu_table->ops->deinit(iommu_table);
}
+struct pt_iommu_armv8 {
+ struct pt_iommu iommu;
+ struct pt_armv8 armpt;
+};
+
+struct pt_iommu_armv8_cfg {
+ struct device *iommu_device;
+ unsigned int features;
+ /* Input Address Size lg2 */
+ u8 ias_lg2;
+ /* Output Address Size lg2 */
+ u8 oas_lg2;
+};
+
+int pt_iommu_armv8_4k_init(struct pt_iommu_armv8 *table,
+ struct pt_iommu_armv8_cfg *cfg, gfp_t gfp);
+int pt_iommu_armv8_16k_init(struct pt_iommu_armv8 *table,
+ struct pt_iommu_armv8_cfg *cfg, gfp_t gfp);
+int pt_iommu_armv8_64k_init(struct pt_iommu_armv8 *table,
+ struct pt_iommu_armv8_cfg *cfg, gfp_t gfp);
+
+static size_t __pt_iommu_armv8_granuals_to_lg2(size_t granual_sizes)
+{
+ size_t supported_granuals = 0;
+
+ if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_4K))
+ supported_granuals |= BIT(12);
+ if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_16K))
+ supported_granuals |= BIT(14);
+ if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_64K))
+ supported_granuals |= BIT(16);
+
+ granual_sizes &= supported_granuals;
+ if (!granual_sizes)
+ return 0;
+
+ /* Prefer the CPU page size if possible */
+ if (granual_sizes & PAGE_SIZE)
+ return PAGE_SHIFT;
+
+ /*
+ * Otherwise prefer the largest page size smaller than the CPU page
+ * size
+ */
+ if (granual_sizes % PAGE_SIZE)
+ return ilog2(rounddown_pow_of_two(granual_sizes % PAGE_SIZE));
+
+ /* Otherwise use the smallest page size available */
+ return __ffs(granual_sizes);
+}
+
+static inline int pt_iommu_armv8_init(struct pt_iommu_armv8 *table,
+ struct pt_iommu_armv8_cfg *cfg,
+ size_t granual_sizes, gfp_t gfp)
+{
+ switch (__pt_iommu_armv8_granuals_to_lg2(granual_sizes)) {
+ case 12:
+ if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_4K))
+ return -EOPNOTSUPP;
+ return pt_iommu_armv8_4k_init(table, cfg, gfp);
+ case 14:
+ if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_16K))
+ return -EOPNOTSUPP;
+ return pt_iommu_armv8_16k_init(table, cfg, gfp);
+ case 16:
+ if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_64K))
+ return -EOPNOTSUPP;
+ return pt_iommu_armv8_64k_init(table, cfg, gfp);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
#endif
The features, format and naming is taking from the ARMv8 VMSAv8-64 chapter. ARMv8 uses almost all the features of the common implementation: - Contigous pages - Leaf pages at many levels - Variable top level - Variable size top level, including super-sized (concatenated tables) - Dirty tracking - low or high starting VA Compared to the io-pgtable version this also implements the contiguous page hint, and supports dirty readback from the S2. The common algorithms use a bit in the folio to keep track of the cache invalidation race, while the io-pgtable version uses a SW bit in the table PTE. In part as an demonstration, to be evaluated with performace data, ARMv8 is multi-compiled for each of the 4k/16k/64k granule size. This gives 3x the .text usage with an unmeasured performance improvement. It shows how Generic PT can be used to optimize code gen. FIXME: Not every detail around the variable VA width is fully completed and tested yet. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/generic_pt/Kconfig | 39 ++ drivers/iommu/generic_pt/fmt/Makefile | 4 + drivers/iommu/generic_pt/fmt/armv8.h | 621 ++++++++++++++++++ drivers/iommu/generic_pt/fmt/defs_armv8.h | 28 + .../iommu/generic_pt/fmt/iommu_armv8_16k.c | 13 + drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c | 13 + .../iommu/generic_pt/fmt/iommu_armv8_64k.c | 13 + include/linux/generic_pt/common.h | 22 + include/linux/generic_pt/iommu.h | 73 ++ 9 files changed, 826 insertions(+) create mode 100644 drivers/iommu/generic_pt/fmt/armv8.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_armv8.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_16k.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_64k.c