@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
+obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-ir.o iommu-platform.o
obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
new file mode 100644
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOMMU Interrupt Remapping
+ *
+ * Copyright © 2024 Ventana Micro Systems Inc.
+ */
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#include "../iommu-pages.h"
+#include "iommu.h"
+
+static size_t riscv_iommu_ir_get_msipte_idx(struct riscv_iommu_domain *domain,
+ phys_addr_t msi_pa)
+{
+ phys_addr_t addr = msi_pa >> 12;
+ size_t idx;
+
+ if (domain->group_index_bits) {
+ phys_addr_t group_mask = BIT(domain->group_index_bits) - 1;
+ phys_addr_t group_shift = domain->group_index_shift - 12;
+ phys_addr_t group = (addr >> group_shift) & group_mask;
+ phys_addr_t mask = domain->msiptp.msi_addr_mask & ~(group_mask << group_shift);
+
+ idx = addr & mask;
+ idx |= group << fls64(mask);
+ } else {
+ idx = addr & domain->msiptp.msi_addr_mask;
+ }
+
+ return idx;
+}
+
+static struct riscv_iommu_msipte *riscv_iommu_ir_get_msipte(struct riscv_iommu_domain *domain,
+ phys_addr_t msi_pa)
+{
+ size_t idx;
+
+ if (((msi_pa >> 12) & ~domain->msiptp.msi_addr_mask) != domain->msiptp.msi_addr_pattern)
+ return NULL;
+
+ idx = riscv_iommu_ir_get_msipte_idx(domain, msi_pa);
+ return &domain->msi_root[idx];
+}
+
+static size_t riscv_iommu_ir_nr_msiptes(struct riscv_iommu_domain *domain)
+{
+ phys_addr_t base = domain->msiptp.msi_addr_pattern << 12;
+ phys_addr_t max_addr = base | (domain->msiptp.msi_addr_mask << 12);
+ size_t max_idx = riscv_iommu_ir_get_msipte_idx(domain, max_addr);
+
+ return max_idx + 1;
+}
+
+static struct irq_chip riscv_iommu_irq_chip = {
+ .name = "IOMMU-IR",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+};
+
+static int riscv_iommu_irq_domain_alloc_irqs(struct irq_domain *irqdomain,
+ unsigned int irq_base, unsigned int nr_irqs,
+ void *arg)
+{
+ struct irq_data *data;
+ int i, ret;
+
+ ret = irq_domain_alloc_irqs_parent(irqdomain, irq_base, nr_irqs, arg);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nr_irqs; i++) {
+ data = irq_domain_get_irq_data(irqdomain, irq_base + i);
+ data->chip = &riscv_iommu_irq_chip;
+ }
+
+ return 0;
+}
+
+static const struct irq_domain_ops riscv_iommu_irq_domain_ops = {
+ .alloc = riscv_iommu_irq_domain_alloc_irqs,
+ .free = irq_domain_free_irqs_parent,
+};
+
+static const struct msi_parent_ops riscv_iommu_msi_parent_ops = {
+ .prefix = "IR-",
+ .supported_flags = MSI_GENERIC_FLAGS_MASK |
+ MSI_FLAG_PCI_MSIX,
+ .required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
+ MSI_FLAG_USE_DEF_CHIP_OPS,
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};
+
+int riscv_iommu_irq_domain_create(struct riscv_iommu_domain *domain,
+ struct device *dev)
+{
+ struct riscv_iommu_device *iommu = dev_to_iommu(dev);
+ struct fwnode_handle *fn;
+ char *fwname;
+
+ if (domain->irqdomain) {
+ dev_set_msi_domain(dev, domain->irqdomain);
+ return 0;
+ }
+
+ if (!(iommu->caps & RISCV_IOMMU_CAPABILITIES_MSI_FLAT)) {
+ dev_warn(iommu->dev, "Cannot enable interrupt remapping\n");
+ return 0;
+ }
+
+ spin_lock_init(&domain->msi_lock);
+ /*
+ * TODO: The hypervisor should be in control of this size. For now
+ * we just allocate enough space for 512 VCPUs.
+ */
+ domain->msi_order = 1;
+ domain->msi_root = iommu_alloc_pages_node(domain->numa_node,
+ GFP_KERNEL_ACCOUNT, domain->msi_order);
+ if (!domain->msi_root)
+ return -ENOMEM;
+
+ fwname = kasprintf(GFP_KERNEL, "IOMMU-IR-%s", dev_name(dev));
+ if (!fwname) {
+ iommu_free_pages(domain->msi_root, domain->msi_order);
+ return -ENOMEM;
+ }
+
+ fn = irq_domain_alloc_named_fwnode(fwname);
+ kfree(fwname);
+ if (!fn) {
+ dev_err(iommu->dev, "Couldn't allocate fwnode\n");
+ iommu_free_pages(domain->msi_root, domain->msi_order);
+ return -ENOMEM;
+ }
+
+ domain->irqdomain = irq_domain_create_hierarchy(dev_get_msi_domain(dev),
+ 0, 0, fn,
+ &riscv_iommu_irq_domain_ops,
+ domain);
+ if (!domain->irqdomain) {
+ dev_err(iommu->dev, "Failed to create IOMMU irq domain\n");
+ iommu_free_pages(domain->msi_root, domain->msi_order);
+ irq_domain_free_fwnode(fn);
+ return -ENOMEM;
+ }
+
+ domain->irqdomain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
+ domain->irqdomain->msi_parent_ops = &riscv_iommu_msi_parent_ops;
+ irq_domain_update_bus_token(domain->irqdomain, DOMAIN_BUS_MSI_REMAP);
+ dev_set_msi_domain(dev, domain->irqdomain);
+
+ return 0;
+}
+
+void riscv_iommu_ir_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
+ struct riscv_iommu_domain *domain = info->domain;
+ struct iommu_resv_region *reg;
+ phys_addr_t base, addr;
+ size_t nr_pages, i;
+
+ if (!domain || !domain->msiptp.msiptp)
+ return;
+
+ base = domain->msiptp.msi_addr_pattern << 12;
+
+ if (domain->group_index_bits) {
+ phys_addr_t group_mask = BIT(domain->group_index_bits) - 1;
+ phys_addr_t group_shift = domain->group_index_shift - 12;
+ phys_addr_t mask = domain->msiptp.msi_addr_mask & ~(group_mask << group_shift);
+
+ nr_pages = mask + 1;
+ } else {
+ nr_pages = domain->msiptp.msi_addr_mask + 1;
+ }
+
+ for (i = 0; i < BIT(domain->group_index_bits); i++) {
+ addr = base | (i << domain->group_index_shift);
+ reg = iommu_alloc_resv_region(addr, nr_pages * 4096,
+ 0, IOMMU_RESV_MSI, GFP_KERNEL);
+ if (reg)
+ list_add_tail(®->list, head);
+ }
+}
+
+void riscv_iommu_irq_domain_remove(struct riscv_iommu_domain *domain)
+{
+ struct fwnode_handle *fn;
+
+ if (!domain->irqdomain)
+ return;
+
+ iommu_free_pages(domain->msi_root, domain->msi_order);
+
+ fn = domain->irqdomain->fwnode;
+ irq_domain_remove(domain->irqdomain);
+ irq_domain_free_fwnode(fn);
+}
+
+void riscv_iommu_irq_domain_unlink(struct riscv_iommu_domain *domain,
+ struct device *dev)
+{
+ if (!domain || !domain->irqdomain)
+ return;
+
+ dev_set_msi_domain(dev, domain->irqdomain->parent);
+}
@@ -943,7 +943,8 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
rcu_read_unlock();
}
-#define RISCV_IOMMU_FSC_BARE 0
+#define RISCV_IOMMU_FSC_BARE 0
+#define RISCV_IOMMU_IOHGATP_BARE 0
/*
* Update IODIR for the device.
@@ -1245,6 +1246,8 @@ static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
WARN_ON(!list_empty(&domain->bonds));
+ riscv_iommu_irq_domain_remove(domain);
+
if (domain->pscid > 0)
ida_free(&riscv_iommu_pscids, domain->pscid);
if (domain->gscid > 0)
@@ -1276,10 +1279,30 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
struct riscv_iommu_dc dc = {0};
+ int ret;
if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
return -ENODEV;
+ if (riscv_iommu_bond_link(domain, dev))
+ return -ENOMEM;
+
+ if (iommu_domain->type == IOMMU_DOMAIN_UNMANAGED) {
+ domain->gscid = ida_alloc_range(&riscv_iommu_gscids, 1,
+ RISCV_IOMMU_MAX_GSCID, GFP_KERNEL);
+ if (domain->gscid < 0) {
+ riscv_iommu_bond_unlink(domain, dev);
+ return -ENOMEM;
+ }
+
+ ret = riscv_iommu_irq_domain_create(domain, dev);
+ if (ret) {
+ riscv_iommu_bond_unlink(domain, dev);
+ ida_free(&riscv_iommu_gscids, domain->gscid);
+ return ret;
+ }
+ }
+
if (domain->gscid) {
dc.iohgatp = FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_MODE, domain->pgd_mode) |
FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_GSCID, domain->gscid) |
@@ -1292,10 +1315,9 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
dc.ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
RISCV_IOMMU_PC_TA_V;
- if (riscv_iommu_bond_link(domain, dev))
- return -ENOMEM;
-
riscv_iommu_iodir_update(iommu, dev, &dc);
+
+ riscv_iommu_irq_domain_unlink(info->domain, dev);
riscv_iommu_bond_unlink(info->domain, dev);
info->domain = domain;
@@ -1389,9 +1411,12 @@ static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain,
struct riscv_iommu_dc dc = {0};
dc.fsc = RISCV_IOMMU_FSC_BARE;
+ dc.iohgatp = RISCV_IOMMU_IOHGATP_BARE;
/* Make device context invalid, translation requests will fault w/ #258 */
riscv_iommu_iodir_update(iommu, dev, &dc);
+
+ riscv_iommu_irq_domain_unlink(info->domain, dev);
riscv_iommu_bond_unlink(info->domain, dev);
info->domain = NULL;
@@ -1413,15 +1438,24 @@ static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain,
struct riscv_iommu_dc dc = {0};
dc.fsc = RISCV_IOMMU_FSC_BARE;
+ dc.iohgatp = RISCV_IOMMU_IOHGATP_BARE;
dc.ta = RISCV_IOMMU_PC_TA_V;
riscv_iommu_iodir_update(iommu, dev, &dc);
+
+ riscv_iommu_irq_domain_unlink(info->domain, dev);
riscv_iommu_bond_unlink(info->domain, dev);
info->domain = NULL;
return 0;
}
+static void riscv_iommu_get_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ riscv_iommu_ir_get_resv_regions(dev, head);
+}
+
static struct iommu_domain riscv_iommu_identity_domain = {
.type = IOMMU_DOMAIN_IDENTITY,
.ops = &(const struct iommu_domain_ops) {
@@ -1516,6 +1550,7 @@ static const struct iommu_ops riscv_iommu_ops = {
.blocked_domain = &riscv_iommu_blocking_domain,
.release_domain = &riscv_iommu_blocking_domain,
.domain_alloc_paging = riscv_iommu_alloc_paging_domain,
+ .get_resv_regions = riscv_iommu_get_resv_regions,
.device_group = riscv_iommu_device_group,
.probe_device = riscv_iommu_probe_device,
.release_device = riscv_iommu_release_device,
@@ -23,6 +23,12 @@
#define RISCV_IOMMU_DDTP_TIMEOUT 10000000
#define RISCV_IOMMU_IOTINVAL_TIMEOUT 90000000
+struct riscv_iommu_msiptp_state {
+ u64 msiptp;
+ u64 msi_addr_mask;
+ u64 msi_addr_pattern;
+};
+
/* This struct contains protection domain specific IOMMU driver data. */
struct riscv_iommu_domain {
struct iommu_domain domain;
@@ -34,6 +40,13 @@ struct riscv_iommu_domain {
int numa_node;
unsigned int pgd_mode;
unsigned long *pgd_root;
+ u32 group_index_bits;
+ u32 group_index_shift;
+ int msi_order;
+ struct riscv_iommu_msipte *msi_root;
+ spinlock_t msi_lock;
+ struct riscv_iommu_msiptp_state msiptp;
+ struct irq_domain *irqdomain;
};
/* Private IOMMU data for managed devices, dev_iommu_priv_* */
@@ -119,6 +132,14 @@ void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu,
void riscv_iommu_cmd_sync(struct riscv_iommu_device *iommu,
unsigned int timeout_us);
+int riscv_iommu_irq_domain_create(struct riscv_iommu_domain *domain,
+ struct device *dev);
+void riscv_iommu_irq_domain_remove(struct riscv_iommu_domain *domain);
+void riscv_iommu_irq_domain_unlink(struct riscv_iommu_domain *domain,
+ struct device *dev);
+void riscv_iommu_ir_get_resv_regions(struct device *dev,
+ struct list_head *head);
+
#define riscv_iommu_readl(iommu, addr) \
readl_relaxed((iommu)->reg + (addr))
This is just a skeleton. Until irq_set_vcpu_affinity() is implemented the IRQ domain doesn't serve any purpose. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> --- drivers/iommu/riscv/Makefile | 2 +- drivers/iommu/riscv/iommu-ir.c | 209 +++++++++++++++++++++++++++++++++ drivers/iommu/riscv/iommu.c | 43 ++++++- drivers/iommu/riscv/iommu.h | 21 ++++ 4 files changed, 270 insertions(+), 5 deletions(-) create mode 100644 drivers/iommu/riscv/iommu-ir.c