@@ -32,6 +32,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "perf.h"
#include "trace.h"
#include "perfmon.h"
@@ -1187,7 +1188,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
- free_page((unsigned long)iommu->qi->desc);
+ iommu_free_page(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
@@ -1755,7 +1756,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
- struct page *desc_page;
+ void *desc;
+ int order;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1776,19 +1778,19 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
- !!ecap_smts(iommu->ecap));
- if (!desc_page) {
+ order = ecap_smts(iommu->ecap) ? 1 : 0;
+ desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
+ if (!desc) {
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
}
- qi->desc = page_address(desc_page);
+ qi->desc = desc;
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
- free_page((unsigned long) qi->desc);
+ iommu_free_page(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
@@ -27,6 +27,7 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "pasid.h"
#include "cap_audit.h"
#include "perfmon.h"
@@ -298,22 +299,6 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node, gfp_t gfp)
-{
- struct page *page;
- void *vaddr = NULL;
-
- page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
- if (page)
- vaddr = page_address(page);
- return vaddr;
-}
-
-void free_pgtable_page(void *vaddr)
-{
- free_page((unsigned long)vaddr);
-}
-
static int domain_type_is_si(struct dmar_domain *domain)
{
return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
@@ -545,7 +530,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
if (!context)
return NULL;
@@ -719,17 +704,17 @@ static void free_context_table(struct intel_iommu *iommu)
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_page(context);
if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_page(context);
}
- free_pgtable_page(iommu->root_entry);
+ iommu_free_page(iommu->root_entry);
iommu->root_entry = NULL;
}
@@ -867,7 +852,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page(domain->nid, gfp);
+ tmp_page = iommu_alloc_page_node(domain->nid, gfp);
if (!tmp_page)
return NULL;
@@ -879,7 +864,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
- free_pgtable_page(tmp_page);
+ iommu_free_page(tmp_page);
else
domain_flush_cache(domain, pte, sizeof(*pte));
}
@@ -992,7 +977,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
last_pfn < level_pfn + level_size(level) - 1)) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
- free_pgtable_page(level_pte);
+ iommu_free_page(level_pte);
}
next:
pfn += level_size(level);
@@ -1016,7 +1001,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- free_pgtable_page(domain->pgd);
+ iommu_free_page(domain->pgd);
domain->pgd = NULL;
}
}
@@ -1118,7 +1103,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1841,7 +1826,7 @@ static void domain_exit(struct dmar_domain *domain)
LIST_HEAD(freelist);
domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
- put_pages_list(&freelist);
+ iommu_put_pages_list(&freelist);
}
if (WARN_ON(!list_empty(&domain->devices)))
@@ -2497,7 +2482,7 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
+ new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL);
if (!new_ce)
goto out_unmap;
@@ -3426,7 +3411,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
start_vpfn, mhp->nr_pages,
list_empty(&freelist), 0);
rcu_read_unlock();
- put_pages_list(&freelist);
+ iommu_put_pages_list(&freelist);
}
break;
}
@@ -3833,7 +3818,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+ domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -3987,7 +3972,7 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
- free_pgtable_page(pte);
+ iommu_free_page(pte);
}
dmar_domain->agaw--;
}
@@ -4141,7 +4126,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
if (dmar_domain->nested_parent)
parent_domain_flush(dmar_domain, start_pfn, nrpages,
list_empty(&gather->freelist));
- put_pages_list(&gather->freelist);
+ iommu_put_pages_list(&gather->freelist);
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -1085,8 +1085,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain);
int dmar_ir_support(void);
-void *alloc_pgtable_page(int node, gfp_t gfp);
-void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
const struct iommu_user_data *user_data);
@@ -22,6 +22,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "cap_audit.h"
enum irq_mode {
@@ -527,7 +528,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
struct ir_table *ir_table;
struct fwnode_handle *fn;
unsigned long *bitmap;
- struct page *pages;
+ void *ir_table_base;
if (iommu->ir_table)
return 0;
@@ -536,9 +537,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
if (!ir_table)
return -ENOMEM;
- pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
- INTR_REMAP_PAGE_ORDER);
- if (!pages) {
+ ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL,
+ INTR_REMAP_PAGE_ORDER);
+ if (!ir_table_base) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
goto out_free_table;
@@ -573,7 +574,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
else
iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
- ir_table->base = page_address(pages);
+ ir_table->base = ir_table_base;
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
@@ -622,7 +623,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
- __free_pages(pages, INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER);
out_free_table:
kfree(ir_table);
@@ -643,8 +644,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
irq_domain_free_fwnode(fn);
iommu->ir_domain = NULL;
}
- free_pages((unsigned long)iommu->ir_table->base,
- INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER);
bitmap_free(iommu->ir_table->bitmap);
kfree(iommu->ir_table);
iommu->ir_table = NULL;
@@ -20,6 +20,7 @@
#include "iommu.h"
#include "pasid.h"
+#include "../iommu-pages.h"
/*
* Intel IOMMU system wide PASID name space:
@@ -38,7 +39,7 @@ int intel_pasid_alloc_table(struct device *dev)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
- struct page *pages;
+ struct pasid_dir_entry *dir;
u32 max_pasid = 0;
int order, size;
@@ -59,14 +60,13 @@ int intel_pasid_alloc_table(struct device *dev)
size = max_pasid >> (PASID_PDE_SHIFT - 3);
order = size ? get_order(size) : 0;
- pages = alloc_pages_node(info->iommu->node,
- GFP_KERNEL | __GFP_ZERO, order);
- if (!pages) {
+ dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
+ if (!dir) {
kfree(pasid_table);
return -ENOMEM;
}
- pasid_table->table = page_address(pages);
+ pasid_table->table = dir;
pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
@@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev)
max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
for (i = 0; i < max_pde; i++) {
table = get_pasid_table_from_pde(&dir[i]);
- free_pgtable_page(table);
+ iommu_free_page(table);
}
- free_pages((unsigned long)pasid_table->table, pasid_table->order);
+ iommu_free_pages(pasid_table->table, pasid_table->order);
kfree(pasid_table);
}
@@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
+ entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
@@ -158,7 +158,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
*/
if (cmpxchg64(&dir[dir_index].val, 0ULL,
(u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
- free_pgtable_page(entries);
+ iommu_free_page(entries);
goto retry;
}
if (!ecap_coherent(info->iommu->ecap)) {
@@ -22,6 +22,7 @@
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
+#include "../iommu-pages.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
@@ -63,16 +64,14 @@ svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
- struct page *pages;
int irq, ret;
- pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
- if (!pages) {
+ iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
+ if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
return -ENOMEM;
}
- iommu->prq = page_address(pages);
irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
@@ -117,7 +116,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
@@ -140,7 +139,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->iopf_queue = NULL;
}
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return 0;
new file mode 100644
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef __IOMMU_PAGES_H
+#define __IOMMU_PAGES_H
+
+#include <linux/vmstat.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+/*
+ * All page allocations that should be reported to as "iommu-pagetables" to
+ * userspace must use one of the functions below. This includes allocations of
+ * page-tables and other per-iommu_domain configuration structures.
+ *
+ * This is necessary for the proper accounting as IOMMU state can be rather
+ * large, i.e. multiple gigabytes in size.
+ */
+
+/**
+ * __iommu_alloc_pages - allocate a zeroed page of a given order.
+ * @gfp: buddy allocator flags
+ * @order: page order
+ *
+ * returns the head struct page of the allocated page.
+ */
+static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order)
+{
+ struct page *page;
+
+ page = alloc_pages(gfp | __GFP_ZERO, order);
+ if (unlikely(!page))
+ return NULL;
+
+ return page;
+}
+
+/**
+ * __iommu_free_pages - free page of a given order
+ * @page: head struct page of the page
+ * @order: page order
+ */
+static inline void __iommu_free_pages(struct page *page, int order)
+{
+ if (!page)
+ return;
+
+ __free_pages(page, order);
+}
+
+/**
+ * iommu_alloc_pages_node - allocate a zeroed page of a given order from
+ * specific NUMA node.
+ * @nid: memory NUMA node id
+ * @gfp: buddy allocator flags
+ * @order: page order
+ *
+ * returns the virtual address of the allocated page
+ */
+static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order)
+{
+ struct page *page = alloc_pages_node(nid, gfp | __GFP_ZERO, order);
+
+ if (unlikely(!page))
+ return NULL;
+
+ return page_address(page);
+}
+
+/**
+ * iommu_alloc_pages - allocate a zeroed page of a given order
+ * @gfp: buddy allocator flags
+ * @order: page order
+ *
+ * returns the virtual address of the allocated page
+ */
+static inline void *iommu_alloc_pages(gfp_t gfp, int order)
+{
+ struct page *page = __iommu_alloc_pages(gfp, order);
+
+ if (unlikely(!page))
+ return NULL;
+
+ return page_address(page);
+}
+
+/**
+ * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node.
+ * @nid: memory NUMA node id
+ * @gfp: buddy allocator flags
+ *
+ * returns the virtual address of the allocated page
+ */
+static inline void *iommu_alloc_page_node(int nid, gfp_t gfp)
+{
+ return iommu_alloc_pages_node(nid, gfp, 0);
+}
+
+/**
+ * iommu_alloc_page - allocate a zeroed page
+ * @gfp: buddy allocator flags
+ *
+ * returns the virtual address of the allocated page
+ */
+static inline void *iommu_alloc_page(gfp_t gfp)
+{
+ return iommu_alloc_pages(gfp, 0);
+}
+
+/**
+ * iommu_free_pages - free page of a given order
+ * @virt: virtual address of the page to be freed.
+ * @order: page order
+ */
+static inline void iommu_free_pages(void *virt, int order)
+{
+ if (!virt)
+ return;
+
+ __iommu_free_pages(virt_to_page(virt), order);
+}
+
+/**
+ * iommu_free_page - free page
+ * @virt: virtual address of the page to be freed.
+ */
+static inline void iommu_free_page(void *virt)
+{
+ iommu_free_pages(virt, 0);
+}
+
+/**
+ * iommu_put_pages_list - free a list of pages.
+ * @page: the head of the lru list to be freed.
+ *
+ * There are no locking requirement for these pages, as they are going to be
+ * put on a free list as soon as refcount reaches 0. Pages are put on this LRU
+ * list once they are removed from the IOMMU page tables. However, they can
+ * still be access through debugfs.
+ */
+static inline void iommu_put_pages_list(struct list_head *page)
+{
+ while (!list_empty(page)) {
+ struct page *p = list_entry(page->prev, struct page, lru);
+
+ list_del(&p->lru);
+ put_page(p);
+ }
+}
+
+#endif /* __IOMMU_PAGES_H */