diff mbox

[v7,04/14] x86: intel-iommu: add vt-d init test

Message ID 1480393550-12385-5-git-send-email-peterx@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Xu Nov. 29, 2016, 4:25 a.m. UTC
Adding fundamental init test for Intel IOMMU. This includes basic
initialization of Intel IOMMU device, like DMAR (DMA Remapping),
IR (Interrupt Remapping), QI (Queue Invalidation), etc.

Further tests can use vtd_init() to initialize Intel IOMMU environment.

x86/unittests is updated to add this test.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 lib/x86/intel-iommu.c |  88 +++++++++++++++++++++++++++++++++++++
 lib/x86/intel-iommu.h | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/Makefile.x86_64   |   2 +
 x86/intel-iommu.c     |  27 ++++++++++++
 x86/unittests.cfg     |   7 +++
 5 files changed, 242 insertions(+)
 create mode 100644 lib/x86/intel-iommu.c
 create mode 100644 lib/x86/intel-iommu.h
 create mode 100644 x86/intel-iommu.c

Comments

Alexander Gordeev Dec. 1, 2016, 1:14 p.m. UTC | #1
On Tue, Nov 29, 2016 at 12:25:40PM +0800, Peter Xu wrote:
> Adding fundamental init test for Intel IOMMU. This includes basic
> initialization of Intel IOMMU device, like DMAR (DMA Remapping),
> IR (Interrupt Remapping), QI (Queue Invalidation), etc.
> 
> Further tests can use vtd_init() to initialize Intel IOMMU environment.
> 
> x86/unittests is updated to add this test.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  lib/x86/intel-iommu.c |  88 +++++++++++++++++++++++++++++++++++++
>  lib/x86/intel-iommu.h | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/Makefile.x86_64   |   2 +
>  x86/intel-iommu.c     |  27 ++++++++++++
>  x86/unittests.cfg     |   7 +++
>  5 files changed, 242 insertions(+)
>  create mode 100644 lib/x86/intel-iommu.c
>  create mode 100644 lib/x86/intel-iommu.h
>  create mode 100644 x86/intel-iommu.c
> 
> diff --git a/lib/x86/intel-iommu.c b/lib/x86/intel-iommu.c
> new file mode 100644
> index 0000000..9890f34
> --- /dev/null
> +++ b/lib/x86/intel-iommu.c
> @@ -0,0 +1,88 @@
> +/*
> + * Intel IOMMU APIs
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#include "intel-iommu.h"
> +
> +#define VTD_RTA_MASK  (PAGE_MASK)
> +#define VTD_IRTA_MASK (PAGE_MASK)
> +
> +static uint64_t vtd_root_table(void)
> +{
> +	/* No extend root table support yet */
> +	return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK;
> +}
> +
> +static uint64_t vtd_ir_table(void)
> +{
> +	return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK;
> +}
> +
> +static void vtd_gcmd_or(uint32_t cmd)
> +{
> +	uint32_t status;
> +
> +	/* We only allow set one bit for each time */
> +	assert(is_power_of_2(cmd));
> +
> +	status = vtd_readl(DMAR_GSTS_REG);
> +	vtd_writel(DMAR_GCMD_REG, status | cmd);
> +
> +	if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
> +		/* One-shot bits are taking effect immediately */
> +		return;
> +	}
> +
> +	/* Make sure IOMMU handled our command request */
> +	while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
> +		cpu_relax();

So I am hitting an endless loop here.
Probably, an IO delay + counter would help.

> +}
> +
> +static void vtd_dump_init_info(void)
> +{
> +	printf("VT-d version:   0x%x\n", vtd_readl(DMAR_VER_REG));
> +	printf("     cap:       0x%016lx\n", vtd_readq(DMAR_CAP_REG));
> +	printf("     ecap:      0x%016lx\n", vtd_readq(DMAR_ECAP_REG));
> +}
> +
> +static void vtd_setup_root_table(void)
> +{
> +	void *root = alloc_page();
> +
> +	memset(root, 0, PAGE_SIZE);

As it is a placeholder at this stage I would suggest less
meaningful fill pattern, i.e. all 1s or 0xdeadbeef may be.

> +	vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
> +	vtd_gcmd_or(VTD_GCMD_ROOT);
> +	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
> +}
> +
> +static void vtd_setup_ir_table(void)
> +{
> +	void *root = alloc_page();
> +
> +	memset(root, 0, PAGE_SIZE);

Same here.

> +	/* 0xf stands for table size (2^(0xf+1) == 65536) */
> +	vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
> +	vtd_gcmd_or(VTD_GCMD_IR_TABLE);
> +	printf("IR table address: 0x%016lx\n", vtd_ir_table());
> +}
> +
> +void vtd_init(void)
> +{
> +	setup_vm();
> +	smp_init();

I would say these two are the generic env. setup, not VT-d
setup. Thus, would better outside of this scope, as far as
I am concerned.

> +
> +	vtd_dump_init_info();

Should we check an iommu is there indeed? My environment
returns all zeroes (which is wrong I guess) and attempts
to proceed.

> +	vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */
> +	vtd_setup_root_table();
> +	vtd_setup_ir_table();
> +	vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */
> +	vtd_gcmd_or(VTD_GCMD_IR);   /* Enable IR */
> +}
> diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
> new file mode 100644
> index 0000000..fae9ae5
> --- /dev/null
> +++ b/lib/x86/intel-iommu.h
> @@ -0,0 +1,118 @@
> +/*
> + * Intel IOMMU header
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + *
> + * (From include/linux/intel-iommu.h)
> + */
> +
> +#ifndef __INTEL_IOMMU_H__
> +#define __INTEL_IOMMU_H__
> +
> +#include "libcflat.h"
> +#include "vm.h"
> +#include "isr.h"
> +#include "smp.h"
> +#include "desc.h"
> +#include "asm/io.h"
> +
> +#define Q35_HOST_BRIDGE_IOMMU_ADDR  0xfed90000ULL
> +
> +/*
> + * Intel IOMMU register specification
> + */
> +#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
> +#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
> +#define DMAR_CAP_REG_HI         0xc  /* High 32-bit of DMAR_CAP_REG */
> +#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
> +#define DMAR_ECAP_REG_HI        0X14
> +#define DMAR_GCMD_REG           0x18 /* Global command */
> +#define DMAR_GSTS_REG           0x1c /* Global status */
> +#define DMAR_RTADDR_REG         0x20 /* Root entry table */
> +#define DMAR_RTADDR_REG_HI      0X24
> +#define DMAR_CCMD_REG           0x28 /* Context command */
> +#define DMAR_CCMD_REG_HI        0x2c
> +#define DMAR_FSTS_REG           0x34 /* Fault status */
> +#define DMAR_FECTL_REG          0x38 /* Fault control */
> +#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data */
> +#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr */
> +#define DMAR_FEUADDR_REG        0x44 /* Upper address */
> +#define DMAR_AFLOG_REG          0x58 /* Advanced fault control */
> +#define DMAR_AFLOG_REG_HI       0X5c
> +#define DMAR_PMEN_REG           0x64 /* Enable protected memory region */
> +#define DMAR_PLMBASE_REG        0x68 /* PMRR low addr */
> +#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
> +#define DMAR_PHMBASE_REG        0x70 /* PMRR high base addr */
> +#define DMAR_PHMBASE_REG_HI     0X74
> +#define DMAR_PHMLIMIT_REG       0x78 /* PMRR high limit */
> +#define DMAR_PHMLIMIT_REG_HI    0x7c
> +#define DMAR_IQH_REG            0x80 /* Invalidation queue head */
> +#define DMAR_IQH_REG_HI         0X84
> +#define DMAR_IQT_REG            0x88 /* Invalidation queue tail */
> +#define DMAR_IQT_REG_HI         0X8c
> +#define DMAR_IQA_REG            0x90 /* Invalidation queue addr */
> +#define DMAR_IQA_REG_HI         0x94
> +#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
> +#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
> +#define DMAR_IRTA_REG_HI        0xbc
> +#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
> +#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
> +#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
> +#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
> +#define DMAR_PQH_REG            0xc0 /* Page request queue head */
> +#define DMAR_PQH_REG_HI         0xc4
> +#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
> +#define DMAR_PQT_REG_HI         0xcc
> +#define DMAR_PQA_REG            0xd0 /* Page request queue address */
> +#define DMAR_PQA_REG_HI         0xd4
> +#define DMAR_PRS_REG            0xdc /* Page request status */
> +#define DMAR_PECTL_REG          0xe0 /* Page request event control */
> +#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
> +#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
> +#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
> +#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
> +#define DMAR_MTRRCAP_REG_HI     0x104
> +#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
> +#define DMAR_MTRRDEF_REG_HI     0x10c
> +
> +#define VTD_GCMD_IR_TABLE       0x1000000
> +#define VTD_GCMD_IR             0x2000000
> +#define VTD_GCMD_QI             0x4000000
> +#define VTD_GCMD_WBF            0x8000000  /* Write Buffer Flush */
> +#define VTD_GCMD_SFL            0x20000000 /* Set Fault Log */
> +#define VTD_GCMD_ROOT           0x40000000
> +#define VTD_GCMD_DMAR           0x80000000
> +#define VTD_GCMD_ONE_SHOT_BITS  (VTD_GCMD_IR_TABLE | VTD_GCMD_WBF | \
> +				 VTD_GCMD_SFL | VTD_GCMD_ROOT)
> +
> +#define vtd_reg(reg) ((volatile void *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg))
> +
> +static inline void vtd_writel(unsigned int reg, uint32_t value)
> +{
> +	__raw_writel(value, vtd_reg(reg));
> +}
> +
> +static inline void vtd_writeq(unsigned int reg, uint64_t value)
> +{
> +	__raw_writeq(value, vtd_reg(reg));
> +}
> +
> +static inline uint32_t vtd_readl(unsigned int reg)
> +{
> +	return __raw_readl(vtd_reg(reg));
> +}
> +
> +static inline uint64_t vtd_readq(unsigned int reg)
> +{
> +	return __raw_readq(vtd_reg(reg));
> +}

The accessors above should use ioremap'ped pointer, not direct
access to Q35_HOST_BRIDGE_IOMMU_ADDR.

> +void vtd_init(void);
> +
> +#endif
> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> index f82492b..3e2821e 100644
> --- a/x86/Makefile.x86_64
> +++ b/x86/Makefile.x86_64
> @@ -4,6 +4,7 @@ ldarch = elf64-x86-64
>  CFLAGS += -mno-red-zone
>  
>  cflatobjs += lib/x86/setjmp64.o
> +cflatobjs += lib/x86/intel-iommu.o
>  
>  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  	  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
> @@ -14,6 +15,7 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  tests += $(TEST_DIR)/svm.flat
>  tests += $(TEST_DIR)/vmx.flat
>  tests += $(TEST_DIR)/tscdeadline_latency.flat
> +tests += $(TEST_DIR)/intel-iommu.flat
>  
>  include $(TEST_DIR)/Makefile.common
>  
> diff --git a/x86/intel-iommu.c b/x86/intel-iommu.c
> new file mode 100644
> index 0000000..f247913
> --- /dev/null
> +++ b/x86/intel-iommu.c
> @@ -0,0 +1,27 @@
> +/*
> + * Intel IOMMU unit test.
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#include "intel-iommu.h"
> +
> +int main(int argc, char *argv[])
> +{
> +	vtd_init();
> +
> +	report("fault status check", vtd_readl(DMAR_FSTS_REG) == 0);
> +	report("QI enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_QI);
> +	report("DMAR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_ROOT);
> +	report("IR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR_TABLE);
> +	report("DMAR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_DMAR);
> +	report("IR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR);
> +
> +	return report_summary();
> +}
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index 23395c6..5413838 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -217,3 +217,10 @@ extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev
>  file = hyperv_clock.flat
>  smp = 2
>  extra_params = -cpu kvm64,hv_time
> +
> +[intel_iommu]
> +file = intel-iommu.flat
> +arch = x86_64
> +timeout = 30
> +smp = 4
> +extra_params = -M q35,kernel-irqchip=split -device intel-iommu,intremap=on,eim=off -device edu
> -- 
> 2.7.4
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Gordeev Dec. 1, 2016, 7:37 p.m. UTC | #2
On Thu, Dec 01, 2016 at 02:14:52PM +0100, Alexander Gordeev wrote:
> > +	vtd_dump_init_info();
> 
> Should we check an iommu is there indeed? My environment
> returns all zeroes (which is wrong I guess) and attempts
> to proceed.

I had an outdated qemu, which was the reason for missing
iommu and endless loop I reported before. I guess, it is
up to Andrew if he wants this test fault-tolerant in this
regard.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Dec. 2, 2016, 3:12 a.m. UTC | #3
On Thu, Dec 01, 2016 at 02:14:53PM +0100, Alexander Gordeev wrote:

[...]

> > +static void vtd_gcmd_or(uint32_t cmd)
> > +{
> > +	uint32_t status;
> > +
> > +	/* We only allow set one bit for each time */
> > +	assert(is_power_of_2(cmd));
> > +
> > +	status = vtd_readl(DMAR_GSTS_REG);
> > +	vtd_writel(DMAR_GCMD_REG, status | cmd);
> > +
> > +	if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
> > +		/* One-shot bits are taking effect immediately */
> > +		return;
> > +	}
> > +
> > +	/* Make sure IOMMU handled our command request */
> > +	while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
> > +		cpu_relax();
> 
> So I am hitting an endless loop here.
> Probably, an IO delay + counter would help.

Are you running with the following parameter appended?

  -device intel-iommu,intremap=on,eim=off

If so, IMHO either QEMU should quit with error (for old QEMUs that
don't support "eim" property, or even without an Intel IOMMU), or it
should success the test if it's not that case.

> 
> > +}
> > +
> > +static void vtd_dump_init_info(void)
> > +{
> > +	printf("VT-d version:   0x%x\n", vtd_readl(DMAR_VER_REG));
> > +	printf("     cap:       0x%016lx\n", vtd_readq(DMAR_CAP_REG));
> > +	printf("     ecap:      0x%016lx\n", vtd_readq(DMAR_ECAP_REG));
> > +}
> > +
> > +static void vtd_setup_root_table(void)
> > +{
> > +	void *root = alloc_page();
> > +
> > +	memset(root, 0, PAGE_SIZE);
> 
> As it is a placeholder at this stage I would suggest less
> meaningful fill pattern, i.e. all 1s or 0xdeadbeef may be.

I was intended to have it all zero here. The data is root entries on
the page, and I need to make sure they are invalid root entries (e.g.,
"reserved" bits should be zeroed, and "present" bit should be
cleared).

> 
> > +	vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
> > +	vtd_gcmd_or(VTD_GCMD_ROOT);
> > +	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
> > +}
> > +
> > +static void vtd_setup_ir_table(void)
> > +{
> > +	void *root = alloc_page();
> > +
> > +	memset(root, 0, PAGE_SIZE);
> 
> Same here.

Same here. I was intended to use zeros to init all interrupt remapping
table entries.

> 
> > +	/* 0xf stands for table size (2^(0xf+1) == 65536) */
> > +	vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
> > +	vtd_gcmd_or(VTD_GCMD_IR_TABLE);
> > +	printf("IR table address: 0x%016lx\n", vtd_ir_table());
> > +}
> > +
> > +void vtd_init(void)
> > +{
> > +	setup_vm();
> > +	smp_init();
> 
> I would say these two are the generic env. setup, not VT-d
> setup. Thus, would better outside of this scope, as far as
> I am concerned.

Here I just want to make sure we have these things inited before hand,
because VT-d init codes needs these.

I have tried to post patches to allow these init functions be called
more than once, but Drew's opinion is that the caller should just
avoid calling it more than once. I think that's fair enough.

If user called these functions more than once, either:

- it'll have no side effect (e.g., smp_init())
- it'll assert() fail so user will know he/she did something wrong
  (e.g., setup_vm())

So IMHO it does not hurt to have these two lines here.

> 
> > +
> > +	vtd_dump_init_info();
> 
> Should we check an iommu is there indeed? My environment
> returns all zeroes (which is wrong I guess) and attempts
> to proceed.

How about an assertion on the version? We just let it quit with error
if with a wrong version (in this case, version is all zeros).

[...]

> > +static inline uint64_t vtd_readq(unsigned int reg)
> > +{
> > +	return __raw_readq(vtd_reg(reg));
> > +}
> 
> The accessors above should use ioremap'ped pointer, not direct
> access to Q35_HOST_BRIDGE_IOMMU_ADDR.

Hmm... This issue applies to EDU device register accesses as well.
Will fix.

Here I think I can use phys_to_virt() directly since x86_64 has
already mapped the first 4G memory as 1:1. However for EDU register
access I'd better use ioremap() since it might be used outside x86_64
in the future.

Thanks,

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Gordeev Dec. 2, 2016, 7:50 a.m. UTC | #4
On Fri, Dec 02, 2016 at 11:12:34AM +0800, Peter Xu wrote:
> > > +static void vtd_setup_root_table(void)
> > > +{
> > > +	void *root = alloc_page();
> > > +
> > > +	memset(root, 0, PAGE_SIZE);
> > 
> > As it is a placeholder at this stage I would suggest less
> > meaningful fill pattern, i.e. all 1s or 0xdeadbeef may be.
> 
> I was intended to have it all zero here. The data is root entries on
> the page, and I need to make sure they are invalid root entries (e.g.,
> "reserved" bits should be zeroed, and "present" bit should be
> cleared).

I see. Thanks!

[...]

> > > +	vtd_dump_init_info();
> > 
> > Should we check an iommu is there indeed? My environment
> > returns all zeroes (which is wrong I guess) and attempts
> > to proceed.
> 
> How about an assertion on the version? We just let it quit with error
> if with a wrong version (in this case, version is all zeros).

Yep. I guess (!max && max >= min) should fit?

> [...]
> 
> > > +static inline uint64_t vtd_readq(unsigned int reg)
> > > +{
> > > +	return __raw_readq(vtd_reg(reg));
> > > +}
> > 
> > The accessors above should use ioremap'ped pointer, not direct
> > access to Q35_HOST_BRIDGE_IOMMU_ADDR.
> 
> Hmm... This issue applies to EDU device register accesses as well.
> Will fix.
> 
> Here I think I can use phys_to_virt() directly since x86_64 has
> already mapped the first 4G memory as 1:1. However for EDU register
> access I'd better use ioremap() since it might be used outside x86_64
> in the future.

Actually, I think for x86_64 you need to ioremap as well. Because (a)
accessing MMIO with no prior call to ioremap() is rather confusing
and (b) we never know what machine this code might run in, let's say
10 years ;)

> Thanks,
> 
> -- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Dec. 5, 2016, 2:31 a.m. UTC | #5
On Fri, Dec 02, 2016 at 08:50:17AM +0100, Alexander Gordeev wrote:

[...]

> > > > +	vtd_dump_init_info();
> > > 
> > > Should we check an iommu is there indeed? My environment
> > > returns all zeroes (which is wrong I guess) and attempts
> > > to proceed.
> > 
> > How about an assertion on the version? We just let it quit with error
> > if with a wrong version (in this case, version is all zeros).
> 
> Yep. I guess (!max && max >= min) should fit?

From vt-d spec 10.4.1, "MAX" is "Major Version number", and "MIN" is
"Minor Version number". So looks like it is possible we have max=1 and
min>1 - minor version increases, while major version keeps. To make it
simpler, I'll use (max >= 1) directly.

> 
> > [...]
> > 
> > > > +static inline uint64_t vtd_readq(unsigned int reg)
> > > > +{
> > > > +	return __raw_readq(vtd_reg(reg));
> > > > +}
> > > 
> > > The accessors above should use ioremap'ped pointer, not direct
> > > access to Q35_HOST_BRIDGE_IOMMU_ADDR.
> > 
> > Hmm... This issue applies to EDU device register accesses as well.
> > Will fix.
> > 
> > Here I think I can use phys_to_virt() directly since x86_64 has
> > already mapped the first 4G memory as 1:1. However for EDU register
> > access I'd better use ioremap() since it might be used outside x86_64
> > in the future.
> 
> Actually, I think for x86_64 you need to ioremap as well. Because (a)
> accessing MMIO with no prior call to ioremap() is rather confusing
> and (b) we never know what machine this code might run in, let's say
> 10 years ;)

Ok. Let me fix. Thanks!

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/lib/x86/intel-iommu.c b/lib/x86/intel-iommu.c
new file mode 100644
index 0000000..9890f34
--- /dev/null
+++ b/lib/x86/intel-iommu.c
@@ -0,0 +1,88 @@ 
+/*
+ * Intel IOMMU APIs
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#include "intel-iommu.h"
+
+#define VTD_RTA_MASK  (PAGE_MASK)
+#define VTD_IRTA_MASK (PAGE_MASK)
+
+static uint64_t vtd_root_table(void)
+{
+	/* No extend root table support yet */
+	return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK;
+}
+
+static uint64_t vtd_ir_table(void)
+{
+	return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK;
+}
+
+static void vtd_gcmd_or(uint32_t cmd)
+{
+	uint32_t status;
+
+	/* We only allow set one bit for each time */
+	assert(is_power_of_2(cmd));
+
+	status = vtd_readl(DMAR_GSTS_REG);
+	vtd_writel(DMAR_GCMD_REG, status | cmd);
+
+	if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
+		/* One-shot bits are taking effect immediately */
+		return;
+	}
+
+	/* Make sure IOMMU handled our command request */
+	while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
+		cpu_relax();
+}
+
+static void vtd_dump_init_info(void)
+{
+	printf("VT-d version:   0x%x\n", vtd_readl(DMAR_VER_REG));
+	printf("     cap:       0x%016lx\n", vtd_readq(DMAR_CAP_REG));
+	printf("     ecap:      0x%016lx\n", vtd_readq(DMAR_ECAP_REG));
+}
+
+static void vtd_setup_root_table(void)
+{
+	void *root = alloc_page();
+
+	memset(root, 0, PAGE_SIZE);
+	vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
+	vtd_gcmd_or(VTD_GCMD_ROOT);
+	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
+}
+
+static void vtd_setup_ir_table(void)
+{
+	void *root = alloc_page();
+
+	memset(root, 0, PAGE_SIZE);
+	/* 0xf stands for table size (2^(0xf+1) == 65536) */
+	vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
+	vtd_gcmd_or(VTD_GCMD_IR_TABLE);
+	printf("IR table address: 0x%016lx\n", vtd_ir_table());
+}
+
+void vtd_init(void)
+{
+	setup_vm();
+	smp_init();
+
+	vtd_dump_init_info();
+	vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */
+	vtd_setup_root_table();
+	vtd_setup_ir_table();
+	vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */
+	vtd_gcmd_or(VTD_GCMD_IR);   /* Enable IR */
+}
diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
new file mode 100644
index 0000000..fae9ae5
--- /dev/null
+++ b/lib/x86/intel-iommu.h
@@ -0,0 +1,118 @@ 
+/*
+ * Intel IOMMU header
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ *
+ * (From include/linux/intel-iommu.h)
+ */
+
+#ifndef __INTEL_IOMMU_H__
+#define __INTEL_IOMMU_H__
+
+#include "libcflat.h"
+#include "vm.h"
+#include "isr.h"
+#include "smp.h"
+#include "desc.h"
+#include "asm/io.h"
+
+#define Q35_HOST_BRIDGE_IOMMU_ADDR  0xfed90000ULL
+
+/*
+ * Intel IOMMU register specification
+ */
+#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
+#define DMAR_CAP_REG_HI         0xc  /* High 32-bit of DMAR_CAP_REG */
+#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
+#define DMAR_ECAP_REG_HI        0X14
+#define DMAR_GCMD_REG           0x18 /* Global command */
+#define DMAR_GSTS_REG           0x1c /* Global status */
+#define DMAR_RTADDR_REG         0x20 /* Root entry table */
+#define DMAR_RTADDR_REG_HI      0X24
+#define DMAR_CCMD_REG           0x28 /* Context command */
+#define DMAR_CCMD_REG_HI        0x2c
+#define DMAR_FSTS_REG           0x34 /* Fault status */
+#define DMAR_FECTL_REG          0x38 /* Fault control */
+#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data */
+#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr */
+#define DMAR_FEUADDR_REG        0x44 /* Upper address */
+#define DMAR_AFLOG_REG          0x58 /* Advanced fault control */
+#define DMAR_AFLOG_REG_HI       0X5c
+#define DMAR_PMEN_REG           0x64 /* Enable protected memory region */
+#define DMAR_PLMBASE_REG        0x68 /* PMRR low addr */
+#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG        0x70 /* PMRR high base addr */
+#define DMAR_PHMBASE_REG_HI     0X74
+#define DMAR_PHMLIMIT_REG       0x78 /* PMRR high limit */
+#define DMAR_PHMLIMIT_REG_HI    0x7c
+#define DMAR_IQH_REG            0x80 /* Invalidation queue head */
+#define DMAR_IQH_REG_HI         0X84
+#define DMAR_IQT_REG            0x88 /* Invalidation queue tail */
+#define DMAR_IQT_REG_HI         0X8c
+#define DMAR_IQA_REG            0x90 /* Invalidation queue addr */
+#define DMAR_IQA_REG_HI         0x94
+#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
+#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
+#define DMAR_IRTA_REG_HI        0xbc
+#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
+#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
+#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
+#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
+#define DMAR_PQH_REG            0xc0 /* Page request queue head */
+#define DMAR_PQH_REG_HI         0xc4
+#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
+#define DMAR_PQT_REG_HI         0xcc
+#define DMAR_PQA_REG            0xd0 /* Page request queue address */
+#define DMAR_PQA_REG_HI         0xd4
+#define DMAR_PRS_REG            0xdc /* Page request status */
+#define DMAR_PECTL_REG          0xe0 /* Page request event control */
+#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
+#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
+#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
+#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
+#define DMAR_MTRRCAP_REG_HI     0x104
+#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
+#define DMAR_MTRRDEF_REG_HI     0x10c
+
+#define VTD_GCMD_IR_TABLE       0x1000000
+#define VTD_GCMD_IR             0x2000000
+#define VTD_GCMD_QI             0x4000000
+#define VTD_GCMD_WBF            0x8000000  /* Write Buffer Flush */
+#define VTD_GCMD_SFL            0x20000000 /* Set Fault Log */
+#define VTD_GCMD_ROOT           0x40000000
+#define VTD_GCMD_DMAR           0x80000000
+#define VTD_GCMD_ONE_SHOT_BITS  (VTD_GCMD_IR_TABLE | VTD_GCMD_WBF | \
+				 VTD_GCMD_SFL | VTD_GCMD_ROOT)
+
+#define vtd_reg(reg) ((volatile void *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg))
+
+static inline void vtd_writel(unsigned int reg, uint32_t value)
+{
+	__raw_writel(value, vtd_reg(reg));
+}
+
+static inline void vtd_writeq(unsigned int reg, uint64_t value)
+{
+	__raw_writeq(value, vtd_reg(reg));
+}
+
+static inline uint32_t vtd_readl(unsigned int reg)
+{
+	return __raw_readl(vtd_reg(reg));
+}
+
+static inline uint64_t vtd_readq(unsigned int reg)
+{
+	return __raw_readq(vtd_reg(reg));
+}
+
+void vtd_init(void);
+
+#endif
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index f82492b..3e2821e 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -4,6 +4,7 @@  ldarch = elf64-x86-64
 CFLAGS += -mno-red-zone
 
 cflatobjs += lib/x86/setjmp64.o
+cflatobjs += lib/x86/intel-iommu.o
 
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
@@ -14,6 +15,7 @@  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
+tests += $(TEST_DIR)/intel-iommu.flat
 
 include $(TEST_DIR)/Makefile.common
 
diff --git a/x86/intel-iommu.c b/x86/intel-iommu.c
new file mode 100644
index 0000000..f247913
--- /dev/null
+++ b/x86/intel-iommu.c
@@ -0,0 +1,27 @@ 
+/*
+ * Intel IOMMU unit test.
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#include "intel-iommu.h"
+
+int main(int argc, char *argv[])
+{
+	vtd_init();
+
+	report("fault status check", vtd_readl(DMAR_FSTS_REG) == 0);
+	report("QI enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_QI);
+	report("DMAR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_ROOT);
+	report("IR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR_TABLE);
+	report("DMAR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_DMAR);
+	report("IR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR);
+
+	return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 23395c6..5413838 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -217,3 +217,10 @@  extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev
 file = hyperv_clock.flat
 smp = 2
 extra_params = -cpu kvm64,hv_time
+
+[intel_iommu]
+file = intel-iommu.flat
+arch = x86_64
+timeout = 30
+smp = 4
+extra_params = -M q35,kernel-irqchip=split -device intel-iommu,intremap=on,eim=off -device edu