diff mbox

[kvm-unit-tests,03/14] x86: intel-iommu: add vt-d init test

Message ID 1476448852-30062-4-git-send-email-peterx@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Xu Oct. 14, 2016, 12:40 p.m. UTC
Adding fundamental init test for Intel IOMMU. This includes basic
initialization of Intel IOMMU device, like DMAR (DMA Remapping),
IR (Interrupt Remapping), QI (Queue Invalidation), etc.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 lib/x86/intel-iommu.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/x86/intel-iommu.h | 106 ++++++++++++++++++++++++++++++++++++++
 x86/Makefile.x86_64   |   2 +
 x86/intel-iommu.c     |  41 +++++++++++++++
 4 files changed, 287 insertions(+)
 create mode 100644 lib/x86/intel-iommu.c
 create mode 100644 lib/x86/intel-iommu.h
 create mode 100644 x86/intel-iommu.c

Comments

Andrew Jones Oct. 20, 2016, 9:30 a.m. UTC | #1
On Fri, Oct 14, 2016 at 08:40:41PM +0800, Peter Xu wrote:
> Adding fundamental init test for Intel IOMMU. This includes basic
> initialization of Intel IOMMU device, like DMAR (DMA Remapping),
> IR (Interrupt Remapping), QI (Queue Invalidation), etc.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  lib/x86/intel-iommu.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/x86/intel-iommu.h | 106 ++++++++++++++++++++++++++++++++++++++
>  x86/Makefile.x86_64   |   2 +
>  x86/intel-iommu.c     |  41 +++++++++++++++
>  4 files changed, 287 insertions(+)
>  create mode 100644 lib/x86/intel-iommu.c
>  create mode 100644 lib/x86/intel-iommu.h
>  create mode 100644 x86/intel-iommu.c
> 
> diff --git a/lib/x86/intel-iommu.c b/lib/x86/intel-iommu.c
> new file mode 100644
> index 0000000..9f55394
> --- /dev/null
> +++ b/lib/x86/intel-iommu.c
> @@ -0,0 +1,138 @@
> +/*
> + * Intel IOMMU APIs
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#include "intel-iommu.h"
> +
> +void vtd_reg_write_4(unsigned int reg, uint32_t value)
> +{
> +	*(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> +}
> +
> +void vtd_reg_write_8(unsigned int reg, uint64_t value)
> +{
> +	*(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> +}
> +
> +uint32_t vtd_reg_read_4(unsigned int reg)
> +{
> +	return *(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> +}
> +
> +uint64_t vtd_reg_read_8(unsigned int reg)
> +{
> +	return *(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> +}

The above could be static inlines in intel-iommu.h. How about
calling them vtd_readl, vtd_readq, vtd_writel, vtd_writeq? That
would make them more consistent with the readl, readq... in
lib/asm-generic/io.h

> +
> +uint32_t vtd_version(void)
> +{
> +	return vtd_reg_read_4(DMAR_VER_REG);
> +}
> +
> +uint64_t vtd_cap(void)
> +{
> +	return vtd_reg_read_8(DMAR_CAP_REG);
> +}
> +
> +uint64_t vtd_ecap(void)
> +{
> +	return vtd_reg_read_8(DMAR_ECAP_REG);
> +}
> +
> +uint32_t vtd_status(void)
> +{
> +	return vtd_reg_read_4(DMAR_GSTS_REG);
> +}
> +
> +uint32_t vtd_fault_status(void)
> +{
> +	return vtd_reg_read_4(DMAR_FSTS_REG);
> +}
> +
> +uint64_t vtd_root_table(void)
> +{
> +	/* No extend root table support yet */
> +	return vtd_reg_read_8(DMAR_RTADDR_REG);
> +}

I'd drop all the above and use vtd_readl/q(DMAR_CAP_REG) directly,
the wrappers don't add anything and take the unit test writer
further away from the register names that they should be more
familiar with.

> +
> +uint64_t vtd_ir_table(void)
> +{
> +	return vtd_reg_read_8(DMAR_IRTA_REG) & 0xfffffffffffff000;
> +}

This one has a mask, so makes sense to keep the wrapper. But the
mask looks like PAGE_MASK to me. Why not use that? Or even drop
this wrapper and just use PAGE_MASK when necessary directly...

> +
> +void vtd_gcmd_or(uint32_t cmd)
> +{
> +	uint32_t status = vtd_status();
> +	/*
> +	 * Logically, we need to read DMAR_GSTS_REG until IOMMU
> +	 * handles the write request. However for QEMU/KVM, this write
> +	 * is always sync, so when this returns, we should be sure

should always be in sync. Sounds like something to unit test :-)

> +	 * that the GCMD write is done.
> +	 */
> +	vtd_reg_write_4(DMAR_GCMD_REG, status | cmd);
> +}
> +
> +void vtd_dump_init_info(void)
> +{
> +	printf("VT-d version:   0x%x\n", vtd_version());
> +	printf("     cap:       0x%016lx\n", vtd_cap());
> +	printf("     ecap:      0x%016lx\n", vtd_ecap());
> +}
> +
> +void vtd_enable_qi(void)
> +{
> +	vtd_gcmd_or(VTD_GCMD_QI);
> +}

I'd drop this wrapper and use a comment where it's needed like

 vtd_gcmd_or(VTD_GCMD_QI); /* enable QI */

(By now you see a theme. Keep wrappers to a minimal in order
 to keep the unit test writer closer to the spec.)

> +
> +void vtd_setup_root_table(void)
> +{
> +	void *root = alloc_page();
> +
> +	memset(root, 0, PAGE_SIZE);
> +	vtd_reg_write_8(DMAR_RTADDR_REG, virt_to_phys(root));
> +	vtd_gcmd_or(VTD_GCMD_ROOT);
> +	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
> +}
> +
> +void vtd_setup_ir_table(void)
> +{
> +	void *root = alloc_page();
> +
> +	memset(root, 0, PAGE_SIZE);
> +	vtd_reg_write_8(DMAR_IRTA_REG, virt_to_phys(root));
> +	/* 0xf stands for table size (2^(0xf+1) == 65536) */
> +	vtd_gcmd_or(VTD_GCMD_IR_TABLE | 0xf);
> +	printf("IR table address: 0x%016lx\n", vtd_ir_table());
> +}
> +
> +void vtd_enable_dmar(void)
> +{
> +	vtd_gcmd_or(VTD_GCMD_DMAR);
> +}
> +
> +void vtd_enable_ir(void)
> +{
> +	vtd_gcmd_or(VTD_GCMD_IR);
> +}

More wrappers to drop.

> +
> +void vtd_init(void)
> +{
> +	setup_vm();
> +	smp_init();
> +	setup_idt();
> +
> +	vtd_dump_init_info();
> +	vtd_enable_qi();
> +	vtd_setup_root_table();
> +	vtd_setup_ir_table();
> +	vtd_enable_dmar();
> +	vtd_enable_ir();

I'd open code all the above here. The functions aren't really
gaining us anything that comments can't do for us.

> +}
> diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
> new file mode 100644
> index 0000000..0f687d1
> --- /dev/null
> +++ b/lib/x86/intel-iommu.h
> @@ -0,0 +1,106 @@
> +/*
> + * Intel IOMMU header

Please point out the kernel source these defines come from,
include/linux/intel-iommu.h

> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#ifndef __INTEL_IOMMU_H__
> +#define __INTEL_IOMMU_H__
> +
> +#include "libcflat.h"
> +#include "vm.h"
> +#include "isr.h"
> +#include "smp.h"
> +#include "desc.h"
> +
> +#define Q35_HOST_BRIDGE_IOMMU_ADDR  0xfed90000ULL
> +
> +/*
> + * Intel IOMMU register specification
> + */
> +#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
> +#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
> +#define DMAR_CAP_REG_HI         0xc  /* High 32-bit of DMAR_CAP_REG */
> +#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
> +#define DMAR_ECAP_REG_HI        0X14
> +#define DMAR_GCMD_REG           0x18 /* Global command */
> +#define DMAR_GSTS_REG           0x1c /* Global status */
> +#define DMAR_RTADDR_REG         0x20 /* Root entry table */
> +#define DMAR_RTADDR_REG_HI      0X24
> +#define DMAR_CCMD_REG           0x28 /* Context command */
> +#define DMAR_CCMD_REG_HI        0x2c
> +#define DMAR_FSTS_REG           0x34 /* Fault status */
> +#define DMAR_FECTL_REG          0x38 /* Fault control */
> +#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data */
> +#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr */
> +#define DMAR_FEUADDR_REG        0x44 /* Upper address */
> +#define DMAR_AFLOG_REG          0x58 /* Advanced fault control */
> +#define DMAR_AFLOG_REG_HI       0X5c
> +#define DMAR_PMEN_REG           0x64 /* Enable protected memory region */
> +#define DMAR_PLMBASE_REG        0x68 /* PMRR low addr */
> +#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
> +#define DMAR_PHMBASE_REG        0x70 /* PMRR high base addr */
> +#define DMAR_PHMBASE_REG_HI     0X74
> +#define DMAR_PHMLIMIT_REG       0x78 /* PMRR high limit */
> +#define DMAR_PHMLIMIT_REG_HI    0x7c
> +#define DMAR_IQH_REG            0x80 /* Invalidation queue head */
> +#define DMAR_IQH_REG_HI         0X84
> +#define DMAR_IQT_REG            0x88 /* Invalidation queue tail */
> +#define DMAR_IQT_REG_HI         0X8c
> +#define DMAR_IQA_REG            0x90 /* Invalidation queue addr */
> +#define DMAR_IQA_REG_HI         0x94
> +#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
> +#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
> +#define DMAR_IRTA_REG_HI        0xbc
> +#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
> +#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
> +#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
> +#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
> +#define DMAR_PQH_REG            0xc0 /* Page request queue head */
> +#define DMAR_PQH_REG_HI         0xc4
> +#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
> +#define DMAR_PQT_REG_HI         0xcc
> +#define DMAR_PQA_REG            0xd0 /* Page request queue address */
> +#define DMAR_PQA_REG_HI         0xd4
> +#define DMAR_PRS_REG            0xdc /* Page request status */
> +#define DMAR_PECTL_REG          0xe0 /* Page request event control */
> +#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
> +#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
> +#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
> +#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
> +#define DMAR_MTRRCAP_REG_HI     0x104
> +#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
> +#define DMAR_MTRRDEF_REG_HI     0x10c
> +
> +#define VTD_GCMD_IR_TABLE       (0x1000000)
> +#define VTD_GCMD_IR             (0x2000000)
> +#define VTD_GCMD_QI             (0x4000000)
> +#define VTD_GCMD_ROOT           (0x40000000)
> +#define VTD_GCMD_DMAR           (0x80000000)
> +
> +void vtd_reg_write_4(unsigned int reg, uint32_t value);
> +void vtd_reg_write_8(unsigned int reg, uint64_t value);
> +uint32_t vtd_reg_read_4(unsigned int reg);
> +uint64_t vtd_reg_read_8(unsigned int reg);
> +uint32_t vtd_version(void);
> +uint64_t vtd_cap(void);
> +uint64_t vtd_ecap(void);
> +uint32_t vtd_status(void);
> +uint32_t vtd_fault_status(void);
> +uint64_t vtd_root_table(void);
> +uint64_t vtd_ir_table(void);
> +void vtd_dump_init_info(void);
> +void vtd_enable_qi(void);
> +void vtd_setup_root_table(void);
> +void vtd_setup_ir_table(void);
> +void vtd_enable_dmar(void);
> +void vtd_enable_ir(void);
> +void vtd_init(void);
> +
> +#endif
> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> index e166911..a8e9445 100644
> --- a/x86/Makefile.x86_64
> +++ b/x86/Makefile.x86_64
> @@ -4,6 +4,7 @@ ldarch = elf64-x86-64
>  CFLAGS += -mno-red-zone
>  
>  cflatobjs += lib/x86/setjmp64.o
> +cflatobjs += lib/x86/intel-iommu.o
>  
>  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  	  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
> @@ -14,6 +15,7 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  tests += $(TEST_DIR)/svm.flat
>  tests += $(TEST_DIR)/vmx.flat
>  tests += $(TEST_DIR)/tscdeadline_latency.flat
> +tests += $(TEST_DIR)/intel-iommu.flat
>  
>  include $(TEST_DIR)/Makefile.common
>  
> diff --git a/x86/intel-iommu.c b/x86/intel-iommu.c
> new file mode 100644
> index 0000000..60d512a
> --- /dev/null
> +++ b/x86/intel-iommu.c
> @@ -0,0 +1,41 @@
> +/*
> + * Intel IOMMU unit test.
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#include "intel-iommu.h"
> +
> +int main(int argc, char *argv[])
> +{
> +	setup_vm();
> +	smp_init();
> +	setup_idt();
> +
> +	vtd_dump_init_info();
> +	report("init status check", vtd_status() == 0);
> +	report("fault status check", vtd_fault_status() == 0);
> +
> +	vtd_enable_qi();
> +	report("QI enablement", vtd_status() | VTD_GCMD_QI);
> +
> +	vtd_setup_root_table();
> +	report("DMAR table setup", vtd_status() | VTD_GCMD_ROOT);
> +
> +	vtd_setup_ir_table();
> +	report("IR table setup", vtd_status() | VTD_GCMD_IR_TABLE);

The above three report() calls will always pass since you're using OR.

> +
> +	vtd_enable_dmar();
> +	report("DMAR enablement", vtd_status() & VTD_GCMD_DMAR);
> +
> +	vtd_enable_ir();
> +	report("IR enablement", vtd_status() & VTD_GCMD_IR);

You've reproduced vtd_init() here. Why not just call it and then
do a sequence of report() calls where you check each register
has the expected value? I.e.

 vtd_init();
 status = vtd_readl(DMAR_GSTS_REG);
 report("QI enablement", status & VTD_GCMD_QI);
 report("DMAR table setup", status & VTD_GCMD_ROOT);
 ...

Would that not work with this device?

> +
> +	return report_summary();
> +}
> -- 
> 2.7.4
> 

Thanks,
drew
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Oct. 21, 2016, 9:52 a.m. UTC | #2
On Thu, Oct 20, 2016 at 11:30:30AM +0200, Andrew Jones wrote:

[...]

> > +void vtd_reg_write_4(unsigned int reg, uint32_t value)
> > +{
> > +	*(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > +}
> > +
> > +void vtd_reg_write_8(unsigned int reg, uint64_t value)
> > +{
> > +	*(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > +}
> > +
> > +uint32_t vtd_reg_read_4(unsigned int reg)
> > +{
> > +	return *(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > +}
> > +
> > +uint64_t vtd_reg_read_8(unsigned int reg)
> > +{
> > +	return *(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > +}
> 
> The above could be static inlines in intel-iommu.h. How about
> calling them vtd_readl, vtd_readq, vtd_writel, vtd_writeq? That
> would make them more consistent with the readl, readq... in
> lib/asm-generic/io.h

Sure! Will fix.

> 
> > +
> > +uint32_t vtd_version(void)
> > +{
> > +	return vtd_reg_read_4(DMAR_VER_REG);
> > +}
> > +
> > +uint64_t vtd_cap(void)
> > +{
> > +	return vtd_reg_read_8(DMAR_CAP_REG);
> > +}
> > +
> > +uint64_t vtd_ecap(void)
> > +{
> > +	return vtd_reg_read_8(DMAR_ECAP_REG);
> > +}
> > +
> > +uint32_t vtd_status(void)
> > +{
> > +	return vtd_reg_read_4(DMAR_GSTS_REG);
> > +}
> > +
> > +uint32_t vtd_fault_status(void)
> > +{
> > +	return vtd_reg_read_4(DMAR_FSTS_REG);
> > +}
> > +
> > +uint64_t vtd_root_table(void)
> > +{
> > +	/* No extend root table support yet */
> > +	return vtd_reg_read_8(DMAR_RTADDR_REG);
> > +}
> 
> I'd drop all the above and use vtd_readl/q(DMAR_CAP_REG) directly,
> the wrappers don't add anything and take the unit test writer
> further away from the register names that they should be more
> familiar with.

Here I used wrapper since I don't want to remember all the register
names, and (especially) I don't want to remember the size of the
registers.

For example, when I want to read ECAP register, calling vtd_ecap()
will let me make sure the return value is 8 bytes (compiler will help
to make sure of it, and I can simply know it from seeing the return
type of the wrapper function, which is uint64_t), and I don't need to
bother whether I should use vtd_readl() or vtd_readq() for it. If I
don't have vtd_ecap() wrapper, I can't see the length from the
register name only, and I need to check the spec every time, or with
comment like:

  #define DMAR_ECAP_REG XXX /* 8 bytes */

In that case, I'd prefer with a wrapper for the registers (maybe I can
move the functions into header files as well, with inline static).

> 
> > +
> > +uint64_t vtd_ir_table(void)
> > +{
> > +	return vtd_reg_read_8(DMAR_IRTA_REG) & 0xfffffffffffff000;
> > +}
> 
> This one has a mask, so makes sense to keep the wrapper. But the
> mask looks like PAGE_MASK to me. Why not use that? Or even drop
> this wrapper and just use PAGE_MASK when necessary directly...

The last 12 bits have other means (I didn't check it here, it's
explained in chap 10.4.29 in vt-d spec in case anyone is interested),
so it's just coincident that it looks like a page mask. However, I
think I can use PAGE_MASK here.

> 
> > +
> > +void vtd_gcmd_or(uint32_t cmd)
> > +{
> > +	uint32_t status = vtd_status();
> > +	/*
> > +	 * Logically, we need to read DMAR_GSTS_REG until IOMMU
> > +	 * handles the write request. However for QEMU/KVM, this write
> > +	 * is always sync, so when this returns, we should be sure
> 
> should always be in sync. Sounds like something to unit test :-)

I am not sure whether I got the point... but real hardware should be
async, so I don't know whether the code can work on real hardware. For
QEMU, it is sync, so it's safe for kvm-unit-tests.

> 
> > +	 * that the GCMD write is done.
> > +	 */
> > +	vtd_reg_write_4(DMAR_GCMD_REG, status | cmd);
> > +}
> > +
> > +void vtd_dump_init_info(void)
> > +{
> > +	printf("VT-d version:   0x%x\n", vtd_version());
> > +	printf("     cap:       0x%016lx\n", vtd_cap());
> > +	printf("     ecap:      0x%016lx\n", vtd_ecap());
> > +}
> > +
> > +void vtd_enable_qi(void)
> > +{
> > +	vtd_gcmd_or(VTD_GCMD_QI);
> > +}
> 
> I'd drop this wrapper and use a comment where it's needed like
> 
>  vtd_gcmd_or(VTD_GCMD_QI); /* enable QI */
> 
> (By now you see a theme. Keep wrappers to a minimal in order
>  to keep the unit test writer closer to the spec.)

Yeah. Besides the case that I mentioned above (which I am still not
quite sure), I agree that I can use register names directly in most
cases. Will fix.

> 
> > +
> > +void vtd_setup_root_table(void)
> > +{
> > +	void *root = alloc_page();
> > +
> > +	memset(root, 0, PAGE_SIZE);
> > +	vtd_reg_write_8(DMAR_RTADDR_REG, virt_to_phys(root));
> > +	vtd_gcmd_or(VTD_GCMD_ROOT);
> > +	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
> > +}
> > +
> > +void vtd_setup_ir_table(void)
> > +{
> > +	void *root = alloc_page();
> > +
> > +	memset(root, 0, PAGE_SIZE);
> > +	vtd_reg_write_8(DMAR_IRTA_REG, virt_to_phys(root));
> > +	/* 0xf stands for table size (2^(0xf+1) == 65536) */
> > +	vtd_gcmd_or(VTD_GCMD_IR_TABLE | 0xf);
> > +	printf("IR table address: 0x%016lx\n", vtd_ir_table());
> > +}
> > +
> > +void vtd_enable_dmar(void)
> > +{
> > +	vtd_gcmd_or(VTD_GCMD_DMAR);
> > +}
> > +
> > +void vtd_enable_ir(void)
> > +{
> > +	vtd_gcmd_or(VTD_GCMD_IR);
> > +}
> 
> More wrappers to drop.

Ok.

> 
> > +
> > +void vtd_init(void)
> > +{
> > +	setup_vm();
> > +	smp_init();
> > +	setup_idt();
> > +
> > +	vtd_dump_init_info();
> > +	vtd_enable_qi();
> > +	vtd_setup_root_table();
> > +	vtd_setup_ir_table();
> > +	vtd_enable_dmar();
> > +	vtd_enable_ir();
> 
> I'd open code all the above here. The functions aren't really
> gaining us anything that comments can't do for us.

Will fix.

> 
> > +}
> > diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
> > new file mode 100644
> > index 0000000..0f687d1
> > --- /dev/null
> > +++ b/lib/x86/intel-iommu.h
> > @@ -0,0 +1,106 @@
> > +/*
> > + * Intel IOMMU header
> 
> Please point out the kernel source these defines come from,
> include/linux/intel-iommu.h

Will add.

[...]

> > +int main(int argc, char *argv[])
> > +{
> > +	setup_vm();
> > +	smp_init();
> > +	setup_idt();
> > +
> > +	vtd_dump_init_info();
> > +	report("init status check", vtd_status() == 0);
> > +	report("fault status check", vtd_fault_status() == 0);
> > +
> > +	vtd_enable_qi();
> > +	report("QI enablement", vtd_status() | VTD_GCMD_QI);
> > +
> > +	vtd_setup_root_table();
> > +	report("DMAR table setup", vtd_status() | VTD_GCMD_ROOT);
> > +
> > +	vtd_setup_ir_table();
> > +	report("IR table setup", vtd_status() | VTD_GCMD_IR_TABLE);
> 
> The above three report() calls will always pass since you're using OR.

Oops... Thanks. :)

> 
> > +
> > +	vtd_enable_dmar();
> > +	report("DMAR enablement", vtd_status() & VTD_GCMD_DMAR);
> > +
> > +	vtd_enable_ir();
> > +	report("IR enablement", vtd_status() & VTD_GCMD_IR);
> 
> You've reproduced vtd_init() here. Why not just call it and then
> do a sequence of report() calls where you check each register
> has the expected value? I.e.
> 
>  vtd_init();
>  status = vtd_readl(DMAR_GSTS_REG);
>  report("QI enablement", status & VTD_GCMD_QI);
>  report("DMAR table setup", status & VTD_GCMD_ROOT);
>  ...
> 
> Would that not work with this device?

I think it should work. Will take your advice.

Thanks!

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Jones Oct. 21, 2016, 12:18 p.m. UTC | #3
On Fri, Oct 21, 2016 at 05:52:50PM +0800, Peter Xu wrote:
> On Thu, Oct 20, 2016 at 11:30:30AM +0200, Andrew Jones wrote:
> 
> [...]
> 
> > > +void vtd_reg_write_4(unsigned int reg, uint32_t value)
> > > +{
> > > +	*(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > > +}
> > > +
> > > +void vtd_reg_write_8(unsigned int reg, uint64_t value)
> > > +{
> > > +	*(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > > +}
> > > +
> > > +uint32_t vtd_reg_read_4(unsigned int reg)
> > > +{
> > > +	return *(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > > +}
> > > +
> > > +uint64_t vtd_reg_read_8(unsigned int reg)
> > > +{
> > > +	return *(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > > +}
> > 
> > The above could be static inlines in intel-iommu.h. How about
> > calling them vtd_readl, vtd_readq, vtd_writel, vtd_writeq? That
> > would make them more consistent with the readl, readq... in
> > lib/asm-generic/io.h
> 
> Sure! Will fix.
> 
> > 
> > > +
> > > +uint32_t vtd_version(void)
> > > +{
> > > +	return vtd_reg_read_4(DMAR_VER_REG);
> > > +}
> > > +
> > > +uint64_t vtd_cap(void)
> > > +{
> > > +	return vtd_reg_read_8(DMAR_CAP_REG);
> > > +}
> > > +
> > > +uint64_t vtd_ecap(void)
> > > +{
> > > +	return vtd_reg_read_8(DMAR_ECAP_REG);
> > > +}
> > > +
> > > +uint32_t vtd_status(void)
> > > +{
> > > +	return vtd_reg_read_4(DMAR_GSTS_REG);
> > > +}
> > > +
> > > +uint32_t vtd_fault_status(void)
> > > +{
> > > +	return vtd_reg_read_4(DMAR_FSTS_REG);
> > > +}
> > > +
> > > +uint64_t vtd_root_table(void)
> > > +{
> > > +	/* No extend root table support yet */
> > > +	return vtd_reg_read_8(DMAR_RTADDR_REG);
> > > +}
> > 
> > I'd drop all the above and use vtd_readl/q(DMAR_CAP_REG) directly,
> > the wrappers don't add anything and take the unit test writer
> > further away from the register names that they should be more
> > familiar with.
> 
> Here I used wrapper since I don't want to remember all the register
> names, and (especially) I don't want to remember the size of the
> registers.

You won't. You'll have the spec open reading them and then use them
in the code. Reviewers will also open the spec and then search for
them in the code. Nobody will memorize them, they'll just match them.

> 
> For example, when I want to read ECAP register, calling vtd_ecap()
> will let me make sure the return value is 8 bytes (compiler will help
> to make sure of it, and I can simply know it from seeing the return
> type of the wrapper function, which is uint64_t), and I don't need to
> bother whether I should use vtd_readl() or vtd_readq() for it. If I
> don't have vtd_ecap() wrapper, I can't see the length from the
> register name only, and I need to check the spec every time, or with
> comment like:
> 
>   #define DMAR_ECAP_REG XXX /* 8 bytes */

That comment sounds good to me :-)

> 
> In that case, I'd prefer with a wrapper for the registers (maybe I can
> move the functions into header files as well, with inline static).
> 
> > 
> > > +
> > > +uint64_t vtd_ir_table(void)
> > > +{
> > > +	return vtd_reg_read_8(DMAR_IRTA_REG) & 0xfffffffffffff000;
> > > +}
> > 
> > This one has a mask, so makes sense to keep the wrapper. But the
> > mask looks like PAGE_MASK to me. Why not use that? Or even drop
> > this wrapper and just use PAGE_MASK when necessary directly...
> 
> The last 12 bits have other means (I didn't check it here, it's
> explained in chap 10.4.29 in vt-d spec in case anyone is interested),
> so it's just coincident that it looks like a page mask. However, I
> think I can use PAGE_MASK here.

Yeah, I have no idea bout the mask, /me didn't open the spec. Use
whatever is correct, maybe defining a new mask if this mask is not
always == PAGE_MASK, or even if it is, e.g.

 #define MY_MASK PAGE_MASK

> 
> > 
> > > +
> > > +void vtd_gcmd_or(uint32_t cmd)
> > > +{
> > > +	uint32_t status = vtd_status();
> > > +	/*
> > > +	 * Logically, we need to read DMAR_GSTS_REG until IOMMU
> > > +	 * handles the write request. However for QEMU/KVM, this write
> > > +	 * is always sync, so when this returns, we should be sure
> > 
> > should always be in sync. Sounds like something to unit test :-)
> 
> I am not sure whether I got the point... but real hardware should be
> async, so I don't know whether the code can work on real hardware. For
> QEMU, it is sync, so it's safe for kvm-unit-tests.

I just mean if there are assumptions on how something should work,
then either it should be confirmed and asserted, or even tested
with an explicit try-report unit test.

> 
> > 
> > > +	 * that the GCMD write is done.
> > > +	 */
> > > +	vtd_reg_write_4(DMAR_GCMD_REG, status | cmd);
> > > +}
> > > +
> > > +void vtd_dump_init_info(void)
> > > +{
> > > +	printf("VT-d version:   0x%x\n", vtd_version());
> > > +	printf("     cap:       0x%016lx\n", vtd_cap());
> > > +	printf("     ecap:      0x%016lx\n", vtd_ecap());
> > > +}
> > > +
> > > +void vtd_enable_qi(void)
> > > +{
> > > +	vtd_gcmd_or(VTD_GCMD_QI);
> > > +}
> > 
> > I'd drop this wrapper and use a comment where it's needed like
> > 
> >  vtd_gcmd_or(VTD_GCMD_QI); /* enable QI */
> > 
> > (By now you see a theme. Keep wrappers to a minimal in order
> >  to keep the unit test writer closer to the spec.)
> 
> Yeah. Besides the case that I mentioned above (which I am still not
> quite sure), I agree that I can use register names directly in most
> cases. Will fix.
> 
> > 
> > > +
> > > +void vtd_setup_root_table(void)
> > > +{
> > > +	void *root = alloc_page();
> > > +
> > > +	memset(root, 0, PAGE_SIZE);
> > > +	vtd_reg_write_8(DMAR_RTADDR_REG, virt_to_phys(root));
> > > +	vtd_gcmd_or(VTD_GCMD_ROOT);
> > > +	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
> > > +}
> > > +
> > > +void vtd_setup_ir_table(void)
> > > +{
> > > +	void *root = alloc_page();
> > > +
> > > +	memset(root, 0, PAGE_SIZE);
> > > +	vtd_reg_write_8(DMAR_IRTA_REG, virt_to_phys(root));
> > > +	/* 0xf stands for table size (2^(0xf+1) == 65536) */
> > > +	vtd_gcmd_or(VTD_GCMD_IR_TABLE | 0xf);
> > > +	printf("IR table address: 0x%016lx\n", vtd_ir_table());
> > > +}
> > > +
> > > +void vtd_enable_dmar(void)
> > > +{
> > > +	vtd_gcmd_or(VTD_GCMD_DMAR);
> > > +}
> > > +
> > > +void vtd_enable_ir(void)
> > > +{
> > > +	vtd_gcmd_or(VTD_GCMD_IR);
> > > +}
> > 
> > More wrappers to drop.
> 
> Ok.
> 
> > 
> > > +
> > > +void vtd_init(void)
> > > +{
> > > +	setup_vm();
> > > +	smp_init();
> > > +	setup_idt();
> > > +
> > > +	vtd_dump_init_info();
> > > +	vtd_enable_qi();
> > > +	vtd_setup_root_table();
> > > +	vtd_setup_ir_table();
> > > +	vtd_enable_dmar();
> > > +	vtd_enable_ir();
> > 
> > I'd open code all the above here. The functions aren't really
> > gaining us anything that comments can't do for us.
> 
> Will fix.
> 
> > 
> > > +}
> > > diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
> > > new file mode 100644
> > > index 0000000..0f687d1
> > > --- /dev/null
> > > +++ b/lib/x86/intel-iommu.h
> > > @@ -0,0 +1,106 @@
> > > +/*
> > > + * Intel IOMMU header
> > 
> > Please point out the kernel source these defines come from,
> > include/linux/intel-iommu.h
> 
> Will add.
> 
> [...]
> 
> > > +int main(int argc, char *argv[])
> > > +{
> > > +	setup_vm();
> > > +	smp_init();
> > > +	setup_idt();
> > > +
> > > +	vtd_dump_init_info();
> > > +	report("init status check", vtd_status() == 0);
> > > +	report("fault status check", vtd_fault_status() == 0);
> > > +
> > > +	vtd_enable_qi();
> > > +	report("QI enablement", vtd_status() | VTD_GCMD_QI);
> > > +
> > > +	vtd_setup_root_table();
> > > +	report("DMAR table setup", vtd_status() | VTD_GCMD_ROOT);
> > > +
> > > +	vtd_setup_ir_table();
> > > +	report("IR table setup", vtd_status() | VTD_GCMD_IR_TABLE);
> > 
> > The above three report() calls will always pass since you're using OR.
> 
> Oops... Thanks. :)
> 
> > 
> > > +
> > > +	vtd_enable_dmar();
> > > +	report("DMAR enablement", vtd_status() & VTD_GCMD_DMAR);
> > > +
> > > +	vtd_enable_ir();
> > > +	report("IR enablement", vtd_status() & VTD_GCMD_IR);
> > 
> > You've reproduced vtd_init() here. Why not just call it and then
> > do a sequence of report() calls where you check each register
> > has the expected value? I.e.
> > 
> >  vtd_init();
> >  status = vtd_readl(DMAR_GSTS_REG);
> >  report("QI enablement", status & VTD_GCMD_QI);
> >  report("DMAR table setup", status & VTD_GCMD_ROOT);
> >  ...
> > 
> > Would that not work with this device?
> 
> I think it should work. Will take your advice.
>

Thanks,
drew 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Oct. 24, 2016, 6:36 a.m. UTC | #4
On Fri, Oct 21, 2016 at 02:18:04PM +0200, Andrew Jones wrote:
> On Fri, Oct 21, 2016 at 05:52:50PM +0800, Peter Xu wrote:
> > On Thu, Oct 20, 2016 at 11:30:30AM +0200, Andrew Jones wrote:
> > 
> > [...]
> > 
> > > > +void vtd_reg_write_4(unsigned int reg, uint32_t value)
> > > > +{
> > > > +	*(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > > > +}
> > > > +
> > > > +void vtd_reg_write_8(unsigned int reg, uint64_t value)
> > > > +{
> > > > +	*(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
> > > > +}
> > > > +
> > > > +uint32_t vtd_reg_read_4(unsigned int reg)
> > > > +{
> > > > +	return *(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > > > +}
> > > > +
> > > > +uint64_t vtd_reg_read_8(unsigned int reg)
> > > > +{
> > > > +	return *(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
> > > > +}
> > > 
> > > The above could be static inlines in intel-iommu.h. How about
> > > calling them vtd_readl, vtd_readq, vtd_writel, vtd_writeq? That
> > > would make them more consistent with the readl, readq... in
> > > lib/asm-generic/io.h
> > 
> > Sure! Will fix.
> > 
> > > 
> > > > +
> > > > +uint32_t vtd_version(void)
> > > > +{
> > > > +	return vtd_reg_read_4(DMAR_VER_REG);
> > > > +}
> > > > +
> > > > +uint64_t vtd_cap(void)
> > > > +{
> > > > +	return vtd_reg_read_8(DMAR_CAP_REG);
> > > > +}
> > > > +
> > > > +uint64_t vtd_ecap(void)
> > > > +{
> > > > +	return vtd_reg_read_8(DMAR_ECAP_REG);
> > > > +}
> > > > +
> > > > +uint32_t vtd_status(void)
> > > > +{
> > > > +	return vtd_reg_read_4(DMAR_GSTS_REG);
> > > > +}
> > > > +
> > > > +uint32_t vtd_fault_status(void)
> > > > +{
> > > > +	return vtd_reg_read_4(DMAR_FSTS_REG);
> > > > +}
> > > > +
> > > > +uint64_t vtd_root_table(void)
> > > > +{
> > > > +	/* No extend root table support yet */
> > > > +	return vtd_reg_read_8(DMAR_RTADDR_REG);
> > > > +}
> > > 
> > > I'd drop all the above and use vtd_readl/q(DMAR_CAP_REG) directly,
> > > the wrappers don't add anything and take the unit test writer
> > > further away from the register names that they should be more
> > > familiar with.
> > 
> > Here I used wrapper since I don't want to remember all the register
> > names, and (especially) I don't want to remember the size of the
> > registers.
> 
> You won't. You'll have the spec open reading them and then use them
> in the code. Reviewers will also open the spec and then search for
> them in the code. Nobody will memorize them, they'll just match them.

Ok.

> 
> > 
> > For example, when I want to read ECAP register, calling vtd_ecap()
> > will let me make sure the return value is 8 bytes (compiler will help
> > to make sure of it, and I can simply know it from seeing the return
> > type of the wrapper function, which is uint64_t), and I don't need to
> > bother whether I should use vtd_readl() or vtd_readq() for it. If I
> > don't have vtd_ecap() wrapper, I can't see the length from the
> > register name only, and I need to check the spec every time, or with
> > comment like:
> > 
> >   #define DMAR_ECAP_REG XXX /* 8 bytes */
> 
> That comment sounds good to me :-)

I see that these registers already have comments, I think it'll be
good to make these lines exactly the same as original header in
Linux/QEMU source, so I didn't do that. :)

However, I'll remove all the useless wrappers and use direct accesses
to registers when necessary. 

> 
> > 
> > In that case, I'd prefer with a wrapper for the registers (maybe I can
> > move the functions into header files as well, with inline static).
> > 
> > > 
> > > > +
> > > > +uint64_t vtd_ir_table(void)
> > > > +{
> > > > +	return vtd_reg_read_8(DMAR_IRTA_REG) & 0xfffffffffffff000;
> > > > +}
> > > 
> > > This one has a mask, so makes sense to keep the wrapper. But the
> > > mask looks like PAGE_MASK to me. Why not use that? Or even drop
> > > this wrapper and just use PAGE_MASK when necessary directly...
> > 
> > The last 12 bits have other means (I didn't check it here, it's
> > explained in chap 10.4.29 in vt-d spec in case anyone is interested),
> > so it's just coincident that it looks like a page mask. However, I
> > think I can use PAGE_MASK here.
> 
> Yeah, I have no idea bout the mask, /me didn't open the spec. Use
> whatever is correct, maybe defining a new mask if this mask is not
> always == PAGE_MASK, or even if it is, e.g.
> 
>  #define MY_MASK PAGE_MASK

Yeah this looks clean. Will use it.

> 
> > 
> > > 
> > > > +
> > > > +void vtd_gcmd_or(uint32_t cmd)
> > > > +{
> > > > +	uint32_t status = vtd_status();
> > > > +	/*
> > > > +	 * Logically, we need to read DMAR_GSTS_REG until IOMMU
> > > > +	 * handles the write request. However for QEMU/KVM, this write
> > > > +	 * is always sync, so when this returns, we should be sure
> > > 
> > > should always be in sync. Sounds like something to unit test :-)
> > 
> > I am not sure whether I got the point... but real hardware should be
> > async, so I don't know whether the code can work on real hardware. For
> > QEMU, it is sync, so it's safe for kvm-unit-tests.
> 
> I just mean if there are assumptions on how something should work,
> then either it should be confirmed and asserted, or even tested
> with an explicit try-report unit test.

I see. Maybe it'll be nicer I just re-write this function to not do
such an assumption. After all, it is not hard to add one more step to
check status.

Will fix. Thanks!

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/lib/x86/intel-iommu.c b/lib/x86/intel-iommu.c
new file mode 100644
index 0000000..9f55394
--- /dev/null
+++ b/lib/x86/intel-iommu.c
@@ -0,0 +1,138 @@ 
+/*
+ * Intel IOMMU APIs
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#include "intel-iommu.h"
+
+void vtd_reg_write_4(unsigned int reg, uint32_t value)
+{
+	*(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
+}
+
+void vtd_reg_write_8(unsigned int reg, uint64_t value)
+{
+	*(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg) = value;
+}
+
+uint32_t vtd_reg_read_4(unsigned int reg)
+{
+	return *(uint32_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
+}
+
+uint64_t vtd_reg_read_8(unsigned int reg)
+{
+	return *(uint64_t *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg);
+}
+
+uint32_t vtd_version(void)
+{
+	return vtd_reg_read_4(DMAR_VER_REG);
+}
+
+uint64_t vtd_cap(void)
+{
+	return vtd_reg_read_8(DMAR_CAP_REG);
+}
+
+uint64_t vtd_ecap(void)
+{
+	return vtd_reg_read_8(DMAR_ECAP_REG);
+}
+
+uint32_t vtd_status(void)
+{
+	return vtd_reg_read_4(DMAR_GSTS_REG);
+}
+
+uint32_t vtd_fault_status(void)
+{
+	return vtd_reg_read_4(DMAR_FSTS_REG);
+}
+
+uint64_t vtd_root_table(void)
+{
+	/* No extend root table support yet */
+	return vtd_reg_read_8(DMAR_RTADDR_REG);
+}
+
+uint64_t vtd_ir_table(void)
+{
+	return vtd_reg_read_8(DMAR_IRTA_REG) & 0xfffffffffffff000;
+}
+
+void vtd_gcmd_or(uint32_t cmd)
+{
+	uint32_t status = vtd_status();
+	/*
+	 * Logically, we need to read DMAR_GSTS_REG until IOMMU
+	 * handles the write request. However for QEMU/KVM, this write
+	 * is always sync, so when this returns, we should be sure
+	 * that the GCMD write is done.
+	 */
+	vtd_reg_write_4(DMAR_GCMD_REG, status | cmd);
+}
+
+void vtd_dump_init_info(void)
+{
+	printf("VT-d version:   0x%x\n", vtd_version());
+	printf("     cap:       0x%016lx\n", vtd_cap());
+	printf("     ecap:      0x%016lx\n", vtd_ecap());
+}
+
+void vtd_enable_qi(void)
+{
+	vtd_gcmd_or(VTD_GCMD_QI);
+}
+
+void vtd_setup_root_table(void)
+{
+	void *root = alloc_page();
+
+	memset(root, 0, PAGE_SIZE);
+	vtd_reg_write_8(DMAR_RTADDR_REG, virt_to_phys(root));
+	vtd_gcmd_or(VTD_GCMD_ROOT);
+	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
+}
+
+void vtd_setup_ir_table(void)
+{
+	void *root = alloc_page();
+
+	memset(root, 0, PAGE_SIZE);
+	vtd_reg_write_8(DMAR_IRTA_REG, virt_to_phys(root));
+	/* 0xf stands for table size (2^(0xf+1) == 65536) */
+	vtd_gcmd_or(VTD_GCMD_IR_TABLE | 0xf);
+	printf("IR table address: 0x%016lx\n", vtd_ir_table());
+}
+
+void vtd_enable_dmar(void)
+{
+	vtd_gcmd_or(VTD_GCMD_DMAR);
+}
+
+void vtd_enable_ir(void)
+{
+	vtd_gcmd_or(VTD_GCMD_IR);
+}
+
+void vtd_init(void)
+{
+	setup_vm();
+	smp_init();
+	setup_idt();
+
+	vtd_dump_init_info();
+	vtd_enable_qi();
+	vtd_setup_root_table();
+	vtd_setup_ir_table();
+	vtd_enable_dmar();
+	vtd_enable_ir();
+}
diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h
new file mode 100644
index 0000000..0f687d1
--- /dev/null
+++ b/lib/x86/intel-iommu.h
@@ -0,0 +1,106 @@ 
+/*
+ * Intel IOMMU header
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#ifndef __INTEL_IOMMU_H__
+#define __INTEL_IOMMU_H__
+
+#include "libcflat.h"
+#include "vm.h"
+#include "isr.h"
+#include "smp.h"
+#include "desc.h"
+
+#define Q35_HOST_BRIDGE_IOMMU_ADDR  0xfed90000ULL
+
+/*
+ * Intel IOMMU register specification
+ */
+#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
+#define DMAR_CAP_REG_HI         0xc  /* High 32-bit of DMAR_CAP_REG */
+#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
+#define DMAR_ECAP_REG_HI        0X14
+#define DMAR_GCMD_REG           0x18 /* Global command */
+#define DMAR_GSTS_REG           0x1c /* Global status */
+#define DMAR_RTADDR_REG         0x20 /* Root entry table */
+#define DMAR_RTADDR_REG_HI      0X24
+#define DMAR_CCMD_REG           0x28 /* Context command */
+#define DMAR_CCMD_REG_HI        0x2c
+#define DMAR_FSTS_REG           0x34 /* Fault status */
+#define DMAR_FECTL_REG          0x38 /* Fault control */
+#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data */
+#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr */
+#define DMAR_FEUADDR_REG        0x44 /* Upper address */
+#define DMAR_AFLOG_REG          0x58 /* Advanced fault control */
+#define DMAR_AFLOG_REG_HI       0X5c
+#define DMAR_PMEN_REG           0x64 /* Enable protected memory region */
+#define DMAR_PLMBASE_REG        0x68 /* PMRR low addr */
+#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG        0x70 /* PMRR high base addr */
+#define DMAR_PHMBASE_REG_HI     0X74
+#define DMAR_PHMLIMIT_REG       0x78 /* PMRR high limit */
+#define DMAR_PHMLIMIT_REG_HI    0x7c
+#define DMAR_IQH_REG            0x80 /* Invalidation queue head */
+#define DMAR_IQH_REG_HI         0X84
+#define DMAR_IQT_REG            0x88 /* Invalidation queue tail */
+#define DMAR_IQT_REG_HI         0X8c
+#define DMAR_IQA_REG            0x90 /* Invalidation queue addr */
+#define DMAR_IQA_REG_HI         0x94
+#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
+#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
+#define DMAR_IRTA_REG_HI        0xbc
+#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
+#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
+#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
+#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
+#define DMAR_PQH_REG            0xc0 /* Page request queue head */
+#define DMAR_PQH_REG_HI         0xc4
+#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
+#define DMAR_PQT_REG_HI         0xcc
+#define DMAR_PQA_REG            0xd0 /* Page request queue address */
+#define DMAR_PQA_REG_HI         0xd4
+#define DMAR_PRS_REG            0xdc /* Page request status */
+#define DMAR_PECTL_REG          0xe0 /* Page request event control */
+#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
+#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
+#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
+#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
+#define DMAR_MTRRCAP_REG_HI     0x104
+#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
+#define DMAR_MTRRDEF_REG_HI     0x10c
+
+#define VTD_GCMD_IR_TABLE       (0x1000000)
+#define VTD_GCMD_IR             (0x2000000)
+#define VTD_GCMD_QI             (0x4000000)
+#define VTD_GCMD_ROOT           (0x40000000)
+#define VTD_GCMD_DMAR           (0x80000000)
+
+void vtd_reg_write_4(unsigned int reg, uint32_t value);
+void vtd_reg_write_8(unsigned int reg, uint64_t value);
+uint32_t vtd_reg_read_4(unsigned int reg);
+uint64_t vtd_reg_read_8(unsigned int reg);
+uint32_t vtd_version(void);
+uint64_t vtd_cap(void);
+uint64_t vtd_ecap(void);
+uint32_t vtd_status(void);
+uint32_t vtd_fault_status(void);
+uint64_t vtd_root_table(void);
+uint64_t vtd_ir_table(void);
+void vtd_dump_init_info(void);
+void vtd_enable_qi(void);
+void vtd_setup_root_table(void);
+void vtd_setup_ir_table(void);
+void vtd_enable_dmar(void);
+void vtd_enable_ir(void);
+void vtd_init(void);
+
+#endif
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index e166911..a8e9445 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -4,6 +4,7 @@  ldarch = elf64-x86-64
 CFLAGS += -mno-red-zone
 
 cflatobjs += lib/x86/setjmp64.o
+cflatobjs += lib/x86/intel-iommu.o
 
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
@@ -14,6 +15,7 @@  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
+tests += $(TEST_DIR)/intel-iommu.flat
 
 include $(TEST_DIR)/Makefile.common
 
diff --git a/x86/intel-iommu.c b/x86/intel-iommu.c
new file mode 100644
index 0000000..60d512a
--- /dev/null
+++ b/x86/intel-iommu.c
@@ -0,0 +1,41 @@ 
+/*
+ * Intel IOMMU unit test.
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#include "intel-iommu.h"
+
+int main(int argc, char *argv[])
+{
+	setup_vm();
+	smp_init();
+	setup_idt();
+
+	vtd_dump_init_info();
+	report("init status check", vtd_status() == 0);
+	report("fault status check", vtd_fault_status() == 0);
+
+	vtd_enable_qi();
+	report("QI enablement", vtd_status() | VTD_GCMD_QI);
+
+	vtd_setup_root_table();
+	report("DMAR table setup", vtd_status() | VTD_GCMD_ROOT);
+
+	vtd_setup_ir_table();
+	report("IR table setup", vtd_status() | VTD_GCMD_IR_TABLE);
+
+	vtd_enable_dmar();
+	report("DMAR enablement", vtd_status() & VTD_GCMD_DMAR);
+
+	vtd_enable_ir();
+	report("IR enablement", vtd_status() & VTD_GCMD_IR);
+
+	return report_summary();
+}