[2/2] AMD/IOMMU: Render IO_PAGE_FAULT errors in a more useful manner
diff mbox series

Message ID 20191126150112.12704-3-andrew.cooper3@citrix.com
State New
Headers show
Series
  • Fixes to AMD IOMMU logging
Related show

Commit Message

Andrew Cooper Nov. 26, 2019, 3:01 p.m. UTC
Print the PCI coordinates in its common format and use d%u notation for the
domain.  As well as printing flags, decode them.  IO_PAGE_FAULT is used for
interrupt remapping errors as well as DMA remapping errors.

Before:
  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695000, flags = 0x10
  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695040, flags = 0x10
  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xfffffff0, flags = 0x30
  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000000, flags = 0x30
  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000040, flags = 0x30

After:
  (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc000 flags 0x10 PR
  (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc040 flags 0x10 PR
  (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000fffffff0 flags 0x30 RW PR
  (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000000 flags 0x30 RW PR
  (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000040 flags 0x30 RW PR

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Juergen Gross <jgross@suse.com>
---
 xen/drivers/passthrough/amd/iommu_init.c      | 35 +++++++++++++++------------
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  3 ---
 2 files changed, 20 insertions(+), 18 deletions(-)

Comments

Roger Pau Monne Nov. 27, 2019, 9:40 a.m. UTC | #1
On Tue, Nov 26, 2019 at 03:01:12PM +0000, Andrew Cooper wrote:
> Print the PCI coordinates in its common format and use d%u notation for the
> domain.  As well as printing flags, decode them.  IO_PAGE_FAULT is used for
> interrupt remapping errors as well as DMA remapping errors.
> 
> Before:
>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695000, flags = 0x10
>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695040, flags = 0x10
>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xfffffff0, flags = 0x30
>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000000, flags = 0x30
>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000040, flags = 0x30
> 
> After:
>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc000 flags 0x10 PR
>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc040 flags 0x10 PR
>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000fffffff0 flags 0x30 RW PR
>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000000 flags 0x30 RW PR
>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000040 flags 0x30 RW PR

Nit: I would place the domain id information at the beginning (since
that's more similar to gprintk format), and maybe drop the AMD-Vi
prefix, it's not very useful IMO:

(XEN) d0 IO_PAGE_FAULT 0000:00:14.1 addr 0000000100000040 flags 0x30 RW PR

But I'm not specially concerned.

> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

In it's current form or with some of the suggestions, in any case it's
certainly an improvement.

> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Juergen Gross <jgross@suse.com>
> ---
>  xen/drivers/passthrough/amd/iommu_init.c      | 35 +++++++++++++++------------
>  xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  3 ---
>  2 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
> index 8aa8788797..cd4e6e16b8 100644
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -513,10 +513,7 @@ static hw_irq_controller iommu_x2apic_type = {
>  
>  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>  {
> -    u16 domain_id, device_id, flags;
> -    unsigned int bdf;
>      u32 code;
> -    u64 *addr;
>      int count = 0;
>      static const char *const event_str[] = {
>  #define EVENT_STR(name) [IOMMU_EVENT_##name - 1] = #name
> @@ -560,18 +557,26 @@ static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>  
>      if ( code == IOMMU_EVENT_IO_PAGE_FAULT )
>      {
> -        device_id = iommu_get_devid_from_event(entry[0]);
> -        domain_id = get_field_from_reg_u32(entry[1],
> -                                           IOMMU_EVENT_DOMAIN_ID_MASK,
> -                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
> -        flags = get_field_from_reg_u32(entry[1],
> -                                       IOMMU_EVENT_FLAGS_MASK,
> -                                       IOMMU_EVENT_FLAGS_SHIFT);
> -        addr= (u64*) (entry + 2);
> -        printk(XENLOG_ERR "AMD-Vi: "
> -               "%s: domain = %d, device id = %#x, "
> -               "fault address = %#"PRIx64", flags = %#x\n",
> -               code_str, domain_id, device_id, *addr, flags);
> +        unsigned int bdf;
> +        uint16_t device_id = MASK_EXTR(entry[0], IOMMU_CMD_DEVICE_ID_MASK);
> +        uint16_t domain_id = MASK_EXTR(entry[1], IOMMU_EVENT_DOMAIN_ID_MASK);
> +        uint16_t flags = MASK_EXTR(entry[1], IOMMU_EVENT_FLAGS_MASK);

I wouldn't mind using using unsigned int for the variables above.

> +        uint64_t addr = *(uint64_t *)(entry + 2);
> +
> +        printk(XENLOG_ERR "AMD-Vi: %s: %04x:%02x:%02x.%u d%d addr %016"PRIx64
> +               " flags %#x%s%s%s%s%s%s%s%s%s%s\n",
> +               code_str, iommu->seg, PCI_BUS(device_id), PCI_SLOT(device_id),
> +               PCI_FUNC(device_id), domain_id, addr, flags,
> +               (flags & 0xe00) ? " ??" : "",
> +               (flags & 0x100) ? " TR" : "",
> +               (flags & 0x080) ? " RZ" : "",
> +               (flags & 0x040) ? " PE" : "",
> +               (flags & 0x020) ? " RW" : "",
> +               (flags & 0x010) ? " PR" : "",
> +               (flags & 0x008) ? " I" : "",
> +               (flags & 0x004) ? " US" : "",
> +               (flags & 0x002) ? " NX" : "",
> +               (flags & 0x001) ? " GN" : "");

I wold rather have those added with proper defined names to
amd-iommu-defs.h.

Thanks, Roger.
Jan Beulich Nov. 27, 2019, 5:02 p.m. UTC | #2
On 26.11.2019 16:01, Andrew Cooper wrote:
> @@ -560,18 +557,26 @@ static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>  
>      if ( code == IOMMU_EVENT_IO_PAGE_FAULT )
>      {
> -        device_id = iommu_get_devid_from_event(entry[0]);
> -        domain_id = get_field_from_reg_u32(entry[1],
> -                                           IOMMU_EVENT_DOMAIN_ID_MASK,
> -                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
> -        flags = get_field_from_reg_u32(entry[1],
> -                                       IOMMU_EVENT_FLAGS_MASK,
> -                                       IOMMU_EVENT_FLAGS_SHIFT);
> -        addr= (u64*) (entry + 2);
> -        printk(XENLOG_ERR "AMD-Vi: "
> -               "%s: domain = %d, device id = %#x, "
> -               "fault address = %#"PRIx64", flags = %#x\n",
> -               code_str, domain_id, device_id, *addr, flags);
> +        unsigned int bdf;
> +        uint16_t device_id = MASK_EXTR(entry[0], IOMMU_CMD_DEVICE_ID_MASK);

s/CMD/EVENT/ and then
Acked-by: Jan Beulich <jbeulich@suse.com>

Jan
Andrew Cooper Nov. 27, 2019, 5:03 p.m. UTC | #3
On 27/11/2019 17:02, Jan Beulich wrote:
> On 26.11.2019 16:01, Andrew Cooper wrote:
>> @@ -560,18 +557,26 @@ static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>>  
>>      if ( code == IOMMU_EVENT_IO_PAGE_FAULT )
>>      {
>> -        device_id = iommu_get_devid_from_event(entry[0]);
>> -        domain_id = get_field_from_reg_u32(entry[1],
>> -                                           IOMMU_EVENT_DOMAIN_ID_MASK,
>> -                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
>> -        flags = get_field_from_reg_u32(entry[1],
>> -                                       IOMMU_EVENT_FLAGS_MASK,
>> -                                       IOMMU_EVENT_FLAGS_SHIFT);
>> -        addr= (u64*) (entry + 2);
>> -        printk(XENLOG_ERR "AMD-Vi: "
>> -               "%s: domain = %d, device id = %#x, "
>> -               "fault address = %#"PRIx64", flags = %#x\n",
>> -               code_str, domain_id, device_id, *addr, flags);
>> +        unsigned int bdf;
>> +        uint16_t device_id = MASK_EXTR(entry[0], IOMMU_CMD_DEVICE_ID_MASK);
> s/CMD/EVENT/ and then
> Acked-by: Jan Beulich <jbeulich@suse.com>

Oops yes.  That was a consequence of following

#define iommu_get_devid_from_event          iommu_get_devid_from_cmd

to get the mask to use.

These really need turning into structs, but that is a job for a
different day.

~Andrew
Andrew Cooper Nov. 27, 2019, 5:38 p.m. UTC | #4
On 27/11/2019 09:40, Roger Pau Monné wrote:
> On Tue, Nov 26, 2019 at 03:01:12PM +0000, Andrew Cooper wrote:
>> Print the PCI coordinates in its common format and use d%u notation for the
>> domain.  As well as printing flags, decode them.  IO_PAGE_FAULT is used for
>> interrupt remapping errors as well as DMA remapping errors.
>>
>> Before:
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695000, flags = 0x10
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xbf695040, flags = 0x10
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0xfffffff0, flags = 0x30
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000000, flags = 0x30
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0xa1, fault address = 0x100000040, flags = 0x30
>>
>> After:
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc000 flags 0x10 PR
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000bf5fc040 flags 0x10 PR
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 00000000fffffff0 flags 0x30 RW PR
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000000 flags 0x30 RW PR
>>   (XEN) AMD-Vi: IO_PAGE_FAULT: 0000:00:14.1 d0 addr 0000000100000040 flags 0x30 RW PR
> Nit: I would place the domain id information at the beginning (since
> that's more similar to gprintk format), and maybe drop the AMD-Vi
> prefix, it's not very useful IMO:
>
> (XEN) d0 IO_PAGE_FAULT 0000:00:14.1 addr 0000000100000040 flags 0x30 RW PR
>
> But I'm not specially concerned.

So I debated not using d%d format.  This is the DTE's "domain_id"
(a.k.a. Tag in the IO-TLB) field which by convention we set to the domid
of the owning device, but isn't necessarily the best option.

In particular, it might be wise to use domid + 1 and choke if we ever
find 0 in use.

>
>> +        uint64_t addr = *(uint64_t *)(entry + 2);
>> +
>> +        printk(XENLOG_ERR "AMD-Vi: %s: %04x:%02x:%02x.%u d%d addr %016"PRIx64
>> +               " flags %#x%s%s%s%s%s%s%s%s%s%s\n",
>> +               code_str, iommu->seg, PCI_BUS(device_id), PCI_SLOT(device_id),
>> +               PCI_FUNC(device_id), domain_id, addr, flags,
>> +               (flags & 0xe00) ? " ??" : "",
>> +               (flags & 0x100) ? " TR" : "",
>> +               (flags & 0x080) ? " RZ" : "",
>> +               (flags & 0x040) ? " PE" : "",
>> +               (flags & 0x020) ? " RW" : "",
>> +               (flags & 0x010) ? " PR" : "",
>> +               (flags & 0x008) ? " I" : "",
>> +               (flags & 0x004) ? " US" : "",
>> +               (flags & 0x002) ? " NX" : "",
>> +               (flags & 0x001) ? " GN" : "");
> I wold rather have those added with proper defined names to
> amd-iommu-defs.h.

All of this is in desperate need of turning into real C structs, rather
than being opencoded in terms of u32[] and offsets/shifts/masks, but
such a change definitely isn't appropriate for backport.

~Andrew

Patch
diff mbox series

diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index 8aa8788797..cd4e6e16b8 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -513,10 +513,7 @@  static hw_irq_controller iommu_x2apic_type = {
 
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
 {
-    u16 domain_id, device_id, flags;
-    unsigned int bdf;
     u32 code;
-    u64 *addr;
     int count = 0;
     static const char *const event_str[] = {
 #define EVENT_STR(name) [IOMMU_EVENT_##name - 1] = #name
@@ -560,18 +557,26 @@  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
 
     if ( code == IOMMU_EVENT_IO_PAGE_FAULT )
     {
-        device_id = iommu_get_devid_from_event(entry[0]);
-        domain_id = get_field_from_reg_u32(entry[1],
-                                           IOMMU_EVENT_DOMAIN_ID_MASK,
-                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
-        flags = get_field_from_reg_u32(entry[1],
-                                       IOMMU_EVENT_FLAGS_MASK,
-                                       IOMMU_EVENT_FLAGS_SHIFT);
-        addr= (u64*) (entry + 2);
-        printk(XENLOG_ERR "AMD-Vi: "
-               "%s: domain = %d, device id = %#x, "
-               "fault address = %#"PRIx64", flags = %#x\n",
-               code_str, domain_id, device_id, *addr, flags);
+        unsigned int bdf;
+        uint16_t device_id = MASK_EXTR(entry[0], IOMMU_CMD_DEVICE_ID_MASK);
+        uint16_t domain_id = MASK_EXTR(entry[1], IOMMU_EVENT_DOMAIN_ID_MASK);
+        uint16_t flags = MASK_EXTR(entry[1], IOMMU_EVENT_FLAGS_MASK);
+        uint64_t addr = *(uint64_t *)(entry + 2);
+
+        printk(XENLOG_ERR "AMD-Vi: %s: %04x:%02x:%02x.%u d%d addr %016"PRIx64
+               " flags %#x%s%s%s%s%s%s%s%s%s%s\n",
+               code_str, iommu->seg, PCI_BUS(device_id), PCI_SLOT(device_id),
+               PCI_FUNC(device_id), domain_id, addr, flags,
+               (flags & 0xe00) ? " ??" : "",
+               (flags & 0x100) ? " TR" : "",
+               (flags & 0x080) ? " RZ" : "",
+               (flags & 0x040) ? " PE" : "",
+               (flags & 0x020) ? " RW" : "",
+               (flags & 0x010) ? " PR" : "",
+               (flags & 0x008) ? " I" : "",
+               (flags & 0x004) ? " US" : "",
+               (flags & 0x002) ? " NX" : "",
+               (flags & 0x001) ? " GN" : "");
 
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 8ed9482791..53900cd60c 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -265,9 +265,6 @@  static inline uint32_t iommu_get_addr_hi_from_cmd(uint32_t cmd)
                                   IOMMU_CMD_ADDR_HIGH_SHIFT);
 }
 
-/* access address field from event log entry */
-#define iommu_get_devid_from_event          iommu_get_devid_from_cmd
-
 /* access iommu base addresses field from mmio regs */
 static inline void iommu_set_addr_lo_to_reg(uint32_t *reg, uint32_t addr)
 {