diff mbox series

[v2,10/18] AMD/IOMMU: walk trees upon page fault

Message ID 5d4a4cd8-ffb0-951a-c86d-98f659ab8d0b@suse.com (mailing list archive)
State New, archived
Headers show
Series IOMMU: superpage support when not sharing pagetables | expand

Commit Message

Jan Beulich Sept. 24, 2021, 9:51 a.m. UTC
This is to aid diagnosing issues and largely matches VT-d's behavior.
Since I'm adding permissions output here as well, take the opportunity
and also add their displaying to amd_dump_page_table_level().

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Roger Pau Monné Dec. 3, 2021, 9:03 a.m. UTC | #1
On Fri, Sep 24, 2021 at 11:51:15AM +0200, Jan Beulich wrote:
> This is to aid diagnosing issues and largely matches VT-d's behavior.
> Since I'm adding permissions output here as well, take the opportunity
> and also add their displaying to amd_dump_page_table_level().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/drivers/passthrough/amd/iommu.h
> +++ b/xen/drivers/passthrough/amd/iommu.h
> @@ -243,6 +243,8 @@ int __must_check amd_iommu_flush_iotlb_p
>                                               unsigned long page_count,
>                                               unsigned int flush_flags);
>  int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
> +                             dfn_t dfn);
>  
>  /* device table functions */
>  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -573,6 +573,9 @@ static void parse_event_log_entry(struct
>                 (flags & 0x002) ? " NX" : "",
>                 (flags & 0x001) ? " GN" : "");
>  
> +        if ( iommu_verbose )
> +            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
> +
>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>                  pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -363,6 +363,50 @@ int amd_iommu_unmap_page(struct domain *
>      return 0;
>  }
>  
> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
> +                             dfn_t dfn)
> +{
> +    mfn_t pt_mfn;
> +    unsigned int level;
> +    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
> +
> +    if ( !dt[dev_id].tv )
> +    {
> +        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
> +        return;
> +    }
> +
> +    pt_mfn = _mfn(dt[dev_id].pt_root);
> +    level = dt[dev_id].paging_mode;
> +    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
> +           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
> +
> +    while ( level )
> +    {
> +        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
> +        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
> +        union amd_iommu_pte pte = pt[idx];

Don't you need to take a lock here (mapping_lock maybe?) in order to
prevent changes to the IOMMU page tables while walking them?

Thanks, Roger.
Jan Beulich Dec. 3, 2021, 9:49 a.m. UTC | #2
On 03.12.2021 10:03, Roger Pau Monné wrote:
> On Fri, Sep 24, 2021 at 11:51:15AM +0200, Jan Beulich wrote:
>> This is to aid diagnosing issues and largely matches VT-d's behavior.
>> Since I'm adding permissions output here as well, take the opportunity
>> and also add their displaying to amd_dump_page_table_level().
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/drivers/passthrough/amd/iommu.h
>> +++ b/xen/drivers/passthrough/amd/iommu.h
>> @@ -243,6 +243,8 @@ int __must_check amd_iommu_flush_iotlb_p
>>                                               unsigned long page_count,
>>                                               unsigned int flush_flags);
>>  int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>> +                             dfn_t dfn);
>>  
>>  /* device table functions */
>>  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -573,6 +573,9 @@ static void parse_event_log_entry(struct
>>                 (flags & 0x002) ? " NX" : "",
>>                 (flags & 0x001) ? " GN" : "");
>>  
>> +        if ( iommu_verbose )
>> +            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
>> +
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>>                  pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>> @@ -363,6 +363,50 @@ int amd_iommu_unmap_page(struct domain *
>>      return 0;
>>  }
>>  
>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>> +                             dfn_t dfn)
>> +{
>> +    mfn_t pt_mfn;
>> +    unsigned int level;
>> +    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
>> +
>> +    if ( !dt[dev_id].tv )
>> +    {
>> +        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
>> +        return;
>> +    }
>> +
>> +    pt_mfn = _mfn(dt[dev_id].pt_root);
>> +    level = dt[dev_id].paging_mode;
>> +    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
>> +           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
>> +
>> +    while ( level )
>> +    {
>> +        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
>> +        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
>> +        union amd_iommu_pte pte = pt[idx];
> 
> Don't you need to take a lock here (mapping_lock maybe?) in order to
> prevent changes to the IOMMU page tables while walking them?

Generally speaking - yes. But see the description saying "largely
matches VT-d's behavior". On VT-d both the IOMMU lock and the mapping
lock would need acquiring to be safe (the former could perhaps be
dropped early). Likewise here. If I wanted to do so here, I ought to
add a prereq patch adjusting the VT-d function. The main "excuse" not
to do so is/was probably the size of the series.

Jan
Jan Beulich Dec. 3, 2021, 9:55 a.m. UTC | #3
On 03.12.2021 10:49, Jan Beulich wrote:
> On 03.12.2021 10:03, Roger Pau Monné wrote:
>> On Fri, Sep 24, 2021 at 11:51:15AM +0200, Jan Beulich wrote:
>>> This is to aid diagnosing issues and largely matches VT-d's behavior.
>>> Since I'm adding permissions output here as well, take the opportunity
>>> and also add their displaying to amd_dump_page_table_level().
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/drivers/passthrough/amd/iommu.h
>>> +++ b/xen/drivers/passthrough/amd/iommu.h
>>> @@ -243,6 +243,8 @@ int __must_check amd_iommu_flush_iotlb_p
>>>                                               unsigned long page_count,
>>>                                               unsigned int flush_flags);
>>>  int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
>>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>>> +                             dfn_t dfn);
>>>  
>>>  /* device table functions */
>>>  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
>>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>>> @@ -573,6 +573,9 @@ static void parse_event_log_entry(struct
>>>                 (flags & 0x002) ? " NX" : "",
>>>                 (flags & 0x001) ? " GN" : "");
>>>  
>>> +        if ( iommu_verbose )
>>> +            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
>>> +
>>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>>>                  pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>>> @@ -363,6 +363,50 @@ int amd_iommu_unmap_page(struct domain *
>>>      return 0;
>>>  }
>>>  
>>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>>> +                             dfn_t dfn)
>>> +{
>>> +    mfn_t pt_mfn;
>>> +    unsigned int level;
>>> +    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
>>> +
>>> +    if ( !dt[dev_id].tv )
>>> +    {
>>> +        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
>>> +        return;
>>> +    }
>>> +
>>> +    pt_mfn = _mfn(dt[dev_id].pt_root);
>>> +    level = dt[dev_id].paging_mode;
>>> +    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
>>> +           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
>>> +
>>> +    while ( level )
>>> +    {
>>> +        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
>>> +        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
>>> +        union amd_iommu_pte pte = pt[idx];
>>
>> Don't you need to take a lock here (mapping_lock maybe?) in order to
>> prevent changes to the IOMMU page tables while walking them?
> 
> Generally speaking - yes. But see the description saying "largely
> matches VT-d's behavior". On VT-d both the IOMMU lock and the mapping
> lock would need acquiring to be safe (the former could perhaps be
> dropped early). Likewise here. If I wanted to do so here, I ought to
> add a prereq patch adjusting the VT-d function. The main "excuse" not
> to do so is/was probably the size of the series.

Which in turn would call for {amd,vtd}_dump_page_tables() to gain proper
locking. Not sure where this would end; these further items are more and
more unrelated to the actual purpose of this series (whereas I needed the
patch here anyway for debugging purposes) ...

Jan
Jan Beulich Dec. 3, 2021, 9:59 a.m. UTC | #4
On 03.12.2021 10:03, Roger Pau Monné wrote:
> On Fri, Sep 24, 2021 at 11:51:15AM +0200, Jan Beulich wrote:
>> This is to aid diagnosing issues and largely matches VT-d's behavior.
>> Since I'm adding permissions output here as well, take the opportunity
>> and also add their displaying to amd_dump_page_table_level().
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/drivers/passthrough/amd/iommu.h
>> +++ b/xen/drivers/passthrough/amd/iommu.h
>> @@ -243,6 +243,8 @@ int __must_check amd_iommu_flush_iotlb_p
>>                                               unsigned long page_count,
>>                                               unsigned int flush_flags);
>>  int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>> +                             dfn_t dfn);
>>  
>>  /* device table functions */
>>  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -573,6 +573,9 @@ static void parse_event_log_entry(struct
>>                 (flags & 0x002) ? " NX" : "",
>>                 (flags & 0x001) ? " GN" : "");
>>  
>> +        if ( iommu_verbose )
>> +            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
>> +
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>>                  pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>> @@ -363,6 +363,50 @@ int amd_iommu_unmap_page(struct domain *
>>      return 0;
>>  }
>>  
>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
>> +                             dfn_t dfn)
>> +{
>> +    mfn_t pt_mfn;
>> +    unsigned int level;
>> +    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
>> +
>> +    if ( !dt[dev_id].tv )
>> +    {
>> +        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
>> +        return;
>> +    }
>> +
>> +    pt_mfn = _mfn(dt[dev_id].pt_root);
>> +    level = dt[dev_id].paging_mode;
>> +    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
>> +           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
>> +
>> +    while ( level )
>> +    {
>> +        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
>> +        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
>> +        union amd_iommu_pte pte = pt[idx];
> 
> Don't you need to take a lock here (mapping_lock maybe?) in order to
> prevent changes to the IOMMU page tables while walking them?

Further to my earlier reply, taking the mapping lock here isn't
straightforward, as that would mean determining the correct domain.

Jan
Roger Pau Monné Dec. 10, 2021, 10:23 a.m. UTC | #5
On Fri, Dec 03, 2021 at 10:55:54AM +0100, Jan Beulich wrote:
> On 03.12.2021 10:49, Jan Beulich wrote:
> > On 03.12.2021 10:03, Roger Pau Monné wrote:
> >> On Fri, Sep 24, 2021 at 11:51:15AM +0200, Jan Beulich wrote:
> >>> This is to aid diagnosing issues and largely matches VT-d's behavior.
> >>> Since I'm adding permissions output here as well, take the opportunity
> >>> and also add their displaying to amd_dump_page_table_level().
> >>>
> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>
> >>> --- a/xen/drivers/passthrough/amd/iommu.h
> >>> +++ b/xen/drivers/passthrough/amd/iommu.h
> >>> @@ -243,6 +243,8 @@ int __must_check amd_iommu_flush_iotlb_p
> >>>                                               unsigned long page_count,
> >>>                                               unsigned int flush_flags);
> >>>  int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
> >>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
> >>> +                             dfn_t dfn);
> >>>  
> >>>  /* device table functions */
> >>>  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
> >>> --- a/xen/drivers/passthrough/amd/iommu_init.c
> >>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> >>> @@ -573,6 +573,9 @@ static void parse_event_log_entry(struct
> >>>                 (flags & 0x002) ? " NX" : "",
> >>>                 (flags & 0x001) ? " GN" : "");
> >>>  
> >>> +        if ( iommu_verbose )
> >>> +            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
> >>> +
> >>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
> >>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
> >>>                  pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> >>> --- a/xen/drivers/passthrough/amd/iommu_map.c
> >>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> >>> @@ -363,6 +363,50 @@ int amd_iommu_unmap_page(struct domain *
> >>>      return 0;
> >>>  }
> >>>  
> >>> +void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
> >>> +                             dfn_t dfn)
> >>> +{
> >>> +    mfn_t pt_mfn;
> >>> +    unsigned int level;
> >>> +    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
> >>> +
> >>> +    if ( !dt[dev_id].tv )
> >>> +    {
> >>> +        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    pt_mfn = _mfn(dt[dev_id].pt_root);
> >>> +    level = dt[dev_id].paging_mode;
> >>> +    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
> >>> +           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
> >>> +
> >>> +    while ( level )
> >>> +    {
> >>> +        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
> >>> +        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
> >>> +        union amd_iommu_pte pte = pt[idx];
> >>
> >> Don't you need to take a lock here (mapping_lock maybe?) in order to
> >> prevent changes to the IOMMU page tables while walking them?
> > 
> > Generally speaking - yes. But see the description saying "largely
> > matches VT-d's behavior". On VT-d both the IOMMU lock and the mapping
> > lock would need acquiring to be safe (the former could perhaps be
> > dropped early). Likewise here. If I wanted to do so here, I ought to
> > add a prereq patch adjusting the VT-d function. The main "excuse" not
> > to do so is/was probably the size of the series.
> 
> Which in turn would call for {amd,vtd}_dump_page_tables() to gain proper
> locking. Not sure where this would end; these further items are more and
> more unrelated to the actual purpose of this series (whereas I needed the
> patch here anyway for debugging purposes) ...

I think adding a comment regarding the lack of locking due to the
function only being used as a debug aide would help clarify this. I
don't think we support running with iommu debug or verbose modes.

Thanks, Roger.
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -243,6 +243,8 @@  int __must_check amd_iommu_flush_iotlb_p
                                              unsigned long page_count,
                                              unsigned int flush_flags);
 int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
+void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
+                             dfn_t dfn);
 
 /* device table functions */
 int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -573,6 +573,9 @@  static void parse_event_log_entry(struct
                (flags & 0x002) ? " NX" : "",
                (flags & 0x001) ? " GN" : "");
 
+        if ( iommu_verbose )
+            amd_iommu_print_entries(iommu, device_id, daddr_to_dfn(addr));
+
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
                 pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -363,6 +363,50 @@  int amd_iommu_unmap_page(struct domain *
     return 0;
 }
 
+void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
+                             dfn_t dfn)
+{
+    mfn_t pt_mfn;
+    unsigned int level;
+    const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
+
+    if ( !dt[dev_id].tv )
+    {
+        printk("%pp: no root\n", &PCI_SBDF2(iommu->seg, dev_id));
+        return;
+    }
+
+    pt_mfn = _mfn(dt[dev_id].pt_root);
+    level = dt[dev_id].paging_mode;
+    printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
+           &PCI_SBDF2(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
+
+    while ( level )
+    {
+        const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
+        unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
+        union amd_iommu_pte pte = pt[idx];
+
+        unmap_domain_page(pt);
+
+        printk("  L%u[%03x] = %"PRIx64" %c%c\n", level, idx, pte.raw,
+               pte.pr ? pte.ir ? 'r' : '-' : 'n',
+               pte.pr ? pte.iw ? 'w' : '-' : 'p');
+
+        if ( !pte.pr )
+            break;
+
+        if ( pte.next_level >= level )
+        {
+            printk("  L%u[%03x]: next: %u\n", level, idx, pte.next_level);
+            break;
+        }
+
+        pt_mfn = _mfn(pte.mfn);
+        level = pte.next_level;
+    }
+}
+
 static unsigned long flush_count(unsigned long dfn, unsigned long page_count,
                                  unsigned int order)
 {
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -607,10 +607,11 @@  static void amd_dump_page_table_level(st
                 mfn_to_page(_mfn(pde->mfn)), pde->next_level,
                 address, indent + 1);
         else
-            printk("%*sdfn: %08lx  mfn: %08lx\n",
+            printk("%*sdfn: %08lx  mfn: %08lx  %c%c\n",
                    indent, "",
                    (unsigned long)PFN_DOWN(address),
-                   (unsigned long)PFN_DOWN(pfn_to_paddr(pde->mfn)));
+                   (unsigned long)PFN_DOWN(pfn_to_paddr(pde->mfn)),
+                   pde->ir ? 'r' : '-', pde->iw ? 'w' : '-');
     }
 
     unmap_domain_page(table_vaddr);