diff mbox series

[v1,01/17] intel_iommu: Use the latest fault reasons defined by spec

Message ID 20240718081636.879544-2-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series intel_iommu: Enable stage-1 translation for emulated device | expand

Commit Message

Duan, Zhenzhong July 18, 2024, 8:16 a.m. UTC
From: Yu Zhang <yu.c.zhang@linux.intel.com>

Spec revision 3.0 or above defines more detailed fault reasons for
scalable mode. So introduce them into emulation code, see spec
section 7.1.2 for details.

Note spec revision has no relation with VERSION register, Guest
kernel should not use that register to judge what features are
supported. Instead cap/ecap bits should be checked.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  9 ++++++++-
 hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
 2 files changed, 24 insertions(+), 10 deletions(-)

Comments

CLEMENT MATHIEU--DRIF July 23, 2024, 7:12 a.m. UTC | #1
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>


On 18/07/2024 10:16, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> From: Yu Zhang <yu.c.zhang@linux.intel.com>
>
> Spec revision 3.0 or above defines more detailed fault reasons for
> scalable mode. So introduce them into emulation code, see spec
> section 7.1.2 for details.
>
> Note spec revision has no relation with VERSION register, Guest
> kernel should not use that register to judge what features are
> supported. Instead cap/ecap bits should be checked.
>
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  9 ++++++++-
>   hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
>   2 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..c0ca7b372f 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
>                                     * request while disabled */
>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>
> -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
> +    /* PASID directory entry access failure */
> +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
> +    /* The Present(P) field of pasid directory entry is 0 */
> +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
> +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
> +    /* The Present(P) field of pasid table entry is 0 */
> +    VTD_FR_PASID_ENTRY_P = 0x59,
> +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
>
>       /* Output address in the interrupt address range for scalable mode */
>       VTD_FR_SM_INTERRUPT_ADDR = 0x87,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 37c21a0aec..e65f5b29a5 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
>       addr = pasid_dir_base + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
>       }
>
>       pdire->val = le64_to_cpu(pdire->val);
> @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>                                             dma_addr_t addr,
>                                             VTDPASIDEntry *pe)
>   {
> +    uint8_t pgtt;
>       uint32_t index;
>       dma_addr_t entry_size;
>       X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>       addr = addr + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
>       }
>       for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
>           pe->val[i] = le64_to_cpu(pe->val[i]);
> @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>
>       /* Do translation type check */
>       if (!vtd_pe_type_check(x86_iommu, pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>
> -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +    pgtt = VTD_PE_GET_TYPE(pe);
> +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
> +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>
>       return 0;
> @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>
>       ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
> @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>
>       if (!vtd_pe_present(pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_ENTRY_P;
>       }
>
>       return 0;
> @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>       }
>
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>
>       /*
> @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
>       [VTD_FR_ROOT_ENTRY_RSVD] = false,
>       [VTD_FR_PAGING_ENTRY_RSVD] = true,
>       [VTD_FR_CONTEXT_ENTRY_TT] = true,
> -    [VTD_FR_PASID_TABLE_INV] = false,
> +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>       [VTD_FR_SM_INTERRUPT_ADDR] = true,
>       [VTD_FR_MAX] = false,
>   };
> --
> 2.34.1
>
Yi Liu July 29, 2024, 7:39 a.m. UTC | #2
On 2024/7/18 16:16, Zhenzhong Duan wrote:
> From: Yu Zhang <yu.c.zhang@linux.intel.com>
> 
> Spec revision 3.0 or above defines more detailed fault reasons for
> scalable mode. So introduce them into emulation code, see spec
> section 7.1.2 for details.
> 
> Note spec revision has no relation with VERSION register, Guest
> kernel should not use that register to judge what features are
> supported. Instead cap/ecap bits should be checked.
> 
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  9 ++++++++-
>   hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
>   2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..c0ca7b372f 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
>                                     * request while disabled */
>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>   
> -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
> +    /* PASID directory entry access failure */
> +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
> +    /* The Present(P) field of pasid directory entry is 0 */
> +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
> +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
> +    /* The Present(P) field of pasid table entry is 0 */
> +    VTD_FR_PASID_ENTRY_P = 0x59,
> +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
>   
>       /* Output address in the interrupt address range for scalable mode */
>       VTD_FR_SM_INTERRUPT_ADDR = 0x87,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 37c21a0aec..e65f5b29a5 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
>       addr = pasid_dir_base + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
>       }
>   
>       pdire->val = le64_to_cpu(pdire->val);
> @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>                                             dma_addr_t addr,
>                                             VTDPASIDEntry *pe)
>   {
> +    uint8_t pgtt;
>       uint32_t index;
>       dma_addr_t entry_size;
>       X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>       addr = addr + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
>       }
>       for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
>           pe->val[i] = le64_to_cpu(pe->val[i]);
> @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>   
>       /* Do translation type check */
>       if (!vtd_pe_type_check(x86_iommu, pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>   
> -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +    pgtt = VTD_PE_GET_TYPE(pe);
> +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
> +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>   
>       return 0;
> @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>   
>       ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
> @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pe_present(pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_ENTRY_P;
>       }
>   
>       return 0;
> @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>   
>       /*
> @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
>       [VTD_FR_ROOT_ENTRY_RSVD] = false,
>       [VTD_FR_PAGING_ENTRY_RSVD] = true,
>       [VTD_FR_CONTEXT_ENTRY_TT] = true,
> -    [VTD_FR_PASID_TABLE_INV] = false,
> +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>       [VTD_FR_SM_INTERRUPT_ADDR] = true,
>       [VTD_FR_MAX] = false,
>   };

@Jason, @Michael,

Do you know the rule of setting this table? I noticed it was introduced
since day-1[1]. I didn't see any history discussion on it on lore. So not
quite sure about the purpose of it. Per the usage of this table, it is used
as a filter when the iommu driver has set the FPD bit. If FPD is set, some
errors need not to trigger a trace which is mostly for debug purpose.

I noticed Peter had asked it as well[2]. But I don't think it was clarified
clearly. May we have a clarification for it here? BTW. I didn't see VT-d
spec has any definition on it. So it should just be a software trick. :)

[1] 
https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
[2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/
Michael S. Tsirkin July 29, 2024, 8:42 a.m. UTC | #3
On Mon, Jul 29, 2024 at 03:39:03PM +0800, Yi Liu wrote:
> On 2024/7/18 16:16, Zhenzhong Duan wrote:
> > From: Yu Zhang <yu.c.zhang@linux.intel.com>
> > 
> > Spec revision 3.0 or above defines more detailed fault reasons for
> > scalable mode. So introduce them into emulation code, see spec
> > section 7.1.2 for details.
> > 
> > Note spec revision has no relation with VERSION register, Guest
> > kernel should not use that register to judge what features are
> > supported. Instead cap/ecap bits should be checked.
> > 
> > Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> > ---
> >   hw/i386/intel_iommu_internal.h |  9 ++++++++-
> >   hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
> >   2 files changed, 24 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> > index f8cf99bddf..c0ca7b372f 100644
> > --- a/hw/i386/intel_iommu_internal.h
> > +++ b/hw/i386/intel_iommu_internal.h
> > @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
> >                                     * request while disabled */
> >       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
> > -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
> > +    /* PASID directory entry access failure */
> > +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
> > +    /* The Present(P) field of pasid directory entry is 0 */
> > +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
> > +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
> > +    /* The Present(P) field of pasid table entry is 0 */
> > +    VTD_FR_PASID_ENTRY_P = 0x59,
> > +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
> >       /* Output address in the interrupt address range for scalable mode */
> >       VTD_FR_SM_INTERRUPT_ADDR = 0x87,
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 37c21a0aec..e65f5b29a5 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
> >       addr = pasid_dir_base + index * entry_size;
> >       if (dma_memory_read(&address_space_memory, addr,
> >                           pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
> >       }
> >       pdire->val = le64_to_cpu(pdire->val);
> > @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >                                             dma_addr_t addr,
> >                                             VTDPASIDEntry *pe)
> >   {
> > +    uint8_t pgtt;
> >       uint32_t index;
> >       dma_addr_t entry_size;
> >       X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> > @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >       addr = addr + index * entry_size;
> >       if (dma_memory_read(&address_space_memory, addr,
> >                           pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
> >       }
> >       for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
> >           pe->val[i] = le64_to_cpu(pe->val[i]);
> > @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >       /* Do translation type check */
> >       if (!vtd_pe_type_check(x86_iommu, pe)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >       }
> > -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +    pgtt = VTD_PE_GET_TYPE(pe);
> > +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
> > +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> > +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >       }
> >       return 0;
> > @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pdire_present(&pdire)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ENTRY_P;
> >       }
> >       ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
> > @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pe_present(pe)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_ENTRY_P;
> >       }
> >       return 0;
> > @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pdire_present(&pdire)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ENTRY_P;
> >       }
> >       /*
> > @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
> >       [VTD_FR_ROOT_ENTRY_RSVD] = false,
> >       [VTD_FR_PAGING_ENTRY_RSVD] = true,
> >       [VTD_FR_CONTEXT_ENTRY_TT] = true,
> > -    [VTD_FR_PASID_TABLE_INV] = false,
> > +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
> > +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
> > +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
> > +    [VTD_FR_PASID_ENTRY_P] = true,
> > +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
> >       [VTD_FR_SM_INTERRUPT_ADDR] = true,
> >       [VTD_FR_MAX] = false,
> >   };
> 
> @Jason, @Michael,
> 
> Do you know the rule of setting this table? I noticed it was introduced
> since day-1[1]. I didn't see any history discussion on it on lore. So not
> quite sure about the purpose of it. Per the usage of this table, it is used
> as a filter when the iommu driver has set the FPD bit. If FPD is set, some
> errors need not to trigger a trace which is mostly for debug purpose.
> 
> I noticed Peter had asked it as well[2]. But I don't think it was clarified
> clearly. May we have a clarification for it here? BTW. I didn't see VT-d
> spec has any definition on it. So it should just be a software trick. :)
> 
> [1] https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
> [2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/
> 
> -- 
> Regards,
> Yi Liu

Are you asking for a definition of qualified fault conditions?


7.1.1 Non-Recoverable Address Translation Faults
Non-recoverable address translation faults can be detected by remapping hardware for many different
kinds of requests as shown by Table 26. A non-recoverable fault condition is considered “qualified” if
software can suppress reporting of the fault by setting one of the Fault Processing Disable (FPD) bits
available in one or more of the address translation structures (i.e., the context-entry, scalable-mode
context-entry, scalable-mode PASID-directory entry, scalable-mode PASID-table entry). For a request
that encounters a “qualified” non-recoverable fault condition, if the remapping hardware encountered
any translation structure entry with an FPD field value of 1, the remapping hardware must not report
the fault to software. For example, when processing a request that encounters an FPD field with a value
of 1 in the scalable-mode context-entry and encounters any “qualified” fault such as SCT.*, SPD.*, SPT.*,
SFS.*, SSS.*, or SGN.*, the remapping hardware will not report the fault to software. Memory requests
that result in non-recoverable address translation faults are blocked by hardware
Yi Liu July 29, 2024, 9:39 a.m. UTC | #4
On 2024/7/29 16:42, Michael S. Tsirkin wrote:
> On Mon, Jul 29, 2024 at 03:39:03PM +0800, Yi Liu wrote:
>> On 2024/7/18 16:16, Zhenzhong Duan wrote:
>>> From: Yu Zhang <yu.c.zhang@linux.intel.com>
>>>
>>> Spec revision 3.0 or above defines more detailed fault reasons for
>>> scalable mode. So introduce them into emulation code, see spec
>>> section 7.1.2 for details.
>>>
>>> Note spec revision has no relation with VERSION register, Guest
>>> kernel should not use that register to judge what features are
>>> supported. Instead cap/ecap bits should be checked.
>>>
>>> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_internal.h |  9 ++++++++-
>>>    hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
>>>    2 files changed, 24 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>> index f8cf99bddf..c0ca7b372f 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
>>>                                      * request while disabled */
>>>        VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>> -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
>>> +    /* PASID directory entry access failure */
>>> +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>> +    /* The Present(P) field of pasid directory entry is 0 */
>>> +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
>>> +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
>>> +    /* The Present(P) field of pasid table entry is 0 */
>>> +    VTD_FR_PASID_ENTRY_P = 0x59,
>>> +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
>>>        /* Output address in the interrupt address range for scalable mode */
>>>        VTD_FR_SM_INTERRUPT_ADDR = 0x87,
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 37c21a0aec..e65f5b29a5 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
>>>        addr = pasid_dir_base + index * entry_size;
>>>        if (dma_memory_read(&address_space_memory, addr,
>>>                            pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
>>>        }
>>>        pdire->val = le64_to_cpu(pdire->val);
>>> @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>                                              dma_addr_t addr,
>>>                                              VTDPASIDEntry *pe)
>>>    {
>>> +    uint8_t pgtt;
>>>        uint32_t index;
>>>        dma_addr_t entry_size;
>>>        X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>> @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>        addr = addr + index * entry_size;
>>>        if (dma_memory_read(&address_space_memory, addr,
>>>                            pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
>>>        }
>>>        for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
>>>            pe->val[i] = le64_to_cpu(pe->val[i]);
>>> @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>        /* Do translation type check */
>>>        if (!vtd_pe_type_check(x86_iommu, pe)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>        }
>>> -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +    pgtt = VTD_PE_GET_TYPE(pe);
>>> +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
>>> +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
>>> +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>        }
>>>        return 0;
>>> @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pdire_present(&pdire)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>>>        }
>>>        ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
>>> @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pe_present(pe)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_ENTRY_P;
>>>        }
>>>        return 0;
>>> @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pdire_present(&pdire)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>>>        }
>>>        /*
>>> @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
>>>        [VTD_FR_ROOT_ENTRY_RSVD] = false,
>>>        [VTD_FR_PAGING_ENTRY_RSVD] = true,
>>>        [VTD_FR_CONTEXT_ENTRY_TT] = true,
>>> -    [VTD_FR_PASID_TABLE_INV] = false,
>>> +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
>>> +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
>>> +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
>>> +    [VTD_FR_PASID_ENTRY_P] = true,
>>> +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>>>        [VTD_FR_SM_INTERRUPT_ADDR] = true,
>>>        [VTD_FR_MAX] = false,
>>>    };
>>
>> @Jason, @Michael,
>>
>> Do you know the rule of setting this table? I noticed it was introduced
>> since day-1[1]. I didn't see any history discussion on it on lore. So not
>> quite sure about the purpose of it. Per the usage of this table, it is used
>> as a filter when the iommu driver has set the FPD bit. If FPD is set, some
>> errors need not to trigger a trace which is mostly for debug purpose.
>>
>> I noticed Peter had asked it as well[2]. But I don't think it was clarified
>> clearly. May we have a clarification for it here? BTW. I didn't see VT-d
>> spec has any definition on it. So it should just be a software trick. :)
>>
>> [1] https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
>> [2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/
>>
>> -- 
>> Regards,
>> Yi Liu
> 
> Are you asking for a definition of qualified fault conditions?
> 
> 
> 7.1.1 Non-Recoverable Address Translation Faults
> Non-recoverable address translation faults can be detected by remapping hardware for many different
> kinds of requests as shown by Table 26. A non-recoverable fault condition is considered “qualified” if
> software can suppress reporting of the fault by setting one of the Fault Processing Disable (FPD) bits
> available in one or more of the address translation structures (i.e., the context-entry, scalable-mode
> context-entry, scalable-mode PASID-directory entry, scalable-mode PASID-table entry). For a request
> that encounters a “qualified” non-recoverable fault condition, if the remapping hardware encountered
> any translation structure entry with an FPD field value of 1, the remapping hardware must not report
> the fault to software. For example, when processing a request that encounters an FPD field with a value
> of 1 in the scalable-mode context-entry and encounters any “qualified” fault such as SCT.*, SPD.*, SPT.*,
> SFS.*, SSS.*, or SGN.*, the remapping hardware will not report the fault to software. Memory requests
> that result in non-recoverable address translation faults are blocked by hardware
> 

I see. Table 26 has the "qualified" column. It's clear to me now. Thanks. :)
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..c0ca7b372f 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -311,7 +311,14 @@  typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
-    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
+    /* PASID directory entry access failure */
+    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
+    /* The Present(P) field of pasid directory entry is 0 */
+    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
+    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
+    /* The Present(P) field of pasid table entry is 0 */
+    VTD_FR_PASID_ENTRY_P = 0x59,
+    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
 
     /* Output address in the interrupt address range for scalable mode */
     VTD_FR_SM_INTERRUPT_ADDR = 0x87,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 37c21a0aec..e65f5b29a5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -796,7 +796,7 @@  static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
     addr = pasid_dir_base + index * entry_size;
     if (dma_memory_read(&address_space_memory, addr,
                         pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ACCESS_ERR;
     }
 
     pdire->val = le64_to_cpu(pdire->val);
@@ -814,6 +814,7 @@  static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
                                           dma_addr_t addr,
                                           VTDPASIDEntry *pe)
 {
+    uint8_t pgtt;
     uint32_t index;
     dma_addr_t entry_size;
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -823,7 +824,7 @@  static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
     addr = addr + index * entry_size;
     if (dma_memory_read(&address_space_memory, addr,
                         pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
     }
     for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
         pe->val[i] = le64_to_cpu(pe->val[i]);
@@ -831,11 +832,13 @@  static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
 
     /* Do translation type check */
     if (!vtd_pe_type_check(x86_iommu, pe)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_TABLE_ENTRY_INV;
     }
 
-    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
-        return -VTD_FR_PASID_TABLE_INV;
+    pgtt = VTD_PE_GET_TYPE(pe);
+    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
+        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
+            return -VTD_FR_PASID_TABLE_ENTRY_INV;
     }
 
     return 0;
@@ -876,7 +879,7 @@  static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
     }
 
     if (!vtd_pdire_present(&pdire)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ENTRY_P;
     }
 
     ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
@@ -885,7 +888,7 @@  static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
     }
 
     if (!vtd_pe_present(pe)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_ENTRY_P;
     }
 
     return 0;
@@ -938,7 +941,7 @@  static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
     }
 
     if (!vtd_pdire_present(&pdire)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ENTRY_P;
     }
 
     /*
@@ -1795,7 +1798,11 @@  static const bool vtd_qualified_faults[] = {
     [VTD_FR_ROOT_ENTRY_RSVD] = false,
     [VTD_FR_PAGING_ENTRY_RSVD] = true,
     [VTD_FR_CONTEXT_ENTRY_TT] = true,
-    [VTD_FR_PASID_TABLE_INV] = false,
+    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
+    [VTD_FR_PASID_DIR_ENTRY_P] = true,
+    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
+    [VTD_FR_PASID_ENTRY_P] = true,
+    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
     [VTD_FR_SM_INTERRUPT_ADDR] = true,
     [VTD_FR_MAX] = false,
 };