diff mbox series

[RFC,v3,14/21] iommu: introduce device fault data

Message ID 20190108102633.17482-15-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series SMMUv3 Nested Stage Setup | expand

Commit Message

Eric Auger Jan. 8, 2019, 10:26 a.m. UTC
From: Jacob Pan <jacob.jun.pan@linux.intel.com>

Device faults detected by IOMMU can be reported outside IOMMU
subsystem for further processing. This patch intends to provide
a generic device fault data such that device drivers can be
communicated with IOMMU faults without model specific knowledge.

The proposed format is the result of discussion at:
https://lkml.org/lkml/2017/11/10/291
Part of the code is based on Jean-Philippe Brucker's patchset
(https://patchwork.kernel.org/patch/9989315/).

The assumption is that model specific IOMMU driver can filter and
handle most of the internal faults if the cause is within IOMMU driver
control. Therefore, the fault reasons can be reported are grouped
and generalized based common specifications such as PCI ATS.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
[moved part of the iommu_fault_event struct in the uapi, enriched
 the fault reasons to be able to map unrecoverable SMMUv3 errors]
---
 include/linux/iommu.h      | 55 ++++++++++++++++++++++++-
 include/uapi/linux/iommu.h | 83 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 2 deletions(-)

Comments

Jean-Philippe Brucker Jan. 11, 2019, 11:06 a.m. UTC | #1
On 10/01/2019 18:45, Jacob Pan wrote:
> On Tue,  8 Jan 2019 11:26:26 +0100
> Eric Auger <eric.auger@redhat.com> wrote:
> 
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>
>> Device faults detected by IOMMU can be reported outside IOMMU
>> subsystem for further processing. This patch intends to provide
>> a generic device fault data such that device drivers can be
>> communicated with IOMMU faults without model specific knowledge.
>>
>> The proposed format is the result of discussion at:
>> https://lkml.org/lkml/2017/11/10/291
>> Part of the code is based on Jean-Philippe Brucker's patchset
>> (https://patchwork.kernel.org/patch/9989315/).
>>
>> The assumption is that model specific IOMMU driver can filter and
>> handle most of the internal faults if the cause is within IOMMU driver
>> control. Therefore, the fault reasons can be reported are grouped
>> and generalized based common specifications such as PCI ATS.
>>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> [moved part of the iommu_fault_event struct in the uapi, enriched
>>  the fault reasons to be able to map unrecoverable SMMUv3 errors]
>> ---
>>  include/linux/iommu.h      | 55 ++++++++++++++++++++++++-
>>  include/uapi/linux/iommu.h | 83
>> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 136
>> insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 244c1a3d5989..1dedc2d247c2 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -49,13 +49,17 @@ struct bus_type;
>>  struct device;
>>  struct iommu_domain;
>>  struct notifier_block;
>> +struct iommu_fault_event;
>>  
>>  /* iommu fault flags */
>> -#define IOMMU_FAULT_READ	0x0
>> -#define IOMMU_FAULT_WRITE	0x1
>> +#define IOMMU_FAULT_READ		(1 << 0)
>> +#define IOMMU_FAULT_WRITE		(1 << 1)
>> +#define IOMMU_FAULT_EXEC		(1 << 2)
>> +#define IOMMU_FAULT_PRIV		(1 << 3)
>>  
>>  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>>  			struct device *, unsigned long, int, void *);
>> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *,
>> void *); 
>>  struct iommu_domain_geometry {
>>  	dma_addr_t aperture_start; /* First address that can be
>> mapped    */ @@ -255,6 +259,52 @@ struct iommu_device {
>>  	struct device *dev;
>>  };
>>  
>> +/**
>> + * struct iommu_fault_event - Generic per device fault data
>> + *
>> + * - PCI and non-PCI devices
>> + * - Recoverable faults (e.g. page request), information based on
>> PCI ATS
>> + * and PASID spec.
>> + * - Un-recoverable faults of device interest
>> + * - DMA remapping and IRQ remapping faults
>> + *
>> + * @fault: fault descriptor
>> + * @device_private: if present, uniquely identify device-specific
>> + *                  private data for an individual page request.
>> + * @iommu_private: used by the IOMMU driver for storing
>> fault-specific
>> + *                 data. Users should not modify this field before
>> + *                 sending the fault response.
>> + */
>> +struct iommu_fault_event {
>> +	struct iommu_fault fault;
>> +	u64 device_private;
> I think we want to move device_private to uapi since it gets injected
> into the guest, then returned by guest in case of page response. For
> VT-d we also need 128 bits of private data. VT-d spec. 7.7.1

Ah, I didn't notice the format changed in VT-d rev3. On that topic, how
do we manage future extensions to the iommu_fault struct? Should we add
~48 bytes of padding after device_private, along with some flags telling
which field is valid, or deal with it using a structure version like we
do for the invalidate and bind structs? In the first case, iommu_fault
wouldn't fit in a 64-byte cacheline anymore, but I'm not sure we care.

> For exception tracking (e.g. unanswered page request), I can add timer
> and list info later when I include PRQ. sounds ok?
>> +	u64 iommu_private;
[...]
>> +/**
>> + * struct iommu_fault - Generic fault data
>> + *
>> + * @type contains fault type
>> + * @reason fault reasons if relevant outside IOMMU driver.
>> + * IOMMU driver internal faults are not reported.
>> + * @addr: tells the offending page address
>> + * @fetch_addr: tells the address that caused an abort, if any
>> + * @pasid: contains process address space ID, used in shared virtual
>> memory
>> + * @page_req_group_id: page request group index
>> + * @last_req: last request in a page request group
>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>> + * @prot: page access protection flag:
>> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>> + */
>> +
>> +struct iommu_fault {
>> +	__u32	type;   /* enum iommu_fault_type */
>> +	__u32	reason; /* enum iommu_fault_reason */
>> +	__u64	addr;
>> +	__u64	fetch_addr;
>> +	__u32	pasid;
>> +	__u32	page_req_group_id;
>> +	__u32	last_req;
>> +	__u32	pasid_valid;
>> +	__u32	prot;
>> +	__u32	access;

What does @access contain? Can it be squashed into @prot?

Thanks,
Jean

> relocated to uapi, Yi can you confirm?
> 	__u64 device_private[2];
> 
>> +};
>>  #endif /* _UAPI_IOMMU_H */
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
Jacob Pan Jan. 14, 2019, 10:32 p.m. UTC | #2
On Fri, 11 Jan 2019 11:06:29 +0000
Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote:

> On 10/01/2019 18:45, Jacob Pan wrote:
> > On Tue,  8 Jan 2019 11:26:26 +0100
> > Eric Auger <eric.auger@redhat.com> wrote:
> >   
> >> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >>
> >> Device faults detected by IOMMU can be reported outside IOMMU
> >> subsystem for further processing. This patch intends to provide
> >> a generic device fault data such that device drivers can be
> >> communicated with IOMMU faults without model specific knowledge.
> >>
> >> The proposed format is the result of discussion at:
> >> https://lkml.org/lkml/2017/11/10/291
> >> Part of the code is based on Jean-Philippe Brucker's patchset
> >> (https://patchwork.kernel.org/patch/9989315/).
> >>
> >> The assumption is that model specific IOMMU driver can filter and
> >> handle most of the internal faults if the cause is within IOMMU
> >> driver control. Therefore, the fault reasons can be reported are
> >> grouped and generalized based common specifications such as PCI
> >> ATS.
> >>
> >> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >> Signed-off-by: Jean-Philippe Brucker
> >> <jean-philippe.brucker@arm.com> Signed-off-by: Liu, Yi L
> >> <yi.l.liu@linux.intel.com> Signed-off-by: Ashok Raj
> >> <ashok.raj@intel.com> Signed-off-by: Eric Auger
> >> <eric.auger@redhat.com> [moved part of the iommu_fault_event
> >> struct in the uapi, enriched the fault reasons to be able to map
> >> unrecoverable SMMUv3 errors] ---
> >>  include/linux/iommu.h      | 55 ++++++++++++++++++++++++-
> >>  include/uapi/linux/iommu.h | 83
> >> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 136
> >> insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> >> index 244c1a3d5989..1dedc2d247c2 100644
> >> --- a/include/linux/iommu.h
> >> +++ b/include/linux/iommu.h
> >> @@ -49,13 +49,17 @@ struct bus_type;
> >>  struct device;
> >>  struct iommu_domain;
> >>  struct notifier_block;
> >> +struct iommu_fault_event;
> >>  
> >>  /* iommu fault flags */
> >> -#define IOMMU_FAULT_READ	0x0
> >> -#define IOMMU_FAULT_WRITE	0x1
> >> +#define IOMMU_FAULT_READ		(1 << 0)
> >> +#define IOMMU_FAULT_WRITE		(1 << 1)
> >> +#define IOMMU_FAULT_EXEC		(1 << 2)
> >> +#define IOMMU_FAULT_PRIV		(1 << 3)
> >>  
> >>  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
> >>  			struct device *, unsigned long, int, void
> >> *); +typedef int (*iommu_dev_fault_handler_t)(struct
> >> iommu_fault_event *, void *); 
> >>  struct iommu_domain_geometry {
> >>  	dma_addr_t aperture_start; /* First address that can be
> >> mapped    */ @@ -255,6 +259,52 @@ struct iommu_device {
> >>  	struct device *dev;
> >>  };
> >>  
> >> +/**
> >> + * struct iommu_fault_event - Generic per device fault data
> >> + *
> >> + * - PCI and non-PCI devices
> >> + * - Recoverable faults (e.g. page request), information based on
> >> PCI ATS
> >> + * and PASID spec.
> >> + * - Un-recoverable faults of device interest
> >> + * - DMA remapping and IRQ remapping faults
> >> + *
> >> + * @fault: fault descriptor
> >> + * @device_private: if present, uniquely identify device-specific
> >> + *                  private data for an individual page request.
> >> + * @iommu_private: used by the IOMMU driver for storing
> >> fault-specific
> >> + *                 data. Users should not modify this field before
> >> + *                 sending the fault response.
> >> + */
> >> +struct iommu_fault_event {
> >> +	struct iommu_fault fault;
> >> +	u64 device_private;  
> > I think we want to move device_private to uapi since it gets
> > injected into the guest, then returned by guest in case of page
> > response. For VT-d we also need 128 bits of private data. VT-d
> > spec. 7.7.1  
> 
> Ah, I didn't notice the format changed in VT-d rev3. On that topic,
> how do we manage future extensions to the iommu_fault struct? Should
> we add ~48 bytes of padding after device_private, along with some
> flags telling which field is valid, or deal with it using a structure
> version like we do for the invalidate and bind structs? In the first
> case, iommu_fault wouldn't fit in a 64-byte cacheline anymore, but
> I'm not sure we care.
> 
IMHO, I like version and padding. I don't see a need for flags once we
have version.

> > For exception tracking (e.g. unanswered page request), I can add
> > timer and list info later when I include PRQ. sounds ok?  
> >> +	u64 iommu_private;  
> [...]
> >> +/**
> >> + * struct iommu_fault - Generic fault data
> >> + *
> >> + * @type contains fault type
> >> + * @reason fault reasons if relevant outside IOMMU driver.
> >> + * IOMMU driver internal faults are not reported.
> >> + * @addr: tells the offending page address
> >> + * @fetch_addr: tells the address that caused an abort, if any
> >> + * @pasid: contains process address space ID, used in shared
> >> virtual memory
> >> + * @page_req_group_id: page request group index
> >> + * @last_req: last request in a page request group
> >> + * @pasid_valid: indicates if the PRQ has a valid PASID
> >> + * @prot: page access protection flag:
> >> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
> >> + */
> >> +
> >> +struct iommu_fault {
> >> +	__u32	type;   /* enum iommu_fault_type */
> >> +	__u32	reason; /* enum iommu_fault_reason */
> >> +	__u64	addr;
> >> +	__u64	fetch_addr;
> >> +	__u32	pasid;
> >> +	__u32	page_req_group_id;
> >> +	__u32	last_req;
> >> +	__u32	pasid_valid;
> >> +	__u32	prot;
> >> +	__u32	access;  
> 
> What does @access contain? Can it be squashed into @prot?
> 
I agreed.

how about this?
#define IOMMU_FAULT_VERSION_V1 0x1
struct iommu_fault {
	__u16 version;
	__u16 type;
	__u32 reason;
	__u64 addr;
	__u32 pasid;
	__u32 page_req_group_id;
	__u32 last_req : 1;
	__u32 pasid_valid : 1;
	__u32 prot;
	__u64 device_private[2];
	__u8 padding[48];
};


> Thanks,
> Jean
> 
> > relocated to uapi, Yi can you confirm?
> > 	__u64 device_private[2];
> >   
> >> +};
> >>  #endif /* _UAPI_IOMMU_H */  
> > 
> > _______________________________________________
> > iommu mailing list
> > iommu@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/iommu
> >   
> 

[Jacob Pan]
Eric Auger Jan. 15, 2019, 9:27 p.m. UTC | #3
Hi Jean,

On 1/11/19 12:06 PM, Jean-Philippe Brucker wrote:
> On 10/01/2019 18:45, Jacob Pan wrote:
>> On Tue,  8 Jan 2019 11:26:26 +0100
>> Eric Auger <eric.auger@redhat.com> wrote:
>>
>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>
>>> Device faults detected by IOMMU can be reported outside IOMMU
>>> subsystem for further processing. This patch intends to provide
>>> a generic device fault data such that device drivers can be
>>> communicated with IOMMU faults without model specific knowledge.
>>>
>>> The proposed format is the result of discussion at:
>>> https://lkml.org/lkml/2017/11/10/291
>>> Part of the code is based on Jean-Philippe Brucker's patchset
>>> (https://patchwork.kernel.org/patch/9989315/).
>>>
>>> The assumption is that model specific IOMMU driver can filter and
>>> handle most of the internal faults if the cause is within IOMMU driver
>>> control. Therefore, the fault reasons can be reported are grouped
>>> and generalized based common specifications such as PCI ATS.
>>>
>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>> [moved part of the iommu_fault_event struct in the uapi, enriched
>>>  the fault reasons to be able to map unrecoverable SMMUv3 errors]
>>> ---
>>>  include/linux/iommu.h      | 55 ++++++++++++++++++++++++-
>>>  include/uapi/linux/iommu.h | 83
>>> ++++++++++++++++++++++++++++++++++++++ 2 files changed, 136
>>> insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>>> index 244c1a3d5989..1dedc2d247c2 100644
>>> --- a/include/linux/iommu.h
>>> +++ b/include/linux/iommu.h
>>> @@ -49,13 +49,17 @@ struct bus_type;
>>>  struct device;
>>>  struct iommu_domain;
>>>  struct notifier_block;
>>> +struct iommu_fault_event;
>>>  
>>>  /* iommu fault flags */
>>> -#define IOMMU_FAULT_READ	0x0
>>> -#define IOMMU_FAULT_WRITE	0x1
>>> +#define IOMMU_FAULT_READ		(1 << 0)
>>> +#define IOMMU_FAULT_WRITE		(1 << 1)
>>> +#define IOMMU_FAULT_EXEC		(1 << 2)
>>> +#define IOMMU_FAULT_PRIV		(1 << 3)
>>>  
>>>  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>>>  			struct device *, unsigned long, int, void *);
>>> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *,
>>> void *); 
>>>  struct iommu_domain_geometry {
>>>  	dma_addr_t aperture_start; /* First address that can be
>>> mapped    */ @@ -255,6 +259,52 @@ struct iommu_device {
>>>  	struct device *dev;
>>>  };
>>>  
>>> +/**
>>> + * struct iommu_fault_event - Generic per device fault data
>>> + *
>>> + * - PCI and non-PCI devices
>>> + * - Recoverable faults (e.g. page request), information based on
>>> PCI ATS
>>> + * and PASID spec.
>>> + * - Un-recoverable faults of device interest
>>> + * - DMA remapping and IRQ remapping faults
>>> + *
>>> + * @fault: fault descriptor
>>> + * @device_private: if present, uniquely identify device-specific
>>> + *                  private data for an individual page request.
>>> + * @iommu_private: used by the IOMMU driver for storing
>>> fault-specific
>>> + *                 data. Users should not modify this field before
>>> + *                 sending the fault response.
>>> + */
>>> +struct iommu_fault_event {
>>> +	struct iommu_fault fault;
>>> +	u64 device_private;
>> I think we want to move device_private to uapi since it gets injected
>> into the guest, then returned by guest in case of page response. For
>> VT-d we also need 128 bits of private data. VT-d spec. 7.7.1
> 
> Ah, I didn't notice the format changed in VT-d rev3. On that topic, how
> do we manage future extensions to the iommu_fault struct? Should we add
> ~48 bytes of padding after device_private, along with some flags telling
> which field is valid, or deal with it using a structure version like we
> do for the invalidate and bind structs? In the first case, iommu_fault
> wouldn't fit in a 64-byte cacheline anymore, but I'm not sure we care.
> 
>> For exception tracking (e.g. unanswered page request), I can add timer
>> and list info later when I include PRQ. sounds ok?
>>> +	u64 iommu_private;
> [...]
>>> +/**
>>> + * struct iommu_fault - Generic fault data
>>> + *
>>> + * @type contains fault type
>>> + * @reason fault reasons if relevant outside IOMMU driver.
>>> + * IOMMU driver internal faults are not reported.
>>> + * @addr: tells the offending page address
>>> + * @fetch_addr: tells the address that caused an abort, if any
>>> + * @pasid: contains process address space ID, used in shared virtual
>>> memory
>>> + * @page_req_group_id: page request group index
>>> + * @last_req: last request in a page request group
>>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>>> + * @prot: page access protection flag:
>>> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>>> + */
>>> +
>>> +struct iommu_fault {
>>> +	__u32	type;   /* enum iommu_fault_type */
>>> +	__u32	reason; /* enum iommu_fault_reason */
>>> +	__u64	addr;
>>> +	__u64	fetch_addr;
>>> +	__u32	pasid;
>>> +	__u32	page_req_group_id;
>>> +	__u32	last_req;
>>> +	__u32	pasid_valid;
>>> +	__u32	prot;
>>> +	__u32	access;
> 
> What does @access contain? Can it be squashed into @prot?
it was related to F_ACCESS event record and was a placeholder for
reporting access attributes of the input transaction (Rnw, InD, PnU
fields). But I wonder whether this is needed to implement such fine
level fault reporting. Do we really care?

Thanks

Eric
> 
> Thanks,
> Jean
> 
>> relocated to uapi, Yi can you confirm?
>> 	__u64 device_private[2];
>>
>>> +};
>>>  #endif /* _UAPI_IOMMU_H */
>>
>> _______________________________________________
>> iommu mailing list
>> iommu@lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>>
>
Jean-Philippe Brucker Jan. 16, 2019, 3:52 p.m. UTC | #4
On 14/01/2019 22:32, Jacob Pan wrote:
>> [...]
>>>> +/**
>>>> + * struct iommu_fault - Generic fault data
>>>> + *
>>>> + * @type contains fault type
>>>> + * @reason fault reasons if relevant outside IOMMU driver.
>>>> + * IOMMU driver internal faults are not reported.
>>>> + * @addr: tells the offending page address
>>>> + * @fetch_addr: tells the address that caused an abort, if any
>>>> + * @pasid: contains process address space ID, used in shared
>>>> virtual memory
>>>> + * @page_req_group_id: page request group index
>>>> + * @last_req: last request in a page request group
>>>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>>>> + * @prot: page access protection flag:
>>>> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>>>> + */
>>>> +
>>>> +struct iommu_fault {
>>>> +	__u32	type;   /* enum iommu_fault_type */
>>>> +	__u32	reason; /* enum iommu_fault_reason */
>>>> +	__u64	addr;
>>>> +	__u64	fetch_addr;
>>>> +	__u32	pasid;
>>>> +	__u32	page_req_group_id;
>>>> +	__u32	last_req;
>>>> +	__u32	pasid_valid;
>>>> +	__u32	prot;
>>>> +	__u32	access;  
>>
>> What does @access contain? Can it be squashed into @prot?
>>
> I agreed.
> 
> how about this?
> #define IOMMU_FAULT_VERSION_V1 0x1
> struct iommu_fault {
> 	__u16 version;

Right, but the version field becomes redundant when we present a batch
of these to userspace, in patch 18 (assuming we don't want to mix fault
structure versions within a batch... I certainly don't).

When introducing IOMMU_FAULT_VERSION_V2, in a distant future, I think we
still need to support a userspace that uses IOMMU_FAULT_VERSION_V1. One
strategy for this:

* We define structs iommu_fault_v1 (the current iommu_fault) and
  iommu_fault_v2.
* Userspace selects IOMMU_FAULT_VERSION_V1 when registering the fault
  queue
* The IOMMU driver fills iommu_fault_v2 and passes it to VFIO
* VFIO does its best to translate this into a iommu_fault_v1 struct

So what we need now, is a way for userspace to tell the kernel which
structure version it expects. I'm not sure we even need to pass the
actual version number we're using back to userspace. Agreeing on one
version at registration should be sufficient.

> 	__u16 type;
> 	__u32 reason;
> 	__u64 addr;

I'm in favor of keeping @fetch_addr as well, it can contain useful
information. For example, while attempting to translate an IOVA
0xfffff000, the IOMMU can't find the PASID table that we installed with
address 0xdead - the guest passed an invalid address to
bind_pasid_table(). We can then report 0xfffff000 in @addr, and 0xdead
in @fetch_addr.

> 	__u32 pasid;
> 	__u32 page_req_group_id;
> 	__u32 last_req : 1;
> 	__u32 pasid_valid : 1;

Agreed, with some explicit padding or combined as a @flag field. In fact
if we do add the @fetch_addr field, I think we need a bit that indicates
its validity as well.

Thanks,
Jean

> 	__u32 prot;
> 	__u64 device_private[2];
> 	__u8 padding[48];
> };
> 
> 
>> Thanks,
>> Jean
>>
>>> relocated to uapi, Yi can you confirm?
>>> 	__u64 device_private[2];
>>>   
>>>> +};
>>>>  #endif /* _UAPI_IOMMU_H */  
>>>
>>> _______________________________________________
>>> iommu mailing list
>>> iommu@lists.linux-foundation.org
>>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>>>   
>>
> 
> [Jacob Pan]
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
Jean-Philippe Brucker Jan. 16, 2019, 4:54 p.m. UTC | #5
On 15/01/2019 21:27, Auger Eric wrote:
[...]
>>>>  /* iommu fault flags */
>>>> -#define IOMMU_FAULT_READ	0x0
>>>> -#define IOMMU_FAULT_WRITE	0x1
>>>> +#define IOMMU_FAULT_READ		(1 << 0)
>>>> +#define IOMMU_FAULT_WRITE		(1 << 1)
>>>> +#define IOMMU_FAULT_EXEC		(1 << 2)
>>>> +#define IOMMU_FAULT_PRIV		(1 << 3)
>>>>  
>>>>  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>>>>  			struct device *, unsigned long, int, void *);
>>>> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *,
>>>> void *); 
>>>>  struct iommu_domain_geometry {
>>>>  	dma_addr_t aperture_start; /* First address that can be
>>>> mapped    */ @@ -255,6 +259,52 @@ struct iommu_device {
>>>>  	struct device *dev;
>>>>  };
>>>>  
>>>> +/**
>>>> + * struct iommu_fault_event - Generic per device fault data
>>>> + *
>>>> + * - PCI and non-PCI devices
>>>> + * - Recoverable faults (e.g. page request), information based on
>>>> PCI ATS
>>>> + * and PASID spec.
>>>> + * - Un-recoverable faults of device interest
>>>> + * - DMA remapping and IRQ remapping faults
>>>> + *
>>>> + * @fault: fault descriptor
>>>> + * @device_private: if present, uniquely identify device-specific
>>>> + *                  private data for an individual page request.
>>>> + * @iommu_private: used by the IOMMU driver for storing
>>>> fault-specific
>>>> + *                 data. Users should not modify this field before
>>>> + *                 sending the fault response.
>>>> + */
>>>> +struct iommu_fault_event {
>>>> +	struct iommu_fault fault;
>>>> +	u64 device_private;
>>> I think we want to move device_private to uapi since it gets injected
>>> into the guest, then returned by guest in case of page response. For
>>> VT-d we also need 128 bits of private data. VT-d spec. 7.7.1
>>
>> Ah, I didn't notice the format changed in VT-d rev3. On that topic, how
>> do we manage future extensions to the iommu_fault struct? Should we add
>> ~48 bytes of padding after device_private, along with some flags telling
>> which field is valid, or deal with it using a structure version like we
>> do for the invalidate and bind structs? In the first case, iommu_fault
>> wouldn't fit in a 64-byte cacheline anymore, but I'm not sure we care.
>>
>>> For exception tracking (e.g. unanswered page request), I can add timer
>>> and list info later when I include PRQ. sounds ok?
>>>> +	u64 iommu_private;
>> [...]
>>>> +/**
>>>> + * struct iommu_fault - Generic fault data
>>>> + *
>>>> + * @type contains fault type
>>>> + * @reason fault reasons if relevant outside IOMMU driver.
>>>> + * IOMMU driver internal faults are not reported.
>>>> + * @addr: tells the offending page address
>>>> + * @fetch_addr: tells the address that caused an abort, if any
>>>> + * @pasid: contains process address space ID, used in shared virtual
>>>> memory
>>>> + * @page_req_group_id: page request group index
>>>> + * @last_req: last request in a page request group
>>>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>>>> + * @prot: page access protection flag:
>>>> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>>>> + */
>>>> +
>>>> +struct iommu_fault {
>>>> +	__u32	type;   /* enum iommu_fault_type */
>>>> +	__u32	reason; /* enum iommu_fault_reason */
>>>> +	__u64	addr;
>>>> +	__u64	fetch_addr;
>>>> +	__u32	pasid;
>>>> +	__u32	page_req_group_id;
>>>> +	__u32	last_req;
>>>> +	__u32	pasid_valid;
>>>> +	__u32	prot;
>>>> +	__u32	access;
>>
>> What does @access contain? Can it be squashed into @prot?
> it was related to F_ACCESS event record and was a placeholder for
> reporting access attributes of the input transaction (Rnw, InD, PnU
> fields). But I wonder whether this is needed to implement such fine
> level fault reporting. Do we really care?

I think we do, to properly inject PRI/Stall later. But RnW, InD and PnU
can already be described with the IOMMU_FAULT_* flags defined above.
We're missing CLASS and S2, which could also be useful for debugging.
CLASS is specific to SMMUv3 but could probably be represented with
@reason. For S2, we could keep printing stage-2 faults in the driver,
and not report them to userspace.

Thanks,
Jean
Eric Auger Jan. 16, 2019, 6:33 p.m. UTC | #6
Hi Jean,

On 1/16/19 4:52 PM, Jean-Philippe Brucker wrote:
> On 14/01/2019 22:32, Jacob Pan wrote:
>>> [...]
>>>>> +/**
>>>>> + * struct iommu_fault - Generic fault data
>>>>> + *
>>>>> + * @type contains fault type
>>>>> + * @reason fault reasons if relevant outside IOMMU driver.
>>>>> + * IOMMU driver internal faults are not reported.
>>>>> + * @addr: tells the offending page address
>>>>> + * @fetch_addr: tells the address that caused an abort, if any
>>>>> + * @pasid: contains process address space ID, used in shared
>>>>> virtual memory
>>>>> + * @page_req_group_id: page request group index
>>>>> + * @last_req: last request in a page request group
>>>>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>>>>> + * @prot: page access protection flag:
>>>>> + *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>>>>> + */
>>>>> +
>>>>> +struct iommu_fault {
>>>>> +	__u32	type;   /* enum iommu_fault_type */
>>>>> +	__u32	reason; /* enum iommu_fault_reason */
>>>>> +	__u64	addr;
>>>>> +	__u64	fetch_addr;
>>>>> +	__u32	pasid;
>>>>> +	__u32	page_req_group_id;
>>>>> +	__u32	last_req;
>>>>> +	__u32	pasid_valid;
>>>>> +	__u32	prot;
>>>>> +	__u32	access;  
>>>
>>> What does @access contain? Can it be squashed into @prot?
>>>
>> I agreed.
>>
>> how about this?
>> #define IOMMU_FAULT_VERSION_V1 0x1
>> struct iommu_fault {
>> 	__u16 version;
> 
> Right, but the version field becomes redundant when we present a batch
> of these to userspace, in patch 18 (assuming we don't want to mix fault
> structure versions within a batch... I certainly don't).>
> When introducing IOMMU_FAULT_VERSION_V2, in a distant future, I think we
> still need to support a userspace that uses IOMMU_FAULT_VERSION_V1. One
> strategy for this:
> 
> * We define structs iommu_fault_v1 (the current iommu_fault) and
>   iommu_fault_v2.
> * Userspace selects IOMMU_FAULT_VERSION_V1 when registering the fault
>   queue
> * The IOMMU driver fills iommu_fault_v2 and passes it to VFIO
> * VFIO does its best to translate this into a iommu_fault_v1 struct
> 
> So what we need now, is a way for userspace to tell the kernel which
> structure version it expects. I'm not sure we even need to pass the
> actual version number we're using back to userspace. Agreeing on one
> version at registration should be sufficient.

As we expose a VFIO region we can report its size, entry size, max
supported entry version, actual entry version in the region capabilities.

Conveying the version along with the eventfd at registration time will
require to introduce a new flag at vfio_irq_set level (if we still plan
to use this VFIO_DEVICE_SET_IRQS API) but that should be feasible.
> 
>> 	__u16 type;
>> 	__u32 reason;
>> 	__u64 addr;
> 
> I'm in favor of keeping @fetch_addr as well, it can contain useful
> information. For example, while attempting to translate an IOVA
> 0xfffff000, the IOMMU can't find the PASID table that we installed with
> address 0xdead - the guest passed an invalid address to
> bind_pasid_table(). We can then report 0xfffff000 in @addr, and 0xdead
> in @fetch_addr.
agreed
> 
>> 	__u32 pasid;
>> 	__u32 page_req_group_id;
>> 	__u32 last_req : 1;
>> 	__u32 pasid_valid : 1;
> 
> Agreed, with some explicit padding or combined as a @flag field. In fact
> if we do add the @fetch_addr field, I think we need a bit that indicates
> its validity as well.
Can't we simply state fetch_addr is valid for
IOMMU_FAULT_REASON_PASID_FETCH, IOMMU_FAULT_REASON_DEVICE_CONTEXT_FETCH,
IOMMU_FAULT_REASON_WALK_EABT which maps to its usage in SMMU spec.

Thanks

Eric

> 
> Thanks,
> Jean
> 
>> 	__u32 prot;
>> 	__u64 device_private[2];
>> 	__u8 padding[48];
>> };
>>
>>
>>> Thanks,
>>> Jean
>>>
>>>> relocated to uapi, Yi can you confirm?
>>>> 	__u64 device_private[2];
>>>>   
>>>>> +};
>>>>>  #endif /* _UAPI_IOMMU_H */  
>>>>
>>>> _______________________________________________
>>>> iommu mailing list
>>>> iommu@lists.linux-foundation.org
>>>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>>>>   
>>>
>>
>> [Jacob Pan]
>> _______________________________________________
>> iommu mailing list
>> iommu@lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>>
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>
diff mbox series

Patch

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 244c1a3d5989..1dedc2d247c2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -49,13 +49,17 @@  struct bus_type;
 struct device;
 struct iommu_domain;
 struct notifier_block;
+struct iommu_fault_event;
 
 /* iommu fault flags */
-#define IOMMU_FAULT_READ	0x0
-#define IOMMU_FAULT_WRITE	0x1
+#define IOMMU_FAULT_READ		(1 << 0)
+#define IOMMU_FAULT_WRITE		(1 << 1)
+#define IOMMU_FAULT_EXEC		(1 << 2)
+#define IOMMU_FAULT_PRIV		(1 << 3)
 
 typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
 			struct device *, unsigned long, int, void *);
+typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
 
 struct iommu_domain_geometry {
 	dma_addr_t aperture_start; /* First address that can be mapped    */
@@ -255,6 +259,52 @@  struct iommu_device {
 	struct device *dev;
 };
 
+/**
+ * struct iommu_fault_event - Generic per device fault data
+ *
+ * - PCI and non-PCI devices
+ * - Recoverable faults (e.g. page request), information based on PCI ATS
+ * and PASID spec.
+ * - Un-recoverable faults of device interest
+ * - DMA remapping and IRQ remapping faults
+ *
+ * @fault: fault descriptor
+ * @device_private: if present, uniquely identify device-specific
+ *                  private data for an individual page request.
+ * @iommu_private: used by the IOMMU driver for storing fault-specific
+ *                 data. Users should not modify this field before
+ *                 sending the fault response.
+ */
+struct iommu_fault_event {
+	struct iommu_fault fault;
+	u64 device_private;
+	u64 iommu_private;
+};
+
+/**
+ * struct iommu_fault_param - per-device IOMMU fault data
+ * @dev_fault_handler: Callback function to handle IOMMU faults at device level
+ * @data: handler private data
+ *
+ */
+struct iommu_fault_param {
+	iommu_dev_fault_handler_t handler;
+	void *data;
+};
+
+/**
+ * struct iommu_param - collection of per-device IOMMU data
+ *
+ * @fault_param: IOMMU detected device fault reporting data
+ *
+ * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
+ *	struct iommu_group	*iommu_group;
+ *	struct iommu_fwspec	*iommu_fwspec;
+ */
+struct iommu_param {
+	struct iommu_fault_param *fault_param;
+};
+
 int  iommu_device_register(struct iommu_device *iommu);
 void iommu_device_unregister(struct iommu_device *iommu);
 int  iommu_device_sysfs_add(struct iommu_device *iommu,
@@ -438,6 +488,7 @@  struct iommu_ops {};
 struct iommu_group {};
 struct iommu_fwspec {};
 struct iommu_device {};
+struct iommu_fault_param {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index f28cd9a1aa96..e9b5330a13c8 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -148,4 +148,87 @@  struct iommu_guest_msi_binding {
 	__u64		gpa;
 	__u32		granule;
 };
+
+/*  Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+	IOMMU_FAULT_DMA_UNRECOV = 1,	/* unrecoverable fault */
+	IOMMU_FAULT_PAGE_REQ,		/* page request fault */
+};
+
+enum iommu_fault_reason {
+	IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+	/* IOMMU internal error, no specific reason to report out */
+	IOMMU_FAULT_REASON_INTERNAL,
+
+	/* Could not access the PASID table (fetch caused external abort) */
+	IOMMU_FAULT_REASON_PASID_FETCH,
+
+	/* could not access the device context (fetch caused external abort) */
+	IOMMU_FAULT_REASON_DEVICE_CONTEXT_FETCH,
+
+	/* pasid entry is invalid or has configuration errors */
+	IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+	/* device context entry is invalid or has configuration errors */
+	IOMMU_FAULT_REASON_BAD_DEVICE_CONTEXT_ENTRY,
+	/*
+	 * PASID is out of range (e.g. exceeds the maximum PASID
+	 * supported by the IOMMU) or disabled.
+	 */
+	IOMMU_FAULT_REASON_PASID_INVALID,
+
+	/* source id is out of range */
+	IOMMU_FAULT_REASON_SOURCEID_INVALID,
+
+	/*
+	 * An external abort occurred fetching (or updating) a translation
+	 * table descriptor
+	 */
+	IOMMU_FAULT_REASON_WALK_EABT,
+
+	/*
+	 * Could not access the page table entry (Bad address),
+	 * actual translation fault
+	 */
+	IOMMU_FAULT_REASON_PTE_FETCH,
+
+	/* Protection flag check failed */
+	IOMMU_FAULT_REASON_PERMISSION,
+
+	/* access flag check failed */
+	IOMMU_FAULT_REASON_ACCESS,
+
+	/* Output address of a translation stage caused Address Size fault */
+	IOMMU_FAULT_REASON_OOR_ADDRESS
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ *
+ * @type contains fault type
+ * @reason fault reasons if relevant outside IOMMU driver.
+ * IOMMU driver internal faults are not reported.
+ * @addr: tells the offending page address
+ * @fetch_addr: tells the address that caused an abort, if any
+ * @pasid: contains process address space ID, used in shared virtual memory
+ * @page_req_group_id: page request group index
+ * @last_req: last request in a page request group
+ * @pasid_valid: indicates if the PRQ has a valid PASID
+ * @prot: page access protection flag:
+ *	IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
+ */
+
+struct iommu_fault {
+	__u32	type;   /* enum iommu_fault_type */
+	__u32	reason; /* enum iommu_fault_reason */
+	__u64	addr;
+	__u64	fetch_addr;
+	__u32	pasid;
+	__u32	page_req_group_id;
+	__u32	last_req;
+	__u32	pasid_valid;
+	__u32	prot;
+	__u32	access;
+};
 #endif /* _UAPI_IOMMU_H */