Message ID | 20190408121911.24103-19-eric.auger@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | SMMUv3 Nested Stage Setup | expand |
On 08/04/2019 13:19, Eric Auger wrote: > When a stage 1 related fault event is read from the event queue, > let's propagate it to potential external fault listeners, ie. users > who registered a fault handler. > > Signed-off-by: Eric Auger <eric.auger@redhat.com> > > --- > v4 -> v5: > - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC > --- > drivers/iommu/arm-smmu-v3.c | 169 +++++++++++++++++++++++++++++++++--- > 1 file changed, 158 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index 8044445bc32a..1fd320788dcb 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -167,6 +167,26 @@ > #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 > #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc > > +/* Events */ > +#define ARM_SMMU_EVT_F_UUT 0x01 > +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 > +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 > +#define ARM_SMMU_EVT_C_BAD_STE 0x04 > +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 > +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 > +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 > +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 > +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 > +#define ARM_SMMU_EVT_C_BAD_CD 0x0a > +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b > +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 > +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 > +#define ARM_SMMU_EVT_F_ACCESS 0x12 > +#define ARM_SMMU_EVT_F_PERMISSION 0x13 > +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 > +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 > +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 > + > /* Common MSI config fields */ > #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) > #define MSI_CFG2_SH GENMASK(5, 4) > @@ -332,6 +352,15 @@ > #define EVTQ_MAX_SZ_SHIFT 7 > > #define EVTQ_0_ID GENMASK_ULL(7, 0) > +#define EVTQ_0_SSV GENMASK_ULL(11, 11) > +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) > +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) > +#define EVTQ_1_PNU GENMASK_ULL(33, 33) > +#define EVTQ_1_IND GENMASK_ULL(34, 34) > +#define EVTQ_1_RNW GENMASK_ULL(35, 35) > +#define EVTQ_1_S2 GENMASK_ULL(39, 39) > +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) > +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) > > /* PRI queue */ > #define PRIQ_ENT_DWORDS 2 > @@ -639,6 +668,64 @@ struct arm_smmu_domain { > spinlock_t devices_lock; > }; > > +/* fault propagation */ > + > +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ > + IOMMU_FAULT_UNRECOV_PERM_VALID | \ > + IOMMU_FAULT_UNRECOV_ADDR_VALID) > + > +struct arm_smmu_fault_propagation_data { > + enum iommu_fault_reason reason; > + bool s1_check; > + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ > +}; > + > +/* > + * Describes how SMMU faults translate into generic IOMMU faults > + * and if they need to be reported externally > + */ > +static const struct arm_smmu_fault_propagation_data fault_propagation[] = { > +[ARM_SMMU_EVT_F_UUT] = { }, > +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, > +[ARM_SMMU_EVT_F_STE_FETCH] = { }, > +[ARM_SMMU_EVT_C_BAD_STE] = { }, > +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, > +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, > +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, > +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = {IOMMU_FAULT_REASON_PASID_INVALID, > + false, > + IOMMU_FAULT_UNRECOV_PASID_VALID > + }, > +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, > + false, > + IOMMU_FAULT_UNRECOV_PASID_VALID | It doesn't make sense to presume validity here, or in any of the faults below... > + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID > + }, > +[ARM_SMMU_EVT_C_BAD_CD] = {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, > + false, > + IOMMU_FAULT_UNRECOV_PASID_VALID > + }, > +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, true, > + IOMMU_FAULT_F_FIELDS | > + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID > + }, > +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, true, > + IOMMU_FAULT_F_FIELDS > + }, > +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, true, > + IOMMU_FAULT_F_FIELDS > + }, > +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, > + IOMMU_FAULT_F_FIELDS > + }, > +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, true, > + IOMMU_FAULT_F_FIELDS > + }, > +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, > +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, > +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, > +}; > + > struct arm_smmu_option_prop { > u32 opt; > const char *prop; > @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) > return 0; > } > > -__maybe_unused > static struct arm_smmu_master_data * > arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) > { > @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) > return master; > } > > +/* Populates the record fields according to the input SMMU event */ > +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, > + struct iommu_fault_unrecoverable *record) > +{ > + const struct arm_smmu_fault_propagation_data *data; > + u32 fields; > + > + if (type >= ARRAY_SIZE(fault_propagation)) > + return false; > + > + data = &fault_propagation[type]; > + if (!data->reason) > + return false; > + > + fields = data->fields; > + > + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) > + return false; /* S2 related fault, don't propagate */ > + > + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { > + if (FIELD_GET(EVTQ_0_SSV, evt[0])) > + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); > + else > + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; ...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up coming out of here *without* reporting the offending PASID. > + } > + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { > + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) > + record->perm |= IOMMU_FAULT_PERM_WRITE; > + if (FIELD_GET(EVTQ_1_PNU, evt[1])) > + record->perm |= IOMMU_FAULT_PERM_PRIV; > + if (FIELD_GET(EVTQ_1_IND, evt[1])) > + record->perm |= IOMMU_FAULT_PERM_EXEC; > + } > + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) > + record->addr = evt[2]; > + > + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) > + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); > + > + record->flags = fields; > + return true; > +} > + > +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 *evt) > +{ > + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); > + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); > + struct arm_smmu_master_data *master; > + struct iommu_fault_event event = {}; > + int i; > + > + master = arm_smmu_find_master(smmu, sid); > + if (WARN_ON(!master)) > + return; NAK. If I'm getting global faults like C_BAD_STE where a device almost certainly *isn't* configured (because hey, we would have initialised its STEs if we knew), then I sure as hell want to see the actual faults. Spamming a constant stream of stack traces *instead* of showing them is worse than useless. > + > + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; > + > + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { > + iommu_report_device_fault(master->dev, &event); > + return; And again, the vast majority of the time, there won't be a fault handler registered, so unconditionally suppressing the most common and useful stuff like translation and permission faults is very much not OK. Robin. > + } > + > + dev_info(smmu->dev, "event 0x%02x received:\n", type); > + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) { > + dev_info(smmu->dev, "\t0x%016llx\n", > + (unsigned long long)evt[i]); > + } > +} > + > /* IRQ and event handlers */ > static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) > { > - int i; > struct arm_smmu_device *smmu = dev; > struct arm_smmu_queue *q = &smmu->evtq.q; > u64 evt[EVTQ_ENT_DWORDS]; > > do { > - while (!queue_remove_raw(q, evt)) { > - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); > - > - dev_info(smmu->dev, "event 0x%02x received:\n", id); > - for (i = 0; i < ARRAY_SIZE(evt); ++i) > - dev_info(smmu->dev, "\t0x%016llx\n", > - (unsigned long long)evt[i]); > - > - } > + while (!queue_remove_raw(q, evt)) > + arm_smmu_report_event(smmu, evt); > > /* > * Not much we can do on overflow, so scream and pretend we're >
Hi Robin, On 5/8/19 7:20 PM, Robin Murphy wrote: > On 08/04/2019 13:19, Eric Auger wrote: >> When a stage 1 related fault event is read from the event queue, >> let's propagate it to potential external fault listeners, ie. users >> who registered a fault handler. >> >> Signed-off-by: Eric Auger <eric.auger@redhat.com> >> >> --- >> v4 -> v5: >> - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC >> --- >> drivers/iommu/arm-smmu-v3.c | 169 +++++++++++++++++++++++++++++++++--- >> 1 file changed, 158 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c >> index 8044445bc32a..1fd320788dcb 100644 >> --- a/drivers/iommu/arm-smmu-v3.c >> +++ b/drivers/iommu/arm-smmu-v3.c >> @@ -167,6 +167,26 @@ >> #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 >> #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc >> +/* Events */ >> +#define ARM_SMMU_EVT_F_UUT 0x01 >> +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 >> +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 >> +#define ARM_SMMU_EVT_C_BAD_STE 0x04 >> +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 >> +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 >> +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 >> +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 >> +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 >> +#define ARM_SMMU_EVT_C_BAD_CD 0x0a >> +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b >> +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 >> +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 >> +#define ARM_SMMU_EVT_F_ACCESS 0x12 >> +#define ARM_SMMU_EVT_F_PERMISSION 0x13 >> +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 >> +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 >> +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 >> + >> /* Common MSI config fields */ >> #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) >> #define MSI_CFG2_SH GENMASK(5, 4) >> @@ -332,6 +352,15 @@ >> #define EVTQ_MAX_SZ_SHIFT 7 >> #define EVTQ_0_ID GENMASK_ULL(7, 0) >> +#define EVTQ_0_SSV GENMASK_ULL(11, 11) >> +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) >> +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) >> +#define EVTQ_1_PNU GENMASK_ULL(33, 33) >> +#define EVTQ_1_IND GENMASK_ULL(34, 34) >> +#define EVTQ_1_RNW GENMASK_ULL(35, 35) >> +#define EVTQ_1_S2 GENMASK_ULL(39, 39) >> +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) >> +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) >> /* PRI queue */ >> #define PRIQ_ENT_DWORDS 2 >> @@ -639,6 +668,64 @@ struct arm_smmu_domain { >> spinlock_t devices_lock; >> }; >> +/* fault propagation */ >> + >> +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ >> + IOMMU_FAULT_UNRECOV_PERM_VALID | \ >> + IOMMU_FAULT_UNRECOV_ADDR_VALID) >> + >> +struct arm_smmu_fault_propagation_data { >> + enum iommu_fault_reason reason; >> + bool s1_check; >> + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ >> +}; >> + >> +/* >> + * Describes how SMMU faults translate into generic IOMMU faults >> + * and if they need to be reported externally >> + */ >> +static const struct arm_smmu_fault_propagation_data >> fault_propagation[] = { >> +[ARM_SMMU_EVT_F_UUT] = { }, >> +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, >> +[ARM_SMMU_EVT_F_STE_FETCH] = { }, >> +[ARM_SMMU_EVT_C_BAD_STE] = { }, >> +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, >> +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, >> +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, >> +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = {IOMMU_FAULT_REASON_PASID_INVALID, >> + false, >> + IOMMU_FAULT_UNRECOV_PASID_VALID >> + }, >> +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, >> + false, >> + IOMMU_FAULT_UNRECOV_PASID_VALID | > > It doesn't make sense to presume validity here, or in any of the faults > below... > >> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >> + }, >> +[ARM_SMMU_EVT_C_BAD_CD] = >> {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, >> + false, >> + IOMMU_FAULT_UNRECOV_PASID_VALID >> + }, >> +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, true, >> + IOMMU_FAULT_F_FIELDS | >> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >> + }, >> +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, >> true, >> + IOMMU_FAULT_F_FIELDS >> + }, >> +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, >> true, >> + IOMMU_FAULT_F_FIELDS >> + }, >> +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, >> + IOMMU_FAULT_F_FIELDS >> + }, >> +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, >> true, >> + IOMMU_FAULT_F_FIELDS >> + }, >> +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, >> +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, >> +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, >> +}; >> + >> struct arm_smmu_option_prop { >> u32 opt; >> const char *prop; >> @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct >> arm_smmu_device *smmu, u32 sid) >> return 0; >> } >> -__maybe_unused >> static struct arm_smmu_master_data * >> arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) >> { >> @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device >> *smmu, u32 sid) >> return master; >> } >> +/* Populates the record fields according to the input SMMU event */ >> +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, >> + struct iommu_fault_unrecoverable *record) >> +{ >> + const struct arm_smmu_fault_propagation_data *data; >> + u32 fields; >> + >> + if (type >= ARRAY_SIZE(fault_propagation)) >> + return false; >> + >> + data = &fault_propagation[type]; >> + if (!data->reason) >> + return false; >> + >> + fields = data->fields; >> + >> + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) >> + return false; /* S2 related fault, don't propagate */ >> + >> + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { >> + if (FIELD_GET(EVTQ_0_SSV, evt[0])) >> + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); >> + else >> + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; > > ...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up > coming out of here *without* reporting the offending PASID. Correct. > >> + } >> + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { >> + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) >> + record->perm |= IOMMU_FAULT_PERM_WRITE; >> + if (FIELD_GET(EVTQ_1_PNU, evt[1])) >> + record->perm |= IOMMU_FAULT_PERM_PRIV; >> + if (FIELD_GET(EVTQ_1_IND, evt[1])) >> + record->perm |= IOMMU_FAULT_PERM_EXEC; >> + } >> + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) >> + record->addr = evt[2]; >> + >> + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) >> + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); >> + >> + record->flags = fields; >> + return true; >> +} >> + >> +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 >> *evt) >> +{ >> + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); >> + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); >> + struct arm_smmu_master_data *master; >> + struct iommu_fault_event event = {}; >> + int i; >> + >> + master = arm_smmu_find_master(smmu, sid); >> + if (WARN_ON(!master)) >> + return; > > NAK. If I'm getting global faults like C_BAD_STE where a device almost > certainly *isn't* configured (because hey, we would have initialised its > STEs if we knew), then I sure as hell want to see the actual faults. > Spamming a constant stream of stack traces *instead* of showing them is > worse than useless. Sure, if !master I will output the original traces. > >> + >> + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; >> + >> + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { >> + iommu_report_device_fault(master->dev, &event); >> + return; > > And again, the vast majority of the time, there won't be a fault handler > registered, so unconditionally suppressing the most common and useful > stuff like translation and permission faults is very much not OK. Going to test whether we are in nested mode before entering that path. Thanks! Eric > > Robin. > >> + } >> + >> + dev_info(smmu->dev, "event 0x%02x received:\n", type); >> + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) { >> + dev_info(smmu->dev, "\t0x%016llx\n", >> + (unsigned long long)evt[i]); >> + } >> +} >> + >> /* IRQ and event handlers */ >> static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) >> { >> - int i; >> struct arm_smmu_device *smmu = dev; >> struct arm_smmu_queue *q = &smmu->evtq.q; >> u64 evt[EVTQ_ENT_DWORDS]; >> do { >> - while (!queue_remove_raw(q, evt)) { >> - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); >> - >> - dev_info(smmu->dev, "event 0x%02x received:\n", id); >> - for (i = 0; i < ARRAY_SIZE(evt); ++i) >> - dev_info(smmu->dev, "\t0x%016llx\n", >> - (unsigned long long)evt[i]); >> - >> - } >> + while (!queue_remove_raw(q, evt)) >> + arm_smmu_report_event(smmu, evt); >> /* >> * Not much we can do on overflow, so scream and pretend we're >>
On 13/05/2019 08:46, Auger Eric wrote: > Hi Robin, > > On 5/8/19 7:20 PM, Robin Murphy wrote: >> On 08/04/2019 13:19, Eric Auger wrote: >>> When a stage 1 related fault event is read from the event queue, >>> let's propagate it to potential external fault listeners, ie. users >>> who registered a fault handler. >>> >>> Signed-off-by: Eric Auger <eric.auger@redhat.com> >>> >>> --- >>> v4 -> v5: >>> - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC >>> --- >>> drivers/iommu/arm-smmu-v3.c | 169 +++++++++++++++++++++++++++++++++--- >>> 1 file changed, 158 insertions(+), 11 deletions(-) >>> >>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c >>> index 8044445bc32a..1fd320788dcb 100644 >>> --- a/drivers/iommu/arm-smmu-v3.c >>> +++ b/drivers/iommu/arm-smmu-v3.c >>> @@ -167,6 +167,26 @@ >>> #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 >>> #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc >>> +/* Events */ >>> +#define ARM_SMMU_EVT_F_UUT 0x01 >>> +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 >>> +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 >>> +#define ARM_SMMU_EVT_C_BAD_STE 0x04 >>> +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 >>> +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 >>> +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 >>> +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 >>> +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 >>> +#define ARM_SMMU_EVT_C_BAD_CD 0x0a >>> +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b >>> +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 >>> +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 >>> +#define ARM_SMMU_EVT_F_ACCESS 0x12 >>> +#define ARM_SMMU_EVT_F_PERMISSION 0x13 >>> +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 >>> +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 >>> +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 >>> + >>> /* Common MSI config fields */ >>> #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) >>> #define MSI_CFG2_SH GENMASK(5, 4) >>> @@ -332,6 +352,15 @@ >>> #define EVTQ_MAX_SZ_SHIFT 7 >>> #define EVTQ_0_ID GENMASK_ULL(7, 0) >>> +#define EVTQ_0_SSV GENMASK_ULL(11, 11) >>> +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) >>> +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) >>> +#define EVTQ_1_PNU GENMASK_ULL(33, 33) >>> +#define EVTQ_1_IND GENMASK_ULL(34, 34) >>> +#define EVTQ_1_RNW GENMASK_ULL(35, 35) >>> +#define EVTQ_1_S2 GENMASK_ULL(39, 39) >>> +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) >>> +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) >>> /* PRI queue */ >>> #define PRIQ_ENT_DWORDS 2 >>> @@ -639,6 +668,64 @@ struct arm_smmu_domain { >>> spinlock_t devices_lock; >>> }; >>> +/* fault propagation */ >>> + >>> +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ >>> + IOMMU_FAULT_UNRECOV_PERM_VALID | \ >>> + IOMMU_FAULT_UNRECOV_ADDR_VALID) >>> + >>> +struct arm_smmu_fault_propagation_data { >>> + enum iommu_fault_reason reason; >>> + bool s1_check; >>> + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ >>> +}; >>> + >>> +/* >>> + * Describes how SMMU faults translate into generic IOMMU faults >>> + * and if they need to be reported externally >>> + */ >>> +static const struct arm_smmu_fault_propagation_data >>> fault_propagation[] = { >>> +[ARM_SMMU_EVT_F_UUT] = { }, >>> +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, >>> +[ARM_SMMU_EVT_F_STE_FETCH] = { }, >>> +[ARM_SMMU_EVT_C_BAD_STE] = { }, >>> +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, >>> +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, >>> +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, >>> +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = {IOMMU_FAULT_REASON_PASID_INVALID, >>> + false, >>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>> + }, >>> +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, >>> + false, >>> + IOMMU_FAULT_UNRECOV_PASID_VALID | >> >> It doesn't make sense to presume validity here, or in any of the faults >> below... > > >> >>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>> + }, >>> +[ARM_SMMU_EVT_C_BAD_CD] = >>> {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, >>> + false, >>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>> + }, >>> +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, true, >>> + IOMMU_FAULT_F_FIELDS | >>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>> + }, >>> +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, >>> true, >>> + IOMMU_FAULT_F_FIELDS >>> + }, >>> +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, >>> true, >>> + IOMMU_FAULT_F_FIELDS >>> + }, >>> +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, >>> + IOMMU_FAULT_F_FIELDS >>> + }, >>> +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, >>> true, >>> + IOMMU_FAULT_F_FIELDS >>> + }, >>> +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, >>> +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, >>> +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, >>> +}; >>> + >>> struct arm_smmu_option_prop { >>> u32 opt; >>> const char *prop; >>> @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct >>> arm_smmu_device *smmu, u32 sid) >>> return 0; >>> } >>> -__maybe_unused >>> static struct arm_smmu_master_data * >>> arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) >>> { >>> @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device >>> *smmu, u32 sid) >>> return master; >>> } >>> +/* Populates the record fields according to the input SMMU event */ >>> +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, >>> + struct iommu_fault_unrecoverable *record) >>> +{ >>> + const struct arm_smmu_fault_propagation_data *data; >>> + u32 fields; >>> + >>> + if (type >= ARRAY_SIZE(fault_propagation)) >>> + return false; >>> + >>> + data = &fault_propagation[type]; >>> + if (!data->reason) >>> + return false; >>> + >>> + fields = data->fields; >>> + >>> + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) >>> + return false; /* S2 related fault, don't propagate */ >>> + >>> + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { >>> + if (FIELD_GET(EVTQ_0_SSV, evt[0])) >>> + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); >>> + else >>> + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; >> >> ...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up >> coming out of here *without* reporting the offending PASID. > Correct. >> >>> + } >>> + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { >>> + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) >>> + record->perm |= IOMMU_FAULT_PERM_WRITE; >>> + if (FIELD_GET(EVTQ_1_PNU, evt[1])) >>> + record->perm |= IOMMU_FAULT_PERM_PRIV; >>> + if (FIELD_GET(EVTQ_1_IND, evt[1])) >>> + record->perm |= IOMMU_FAULT_PERM_EXEC; >>> + } >>> + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) >>> + record->addr = evt[2]; >>> + >>> + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) >>> + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); >>> + >>> + record->flags = fields; >>> + return true; >>> +} >>> + >>> +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 >>> *evt) >>> +{ >>> + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); >>> + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); >>> + struct arm_smmu_master_data *master; >>> + struct iommu_fault_event event = {}; >>> + int i; >>> + >>> + master = arm_smmu_find_master(smmu, sid); >>> + if (WARN_ON(!master)) >>> + return; >> >> NAK. If I'm getting global faults like C_BAD_STE where a device almost >> certainly *isn't* configured (because hey, we would have initialised its >> STEs if we knew), then I sure as hell want to see the actual faults. >> Spamming a constant stream of stack traces *instead* of showing them is >> worse than useless. > Sure, if !master I will output the original traces. >> >>> + >>> + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; >>> + >>> + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { >>> + iommu_report_device_fault(master->dev, &event); >>> + return; >> >> And again, the vast majority of the time, there won't be a fault handler >> registered, so unconditionally suppressing the most common and useful >> stuff like translation and permission faults is very much not OK. > Going to test whether we are in nested mode before entering that path. I don't think this has to be exclusive to nesting - the generic reporting mechanism feels like it might ultimately be extensible to other things like Rob's case for generalised stalling. It's just that for robustness, even when a fault handler is present, we still want the driver to be able to report if it didn't actually handle a fault. Thanks, Robin.
Hi Robin, On 5/13/19 1:54 PM, Robin Murphy wrote: > On 13/05/2019 08:46, Auger Eric wrote: >> Hi Robin, >> >> On 5/8/19 7:20 PM, Robin Murphy wrote: >>> On 08/04/2019 13:19, Eric Auger wrote: >>>> When a stage 1 related fault event is read from the event queue, >>>> let's propagate it to potential external fault listeners, ie. users >>>> who registered a fault handler. >>>> >>>> Signed-off-by: Eric Auger <eric.auger@redhat.com> >>>> >>>> --- >>>> v4 -> v5: >>>> - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC >>>> --- >>>> drivers/iommu/arm-smmu-v3.c | 169 >>>> +++++++++++++++++++++++++++++++++--- >>>> 1 file changed, 158 insertions(+), 11 deletions(-) >>>> >>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c >>>> index 8044445bc32a..1fd320788dcb 100644 >>>> --- a/drivers/iommu/arm-smmu-v3.c >>>> +++ b/drivers/iommu/arm-smmu-v3.c >>>> @@ -167,6 +167,26 @@ >>>> #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 >>>> #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc >>>> +/* Events */ >>>> +#define ARM_SMMU_EVT_F_UUT 0x01 >>>> +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 >>>> +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 >>>> +#define ARM_SMMU_EVT_C_BAD_STE 0x04 >>>> +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 >>>> +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 >>>> +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 >>>> +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 >>>> +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 >>>> +#define ARM_SMMU_EVT_C_BAD_CD 0x0a >>>> +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b >>>> +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 >>>> +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 >>>> +#define ARM_SMMU_EVT_F_ACCESS 0x12 >>>> +#define ARM_SMMU_EVT_F_PERMISSION 0x13 >>>> +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 >>>> +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 >>>> +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 >>>> + >>>> /* Common MSI config fields */ >>>> #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) >>>> #define MSI_CFG2_SH GENMASK(5, 4) >>>> @@ -332,6 +352,15 @@ >>>> #define EVTQ_MAX_SZ_SHIFT 7 >>>> #define EVTQ_0_ID GENMASK_ULL(7, 0) >>>> +#define EVTQ_0_SSV GENMASK_ULL(11, 11) >>>> +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) >>>> +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) >>>> +#define EVTQ_1_PNU GENMASK_ULL(33, 33) >>>> +#define EVTQ_1_IND GENMASK_ULL(34, 34) >>>> +#define EVTQ_1_RNW GENMASK_ULL(35, 35) >>>> +#define EVTQ_1_S2 GENMASK_ULL(39, 39) >>>> +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) >>>> +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) >>>> /* PRI queue */ >>>> #define PRIQ_ENT_DWORDS 2 >>>> @@ -639,6 +668,64 @@ struct arm_smmu_domain { >>>> spinlock_t devices_lock; >>>> }; >>>> +/* fault propagation */ >>>> + >>>> +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ >>>> + IOMMU_FAULT_UNRECOV_PERM_VALID | \ >>>> + IOMMU_FAULT_UNRECOV_ADDR_VALID) >>>> + >>>> +struct arm_smmu_fault_propagation_data { >>>> + enum iommu_fault_reason reason; >>>> + bool s1_check; >>>> + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ >>>> +}; >>>> + >>>> +/* >>>> + * Describes how SMMU faults translate into generic IOMMU faults >>>> + * and if they need to be reported externally >>>> + */ >>>> +static const struct arm_smmu_fault_propagation_data >>>> fault_propagation[] = { >>>> +[ARM_SMMU_EVT_F_UUT] = { }, >>>> +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, >>>> +[ARM_SMMU_EVT_F_STE_FETCH] = { }, >>>> +[ARM_SMMU_EVT_C_BAD_STE] = { }, >>>> +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, >>>> +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, >>>> +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, >>>> +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = >>>> {IOMMU_FAULT_REASON_PASID_INVALID, >>>> + false, >>>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>>> + }, >>>> +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, >>>> + false, >>>> + IOMMU_FAULT_UNRECOV_PASID_VALID | >>> >>> It doesn't make sense to presume validity here, or in any of the faults >>> below... >> >> >>> >>>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>>> + }, >>>> +[ARM_SMMU_EVT_C_BAD_CD] = >>>> {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, >>>> + false, >>>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>>> + }, >>>> +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, >>>> true, >>>> + IOMMU_FAULT_F_FIELDS | >>>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>>> + }, >>>> +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, >>>> true, >>>> + IOMMU_FAULT_F_FIELDS >>>> + }, >>>> +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, >>>> true, >>>> + IOMMU_FAULT_F_FIELDS >>>> + }, >>>> +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, >>>> + IOMMU_FAULT_F_FIELDS >>>> + }, >>>> +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, >>>> true, >>>> + IOMMU_FAULT_F_FIELDS >>>> + }, >>>> +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, >>>> +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, >>>> +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, >>>> +}; >>>> + >>>> struct arm_smmu_option_prop { >>>> u32 opt; >>>> const char *prop; >>>> @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct >>>> arm_smmu_device *smmu, u32 sid) >>>> return 0; >>>> } >>>> -__maybe_unused >>>> static struct arm_smmu_master_data * >>>> arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) >>>> { >>>> @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device >>>> *smmu, u32 sid) >>>> return master; >>>> } >>>> +/* Populates the record fields according to the input SMMU event */ >>>> +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, >>>> + struct iommu_fault_unrecoverable *record) >>>> +{ >>>> + const struct arm_smmu_fault_propagation_data *data; >>>> + u32 fields; >>>> + >>>> + if (type >= ARRAY_SIZE(fault_propagation)) >>>> + return false; >>>> + >>>> + data = &fault_propagation[type]; >>>> + if (!data->reason) >>>> + return false; >>>> + >>>> + fields = data->fields; >>>> + >>>> + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) >>>> + return false; /* S2 related fault, don't propagate */ >>>> + >>>> + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { >>>> + if (FIELD_GET(EVTQ_0_SSV, evt[0])) >>>> + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); >>>> + else >>>> + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; >>> >>> ...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up >>> coming out of here *without* reporting the offending PASID. >> Correct. >>> >>>> + } >>>> + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { >>>> + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) >>>> + record->perm |= IOMMU_FAULT_PERM_WRITE; >>>> + if (FIELD_GET(EVTQ_1_PNU, evt[1])) >>>> + record->perm |= IOMMU_FAULT_PERM_PRIV; >>>> + if (FIELD_GET(EVTQ_1_IND, evt[1])) >>>> + record->perm |= IOMMU_FAULT_PERM_EXEC; >>>> + } >>>> + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) >>>> + record->addr = evt[2]; >>>> + >>>> + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) >>>> + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); >>>> + >>>> + record->flags = fields; >>>> + return true; >>>> +} >>>> + >>>> +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 >>>> *evt) >>>> +{ >>>> + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); >>>> + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); >>>> + struct arm_smmu_master_data *master; >>>> + struct iommu_fault_event event = {}; >>>> + int i; >>>> + >>>> + master = arm_smmu_find_master(smmu, sid); >>>> + if (WARN_ON(!master)) >>>> + return; >>> >>> NAK. If I'm getting global faults like C_BAD_STE where a device almost >>> certainly *isn't* configured (because hey, we would have initialised its >>> STEs if we knew), then I sure as hell want to see the actual faults. >>> Spamming a constant stream of stack traces *instead* of showing them is >>> worse than useless. >> Sure, if !master I will output the original traces. >>> >>>> + >>>> + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; >>>> + >>>> + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { >>>> + iommu_report_device_fault(master->dev, &event); >>>> + return; >>> >>> And again, the vast majority of the time, there won't be a fault handler >>> registered, so unconditionally suppressing the most common and useful >>> stuff like translation and permission faults is very much not OK. >> Going to test whether we are in nested mode before entering that path. > > I don't think this has to be exclusive to nesting - the generic > reporting mechanism feels like it might ultimately be extensible to > other things like Rob's case for generalised stalling. It's just that > for robustness, even when a fault handler is present, we still want the > driver to be able to report if it didn't actually handle a fault. Jean-Philippe pointed out in a previous review (https://patchwork.kernel.org/patch/10751801/#22424047) that the guest can flood the host log with S1 related faults. At the moment we do not check that a fault handler is registered in nested mode. Maybe we should? Even if the fault handler is registered, as it is based on a circular buffer, this latter can be full and lead to a log flood. Thanks Eric > > Thanks, > Robin.
On 13/05/2019 13:32, Auger Eric wrote: > Hi Robin, > On 5/13/19 1:54 PM, Robin Murphy wrote: >> On 13/05/2019 08:46, Auger Eric wrote: >>> Hi Robin, >>> >>> On 5/8/19 7:20 PM, Robin Murphy wrote: >>>> On 08/04/2019 13:19, Eric Auger wrote: >>>>> When a stage 1 related fault event is read from the event queue, >>>>> let's propagate it to potential external fault listeners, ie. users >>>>> who registered a fault handler. >>>>> >>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com> >>>>> >>>>> --- >>>>> v4 -> v5: >>>>> - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC >>>>> --- >>>>> drivers/iommu/arm-smmu-v3.c | 169 >>>>> +++++++++++++++++++++++++++++++++--- >>>>> 1 file changed, 158 insertions(+), 11 deletions(-) >>>>> >>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c >>>>> index 8044445bc32a..1fd320788dcb 100644 >>>>> --- a/drivers/iommu/arm-smmu-v3.c >>>>> +++ b/drivers/iommu/arm-smmu-v3.c >>>>> @@ -167,6 +167,26 @@ >>>>> #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 >>>>> #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc >>>>> +/* Events */ >>>>> +#define ARM_SMMU_EVT_F_UUT 0x01 >>>>> +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 >>>>> +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 >>>>> +#define ARM_SMMU_EVT_C_BAD_STE 0x04 >>>>> +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 >>>>> +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 >>>>> +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 >>>>> +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 >>>>> +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 >>>>> +#define ARM_SMMU_EVT_C_BAD_CD 0x0a >>>>> +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b >>>>> +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 >>>>> +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 >>>>> +#define ARM_SMMU_EVT_F_ACCESS 0x12 >>>>> +#define ARM_SMMU_EVT_F_PERMISSION 0x13 >>>>> +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 >>>>> +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 >>>>> +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 >>>>> + >>>>> /* Common MSI config fields */ >>>>> #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) >>>>> #define MSI_CFG2_SH GENMASK(5, 4) >>>>> @@ -332,6 +352,15 @@ >>>>> #define EVTQ_MAX_SZ_SHIFT 7 >>>>> #define EVTQ_0_ID GENMASK_ULL(7, 0) >>>>> +#define EVTQ_0_SSV GENMASK_ULL(11, 11) >>>>> +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) >>>>> +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) >>>>> +#define EVTQ_1_PNU GENMASK_ULL(33, 33) >>>>> +#define EVTQ_1_IND GENMASK_ULL(34, 34) >>>>> +#define EVTQ_1_RNW GENMASK_ULL(35, 35) >>>>> +#define EVTQ_1_S2 GENMASK_ULL(39, 39) >>>>> +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) >>>>> +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) >>>>> /* PRI queue */ >>>>> #define PRIQ_ENT_DWORDS 2 >>>>> @@ -639,6 +668,64 @@ struct arm_smmu_domain { >>>>> spinlock_t devices_lock; >>>>> }; >>>>> +/* fault propagation */ >>>>> + >>>>> +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ >>>>> + IOMMU_FAULT_UNRECOV_PERM_VALID | \ >>>>> + IOMMU_FAULT_UNRECOV_ADDR_VALID) >>>>> + >>>>> +struct arm_smmu_fault_propagation_data { >>>>> + enum iommu_fault_reason reason; >>>>> + bool s1_check; >>>>> + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ >>>>> +}; >>>>> + >>>>> +/* >>>>> + * Describes how SMMU faults translate into generic IOMMU faults >>>>> + * and if they need to be reported externally >>>>> + */ >>>>> +static const struct arm_smmu_fault_propagation_data >>>>> fault_propagation[] = { >>>>> +[ARM_SMMU_EVT_F_UUT] = { }, >>>>> +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, >>>>> +[ARM_SMMU_EVT_F_STE_FETCH] = { }, >>>>> +[ARM_SMMU_EVT_C_BAD_STE] = { }, >>>>> +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, >>>>> +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, >>>>> +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, >>>>> +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = >>>>> {IOMMU_FAULT_REASON_PASID_INVALID, >>>>> + false, >>>>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, >>>>> + false, >>>>> + IOMMU_FAULT_UNRECOV_PASID_VALID | >>>> >>>> It doesn't make sense to presume validity here, or in any of the faults >>>> below... >>> >>> >>>> >>>>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>>>> + }, >>>>> +[ARM_SMMU_EVT_C_BAD_CD] = >>>>> {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, >>>>> + false, >>>>> + IOMMU_FAULT_UNRECOV_PASID_VALID >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, >>>>> true, >>>>> + IOMMU_FAULT_F_FIELDS | >>>>> + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, >>>>> true, >>>>> + IOMMU_FAULT_F_FIELDS >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, >>>>> true, >>>>> + IOMMU_FAULT_F_FIELDS >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, >>>>> + IOMMU_FAULT_F_FIELDS >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, >>>>> true, >>>>> + IOMMU_FAULT_F_FIELDS >>>>> + }, >>>>> +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, >>>>> +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, >>>>> +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, >>>>> +}; >>>>> + >>>>> struct arm_smmu_option_prop { >>>>> u32 opt; >>>>> const char *prop; >>>>> @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct >>>>> arm_smmu_device *smmu, u32 sid) >>>>> return 0; >>>>> } >>>>> -__maybe_unused >>>>> static struct arm_smmu_master_data * >>>>> arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) >>>>> { >>>>> @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device >>>>> *smmu, u32 sid) >>>>> return master; >>>>> } >>>>> +/* Populates the record fields according to the input SMMU event */ >>>>> +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, >>>>> + struct iommu_fault_unrecoverable *record) >>>>> +{ >>>>> + const struct arm_smmu_fault_propagation_data *data; >>>>> + u32 fields; >>>>> + >>>>> + if (type >= ARRAY_SIZE(fault_propagation)) >>>>> + return false; >>>>> + >>>>> + data = &fault_propagation[type]; >>>>> + if (!data->reason) >>>>> + return false; >>>>> + >>>>> + fields = data->fields; >>>>> + >>>>> + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) >>>>> + return false; /* S2 related fault, don't propagate */ >>>>> + >>>>> + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { >>>>> + if (FIELD_GET(EVTQ_0_SSV, evt[0])) >>>>> + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); >>>>> + else >>>>> + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; >>>> >>>> ...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up >>>> coming out of here *without* reporting the offending PASID. >>> Correct. >>>> >>>>> + } >>>>> + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { >>>>> + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) >>>>> + record->perm |= IOMMU_FAULT_PERM_WRITE; >>>>> + if (FIELD_GET(EVTQ_1_PNU, evt[1])) >>>>> + record->perm |= IOMMU_FAULT_PERM_PRIV; >>>>> + if (FIELD_GET(EVTQ_1_IND, evt[1])) >>>>> + record->perm |= IOMMU_FAULT_PERM_EXEC; >>>>> + } >>>>> + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) >>>>> + record->addr = evt[2]; >>>>> + >>>>> + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) >>>>> + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); >>>>> + >>>>> + record->flags = fields; >>>>> + return true; >>>>> +} >>>>> + >>>>> +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 >>>>> *evt) >>>>> +{ >>>>> + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); >>>>> + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); >>>>> + struct arm_smmu_master_data *master; >>>>> + struct iommu_fault_event event = {}; >>>>> + int i; >>>>> + >>>>> + master = arm_smmu_find_master(smmu, sid); >>>>> + if (WARN_ON(!master)) >>>>> + return; >>>> >>>> NAK. If I'm getting global faults like C_BAD_STE where a device almost >>>> certainly *isn't* configured (because hey, we would have initialised its >>>> STEs if we knew), then I sure as hell want to see the actual faults. >>>> Spamming a constant stream of stack traces *instead* of showing them is >>>> worse than useless. >>> Sure, if !master I will output the original traces. >>>> >>>>> + >>>>> + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; >>>>> + >>>>> + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { >>>>> + iommu_report_device_fault(master->dev, &event); >>>>> + return; >>>> >>>> And again, the vast majority of the time, there won't be a fault handler >>>> registered, so unconditionally suppressing the most common and useful >>>> stuff like translation and permission faults is very much not OK. >>> Going to test whether we are in nested mode before entering that path. >> >> I don't think this has to be exclusive to nesting - the generic >> reporting mechanism feels like it might ultimately be extensible to >> other things like Rob's case for generalised stalling. It's just that >> for robustness, even when a fault handler is present, we still want the >> driver to be able to report if it didn't actually handle a fault. > > > Jean-Philippe pointed out in a previous review > (https://patchwork.kernel.org/patch/10751801/#22424047) that the guest > can flood the host log with S1 related faults. At the moment we do not > check that a fault handler is registered in nested mode. Maybe we > should? Even if the fault handler is registered, as it is based on a > circular buffer, this latter can be full and lead to a log flood. Ah, right, I didn't quite have the full picture in mind, thanks for the jog. I guess in that case we want some degree of special-casing for nested configs - anything S1-related that we expect the fault handler to simply inject back into the guest, we can indeed suppress on the host, but I do think it's worth anticipating host-related faults (and entirely host-owned fault-handlers in future) as well. Robin.
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8044445bc32a..1fd320788dcb 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -167,6 +167,26 @@ #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc +/* Events */ +#define ARM_SMMU_EVT_F_UUT 0x01 +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 +#define ARM_SMMU_EVT_C_BAD_STE 0x04 +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 +#define ARM_SMMU_EVT_C_BAD_CD 0x0a +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 +#define ARM_SMMU_EVT_F_ACCESS 0x12 +#define ARM_SMMU_EVT_F_PERMISSION 0x13 +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 + /* Common MSI config fields */ #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) #define MSI_CFG2_SH GENMASK(5, 4) @@ -332,6 +352,15 @@ #define EVTQ_MAX_SZ_SHIFT 7 #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVTQ_0_SSV GENMASK_ULL(11, 11) +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) +#define EVTQ_1_PNU GENMASK_ULL(33, 33) +#define EVTQ_1_IND GENMASK_ULL(34, 34) +#define EVTQ_1_RNW GENMASK_ULL(35, 35) +#define EVTQ_1_S2 GENMASK_ULL(39, 39) +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) /* PRI queue */ #define PRIQ_ENT_DWORDS 2 @@ -639,6 +668,64 @@ struct arm_smmu_domain { spinlock_t devices_lock; }; +/* fault propagation */ + +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ + IOMMU_FAULT_UNRECOV_PERM_VALID | \ + IOMMU_FAULT_UNRECOV_ADDR_VALID) + +struct arm_smmu_fault_propagation_data { + enum iommu_fault_reason reason; + bool s1_check; + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ +}; + +/* + * Describes how SMMU faults translate into generic IOMMU faults + * and if they need to be reported externally + */ +static const struct arm_smmu_fault_propagation_data fault_propagation[] = { +[ARM_SMMU_EVT_F_UUT] = { }, +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, +[ARM_SMMU_EVT_F_STE_FETCH] = { }, +[ARM_SMMU_EVT_C_BAD_STE] = { }, +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = {IOMMU_FAULT_REASON_PASID_INVALID, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID + }, +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID | + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID + }, +[ARM_SMMU_EVT_C_BAD_CD] = {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID + }, +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, true, + IOMMU_FAULT_F_FIELDS | + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID + }, +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, +}; + struct arm_smmu_option_prop { u32 opt; const char *prop; @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } -__maybe_unused static struct arm_smmu_master_data * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) return master; } +/* Populates the record fields according to the input SMMU event */ +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, + struct iommu_fault_unrecoverable *record) +{ + const struct arm_smmu_fault_propagation_data *data; + u32 fields; + + if (type >= ARRAY_SIZE(fault_propagation)) + return false; + + data = &fault_propagation[type]; + if (!data->reason) + return false; + + fields = data->fields; + + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) + return false; /* S2 related fault, don't propagate */ + + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { + if (FIELD_GET(EVTQ_0_SSV, evt[0])) + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); + else + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID; + } + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) + record->perm |= IOMMU_FAULT_PERM_WRITE; + if (FIELD_GET(EVTQ_1_PNU, evt[1])) + record->perm |= IOMMU_FAULT_PERM_PRIV; + if (FIELD_GET(EVTQ_1_IND, evt[1])) + record->perm |= IOMMU_FAULT_PERM_EXEC; + } + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) + record->addr = evt[2]; + + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); + + record->flags = fields; + return true; +} + +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 *evt) +{ + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); + struct arm_smmu_master_data *master; + struct iommu_fault_event event = {}; + int i; + + master = arm_smmu_find_master(smmu, sid); + if (WARN_ON(!master)) + return; + + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; + + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { + iommu_report_device_fault(master->dev, &event); + return; + } + + dev_info(smmu->dev, "event 0x%02x received:\n", type); + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) { + dev_info(smmu->dev, "\t0x%016llx\n", + (unsigned long long)evt[i]); + } +} + /* IRQ and event handlers */ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; u64 evt[EVTQ_ENT_DWORDS]; do { - while (!queue_remove_raw(q, evt)) { - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); - - dev_info(smmu->dev, "event 0x%02x received:\n", id); - for (i = 0; i < ARRAY_SIZE(evt); ++i) - dev_info(smmu->dev, "\t0x%016llx\n", - (unsigned long long)evt[i]); - - } + while (!queue_remove_raw(q, evt)) + arm_smmu_report_event(smmu, evt); /* * Not much we can do on overflow, so scream and pretend we're
When a stage 1 related fault event is read from the event queue, let's propagate it to potential external fault listeners, ie. users who registered a fault handler. Signed-off-by: Eric Auger <eric.auger@redhat.com> --- v4 -> v5: - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC --- drivers/iommu/arm-smmu-v3.c | 169 +++++++++++++++++++++++++++++++++--- 1 file changed, 158 insertions(+), 11 deletions(-)