Message ID | 1466747518-54402-7-git-send-email-quan.xu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> From: Xu, Quan > Sent: Friday, June 24, 2016 1:52 PM > > From: Quan Xu <quan.xu@intel.com> > > If Device-TLB flush timed out, we hide the target ATS device > immediately and crash the domain owning this ATS device. If > impacted domain is hardware domain, just throw out a warning. > > By hiding the device, we make sure it can't be assigned to any > domain any longer (see device_assigned). > > Signed-off-by: Quan Xu <quan.xu@intel.com> > > CC: Jan Beulich <jbeulich@suse.com> > CC: Kevin Tian <kevin.tian@intel.com> > CC: Feng Wu <feng.wu@intel.com> > > --- > v12: > 1. a forward declaration struct pci_ats_dev*, instead of > including ats.h. > 2. eliminate the loop. > 3. use the same logic for logging and crashing as I did in > other series (despite I have moved the domain crash logic > up to the generic IOMMU layer, I think I am better still > leave it as is). > 4. enhance dev_invalidate_sync() with ASSERT(). > --- > xen/drivers/passthrough/pci.c | 6 +-- > xen/drivers/passthrough/vtd/extern.h | 5 ++- > xen/drivers/passthrough/vtd/qinval.c | 75 > +++++++++++++++++++++++++++++------ > xen/drivers/passthrough/vtd/x86/ats.c | 9 +---- > xen/include/xen/pci.h | 1 + > 5 files changed, 71 insertions(+), 25 deletions(-) > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > index 98936f55c..843dc88 100644 > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) > xfree(pdev); > } > > -static void _pci_hide_device(struct pci_dev *pdev) > +void pci_hide_existing_device(struct pci_dev *pdev) Don't understand what is the meaning of "hiding existing device". Is there case where you may want to hide a non-existing device? > { > if ( pdev->domain ) > return; > @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) > pdev = alloc_pdev(get_pseg(0), bus, devfn); > if ( pdev ) > { > - _pci_hide_device(pdev); > + pci_hide_existing_device(pdev); > rc = 0; > } > pcidevs_unlock(); > @@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn) > } > > __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map); > - _pci_hide_device(pdev); > + pci_hide_existing_device(pdev); > > return 0; > } > diff --git a/xen/drivers/passthrough/vtd/extern.h > b/xen/drivers/passthrough/vtd/extern.h > index 45357f2..efaff28 100644 > --- a/xen/drivers/passthrough/vtd/extern.h > +++ b/xen/drivers/passthrough/vtd/extern.h > @@ -25,6 +25,7 @@ > > #define VTDPREFIX "[VT-D]" > > +struct pci_ats_dev; > extern bool_t rwbf_quirk; > > void print_iommu_regs(struct acpi_drhd_unit *drhd); > @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, > u64 addr, unsigned int size_order, u64 type); > > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, > - u32 max_invs_pend, > - u16 sid, u16 size, u64 addr); > + struct pci_ats_dev *ats_dev, > + u16 did, u16 size, u64 addr); > > unsigned int get_cache_line_size(void); > void cacheline_flush(char *); > diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c > index 4492b29..e4e2771 100644 > --- a/xen/drivers/passthrough/vtd/qinval.c > +++ b/xen/drivers/passthrough/vtd/qinval.c > @@ -27,11 +27,11 @@ > #include "dmar.h" > #include "vtd.h" > #include "extern.h" > +#include "../ats.h" Earlier you said: > 1. a forward declaration struct pci_ats_dev*, instead of > including ats.h. But above you still have ats.h included. > > #define VTD_QI_TIMEOUT 1 > > -static int __must_check invalidate_sync(struct iommu *iommu, > - bool_t flush_dev_iotlb); > +static int __must_check invalidate_sync(struct iommu *iommu); I don't understand the rationale behind. In earlier patch you introduce a new parameter which is however just removed later here.... Thanks Kevin
>>> On 24.06.16 at 13:55, <kevin.tian@intel.com> wrote: >> --- a/xen/drivers/passthrough/pci.c >> +++ b/xen/drivers/passthrough/pci.c >> @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) >> xfree(pdev); >> } >> >> -static void _pci_hide_device(struct pci_dev *pdev) >> +void pci_hide_existing_device(struct pci_dev *pdev) > > Don't understand what is the meaning of "hiding existing device". > Is there case where you may want to hide a non-existing device? The distinction is whether alloc_pdev() needs calling, as can be seen ... >> @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) >> pdev = alloc_pdev(get_pseg(0), bus, devfn); >> if ( pdev ) >> { >> - _pci_hide_device(pdev); >> + pci_hide_existing_device(pdev); >> rc = 0; >> } ... as the beginning context of this hunk. Jan
On June 24, 2016 7:55 PM, Tian, Kevin <kevin.tian@intel.com> wrote: > > From: Xu, Quan > > Sent: Friday, June 24, 2016 1:52 PM > > diff --git a/xen/drivers/passthrough/vtd/extern.h > > b/xen/drivers/passthrough/vtd/extern.h > > index 45357f2..efaff28 100644 > > --- a/xen/drivers/passthrough/vtd/extern.h > > +++ b/xen/drivers/passthrough/vtd/extern.h > > @@ -25,6 +25,7 @@ > > > > #define VTDPREFIX "[VT-D]" > > > > +struct pci_ats_dev; > > extern bool_t rwbf_quirk; > > > > void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ > > int dev_invalidate_iotlb(struct iommu *iommu, u16 did, > > u64 addr, unsigned int size_order, u64 > > type); > > > > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, > > - u32 max_invs_pend, > > - u16 sid, u16 size, u64 addr); > > + struct pci_ats_dev *ats_dev, > > + u16 did, u16 size, u64 > > + addr); > > > > unsigned int get_cache_line_size(void); void cacheline_flush(char > > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c > > b/xen/drivers/passthrough/vtd/qinval.c > > index 4492b29..e4e2771 100644 > > --- a/xen/drivers/passthrough/vtd/qinval.c > > +++ b/xen/drivers/passthrough/vtd/qinval.c > > @@ -27,11 +27,11 @@ > > #include "dmar.h" > > #include "vtd.h" > > #include "extern.h" > > +#include "../ats.h" > > Earlier you said: > > 1. a forward declaration struct pci_ats_dev*, instead of > > including ats.h. > This context is 'in extern.h', but.. > But above you still have ats.h included. > .. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is used in this file. > > > > #define VTD_QI_TIMEOUT 1 > > > > -static int __must_check invalidate_sync(struct iommu *iommu, > > - bool_t flush_dev_iotlb); > > +static int __must_check invalidate_sync(struct iommu *iommu); > > I don't understand the rationale behind. In earlier patch you introduce a new > parameter which is however just removed later here.... > In earlier patch, refactor invalidate_sync() to indicate whether we need to flush device IOTLB or not. change it back here, as I add a specific function - dev_invalidate_sync() for device IOTLB invalidation.. Quan
>>> On 26.06.16 at 11:18, <quan.xu@intel.com> wrote: > On June 24, 2016 7:55 PM, Tian, Kevin <kevin.tian@intel.com> wrote: >> > From: Xu, Quan >> > Sent: Friday, June 24, 2016 1:52 PM >> > diff --git a/xen/drivers/passthrough/vtd/extern.h >> > b/xen/drivers/passthrough/vtd/extern.h >> > index 45357f2..efaff28 100644 >> > --- a/xen/drivers/passthrough/vtd/extern.h >> > +++ b/xen/drivers/passthrough/vtd/extern.h >> > @@ -25,6 +25,7 @@ >> > >> > #define VTDPREFIX "[VT-D]" >> > >> > +struct pci_ats_dev; >> > extern bool_t rwbf_quirk; >> > >> > void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ >> > int dev_invalidate_iotlb(struct iommu *iommu, u16 did, >> > u64 addr, unsigned int size_order, u64 >> > type); >> > >> > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, >> > - u32 max_invs_pend, >> > - u16 sid, u16 size, u64 addr); >> > + struct pci_ats_dev *ats_dev, >> > + u16 did, u16 size, u64 >> > + addr); >> > >> > unsigned int get_cache_line_size(void); void cacheline_flush(char >> > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c >> > b/xen/drivers/passthrough/vtd/qinval.c >> > index 4492b29..e4e2771 100644 >> > --- a/xen/drivers/passthrough/vtd/qinval.c >> > +++ b/xen/drivers/passthrough/vtd/qinval.c >> > @@ -27,11 +27,11 @@ >> > #include "dmar.h" >> > #include "vtd.h" >> > #include "extern.h" >> > +#include "../ats.h" >> >> Earlier you said: >> > 1. a forward declaration struct pci_ats_dev*, instead of >> > including ats.h. >> > > This context is 'in extern.h', but.. > >> But above you still have ats.h included. >> > > .. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is > used in this file. Used as in "a variable of this type, or a pointer to this type de-referenced", or only as in "a variable/parameter of pointer to this type declared"? In the latter case you don't need the include. Jan
>>> On 24.06.16 at 07:51, <quan.xu@intel.com> wrote: > @@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct iommu *iommu, > return -EOPNOTSUPP; > } > > -static int __must_check invalidate_sync(struct iommu *iommu, > - bool_t flush_dev_iotlb) > +static int __must_check invalidate_sync(struct iommu *iommu) > { > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > > ASSERT(qi_ctrl->qinval_maddr); > > - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > + return queue_invalidate_wait(iommu, 0, 1, 1, 0); > +} > + > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > + struct pci_dev *pdev) > +{ > + struct domain *d = NULL; > + > + if ( test_bit(did, iommu->domid_bitmap) ) > + d = rcu_lock_domain_by_id(iommu->domid_map[did]); > + > + /* > + * In case the domain has been freed or the IOMMU domid bitmap is > + * not valid, the device no longer belongs to this domain. > + */ > + if ( d == NULL ) > + return; > + > + pcidevs_lock(); > + ASSERT(pdev->domain); > + list_del(&pdev->domain_list); > + pdev->domain = NULL; > + pci_hide_existing_device(pdev); > + pcidevs_unlock(); > + > + if ( !d->is_shutting_down && printk_ratelimit() ) > + printk(XENLOG_WARNING VTDPREFIX > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > + d->domain_id, pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > + > + if ( !is_hardware_domain(d) ) > + domain_crash(d); > + > + rcu_unlock_domain(d); > +} So in an earlier patch in this series you (supposedly) moved similar logic up to the vendor independent layer. I think this then would better get moved up too, if at all possible. Jan
On June 27, 2016 4:24 PM, Jan Beulich <JBeulich@suse.com> wrote: > >>> On 24.06.16 at 07:51, <quan.xu@intel.com> wrote: > > @@ -199,24 +199,73 @@ static int __must_check > queue_invalidate_wait(struct iommu *iommu, > > return -EOPNOTSUPP; > > } > > > > -static int __must_check invalidate_sync(struct iommu *iommu, > > - bool_t flush_dev_iotlb) > > +static int __must_check invalidate_sync(struct iommu *iommu) > > { > > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > > > > ASSERT(qi_ctrl->qinval_maddr); > > > > - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > > + return queue_invalidate_wait(iommu, 0, 1, 1, 0); } > > + > > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > > + struct pci_dev *pdev) { > > + struct domain *d = NULL; > > + > > + if ( test_bit(did, iommu->domid_bitmap) ) > > + d = rcu_lock_domain_by_id(iommu->domid_map[did]); > > + > > + /* > > + * In case the domain has been freed or the IOMMU domid bitmap is > > + * not valid, the device no longer belongs to this domain. > > + */ > > + if ( d == NULL ) > > + return; > > + > > + pcidevs_lock(); > > + ASSERT(pdev->domain); > > + list_del(&pdev->domain_list); > > + pdev->domain = NULL; > > + pci_hide_existing_device(pdev); > > + pcidevs_unlock(); > > + > > + if ( !d->is_shutting_down && printk_ratelimit() ) > > + printk(XENLOG_WARNING VTDPREFIX > > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > > + d->domain_id, pdev->seg, pdev->bus, > > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > > + > > + if ( !is_hardware_domain(d) ) > > + domain_crash(d); > > + > > + rcu_unlock_domain(d); > > +} > > So in an earlier patch in this series you (supposedly) moved similar logic up to > the vendor independent layer. I think this then would better get moved up > too, if at all possible. > To be honest, I have not much reason for leaving domain crash here and I was aware of this problem, but crash_domain() here is not harmful (as the 'd->is_shutting_down' is Set when to crash, and once the 'd->is_shutting_down' is Set then return in domain_shutdown() ). In case crash domain directly, it may help us narrow down the 'window' (the domain is still running).. To me, moving the logic up is acceptable. In next version, could I only drop: + if ( !is_hardware_domain(d) ) + domain_crash(d); In this patch, and leave the rest as is ? Quan
>>> On 27.06.16 at 14:56, <quan.xu@intel.com> wrote: > On June 27, 2016 4:24 PM, Jan Beulich <JBeulich@suse.com> wrote: >> >>> On 24.06.16 at 07:51, <quan.xu@intel.com> wrote: >> > @@ -199,24 +199,73 @@ static int __must_check >> queue_invalidate_wait(struct iommu *iommu, >> > return -EOPNOTSUPP; >> > } >> > >> > -static int __must_check invalidate_sync(struct iommu *iommu, >> > - bool_t flush_dev_iotlb) >> > +static int __must_check invalidate_sync(struct iommu *iommu) >> > { >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); >> > >> > ASSERT(qi_ctrl->qinval_maddr); >> > >> > - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); >> > + return queue_invalidate_wait(iommu, 0, 1, 1, 0); } >> > + >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, >> > + struct pci_dev *pdev) { >> > + struct domain *d = NULL; >> > + >> > + if ( test_bit(did, iommu->domid_bitmap) ) >> > + d = rcu_lock_domain_by_id(iommu->domid_map[did]); >> > + >> > + /* >> > + * In case the domain has been freed or the IOMMU domid bitmap is >> > + * not valid, the device no longer belongs to this domain. >> > + */ >> > + if ( d == NULL ) >> > + return; >> > + >> > + pcidevs_lock(); >> > + ASSERT(pdev->domain); >> > + list_del(&pdev->domain_list); >> > + pdev->domain = NULL; >> > + pci_hide_existing_device(pdev); >> > + pcidevs_unlock(); >> > + >> > + if ( !d->is_shutting_down && printk_ratelimit() ) >> > + printk(XENLOG_WARNING VTDPREFIX >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", >> > + d->domain_id, pdev->seg, pdev->bus, >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); >> > + >> > + if ( !is_hardware_domain(d) ) >> > + domain_crash(d); >> > + >> > + rcu_unlock_domain(d); >> > +} >> >> So in an earlier patch in this series you (supposedly) moved similar logic up to >> the vendor independent layer. I think this then would better get moved up >> too, if at all possible. >> > > To be honest, I have not much reason for leaving domain crash here and I was > aware of this problem, but crash_domain() here is not harmful (as the > 'd->is_shutting_down' is Set when to crash, and once the 'd->is_shutting_down' > is Set then return in domain_shutdown() ). > In case crash domain directly, it may help us narrow down the 'window' (the > domain is still running).. > > To me, moving the logic up is acceptable. > > In next version, could I only drop: > > + if ( !is_hardware_domain(d) ) > + domain_crash(d); > > In this patch, and leave the rest as is ? Not really - the entire function looks like it could move out of vtd/, as I can't see anything VT-d specific in it. Jan
On June 27, 2016 11:21 PM, Jan Beulich <JBeulich@suse.com> wrote: > >>> On 27.06.16 at 14:56, <quan.xu@intel.com> wrote: > > On June 27, 2016 4:24 PM, Jan Beulich <JBeulich@suse.com> wrote: > >> >>> On 24.06.16 at 07:51, <quan.xu@intel.com> wrote: > >> > @@ -199,24 +199,73 @@ static int __must_check > >> queue_invalidate_wait(struct iommu *iommu, > >> > return -EOPNOTSUPP; > >> > } > >> > > >> > -static int __must_check invalidate_sync(struct iommu *iommu, > >> > - bool_t flush_dev_iotlb) > >> > +static int __must_check invalidate_sync(struct iommu *iommu) > >> > { > >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > >> > > >> > ASSERT(qi_ctrl->qinval_maddr); > >> > > >> > - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > >> > + return queue_invalidate_wait(iommu, 0, 1, 1, 0); } > >> > + > >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > >> > + struct pci_dev *pdev) { > >> > + struct domain *d = NULL; > >> > + > >> > + if ( test_bit(did, iommu->domid_bitmap) ) > >> > + d = rcu_lock_domain_by_id(iommu->domid_map[did]); > >> > + > >> > + /* > >> > + * In case the domain has been freed or the IOMMU domid bitmap is > >> > + * not valid, the device no longer belongs to this domain. > >> > + */ > >> > + if ( d == NULL ) > >> > + return; > >> > + > >> > + pcidevs_lock(); > >> > + ASSERT(pdev->domain); > >> > + list_del(&pdev->domain_list); > >> > + pdev->domain = NULL; > >> > + pci_hide_existing_device(pdev); > >> > + pcidevs_unlock(); > >> > + > >> > + if ( !d->is_shutting_down && printk_ratelimit() ) > >> > + printk(XENLOG_WARNING VTDPREFIX > >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > >> > + d->domain_id, pdev->seg, pdev->bus, > >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > >> > + > >> > + if ( !is_hardware_domain(d) ) > >> > + domain_crash(d); > >> > + > >> > + rcu_unlock_domain(d); > >> > +} > >> > >> So in an earlier patch in this series you (supposedly) moved similar > >> logic up to the vendor independent layer. I think this then would > >> better get moved up too, if at all possible. > >> > > > > To be honest, I have not much reason for leaving domain crash here and > > I was aware of this problem, but crash_domain() here is not harmful > > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd- > >is_shutting_down' > > is Set then return in domain_shutdown() ). > > In case crash domain directly, it may help us narrow down the 'window' > > (the domain is still running).. > > > > To me, moving the logic up is acceptable. > > > > In next version, could I only drop: > > > > + if ( !is_hardware_domain(d) ) > > + domain_crash(d); > > > > In this patch, and leave the rest as is ? > > Not really - the entire function looks like it could move out of vtd/, as I can't > see anything VT-d specific in it. > Yes, it could be out of vtd, and then benefit arm/amd IOMMU to hide ATS device. But 'did' and 'iommu->domid_bitmap' are really vtd specific. Both of them are to get domain* structure, not a big deal, and then I can use domain_id instead. IMO, the domain* structure is a must here, As mentioned, not all of call trees of device iotlb flush are under pcidevs_lock, (.i.e ...--iommu_iotlb_flush()-- xenmem_add_to_physmap()... ) In extreme cases, the domain may has been freed or the device may has been detached or even attached to another domain ( I also need to add 'if (pdev->domain == d )' before to hide device). the domain* structure can help us check above cases. Quan
>>> On 28.06.16 at 09:06, <quan.xu@intel.com> wrote: > On June 27, 2016 11:21 PM, Jan Beulich <JBeulich@suse.com> wrote: >> >>> On 27.06.16 at 14:56, <quan.xu@intel.com> wrote: >> > On June 27, 2016 4:24 PM, Jan Beulich <JBeulich@suse.com> wrote: >> >> >>> On 24.06.16 at 07:51, <quan.xu@intel.com> wrote: >> >> > @@ -199,24 +199,73 @@ static int __must_check >> >> queue_invalidate_wait(struct iommu *iommu, >> >> > return -EOPNOTSUPP; >> >> > } >> >> > >> >> > -static int __must_check invalidate_sync(struct iommu *iommu, >> >> > - bool_t flush_dev_iotlb) >> >> > +static int __must_check invalidate_sync(struct iommu *iommu) >> >> > { >> >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); >> >> > >> >> > ASSERT(qi_ctrl->qinval_maddr); >> >> > >> >> > - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); >> >> > + return queue_invalidate_wait(iommu, 0, 1, 1, 0); } >> >> > + >> >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, >> >> > + struct pci_dev *pdev) { >> >> > + struct domain *d = NULL; >> >> > + >> >> > + if ( test_bit(did, iommu->domid_bitmap) ) >> >> > + d = rcu_lock_domain_by_id(iommu->domid_map[did]); >> >> > + >> >> > + /* >> >> > + * In case the domain has been freed or the IOMMU domid bitmap is >> >> > + * not valid, the device no longer belongs to this domain. >> >> > + */ >> >> > + if ( d == NULL ) >> >> > + return; >> >> > + >> >> > + pcidevs_lock(); >> >> > + ASSERT(pdev->domain); >> >> > + list_del(&pdev->domain_list); >> >> > + pdev->domain = NULL; >> >> > + pci_hide_existing_device(pdev); >> >> > + pcidevs_unlock(); >> >> > + >> >> > + if ( !d->is_shutting_down && printk_ratelimit() ) >> >> > + printk(XENLOG_WARNING VTDPREFIX >> >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", >> >> > + d->domain_id, pdev->seg, pdev->bus, >> >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); >> >> > + >> >> > + if ( !is_hardware_domain(d) ) >> >> > + domain_crash(d); >> >> > + >> >> > + rcu_unlock_domain(d); >> >> > +} >> >> >> >> So in an earlier patch in this series you (supposedly) moved similar >> >> logic up to the vendor independent layer. I think this then would >> >> better get moved up too, if at all possible. >> >> >> > >> > To be honest, I have not much reason for leaving domain crash here and >> > I was aware of this problem, but crash_domain() here is not harmful >> > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd- >> >is_shutting_down' >> > is Set then return in domain_shutdown() ). >> > In case crash domain directly, it may help us narrow down the 'window' >> > (the domain is still running).. >> > >> > To me, moving the logic up is acceptable. >> > >> > In next version, could I only drop: >> > >> > + if ( !is_hardware_domain(d) ) >> > + domain_crash(d); >> > >> > In this patch, and leave the rest as is ? >> >> Not really - the entire function looks like it could move out of vtd/, as I can't >> see anything VT-d specific in it. >> > > Yes, it could be out of vtd, and then benefit arm/amd IOMMU to hide ATS > device. > > But 'did' and 'iommu->domid_bitmap' are really vtd specific. Both of them are > to get domain* structure, not a big deal, and then I can use domain_id > instead. > > IMO, the domain* structure is a must here, I agree, I did overlook that domain lookup. The common code function should be passed a struct domain *, as you suggest. Jan
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 98936f55c..843dc88 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) xfree(pdev); } -static void _pci_hide_device(struct pci_dev *pdev) +void pci_hide_existing_device(struct pci_dev *pdev) { if ( pdev->domain ) return; @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) pdev = alloc_pdev(get_pseg(0), bus, devfn); if ( pdev ) { - _pci_hide_device(pdev); + pci_hide_existing_device(pdev); rc = 0; } pcidevs_unlock(); @@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn) } __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map); - _pci_hide_device(pdev); + pci_hide_existing_device(pdev); return 0; } diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index 45357f2..efaff28 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -25,6 +25,7 @@ #define VTDPREFIX "[VT-D]" +struct pci_ats_dev; extern bool_t rwbf_quirk; void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); int __must_check qinval_device_iotlb_sync(struct iommu *iommu, - u32 max_invs_pend, - u16 sid, u16 size, u64 addr); + struct pci_ats_dev *ats_dev, + u16 did, u16 size, u64 addr); unsigned int get_cache_line_size(void); void cacheline_flush(char *); diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index 4492b29..e4e2771 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -27,11 +27,11 @@ #include "dmar.h" #include "vtd.h" #include "extern.h" +#include "../ats.h" #define VTD_QI_TIMEOUT 1 -static int __must_check invalidate_sync(struct iommu *iommu, - bool_t flush_dev_iotlb); +static int __must_check invalidate_sync(struct iommu *iommu); static void print_qi_regs(struct iommu *iommu) { @@ -103,7 +103,7 @@ static int __must_check queue_invalidate_context_sync(struct iommu *iommu, unmap_vtd_domain_page(qinval_entries); - return invalidate_sync(iommu, 0); + return invalidate_sync(iommu); } static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu, @@ -140,7 +140,7 @@ static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu, qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - return invalidate_sync(iommu, 0); + return invalidate_sync(iommu); } static int __must_check queue_invalidate_wait(struct iommu *iommu, @@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct iommu *iommu, return -EOPNOTSUPP; } -static int __must_check invalidate_sync(struct iommu *iommu, - bool_t flush_dev_iotlb) +static int __must_check invalidate_sync(struct iommu *iommu) { struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); ASSERT(qi_ctrl->qinval_maddr); - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); + return queue_invalidate_wait(iommu, 0, 1, 1, 0); +} + +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, + struct pci_dev *pdev) +{ + struct domain *d = NULL; + + if ( test_bit(did, iommu->domid_bitmap) ) + d = rcu_lock_domain_by_id(iommu->domid_map[did]); + + /* + * In case the domain has been freed or the IOMMU domid bitmap is + * not valid, the device no longer belongs to this domain. + */ + if ( d == NULL ) + return; + + pcidevs_lock(); + ASSERT(pdev->domain); + list_del(&pdev->domain_list); + pdev->domain = NULL; + pci_hide_existing_device(pdev); + pcidevs_unlock(); + + if ( !d->is_shutting_down && printk_ratelimit() ) + printk(XENLOG_WARNING VTDPREFIX + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", + d->domain_id, pdev->seg, pdev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + if ( !is_hardware_domain(d) ) + domain_crash(d); + + rcu_unlock_domain(d); +} + +static int __must_check dev_invalidate_sync(struct iommu *iommu, u16 did, + struct pci_dev *pdev) +{ + struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); + int rc; + + ASSERT(qi_ctrl->qinval_maddr); + rc = queue_invalidate_wait(iommu, 0, 1, 1, 1); + + if ( rc == -ETIMEDOUT ) + dev_invalidate_iotlb_timeout(iommu, did, pdev); + + return rc; } int qinval_device_iotlb_sync(struct iommu *iommu, - u32 max_invs_pend, - u16 sid, u16 size, u64 addr) + struct pci_ats_dev *ats_dev, + u16 did, u16 size, u64 addr) { unsigned long flags; unsigned int index; u64 entry_base; struct qinval_entry *qinval_entry, *qinval_entries; + struct pci_dev *pdev = ats_dev->pci_dev; spin_lock_irqsave(&iommu->register_lock, flags); index = qinval_next_index(iommu); @@ -227,9 +276,9 @@ int qinval_device_iotlb_sync(struct iommu *iommu, qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB; qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0; - qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend; + qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = ats_dev->ats_queue_depth; qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0; - qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid; + qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, pdev->devfn); qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0; qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size; @@ -240,7 +289,7 @@ int qinval_device_iotlb_sync(struct iommu *iommu, qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - return invalidate_sync(iommu, 1); + return dev_invalidate_sync(iommu, did, pdev); } static int __must_check queue_invalidate_iec_sync(struct iommu *iommu, @@ -271,7 +320,7 @@ static int __must_check queue_invalidate_iec_sync(struct iommu *iommu, qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - ret = invalidate_sync(iommu, 0); + ret = invalidate_sync(iommu); /* * reading vt-d architecture register will ensure diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c index a6c53ea..be2a155 100644 --- a/xen/drivers/passthrough/vtd/x86/ats.c +++ b/xen/drivers/passthrough/vtd/x86/ats.c @@ -121,15 +121,12 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, list_for_each_entry( pdev, &ats_devices, list ) { struct pci_dev *pci_dev = pdev->pci_dev; - u16 sid; bool_t sbit; int rc = 0; if ( !pci_dev ) continue; - sid = PCI_BDF2(pci_dev->bus, pci_dev->devfn); - /* Only invalidate devices that belong to this IOMMU */ if ( pdev->iommu != iommu ) continue; @@ -144,8 +141,7 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */ sbit = 1; addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF; - rc = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth, - sid, sbit, addr); + rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr); break; case DMA_TLB_PSI_FLUSH: if ( !device_in_domain(iommu, pdev, did) ) @@ -164,8 +160,7 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K; } - rc = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth, - sid, sbit, addr); + rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr); break; default: dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n"); diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 6ed29dd..e4940cd 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -118,6 +118,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, int pci_remove_device(u16 seg, u8 bus, u8 devfn); int pci_ro_device(int seg, int bus, int devfn); int pci_hide_device(int bus, int devfn); +void pci_hide_existing_device(struct pci_dev *pdev); struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn); struct pci_dev *pci_get_pdev_by_domain(