Message ID | 20240816110820.75672-2-Jiqian.Chen@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Support device passthrough when dom0 is PVH on Xen | expand |
On 16.08.2024 13:08, Jiqian Chen wrote: > @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > break; > } > > + case PHYSDEVOP_pci_device_reset: > + { > + struct pci_device_reset dev_reset; > + struct pci_dev *pdev; > + pci_sbdf_t sbdf; > + > + ret = -EOPNOTSUPP; > + if ( !is_pci_passthrough_enabled() ) > + break; It occurs to me (only now, sorry): Does this case really need to be an error? I.e. do we really need to bother callers by having them find out whether pass-through is supported in the underlying Xen? > + ret = -EFAULT; > + if ( copy_from_guest(&dev_reset, arg, 1) != 0 ) > + break; > + > + sbdf = PCI_SBDF(dev_reset.dev.seg, > + dev_reset.dev.bus, > + dev_reset.dev.devfn); > + > + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); > + if ( ret ) > + break; > + > + pcidevs_lock(); > + pdev = pci_get_pdev(NULL, sbdf); > + if ( !pdev ) > + { > + pcidevs_unlock(); > + ret = -ENODEV; > + break; > + } > + > + write_lock(&pdev->domain->pci_lock); > + pcidevs_unlock(); > + switch ( dev_reset.flags & PCI_DEVICE_RESET_MASK ) > + { > + case PCI_DEVICE_RESET_COLD: > + case PCI_DEVICE_RESET_WARM: > + case PCI_DEVICE_RESET_HOT: > + case PCI_DEVICE_RESET_FLR: > + ret = vpci_reset_device(pdev); > + break; > + > + default: > + ret = -EOPNOTSUPP; EINVAL But: What about the other flag bits? You don't check them (anymore; I thought there was a check there before). > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); > */ > #define PHYSDEVOP_prepare_msix 30 > #define PHYSDEVOP_release_msix 31 > +/* > + * Notify the hypervisor that a PCI device has been reset, so that any > + * internally cached state is regenerated. Should be called after any > + * device reset performed by the hardware domain. > + */ > +#define PHYSDEVOP_pci_device_reset 32 Nit: Please pad the 32 to align with the 30 and 31 in context. Jan
On 2024/8/19 17:04, Jan Beulich wrote: > On 16.08.2024 13:08, Jiqian Chen wrote: >> @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >> break; >> } >> >> + case PHYSDEVOP_pci_device_reset: >> + { >> + struct pci_device_reset dev_reset; >> + struct pci_dev *pdev; >> + pci_sbdf_t sbdf; >> + >> + ret = -EOPNOTSUPP; >> + if ( !is_pci_passthrough_enabled() ) >> + break; > > It occurs to me (only now, sorry): Does this case really need to be an > error? I.e. do we really need to bother callers by having them find out > whether pass-through is supported in the underlying Xen? I am not sure, but for x86, passthrough is always true, it doesn't matter. For arm, this hypercall is also used for passthrough devices for now, so it is better to keep the same behavior as other PHYSDEVOP_pci_device_* operation? > >> + ret = -EFAULT; >> + if ( copy_from_guest(&dev_reset, arg, 1) != 0 ) >> + break; >> + >> + sbdf = PCI_SBDF(dev_reset.dev.seg, >> + dev_reset.dev.bus, >> + dev_reset.dev.devfn); >> + >> + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); >> + if ( ret ) >> + break; >> + >> + pcidevs_lock(); >> + pdev = pci_get_pdev(NULL, sbdf); >> + if ( !pdev ) >> + { >> + pcidevs_unlock(); >> + ret = -ENODEV; >> + break; >> + } >> + >> + write_lock(&pdev->domain->pci_lock); >> + pcidevs_unlock(); >> + switch ( dev_reset.flags & PCI_DEVICE_RESET_MASK ) >> + { >> + case PCI_DEVICE_RESET_COLD: >> + case PCI_DEVICE_RESET_WARM: >> + case PCI_DEVICE_RESET_HOT: >> + case PCI_DEVICE_RESET_FLR: >> + ret = vpci_reset_device(pdev); >> + break; >> + >> + default: >> + ret = -EOPNOTSUPP; > > EINVAL > > But: What about the other flag bits? You don't check them (anymore; I > thought there was a check there before). Before this version, here used value(like enum), so no check for other bits(should be zero). I will add in next version. > >> --- a/xen/include/public/physdev.h >> +++ b/xen/include/public/physdev.h >> @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); >> */ >> #define PHYSDEVOP_prepare_msix 30 >> #define PHYSDEVOP_release_msix 31 >> +/* >> + * Notify the hypervisor that a PCI device has been reset, so that any >> + * internally cached state is regenerated. Should be called after any >> + * device reset performed by the hardware domain. >> + */ >> +#define PHYSDEVOP_pci_device_reset 32 > > Nit: Please pad the 32 to align with the 30 and 31 in context. Will change in next version. > > Jan
On 20.08.2024 08:00, Chen, Jiqian wrote: > On 2024/8/19 17:04, Jan Beulich wrote: >> On 16.08.2024 13:08, Jiqian Chen wrote: >>> @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >>> break; >>> } >>> >>> + case PHYSDEVOP_pci_device_reset: >>> + { >>> + struct pci_device_reset dev_reset; >>> + struct pci_dev *pdev; >>> + pci_sbdf_t sbdf; >>> + >>> + ret = -EOPNOTSUPP; >>> + if ( !is_pci_passthrough_enabled() ) >>> + break; >> >> It occurs to me (only now, sorry): Does this case really need to be an >> error? I.e. do we really need to bother callers by having them find out >> whether pass-through is supported in the underlying Xen? > I am not sure, but for x86, passthrough is always true, it doesn't matter. > For arm, this hypercall is also used for passthrough devices for now, so it is better to keep the same behavior as other PHYSDEVOP_pci_device_* operation? Despite seeing that I did ack the respective change[1] back at the time, I (now) view this as grossly misnamed, at best. Imo it makes pretty little sense for that predicate helper to return true when there are no IOMMUs in use. Even more so that on an Arm/PCI system without IOMMUs one can use the command line option and then execution will make it past this check. I further question the related part of [2]: Why did the stub need moving? I'm not even sure that part of the change fell under the Suggested-by: there, but I also can't exclude it (I didn't bother trying to find where the suggestion was made). In any event - with [1] PHYSDEVOP_*pci* ended up inconsistent on x86, even if right now only on the surface. Yet as soon as this predicate is changed to take IOMMUs into account, the latent inconsistency would become a real one. An alternative to changing how the function behaves would be to rename it, for name and purpose to actually match - is_pci_passthrough_permitted() maybe? Thoughts anyone, Arm / SMMU maintainers in particular? Finally, as to the change here: On an Arm/PCI system where pass-through isn't enabled, the hypervisor will still need to know about resets when vPCI is in use for Dom0. IOW I'd like to refine my earlier comment into suggesting that the conditional be dropped altogether. Jan [1] 15517ed61f55 xen/arm: Add cmdline boot option "pci-passthrough = <boolean>" [2] dec9e02f3190 xen: avoid generation of stub <asm/pci.h> header
On Tue, 2024-08-20 at 09:01 +0200, Jan Beulich wrote: > I further question the related part of [2]: Why did the stub need > moving? The following stub could be return to Arm's asm/pci.h: ``` static inline bool is_pci_passthrough_enabled(void) { return false; } ``` As at the moment it used only by common code in physdev.c which is enabled only in case when CONFIG_HAS_PCI=y so is_pci_passthrough_enabled() will be anyway defined in arch specific files when CONFIG_HAS_PCI=y. The reason why this stub here is that when CONFIG_HAS_PCI=n then my understanding is that is_pci_passthrough_enabled can't return any value except false and not to generate the same stub for each newly introduced architecture but there are architecture which are using this stub. As for example, in Arm this stub is used even if CONFIG_HAS_PCI=n and it will start to fail compilation as <asm/pci.h> is not included by xen/pci.h anymore. > I'm not even sure that part of the change fell under the Suggested- > by: > there, but I also can't exclude it (I didn't bother trying to find > where > the suggestion was made). IIRC and it is really matters now then Suggested-by: was added because it was suggested by you to use just ifdef-ing asm/pci.h instead of arch-specific header or a header in asm-generic. ~ Oleksii
On 8/20/24 03:01, Jan Beulich wrote: > On 20.08.2024 08:00, Chen, Jiqian wrote: >> On 2024/8/19 17:04, Jan Beulich wrote: >>> On 16.08.2024 13:08, Jiqian Chen wrote: >>>> @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >>>> break; >>>> } >>>> >>>> + case PHYSDEVOP_pci_device_reset: >>>> + { >>>> + struct pci_device_reset dev_reset; >>>> + struct pci_dev *pdev; >>>> + pci_sbdf_t sbdf; >>>> + >>>> + ret = -EOPNOTSUPP; >>>> + if ( !is_pci_passthrough_enabled() ) >>>> + break; >>> >>> It occurs to me (only now, sorry): Does this case really need to be an >>> error? I.e. do we really need to bother callers by having them find out >>> whether pass-through is supported in the underlying Xen? >> I am not sure, but for x86, passthrough is always true, it doesn't matter. >> For arm, this hypercall is also used for passthrough devices for now, so it is better to keep the same behavior as other PHYSDEVOP_pci_device_* operation? > > Despite seeing that I did ack the respective change[1] back at the time, I > (now) view this as grossly misnamed, at best. Imo it makes pretty little > sense for that predicate helper to return true when there are no IOMMUs in > use. Even more so that on an Arm/PCI system without IOMMUs one can use the > command line option and then execution will make it past this check. > > I further question the related part of [2]: Why did the stub need moving? > I'm not even sure that part of the change fell under the Suggested-by: > there, but I also can't exclude it (I didn't bother trying to find where > the suggestion was made). > > In any event - with [1] PHYSDEVOP_*pci* ended up inconsistent on x86, > even if right now only on the surface. Yet as soon as this predicate is > changed to take IOMMUs into account, the latent inconsistency would > become a real one. > > An alternative to changing how the function behaves would be to rename it, > for name and purpose to actually match - is_pci_passthrough_permitted() > maybe? > > Thoughts anyone, Arm / SMMU maintainers in particular? > > Finally, as to the change here: On an Arm/PCI system where pass-through > isn't enabled, the hypervisor will still need to know about resets when > vPCI is in use for Dom0. IOW I'd like to refine my earlier comment into > suggesting that the conditional be dropped altogether. I agree on removing the condition for the reason you mentioned. I'd like to remove the other instances of the condition in this file as well, but that is the subject of a separate patch in the works [3]. [3] https://lore.kernel.org/xen-devel/20231109182716.367119-9-stewart.hildebrand@amd.com/ > > Jan > > [1] 15517ed61f55 xen/arm: Add cmdline boot option "pci-passthrough = <boolean>" > [2] dec9e02f3190 xen: avoid generation of stub <asm/pci.h> header
On 2024/8/21 05:42, Stewart Hildebrand wrote: > On 8/20/24 03:01, Jan Beulich wrote: >> On 20.08.2024 08:00, Chen, Jiqian wrote: >>> On 2024/8/19 17:04, Jan Beulich wrote: >>>> On 16.08.2024 13:08, Jiqian Chen wrote: >>>>> @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >>>>> break; >>>>> } >>>>> >>>>> + case PHYSDEVOP_pci_device_reset: >>>>> + { >>>>> + struct pci_device_reset dev_reset; >>>>> + struct pci_dev *pdev; >>>>> + pci_sbdf_t sbdf; >>>>> + >>>>> + ret = -EOPNOTSUPP; >>>>> + if ( !is_pci_passthrough_enabled() ) >>>>> + break; >>>> >>>> It occurs to me (only now, sorry): Does this case really need to be an >>>> error? I.e. do we really need to bother callers by having them find out >>>> whether pass-through is supported in the underlying Xen? >>> I am not sure, but for x86, passthrough is always true, it doesn't matter. >>> For arm, this hypercall is also used for passthrough devices for now, so it is better to keep the same behavior as other PHYSDEVOP_pci_device_* operation? >> >> Despite seeing that I did ack the respective change[1] back at the time, I >> (now) view this as grossly misnamed, at best. Imo it makes pretty little >> sense for that predicate helper to return true when there are no IOMMUs in >> use. Even more so that on an Arm/PCI system without IOMMUs one can use the >> command line option and then execution will make it past this check. >> >> I further question the related part of [2]: Why did the stub need moving? >> I'm not even sure that part of the change fell under the Suggested-by: >> there, but I also can't exclude it (I didn't bother trying to find where >> the suggestion was made). >> >> In any event - with [1] PHYSDEVOP_*pci* ended up inconsistent on x86, >> even if right now only on the surface. Yet as soon as this predicate is >> changed to take IOMMUs into account, the latent inconsistency would >> become a real one. >> >> An alternative to changing how the function behaves would be to rename it, >> for name and purpose to actually match - is_pci_passthrough_permitted() >> maybe? >> >> Thoughts anyone, Arm / SMMU maintainers in particular? >> >> Finally, as to the change here: On an Arm/PCI system where pass-through >> isn't enabled, the hypervisor will still need to know about resets when >> vPCI is in use for Dom0. IOW I'd like to refine my earlier comment into >> suggesting that the conditional be dropped altogether. > > I agree on removing the condition for the reason you mentioned. I'd > like to remove the other instances of the condition in this file as > well, but that is the subject of a separate patch in the works [3]. > > [3] https://lore.kernel.org/xen-devel/20231109182716.367119-9-stewart.hildebrand@amd.com/ Thanks Stewart and Jan, I will remove this check from my patch in next version. > >> >> Jan >> >> [1] 15517ed61f55 xen/arm: Add cmdline boot option "pci-passthrough = <boolean>" >> [2] dec9e02f3190 xen: avoid generation of stub <asm/pci.h> header >
diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c index c1bd17571e47..68815b03eb25 100644 --- a/xen/arch/x86/hvm/hypercall.c +++ b/xen/arch/x86/hvm/hypercall.c @@ -83,6 +83,7 @@ long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) case PHYSDEVOP_pci_mmcfg_reserved: case PHYSDEVOP_pci_device_add: case PHYSDEVOP_pci_device_remove: + case PHYSDEVOP_pci_device_reset: case PHYSDEVOP_dbgp_op: if ( !is_hardware_domain(currd) ) return -ENOSYS; diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c index 42db3e6d133c..980ff1ba3d07 100644 --- a/xen/drivers/pci/physdev.c +++ b/xen/drivers/pci/physdev.c @@ -2,6 +2,7 @@ #include <xen/guest_access.h> #include <xen/hypercall.h> #include <xen/init.h> +#include <xen/vpci.h> #ifndef COMPAT typedef long ret_t; @@ -67,6 +68,57 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; } + case PHYSDEVOP_pci_device_reset: + { + struct pci_device_reset dev_reset; + struct pci_dev *pdev; + pci_sbdf_t sbdf; + + ret = -EOPNOTSUPP; + if ( !is_pci_passthrough_enabled() ) + break; + + ret = -EFAULT; + if ( copy_from_guest(&dev_reset, arg, 1) != 0 ) + break; + + sbdf = PCI_SBDF(dev_reset.dev.seg, + dev_reset.dev.bus, + dev_reset.dev.devfn); + + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); + if ( ret ) + break; + + pcidevs_lock(); + pdev = pci_get_pdev(NULL, sbdf); + if ( !pdev ) + { + pcidevs_unlock(); + ret = -ENODEV; + break; + } + + write_lock(&pdev->domain->pci_lock); + pcidevs_unlock(); + switch ( dev_reset.flags & PCI_DEVICE_RESET_MASK ) + { + case PCI_DEVICE_RESET_COLD: + case PCI_DEVICE_RESET_WARM: + case PCI_DEVICE_RESET_HOT: + case PCI_DEVICE_RESET_FLR: + ret = vpci_reset_device(pdev); + break; + + default: + ret = -EOPNOTSUPP; + break; + } + write_unlock(&pdev->domain->pci_lock); + + break; + } + default: ret = -ENOSYS; break; diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h index f0c0d4727c0b..3902723ce1db 100644 --- a/xen/include/public/physdev.h +++ b/xen/include/public/physdev.h @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * Notify the hypervisor that a PCI device has been reset, so that any + * internally cached state is regenerated. Should be called after any + * device reset performed by the hardware domain. + */ +#define PHYSDEVOP_pci_device_reset 32 + struct physdev_pci_device { /* IN */ uint16_t seg; @@ -305,6 +312,16 @@ struct physdev_pci_device { typedef struct physdev_pci_device physdev_pci_device_t; DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); +struct pci_device_reset { + physdev_pci_device_t dev; +#define PCI_DEVICE_RESET_COLD 0x0 +#define PCI_DEVICE_RESET_WARM 0x1 +#define PCI_DEVICE_RESET_HOT 0x2 +#define PCI_DEVICE_RESET_FLR 0x3 +#define PCI_DEVICE_RESET_MASK 0x3 + uint32_t flags; +}; + #define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_DONE 2 diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index da8d0f41e6f4..41e7c3bc2791 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -304,6 +304,12 @@ static inline bool __must_check vpci_process_pending(struct vcpu *v) } #endif +static inline int __must_check vpci_reset_device(struct pci_dev *pdev) +{ + vpci_deassign_device(pdev); + return vpci_assign_device(pdev); +} + #endif /*