Message ID | 20240516095235.64128-2-Jiqian.Chen@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Support device passthrough when dom0 is PVH on Xen | expand |
On 16.05.2024 11:52, Jiqian Chen wrote: > @@ -67,6 +68,41 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > break; > } > > + case PHYSDEVOP_pci_device_state_reset: { > + struct physdev_pci_device dev; > + struct pci_dev *pdev; > + pci_sbdf_t sbdf; > + > + if ( !is_pci_passthrough_enabled() ) > + return -EOPNOTSUPP; > + > + ret = -EFAULT; > + if ( copy_from_guest(&dev, arg, 1) != 0 ) > + break; > + sbdf = PCI_SBDF(dev.seg, dev.bus, dev.devfn); > + > + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); > + if ( ret ) > + break; Daniel, is re-use of this hook appropriate here? > + pcidevs_lock(); > + pdev = pci_get_pdev(NULL, sbdf); > + if ( !pdev ) > + { > + pcidevs_unlock(); > + ret = -ENODEV; > + break; > + } > + > + write_lock(&pdev->domain->pci_lock); > + ret = vpci_reset_device_state(pdev); > + write_unlock(&pdev->domain->pci_lock); > + pcidevs_unlock(); Can't this be done right after the write_lock()? > + if ( ret ) > + printk(XENLOG_ERR "%pp: failed to reset PCI device state\n", &sbdf); s/PCI/vPCI/ (at least as long as nothing else is done here) > --- a/xen/drivers/vpci/vpci.c > +++ b/xen/drivers/vpci/vpci.c > @@ -115,6 +115,16 @@ int vpci_assign_device(struct pci_dev *pdev) > > return rc; > } > + > +int vpci_reset_device_state(struct pci_dev *pdev) > +{ > + ASSERT(pcidevs_locked()); Is this necessary for ... > + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); > + > + vpci_deassign_device(pdev); > + return vpci_assign_device(pdev); ... either of these calls? Both functions do themselves only have the 2nd of the asserts you add. > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); > */ > #define PHYSDEVOP_prepare_msix 30 > #define PHYSDEVOP_release_msix 31 > +/* > + * Notify the hypervisor that a PCI device has been reset, so that any > + * internally cached state is regenerated. Should be called after any > + * device reset performed by the hardware domain. > + */ > +#define PHYSDEVOP_pci_device_state_reset 32 Nit: Just a single blank as a separator will do here, for going past the columnar arrangement of other #define-s anyway. > struct physdev_pci_device { > /* IN */ > uint16_t seg; Is re-using this struct for this new sub-op sufficient? IOW are all possible resets equal, and hence it doesn't need specifying what kind of reset was done? For example, other than FLR most reset variants reset all functions in one go aiui. Imo that would better require only a single hypercall, just to avoid possible confusion. It also reads as if FLR would not reset as many registers as other reset variants would. This may seem to not matter right now, but this is a stable interface you add, and hence modifying it later will be cumbersome, if possible at all. Jan
On 2024/5/16 21:08, Jan Beulich wrote: > On 16.05.2024 11:52, Jiqian Chen wrote: >> @@ -67,6 +68,41 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >> break; >> } >> >> + case PHYSDEVOP_pci_device_state_reset: { >> + struct physdev_pci_device dev; >> + struct pci_dev *pdev; >> + pci_sbdf_t sbdf; >> + >> + if ( !is_pci_passthrough_enabled() ) >> + return -EOPNOTSUPP; >> + >> + ret = -EFAULT; >> + if ( copy_from_guest(&dev, arg, 1) != 0 ) >> + break; >> + sbdf = PCI_SBDF(dev.seg, dev.bus, dev.devfn); >> + >> + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); >> + if ( ret ) >> + break; > > Daniel, is re-use of this hook appropriate here? In the v2 of this series, Daniel and Roger have agreed that reusing xsm_resource_setup_pci is enough. > >> + pcidevs_lock(); >> + pdev = pci_get_pdev(NULL, sbdf); >> + if ( !pdev ) >> + { >> + pcidevs_unlock(); >> + ret = -ENODEV; >> + break; >> + } >> + >> + write_lock(&pdev->domain->pci_lock); >> + ret = vpci_reset_device_state(pdev); >> + write_unlock(&pdev->domain->pci_lock); >> + pcidevs_unlock(); > > Can't this be done right after the write_lock()? You are right, I will move it in next version. > >> + if ( ret ) >> + printk(XENLOG_ERR "%pp: failed to reset PCI device state\n", &sbdf); > > s/PCI/vPCI/ (at least as long as nothing else is done here) OK, will change in next version. > >> --- a/xen/drivers/vpci/vpci.c >> +++ b/xen/drivers/vpci/vpci.c >> @@ -115,6 +115,16 @@ int vpci_assign_device(struct pci_dev *pdev) >> >> return rc; >> } >> + >> +int vpci_reset_device_state(struct pci_dev *pdev) >> +{ >> + ASSERT(pcidevs_locked()); > > Is this necessary for ... > >> + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); >> + >> + vpci_deassign_device(pdev); >> + return vpci_assign_device(pdev); > > ... either of these calls? Both functions do themselves only have the > 2nd of the asserts you add. I checked codes again; I will remove the two asserts here in next version. > >> --- a/xen/include/public/physdev.h >> +++ b/xen/include/public/physdev.h >> @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); >> */ >> #define PHYSDEVOP_prepare_msix 30 >> #define PHYSDEVOP_release_msix 31 >> +/* >> + * Notify the hypervisor that a PCI device has been reset, so that any >> + * internally cached state is regenerated. Should be called after any >> + * device reset performed by the hardware domain. >> + */ >> +#define PHYSDEVOP_pci_device_state_reset 32 > > Nit: Just a single blank as a separator will do here, for going past the > columnar arrangement of other #define-s anyway. OK. > >> struct physdev_pci_device { >> /* IN */ >> uint16_t seg; > > Is re-using this struct for this new sub-op sufficient? IOW are all > possible resets equal, and hence it doesn't need specifying what kind of > reset was done? For example, other than FLR most reset variants reset all > functions in one go aiui. Imo that would better require only a single > hypercall, just to avoid possible confusion. It also reads as if FLR would > not reset as many registers as other reset variants would. If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? But it can be done for caller to use a cycle to call this reset hypercall for each slot function. > > This may seem to not matter right now, but this is a stable interface you > add, and hence modifying it later will be cumbersome, if possible at all. > > Jan
On 17.05.2024 10:08, Chen, Jiqian wrote: > On 2024/5/16 21:08, Jan Beulich wrote: >> On 16.05.2024 11:52, Jiqian Chen wrote: >>> struct physdev_pci_device { >>> /* IN */ >>> uint16_t seg; >> >> Is re-using this struct for this new sub-op sufficient? IOW are all >> possible resets equal, and hence it doesn't need specifying what kind of >> reset was done? For example, other than FLR most reset variants reset all >> functions in one go aiui. Imo that would better require only a single >> hypercall, just to avoid possible confusion. It also reads as if FLR would >> not reset as many registers as other reset variants would. > If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? > But it can be done for caller to use a cycle to call this reset hypercall for each slot function. It could, yes, but since (aiui) there needs to be an indication of the kind of reset anyway, we can as well avoid relying on the caller doing so (and at the same time simplify what the caller needs to do). Jan
On 2024/5/17 16:20, Jan Beulich wrote: > On 17.05.2024 10:08, Chen, Jiqian wrote: >> On 2024/5/16 21:08, Jan Beulich wrote: >>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>> struct physdev_pci_device { >>>> /* IN */ >>>> uint16_t seg; >>> >>> Is re-using this struct for this new sub-op sufficient? IOW are all >>> possible resets equal, and hence it doesn't need specifying what kind of >>> reset was done? For example, other than FLR most reset variants reset all >>> functions in one go aiui. Imo that would better require only a single >>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>> not reset as many registers as other reset variants would. >> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. > > It could, yes, but since (aiui) there needs to be an indication of the > kind of reset anyway, we can as well avoid relying on the caller doing > so (and at the same time simplify what the caller needs to do). Since the corresponding kernel patch has been merged into linux_next branch https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, if it's not very mandatory and necessary, just let the caller handle it temporarily. Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. > > Jan
On 17.05.2024 11:28, Chen, Jiqian wrote: > On 2024/5/17 16:20, Jan Beulich wrote: >> On 17.05.2024 10:08, Chen, Jiqian wrote: >>> On 2024/5/16 21:08, Jan Beulich wrote: >>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>> struct physdev_pci_device { >>>>> /* IN */ >>>>> uint16_t seg; >>>> >>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>> possible resets equal, and hence it doesn't need specifying what kind of >>>> reset was done? For example, other than FLR most reset variants reset all >>>> functions in one go aiui. Imo that would better require only a single >>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>> not reset as many registers as other reset variants would. >>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >> >> It could, yes, but since (aiui) there needs to be an indication of the >> kind of reset anyway, we can as well avoid relying on the caller doing >> so (and at the same time simplify what the caller needs to do). > Since the corresponding kernel patch has been merged into linux_next branch > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, > if it's not very mandatory and necessary, just let the caller handle it temporarily. As also mentioned for the other patch having a corresponding kernel one: The kernel patch would imo better not be merged until the new sub-op is actually finalized. > Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. I disagree. We shouldn't introduce incomplete sub-ops. At the very least, if you want to stick to the present form, I'd expect you to supply reasons why distinguishing different reset forms is not necessary (now or later). Jan
On 2024/5/17 17:50, Jan Beulich wrote: > On 17.05.2024 11:28, Chen, Jiqian wrote: >> On 2024/5/17 16:20, Jan Beulich wrote: >>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>> struct physdev_pci_device { >>>>>> /* IN */ >>>>>> uint16_t seg; >>>>> >>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>> reset was done? For example, other than FLR most reset variants reset all >>>>> functions in one go aiui. Imo that would better require only a single >>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>> not reset as many registers as other reset variants would. >>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>> >>> It could, yes, but since (aiui) there needs to be an indication of the >>> kind of reset anyway, we can as well avoid relying on the caller doing >>> so (and at the same time simplify what the caller needs to do). >> Since the corresponding kernel patch has been merged into linux_next branch >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >> if it's not very mandatory and necessary, just let the caller handle it temporarily. > > As also mentioned for the other patch having a corresponding kernel one: > The kernel patch would imo better not be merged until the new sub-op is > actually finalized. OK, what should I do next step? Upstream a patch to revert the merged patch on kernel side? > >> Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. > > I disagree. We shouldn't introduce incomplete sub-ops. At the very least, > if you want to stick to the present form, I'd expect you to supply reasons > why distinguishing different reset forms is not necessary (now or later). OK, if want to distinguish different reset, is it acceptable to add a parameter, like "u8 flag", and reset every function if corresponding bit is 1? > > Jan
On 17.05.24 11:50, Jan Beulich wrote: > On 17.05.2024 11:28, Chen, Jiqian wrote: >> On 2024/5/17 16:20, Jan Beulich wrote: >>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>> struct physdev_pci_device { >>>>>> /* IN */ >>>>>> uint16_t seg; >>>>> >>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>> reset was done? For example, other than FLR most reset variants reset all >>>>> functions in one go aiui. Imo that would better require only a single >>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>> not reset as many registers as other reset variants would. >>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>> >>> It could, yes, but since (aiui) there needs to be an indication of the >>> kind of reset anyway, we can as well avoid relying on the caller doing >>> so (and at the same time simplify what the caller needs to do). >> Since the corresponding kernel patch has been merged into linux_next branch >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >> if it's not very mandatory and necessary, just let the caller handle it temporarily. > > As also mentioned for the other patch having a corresponding kernel one: > The kernel patch would imo better not be merged until the new sub-op is > actually finalized. Oh, sorry to have overlooked that the interfcae change isn't yet committed on Xen side. I'll drop the patch from my linux-next branch. Juergen
Hi Juergen: On 2024/5/17 18:03, Jürgen Groß wrote: > On 17.05.24 11:50, Jan Beulich wrote: >> On 17.05.2024 11:28, Chen, Jiqian wrote: >>> On 2024/5/17 16:20, Jan Beulich wrote: >>>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>>> struct physdev_pci_device { >>>>>>> /* IN */ >>>>>>> uint16_t seg; >>>>>> >>>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>>> reset was done? For example, other than FLR most reset variants reset all >>>>>> functions in one go aiui. Imo that would better require only a single >>>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>>> not reset as many registers as other reset variants would. >>>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>>> >>>> It could, yes, but since (aiui) there needs to be an indication of the >>>> kind of reset anyway, we can as well avoid relying on the caller doing >>>> so (and at the same time simplify what the caller needs to do). >>> Since the corresponding kernel patch has been merged into linux_next branch >>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >>> if it's not very mandatory and necessary, just let the caller handle it temporarily. >> >> As also mentioned for the other patch having a corresponding kernel one: >> The kernel patch would imo better not be merged until the new sub-op is >> actually finalized. > > Oh, sorry to have overlooked that the interfcae change isn't yet committed on > Xen side. > > I'll drop the patch from my linux-next branch. Thanks. I will modify my code according to Jan's requirements and send a new version soon. > > > Juergen >
On 17.05.2024 12:00, Chen, Jiqian wrote: > On 2024/5/17 17:50, Jan Beulich wrote: >> On 17.05.2024 11:28, Chen, Jiqian wrote: >>> On 2024/5/17 16:20, Jan Beulich wrote: >>>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>>> struct physdev_pci_device { >>>>>>> /* IN */ >>>>>>> uint16_t seg; >>>>>> >>>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>>> reset was done? For example, other than FLR most reset variants reset all >>>>>> functions in one go aiui. Imo that would better require only a single >>>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>>> not reset as many registers as other reset variants would. >>>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>>> >>>> It could, yes, but since (aiui) there needs to be an indication of the >>>> kind of reset anyway, we can as well avoid relying on the caller doing >>>> so (and at the same time simplify what the caller needs to do). >>> Since the corresponding kernel patch has been merged into linux_next branch >>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >>> if it's not very mandatory and necessary, just let the caller handle it temporarily. >> >> As also mentioned for the other patch having a corresponding kernel one: >> The kernel patch would imo better not be merged until the new sub-op is >> actually finalized. > OK, what should I do next step? > Upstream a patch to revert the merged patch on kernel side? > >> >>> Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. >> >> I disagree. We shouldn't introduce incomplete sub-ops. At the very least, >> if you want to stick to the present form, I'd expect you to supply reasons >> why distinguishing different reset forms is not necessary (now or later). > OK, if want to distinguish different reset, is it acceptable to add a parameter, like "u8 flag", and reset every function if corresponding bit is 1? I'm afraid a boolean won't do, at least not long term. I think it wants to be an enumeration (i.e. a set of enumeration-like #define-s). And just to stress it again: The extra argument is _not_ primarily for the looping over all functions. It is to convey the kind of reset that was done. The single vs all function(s) aspect is just a useful side effect this will have. Jan
On 2024/5/17 18:31, Jan Beulich wrote: > On 17.05.2024 12:00, Chen, Jiqian wrote: >> On 2024/5/17 17:50, Jan Beulich wrote: >>> On 17.05.2024 11:28, Chen, Jiqian wrote: >>>> On 2024/5/17 16:20, Jan Beulich wrote: >>>>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>>>> struct physdev_pci_device { >>>>>>>> /* IN */ >>>>>>>> uint16_t seg; >>>>>>> >>>>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>>>> reset was done? For example, other than FLR most reset variants reset all >>>>>>> functions in one go aiui. Imo that would better require only a single >>>>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>>>> not reset as many registers as other reset variants would. >>>>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>>>> >>>>> It could, yes, but since (aiui) there needs to be an indication of the >>>>> kind of reset anyway, we can as well avoid relying on the caller doing >>>>> so (and at the same time simplify what the caller needs to do). >>>> Since the corresponding kernel patch has been merged into linux_next branch >>>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >>>> if it's not very mandatory and necessary, just let the caller handle it temporarily. >>> >>> As also mentioned for the other patch having a corresponding kernel one: >>> The kernel patch would imo better not be merged until the new sub-op is >>> actually finalized. >> OK, what should I do next step? >> Upstream a patch to revert the merged patch on kernel side? >> >>> >>>> Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. >>> >>> I disagree. We shouldn't introduce incomplete sub-ops. At the very least, >>> if you want to stick to the present form, I'd expect you to supply reasons >>> why distinguishing different reset forms is not necessary (now or later). >> OK, if want to distinguish different reset, is it acceptable to add a parameter, like "u8 flag", and reset every function if corresponding bit is 1? > > I'm afraid a boolean won't do, at least not long term. I think it wants to > be an enumeration (i.e. a set of enumeration-like #define-s). And just to > stress it again: The extra argument is _not_ primarily for the looping over > all functions. It is to convey the kind of reset that was done. The single > vs all function(s) aspect is just a useful side effect this will have. Do you mean, like: enum RESET_DEVICE_STATE { RESET_DEVICE_SINGLE_FUNC, RESET_DEVICE_ALL_FUNC, Others }; If caller pass in RESET_DEVICE_SINGLE_FUNC, I call what I add in this patch, as for other types call other functions if added in future? > > Jan
On 17.05.2024 13:01, Chen, Jiqian wrote: > On 2024/5/17 18:31, Jan Beulich wrote: >> On 17.05.2024 12:00, Chen, Jiqian wrote: >>> On 2024/5/17 17:50, Jan Beulich wrote: >>>> On 17.05.2024 11:28, Chen, Jiqian wrote: >>>>> On 2024/5/17 16:20, Jan Beulich wrote: >>>>>> On 17.05.2024 10:08, Chen, Jiqian wrote: >>>>>>> On 2024/5/16 21:08, Jan Beulich wrote: >>>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote: >>>>>>>>> struct physdev_pci_device { >>>>>>>>> /* IN */ >>>>>>>>> uint16_t seg; >>>>>>>> >>>>>>>> Is re-using this struct for this new sub-op sufficient? IOW are all >>>>>>>> possible resets equal, and hence it doesn't need specifying what kind of >>>>>>>> reset was done? For example, other than FLR most reset variants reset all >>>>>>>> functions in one go aiui. Imo that would better require only a single >>>>>>>> hypercall, just to avoid possible confusion. It also reads as if FLR would >>>>>>>> not reset as many registers as other reset variants would. >>>>>>> If I understood correctly that you mean in this hypercall it needs to support resetting both one function and all functions of a slot(dev)? >>>>>>> But it can be done for caller to use a cycle to call this reset hypercall for each slot function. >>>>>> >>>>>> It could, yes, but since (aiui) there needs to be an indication of the >>>>>> kind of reset anyway, we can as well avoid relying on the caller doing >>>>>> so (and at the same time simplify what the caller needs to do). >>>>> Since the corresponding kernel patch has been merged into linux_next branch >>>>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20240515&id=b272722511d5e8ae580f01830687b8a6b2717f01, >>>>> if it's not very mandatory and necessary, just let the caller handle it temporarily. >>>> >>>> As also mentioned for the other patch having a corresponding kernel one: >>>> The kernel patch would imo better not be merged until the new sub-op is >>>> actually finalized. >>> OK, what should I do next step? >>> Upstream a patch to revert the merged patch on kernel side? >>> >>>> >>>>> Or it can add a new hypercall to reset all functions in one go in future potential requirement, like PHYSDEVOP_pci_device_state_reset_all_func. >>>> >>>> I disagree. We shouldn't introduce incomplete sub-ops. At the very least, >>>> if you want to stick to the present form, I'd expect you to supply reasons >>>> why distinguishing different reset forms is not necessary (now or later). >>> OK, if want to distinguish different reset, is it acceptable to add a parameter, like "u8 flag", and reset every function if corresponding bit is 1? >> >> I'm afraid a boolean won't do, at least not long term. I think it wants to >> be an enumeration (i.e. a set of enumeration-like #define-s). And just to >> stress it again: The extra argument is _not_ primarily for the looping over >> all functions. It is to convey the kind of reset that was done. The single >> vs all function(s) aspect is just a useful side effect this will have. > Do you mean, like: > enum RESET_DEVICE_STATE { > RESET_DEVICE_SINGLE_FUNC, > RESET_DEVICE_ALL_FUNC, > Others > }; No. What we need to be able to tell apart in the hypervisor is (at least) FLR and Conventional Reset. I can't tell right away whether the sub-forms of the latter also may need telling apart. I expect you to dive into that and make a good proposal. Jan
On 5/17/24 04:08, Chen, Jiqian wrote: > On 2024/5/16 21:08, Jan Beulich wrote: >> On 16.05.2024 11:52, Jiqian Chen wrote: >>> @@ -67,6 +68,41 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >>> + pcidevs_lock(); >>> + pdev = pci_get_pdev(NULL, sbdf); >>> + if ( !pdev ) >>> + { >>> + pcidevs_unlock(); >>> + ret = -ENODEV; >>> + break; >>> + } >>> + >>> + write_lock(&pdev->domain->pci_lock); >>> + ret = vpci_reset_device_state(pdev); >>> + write_unlock(&pdev->domain->pci_lock); >>> + pcidevs_unlock(); >> >> Can't this be done right after the write_lock()? > You are right, I will move it in next version. So that we are clear on the proposed order of calls here: + write_lock(&pdev->domain->pci_lock); + pcidevs_unlock(); + ret = vpci_reset_device_state(pdev); + write_unlock(&pdev->domain->pci_lock); >>> --- a/xen/drivers/vpci/vpci.c >>> +++ b/xen/drivers/vpci/vpci.c >>> @@ -115,6 +115,16 @@ int vpci_assign_device(struct pci_dev *pdev) >>> >>> return rc; >>> } >>> + >>> +int vpci_reset_device_state(struct pci_dev *pdev) >>> +{ >>> + ASSERT(pcidevs_locked()); >> >> Is this necessary for ... >> >>> + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); >>> + >>> + vpci_deassign_device(pdev); >>> + return vpci_assign_device(pdev); >> >> ... either of these calls? Both functions do themselves only have the >> 2nd of the asserts you add. > I checked codes again; I will remove the two asserts here in next version. Nit: I think it's okay to keep the ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c index 14679dd82971..56fbb69ab201 100644 --- a/xen/arch/x86/hvm/hypercall.c +++ b/xen/arch/x86/hvm/hypercall.c @@ -84,6 +84,7 @@ long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) case PHYSDEVOP_pci_mmcfg_reserved: case PHYSDEVOP_pci_device_add: case PHYSDEVOP_pci_device_remove: + case PHYSDEVOP_pci_device_state_reset: case PHYSDEVOP_dbgp_op: if ( !is_hardware_domain(currd) ) return -ENOSYS; diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c index 42db3e6d133c..73dc8f058b0e 100644 --- a/xen/drivers/pci/physdev.c +++ b/xen/drivers/pci/physdev.c @@ -2,6 +2,7 @@ #include <xen/guest_access.h> #include <xen/hypercall.h> #include <xen/init.h> +#include <xen/vpci.h> #ifndef COMPAT typedef long ret_t; @@ -67,6 +68,41 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; } + case PHYSDEVOP_pci_device_state_reset: { + struct physdev_pci_device dev; + struct pci_dev *pdev; + pci_sbdf_t sbdf; + + if ( !is_pci_passthrough_enabled() ) + return -EOPNOTSUPP; + + ret = -EFAULT; + if ( copy_from_guest(&dev, arg, 1) != 0 ) + break; + sbdf = PCI_SBDF(dev.seg, dev.bus, dev.devfn); + + ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf); + if ( ret ) + break; + + pcidevs_lock(); + pdev = pci_get_pdev(NULL, sbdf); + if ( !pdev ) + { + pcidevs_unlock(); + ret = -ENODEV; + break; + } + + write_lock(&pdev->domain->pci_lock); + ret = vpci_reset_device_state(pdev); + write_unlock(&pdev->domain->pci_lock); + pcidevs_unlock(); + if ( ret ) + printk(XENLOG_ERR "%pp: failed to reset PCI device state\n", &sbdf); + break; + } + default: ret = -ENOSYS; break; diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c index 97e115dc5798..424aec2d5c46 100644 --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -115,6 +115,16 @@ int vpci_assign_device(struct pci_dev *pdev) return rc; } + +int vpci_reset_device_state(struct pci_dev *pdev) +{ + ASSERT(pcidevs_locked()); + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); + + vpci_deassign_device(pdev); + return vpci_assign_device(pdev); +} + #endif /* __XEN__ */ static int vpci_register_cmp(const struct vpci_register *r1, diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h index f0c0d4727c0b..f5bab1f29779 100644 --- a/xen/include/public/physdev.h +++ b/xen/include/public/physdev.h @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * Notify the hypervisor that a PCI device has been reset, so that any + * internally cached state is regenerated. Should be called after any + * device reset performed by the hardware domain. + */ +#define PHYSDEVOP_pci_device_state_reset 32 + struct physdev_pci_device { /* IN */ uint16_t seg; diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 6e4c972f35ed..93b1c1d72c05 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -30,6 +30,7 @@ int __must_check vpci_assign_device(struct pci_dev *pdev); /* Remove all handlers and free vpci related structures. */ void vpci_deassign_device(struct pci_dev *pdev); +int __must_check vpci_reset_device_state(struct pci_dev *pdev); /* Add/remove a register handler. */ int __must_check vpci_add_register_mask(struct vpci *vpci, @@ -266,6 +267,11 @@ static inline int vpci_assign_device(struct pci_dev *pdev) static inline void vpci_deassign_device(struct pci_dev *pdev) { } +static inline int __must_check vpci_reset_device_state(struct pci_dev *pdev) +{ + return 0; +} + static inline void vpci_dump_msi(void) { } static inline uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg,