Message ID | 20210114103140.866141-2-leon@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Dynamically assign MSI-X vectors count | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Guessed tree name to be net-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 6 maintainers not CCed: luzmaximilian@gmail.com gregkh@linuxfoundation.org idryomov@gmail.com suzuki.poulose@arm.com mchehab+huawei@kernel.org Jonathan.Cameron@huawei.com |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | fail | Errors and warnings before: 9819 this patch: 9820 |
netdev/kdoc | fail | Errors and warnings before: 2 this patch: 3 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | WARNING: line length of 93 exceeds 80 columns |
netdev/build_allmodconfig_warn | fail | Errors and warnings before: 10585 this patch: 10586 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On Thu, 14 Jan 2021 12:31:36 +0200 Leon Romanovsky <leon@kernel.org> wrote: > From: Leon Romanovsky <leonro@nvidia.com> > > Extend PCI sysfs interface with a new callback that allows configure > the number of MSI-X vectors for specific SR-IO VF. This is needed > to optimize the performance of newly bound devices by allocating > the number of vectors based on the administrator knowledge of targeted VM. > > This function is applicable for SR-IOV VF because such devices allocate > their MSI-X table before they will run on the VMs and HW can't guess the > right number of vectors, so the HW allocates them statically and equally. > > The newly added /sys/bus/pci/devices/.../sriov_vf_msix_count file will be seen > for the VFs and it is writable as long as a driver is not bounded to the VF. > > The values accepted are: > * > 0 - this will be number reported by the VF's MSI-X capability > * < 0 - not valid > * = 0 - will reset to the device default value > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > --- > Documentation/ABI/testing/sysfs-bus-pci | 20 +++++++++ > drivers/pci/iov.c | 58 +++++++++++++++++++++++++ > drivers/pci/msi.c | 47 ++++++++++++++++++++ > drivers/pci/pci-sysfs.c | 1 + > drivers/pci/pci.h | 2 + > include/linux/pci.h | 3 ++ > 6 files changed, 131 insertions(+) > > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci > index 25c9c39770c6..34a8c6bcde70 100644 > --- a/Documentation/ABI/testing/sysfs-bus-pci > +++ b/Documentation/ABI/testing/sysfs-bus-pci > @@ -375,3 +375,23 @@ Description: > The value comes from the PCI kernel device state and can be one > of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold". > The file is read only. > + > +What: /sys/bus/pci/devices/.../sriov_vf_msix_count > +Date: December 2020 > +Contact: Leon Romanovsky <leonro@nvidia.com> > +Description: > + This file is associated with the SR-IOV VFs. > + It allows configuration of the number of MSI-X vectors for > + the VF. This is needed to optimize performance of newly bound > + devices by allocating the number of vectors based on the > + administrator knowledge of targeted VM. > + > + The values accepted are: > + * > 0 - this will be number reported by the VF's MSI-X > + capability > + * < 0 - not valid > + * = 0 - will reset to the device default value > + > + The file is writable if the PF is bound to a driver that > + set sriov_vf_total_msix > 0 and there is no driver bound > + to the VF. > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c > index 4afd4ee4f7f0..5bc496f8ffa3 100644 > --- a/drivers/pci/iov.c > +++ b/drivers/pci/iov.c > @@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) > return (dev->devfn + dev->sriov->offset + > dev->sriov->stride * vf_id) & 0xff; > } > +EXPORT_SYMBOL(pci_iov_virtfn_devfn); > > /* > * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may > @@ -426,6 +427,63 @@ const struct attribute_group sriov_dev_attr_group = { > .is_visible = sriov_attrs_are_visible, > }; > > +#ifdef CONFIG_PCI_MSI > +static ssize_t sriov_vf_msix_count_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + int count = pci_msix_vec_count(pdev); > + > + if (count < 0) > + return count; > + > + return sprintf(buf, "%d\n", count); > +} > + > +static ssize_t sriov_vf_msix_count_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct pci_dev *vf_dev = to_pci_dev(dev); > + int val, ret; > + > + ret = kstrtoint(buf, 0, &val); > + if (ret) > + return ret; > + > + ret = pci_set_msix_vec_count(vf_dev, val); > + if (ret) > + return ret; > + > + return count; > +} > +static DEVICE_ATTR_RW(sriov_vf_msix_count); > +#endif > + > +static struct attribute *sriov_vf_dev_attrs[] = { > +#ifdef CONFIG_PCI_MSI > + &dev_attr_sriov_vf_msix_count.attr, > +#endif > + NULL, > +}; > + > +static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj, > + struct attribute *a, int n) > +{ > + struct device *dev = kobj_to_dev(kobj); > + > + if (dev_is_pf(dev)) > + return 0; Wouldn't it be cleaner to also hide this on VFs where pci_msix_vec_count() returns an error or where the PF driver doesn't implement .sriov_set_msix_vec_count()? IOW, expose it only where it could actually work. > + > + return a->mode; > +} > + > +const struct attribute_group sriov_vf_dev_attr_group = { > + .attrs = sriov_vf_dev_attrs, > + .is_visible = sriov_vf_attrs_are_visible, > +}; > + > int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) > { > return 0; > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index 3162f88fe940..5a40200343c9 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -991,6 +991,53 @@ int pci_msix_vec_count(struct pci_dev *dev) > } > EXPORT_SYMBOL(pci_msix_vec_count); > > +/** > + * pci_set_msix_vec_count - change the reported number of MSI-X vectors > + * This function is applicable for SR-IOV VF because such devices allocate > + * their MSI-X table before they will run on the VMs and HW can't guess the > + * right number of vectors, so the HW allocates them statically and equally. Nit, this is an assumption of the VF usage and conjecture of the implementation. > + * @dev: VF device that is going to be changed > + * @count amount of MSI-X vectors > + **/ > +int pci_set_msix_vec_count(struct pci_dev *dev, int count) pci_vf_set_msix_vec_count()? Long, I know, but if it's limited to VFs name it accordingly. Thanks, Alex > +{ > + struct pci_dev *pdev = pci_physfn(dev); > + int ret; > + > + if (!dev->msix_cap || !pdev->msix_cap || count < 0) > + /* > + * We don't support negative numbers for now, > + * but maybe in the future it will make sense. > + */ > + return -EINVAL; > + > + device_lock(&pdev->dev); > + if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) { > + ret = -EOPNOTSUPP; > + goto err_pdev; > + } > + > + device_lock(&dev->dev); > + if (dev->driver) { > + /* > + * Driver already probed this VF and configured itself > + * based on previously configured (or default) MSI-X vector > + * count. It is too late to change this field for this > + * specific VF. > + */ > + ret = -EBUSY; > + goto err_dev; > + } > + > + ret = pdev->driver->sriov_set_msix_vec_count(dev, count); > + > +err_dev: > + device_unlock(&dev->dev); > +err_pdev: > + device_unlock(&pdev->dev); > + return ret; > +} > + > static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, > int nvec, struct irq_affinity *affd, int flags) > { > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c > index fb072f4b3176..0af2222643c2 100644 > --- a/drivers/pci/pci-sysfs.c > +++ b/drivers/pci/pci-sysfs.c > @@ -1557,6 +1557,7 @@ static const struct attribute_group *pci_dev_attr_groups[] = { > &pci_dev_hp_attr_group, > #ifdef CONFIG_PCI_IOV > &sriov_dev_attr_group, > + &sriov_vf_dev_attr_group, > #endif > &pci_bridge_attr_group, > &pcie_dev_attr_group, > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 5c59365092fa..dbbfa9e73ea8 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -183,6 +183,7 @@ extern unsigned int pci_pm_d3hot_delay; > > #ifdef CONFIG_PCI_MSI > void pci_no_msi(void); > +int pci_set_msix_vec_count(struct pci_dev *dev, int count); > #else > static inline void pci_no_msi(void) { } > #endif > @@ -502,6 +503,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); > void pci_restore_iov_state(struct pci_dev *dev); > int pci_iov_bus_range(struct pci_bus *bus); > extern const struct attribute_group sriov_dev_attr_group; > +extern const struct attribute_group sriov_vf_dev_attr_group; > #else > static inline int pci_iov_init(struct pci_dev *dev) > { > diff --git a/include/linux/pci.h b/include/linux/pci.h > index b32126d26997..6be96d468eda 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -856,6 +856,8 @@ struct module; > * e.g. drivers/net/e100.c. > * @sriov_configure: Optional driver callback to allow configuration of > * number of VFs to enable via sysfs "sriov_numvfs" file. > + * @sriov_set_msix_vec_count: Driver callback to change number of MSI-X vectors > + * exposed by the sysfs "vf_msix_vec" entry. > * @err_handler: See Documentation/PCI/pci-error-recovery.rst > * @groups: Sysfs attribute groups. > * @driver: Driver model structure. > @@ -871,6 +873,7 @@ struct pci_driver { > int (*resume)(struct pci_dev *dev); /* Device woken up */ > void (*shutdown)(struct pci_dev *dev); > int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ > + int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */ > const struct pci_error_handlers *err_handler; > const struct attribute_group **groups; > struct device_driver driver; > -- > 2.29.2 >
On Thu, Jan 14, 2021 at 05:05:43PM -0700, Alex Williamson wrote: > On Thu, 14 Jan 2021 12:31:36 +0200 > Leon Romanovsky <leon@kernel.org> wrote: > > > From: Leon Romanovsky <leonro@nvidia.com> > > > > Extend PCI sysfs interface with a new callback that allows configure > > the number of MSI-X vectors for specific SR-IO VF. This is needed > > to optimize the performance of newly bound devices by allocating > > the number of vectors based on the administrator knowledge of targeted VM. > > > > This function is applicable for SR-IOV VF because such devices allocate > > their MSI-X table before they will run on the VMs and HW can't guess the > > right number of vectors, so the HW allocates them statically and equally. > > > > The newly added /sys/bus/pci/devices/.../sriov_vf_msix_count file will be seen > > for the VFs and it is writable as long as a driver is not bounded to the VF. > > > > The values accepted are: > > * > 0 - this will be number reported by the VF's MSI-X capability > > * < 0 - not valid > > * = 0 - will reset to the device default value > > > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > > --- > > Documentation/ABI/testing/sysfs-bus-pci | 20 +++++++++ > > drivers/pci/iov.c | 58 +++++++++++++++++++++++++ > > drivers/pci/msi.c | 47 ++++++++++++++++++++ > > drivers/pci/pci-sysfs.c | 1 + > > drivers/pci/pci.h | 2 + > > include/linux/pci.h | 3 ++ > > 6 files changed, 131 insertions(+) > > > > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci > > index 25c9c39770c6..34a8c6bcde70 100644 > > --- a/Documentation/ABI/testing/sysfs-bus-pci > > +++ b/Documentation/ABI/testing/sysfs-bus-pci > > @@ -375,3 +375,23 @@ Description: > > The value comes from the PCI kernel device state and can be one > > of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold". > > The file is read only. > > + > > +What: /sys/bus/pci/devices/.../sriov_vf_msix_count > > +Date: December 2020 > > +Contact: Leon Romanovsky <leonro@nvidia.com> > > +Description: > > + This file is associated with the SR-IOV VFs. > > + It allows configuration of the number of MSI-X vectors for > > + the VF. This is needed to optimize performance of newly bound > > + devices by allocating the number of vectors based on the > > + administrator knowledge of targeted VM. > > + > > + The values accepted are: > > + * > 0 - this will be number reported by the VF's MSI-X > > + capability > > + * < 0 - not valid > > + * = 0 - will reset to the device default value > > + > > + The file is writable if the PF is bound to a driver that > > + set sriov_vf_total_msix > 0 and there is no driver bound > > + to the VF. > > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c > > index 4afd4ee4f7f0..5bc496f8ffa3 100644 > > --- a/drivers/pci/iov.c > > +++ b/drivers/pci/iov.c > > @@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) > > return (dev->devfn + dev->sriov->offset + > > dev->sriov->stride * vf_id) & 0xff; > > } > > +EXPORT_SYMBOL(pci_iov_virtfn_devfn); > > > > /* > > * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may > > @@ -426,6 +427,63 @@ const struct attribute_group sriov_dev_attr_group = { > > .is_visible = sriov_attrs_are_visible, > > }; > > > > +#ifdef CONFIG_PCI_MSI > > +static ssize_t sriov_vf_msix_count_show(struct device *dev, > > + struct device_attribute *attr, > > + char *buf) > > +{ > > + struct pci_dev *pdev = to_pci_dev(dev); > > + int count = pci_msix_vec_count(pdev); > > + > > + if (count < 0) > > + return count; > > + > > + return sprintf(buf, "%d\n", count); > > +} > > + > > +static ssize_t sriov_vf_msix_count_store(struct device *dev, > > + struct device_attribute *attr, > > + const char *buf, size_t count) > > +{ > > + struct pci_dev *vf_dev = to_pci_dev(dev); > > + int val, ret; > > + > > + ret = kstrtoint(buf, 0, &val); > > + if (ret) > > + return ret; > > + > > + ret = pci_set_msix_vec_count(vf_dev, val); > > + if (ret) > > + return ret; > > + > > + return count; > > +} > > +static DEVICE_ATTR_RW(sriov_vf_msix_count); > > +#endif > > + > > +static struct attribute *sriov_vf_dev_attrs[] = { > > +#ifdef CONFIG_PCI_MSI > > + &dev_attr_sriov_vf_msix_count.attr, > > +#endif > > + NULL, > > +}; > > + > > +static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj, > > + struct attribute *a, int n) > > +{ > > + struct device *dev = kobj_to_dev(kobj); > > + > > + if (dev_is_pf(dev)) > > + return 0; > > Wouldn't it be cleaner to also hide this on VFs where > pci_msix_vec_count() returns an error or where the PF driver doesn't > implement .sriov_set_msix_vec_count()? IOW, expose it only where it > could actually work. I wasn't sure about the policy in PCI/core, but sure will change. > > > + > > + return a->mode; > > +} > > + > > +const struct attribute_group sriov_vf_dev_attr_group = { > > + .attrs = sriov_vf_dev_attrs, > > + .is_visible = sriov_vf_attrs_are_visible, > > +}; > > + > > int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) > > { > > return 0; > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > > index 3162f88fe940..5a40200343c9 100644 > > --- a/drivers/pci/msi.c > > +++ b/drivers/pci/msi.c > > @@ -991,6 +991,53 @@ int pci_msix_vec_count(struct pci_dev *dev) > > } > > EXPORT_SYMBOL(pci_msix_vec_count); > > > > +/** > > + * pci_set_msix_vec_count - change the reported number of MSI-X vectors > > + * This function is applicable for SR-IOV VF because such devices allocate > > + * their MSI-X table before they will run on the VMs and HW can't guess the > > + * right number of vectors, so the HW allocates them statically and equally. > > Nit, this is an assumption of the VF usage and conjecture of the > implementation. This is one of the possible implementations. > > > + * @dev: VF device that is going to be changed > > + * @count amount of MSI-X vectors > > + **/ > > +int pci_set_msix_vec_count(struct pci_dev *dev, int count) > > pci_vf_set_msix_vec_count()? Long, I know, but if it's limited to VFs > name it accordingly. Thanks, I'll do. > > Alex > > > +{ > > + struct pci_dev *pdev = pci_physfn(dev); > > + int ret; > > + > > + if (!dev->msix_cap || !pdev->msix_cap || count < 0) > > + /* > > + * We don't support negative numbers for now, > > + * but maybe in the future it will make sense. > > + */ > > + return -EINVAL; > > + > > + device_lock(&pdev->dev); > > + if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) { > > + ret = -EOPNOTSUPP; > > + goto err_pdev; > > + } > > + > > + device_lock(&dev->dev); > > + if (dev->driver) { > > + /* > > + * Driver already probed this VF and configured itself > > + * based on previously configured (or default) MSI-X vector > > + * count. It is too late to change this field for this > > + * specific VF. > > + */ > > + ret = -EBUSY; > > + goto err_dev; > > + } > > + > > + ret = pdev->driver->sriov_set_msix_vec_count(dev, count); > > + > > +err_dev: > > + device_unlock(&dev->dev); > > +err_pdev: > > + device_unlock(&pdev->dev); > > + return ret; > > +} > > + > > static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, > > int nvec, struct irq_affinity *affd, int flags) > > { > > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c > > index fb072f4b3176..0af2222643c2 100644 > > --- a/drivers/pci/pci-sysfs.c > > +++ b/drivers/pci/pci-sysfs.c > > @@ -1557,6 +1557,7 @@ static const struct attribute_group *pci_dev_attr_groups[] = { > > &pci_dev_hp_attr_group, > > #ifdef CONFIG_PCI_IOV > > &sriov_dev_attr_group, > > + &sriov_vf_dev_attr_group, > > #endif > > &pci_bridge_attr_group, > > &pcie_dev_attr_group, > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > > index 5c59365092fa..dbbfa9e73ea8 100644 > > --- a/drivers/pci/pci.h > > +++ b/drivers/pci/pci.h > > @@ -183,6 +183,7 @@ extern unsigned int pci_pm_d3hot_delay; > > > > #ifdef CONFIG_PCI_MSI > > void pci_no_msi(void); > > +int pci_set_msix_vec_count(struct pci_dev *dev, int count); > > #else > > static inline void pci_no_msi(void) { } > > #endif > > @@ -502,6 +503,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); > > void pci_restore_iov_state(struct pci_dev *dev); > > int pci_iov_bus_range(struct pci_bus *bus); > > extern const struct attribute_group sriov_dev_attr_group; > > +extern const struct attribute_group sriov_vf_dev_attr_group; > > #else > > static inline int pci_iov_init(struct pci_dev *dev) > > { > > diff --git a/include/linux/pci.h b/include/linux/pci.h > > index b32126d26997..6be96d468eda 100644 > > --- a/include/linux/pci.h > > +++ b/include/linux/pci.h > > @@ -856,6 +856,8 @@ struct module; > > * e.g. drivers/net/e100.c. > > * @sriov_configure: Optional driver callback to allow configuration of > > * number of VFs to enable via sysfs "sriov_numvfs" file. > > + * @sriov_set_msix_vec_count: Driver callback to change number of MSI-X vectors > > + * exposed by the sysfs "vf_msix_vec" entry. > > * @err_handler: See Documentation/PCI/pci-error-recovery.rst > > * @groups: Sysfs attribute groups. > > * @driver: Driver model structure. > > @@ -871,6 +873,7 @@ struct pci_driver { > > int (*resume)(struct pci_dev *dev); /* Device woken up */ > > void (*shutdown)(struct pci_dev *dev); > > int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ > > + int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */ > > const struct pci_error_handlers *err_handler; > > const struct attribute_group **groups; > > struct device_driver driver; > > -- > > 2.29.2 > > >
On Sat, Jan 16, 2021 at 10:23:31AM +0200, Leon Romanovsky wrote: > On Thu, Jan 14, 2021 at 05:05:43PM -0700, Alex Williamson wrote: > > On Thu, 14 Jan 2021 12:31:36 +0200 > > Leon Romanovsky <leon@kernel.org> wrote: > > > > > From: Leon Romanovsky <leonro@nvidia.com> > > > > > > Extend PCI sysfs interface with a new callback that allows configure > > > the number of MSI-X vectors for specific SR-IO VF. This is needed > > > to optimize the performance of newly bound devices by allocating > > > the number of vectors based on the administrator knowledge of targeted VM. > > > > > > This function is applicable for SR-IOV VF because such devices allocate > > > their MSI-X table before they will run on the VMs and HW can't guess the > > > right number of vectors, so the HW allocates them statically and equally. > > > > > > The newly added /sys/bus/pci/devices/.../sriov_vf_msix_count file will be seen > > > for the VFs and it is writable as long as a driver is not bounded to the VF. > > > > > > The values accepted are: > > > * > 0 - this will be number reported by the VF's MSI-X capability > > > * < 0 - not valid > > > * = 0 - will reset to the device default value > > > > > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > > > --- > > > Documentation/ABI/testing/sysfs-bus-pci | 20 +++++++++ > > > drivers/pci/iov.c | 58 +++++++++++++++++++++++++ > > > drivers/pci/msi.c | 47 ++++++++++++++++++++ > > > drivers/pci/pci-sysfs.c | 1 + > > > drivers/pci/pci.h | 2 + > > > include/linux/pci.h | 3 ++ > > > 6 files changed, 131 insertions(+) <...> > > > +static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj, > > > + struct attribute *a, int n) > > > +{ > > > + struct device *dev = kobj_to_dev(kobj); > > > + > > > + if (dev_is_pf(dev)) > > > + return 0; > > > > Wouldn't it be cleaner to also hide this on VFs where > > pci_msix_vec_count() returns an error or where the PF driver doesn't > > implement .sriov_set_msix_vec_count()? IOW, expose it only where it > > could actually work. > > I wasn't sure about the policy in PCI/core, but sure will change. I ended adding checks of msix_cap, but can't check .sriov_set_msix_vec_count. The latter will require to hold device_lock on PF that can disappear later, it is too racy. Thanks
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 25c9c39770c6..34a8c6bcde70 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -375,3 +375,23 @@ Description: The value comes from the PCI kernel device state and can be one of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold". The file is read only. + +What: /sys/bus/pci/devices/.../sriov_vf_msix_count +Date: December 2020 +Contact: Leon Romanovsky <leonro@nvidia.com> +Description: + This file is associated with the SR-IOV VFs. + It allows configuration of the number of MSI-X vectors for + the VF. This is needed to optimize performance of newly bound + devices by allocating the number of vectors based on the + administrator knowledge of targeted VM. + + The values accepted are: + * > 0 - this will be number reported by the VF's MSI-X + capability + * < 0 - not valid + * = 0 - will reset to the device default value + + The file is writable if the PF is bound to a driver that + set sriov_vf_total_msix > 0 and there is no driver bound + to the VF. diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 4afd4ee4f7f0..5bc496f8ffa3 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) return (dev->devfn + dev->sriov->offset + dev->sriov->stride * vf_id) & 0xff; } +EXPORT_SYMBOL(pci_iov_virtfn_devfn); /* * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may @@ -426,6 +427,63 @@ const struct attribute_group sriov_dev_attr_group = { .is_visible = sriov_attrs_are_visible, }; +#ifdef CONFIG_PCI_MSI +static ssize_t sriov_vf_msix_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int count = pci_msix_vec_count(pdev); + + if (count < 0) + return count; + + return sprintf(buf, "%d\n", count); +} + +static ssize_t sriov_vf_msix_count_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *vf_dev = to_pci_dev(dev); + int val, ret; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + ret = pci_set_msix_vec_count(vf_dev, val); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_RW(sriov_vf_msix_count); +#endif + +static struct attribute *sriov_vf_dev_attrs[] = { +#ifdef CONFIG_PCI_MSI + &dev_attr_sriov_vf_msix_count.attr, +#endif + NULL, +}; + +static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + + if (dev_is_pf(dev)) + return 0; + + return a->mode; +} + +const struct attribute_group sriov_vf_dev_attr_group = { + .attrs = sriov_vf_dev_attrs, + .is_visible = sriov_vf_attrs_are_visible, +}; + int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { return 0; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 3162f88fe940..5a40200343c9 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -991,6 +991,53 @@ int pci_msix_vec_count(struct pci_dev *dev) } EXPORT_SYMBOL(pci_msix_vec_count); +/** + * pci_set_msix_vec_count - change the reported number of MSI-X vectors + * This function is applicable for SR-IOV VF because such devices allocate + * their MSI-X table before they will run on the VMs and HW can't guess the + * right number of vectors, so the HW allocates them statically and equally. + * @dev: VF device that is going to be changed + * @count amount of MSI-X vectors + **/ +int pci_set_msix_vec_count(struct pci_dev *dev, int count) +{ + struct pci_dev *pdev = pci_physfn(dev); + int ret; + + if (!dev->msix_cap || !pdev->msix_cap || count < 0) + /* + * We don't support negative numbers for now, + * but maybe in the future it will make sense. + */ + return -EINVAL; + + device_lock(&pdev->dev); + if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) { + ret = -EOPNOTSUPP; + goto err_pdev; + } + + device_lock(&dev->dev); + if (dev->driver) { + /* + * Driver already probed this VF and configured itself + * based on previously configured (or default) MSI-X vector + * count. It is too late to change this field for this + * specific VF. + */ + ret = -EBUSY; + goto err_dev; + } + + ret = pdev->driver->sriov_set_msix_vec_count(dev, count); + +err_dev: + device_unlock(&dev->dev); +err_pdev: + device_unlock(&pdev->dev); + return ret; +} + static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd, int flags) { diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index fb072f4b3176..0af2222643c2 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1557,6 +1557,7 @@ static const struct attribute_group *pci_dev_attr_groups[] = { &pci_dev_hp_attr_group, #ifdef CONFIG_PCI_IOV &sriov_dev_attr_group, + &sriov_vf_dev_attr_group, #endif &pci_bridge_attr_group, &pcie_dev_attr_group, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 5c59365092fa..dbbfa9e73ea8 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -183,6 +183,7 @@ extern unsigned int pci_pm_d3hot_delay; #ifdef CONFIG_PCI_MSI void pci_no_msi(void); +int pci_set_msix_vec_count(struct pci_dev *dev, int count); #else static inline void pci_no_msi(void) { } #endif @@ -502,6 +503,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); extern const struct attribute_group sriov_dev_attr_group; +extern const struct attribute_group sriov_vf_dev_attr_group; #else static inline int pci_iov_init(struct pci_dev *dev) { diff --git a/include/linux/pci.h b/include/linux/pci.h index b32126d26997..6be96d468eda 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -856,6 +856,8 @@ struct module; * e.g. drivers/net/e100.c. * @sriov_configure: Optional driver callback to allow configuration of * number of VFs to enable via sysfs "sriov_numvfs" file. + * @sriov_set_msix_vec_count: Driver callback to change number of MSI-X vectors + * exposed by the sysfs "vf_msix_vec" entry. * @err_handler: See Documentation/PCI/pci-error-recovery.rst * @groups: Sysfs attribute groups. * @driver: Driver model structure. @@ -871,6 +873,7 @@ struct pci_driver { int (*resume)(struct pci_dev *dev); /* Device woken up */ void (*shutdown)(struct pci_dev *dev); int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ + int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */ const struct pci_error_handlers *err_handler; const struct attribute_group **groups; struct device_driver driver;