Message ID | 1571245488-3549-3-git-send-email-jonathan.derrick@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Lorenzo Pieralisi |
Headers | show |
Series | Expose VMD BIOS domain info | expand |
On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > When some VMDs are enabled and others are not, it's difficult to > determine which IIO stack corresponds to the enabled VMD. > > To assist userspace with management tasks, VMD BIOS will write the VMD > instance number and socket number into the first enabled root port's IO > Base/Limit registers prior to OS handoff. VMD driver can capture this > information and expose it to userspace. > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > --- > drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c > index 959c7c7..dbe1bff 100644 > --- a/drivers/pci/controller/vmd.c > +++ b/drivers/pci/controller/vmd.c > @@ -98,6 +98,8 @@ struct vmd_dev { > struct irq_domain *irq_domain; > struct pci_bus *bus; > u8 busn_start; > + u8 socket_nr; > + u8 instance_nr; > > struct dma_map_ops dma_ops; > struct dma_domain dma_domain; > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > .write = vmd_pci_write, > }; > > +/** > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > + * @vmd: &struct vmd_dev VMD device descriptor > + * @rp: int iterator cursor > + * @temp: u32 temporary value for config read > + * > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > + * space can be determinately accessed through the VMD Config BAR. Because VMD > + * Root Ports can be individually disabled, it's important to iterate for the > + * first enabled Root Port as determined by reading the Vendor/Device register. > + */ > +#define for_each_vmd_root_port(vmd, rp, temp) \ > + for (rp = 0; rp < 4; rp++) \ > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > + PCI_VENDOR_ID, 4, &temp) || \ > + temp == 0xffffffff) {} else Nit: I do not think this macro is particularly helpful or easy to read. I leave it up to you but I would turn this code (plus the inner loop in vmd_parse_domain()) into a function, eg: struct vmd_dev *vmd_find_first_root_port(..) with the code in the macro above inlined. Up to you. Thanks, Lorenzo > +static int vmd_parse_domain(struct vmd_dev *vmd) > +{ > + int root_port, ret; > + u32 temp, iobase; > + > + vmd->socket_nr = -1; > + vmd->instance_nr = -1; > + > + for_each_vmd_root_port(vmd, root_port, temp) { > + ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), > + PCI_IO_BASE, 2, &iobase); > + if (ret) > + return ret; > + > + vmd->socket_nr = (iobase >> 4) & 0xf; > + vmd->instance_nr = (iobase >> 14) & 0x3; > + > + /* First available will be used */ > + break; > + } > + > + return 0; > +} > + > +static ssize_t socket_nr_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > + > + return sprintf(buf, "%u\n", vmd->socket_nr); > +} > +static DEVICE_ATTR_RO(socket_nr); > + > +static ssize_t instance_nr_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > + > + return sprintf(buf, "%u\n", vmd->instance_nr); > +} > +static DEVICE_ATTR_RO(instance_nr); > + > +static struct attribute *vmd_dev_attrs[] = { > + &dev_attr_socket_nr.attr, > + &dev_attr_instance_nr.attr, > + NULL > +}; > +ATTRIBUTE_GROUPS(vmd_dev); > + > static void vmd_attach_resources(struct vmd_dev *vmd) > { > vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > resource_size_t offset[2] = {0}; > resource_size_t membar2_offset = 0x2000; > struct pci_bus *child; > + int ret; > + > + ret = vmd_parse_domain(vmd); > + if (ret) > + return ret; > > /* > * Shadow registers may exist in certain VMD device ids which allow > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > */ > if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { > u32 vmlock; > - int ret; > > membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; > ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev) > .probe = vmd_probe, > .remove = vmd_remove, > .driver = { > - .pm = &vmd_dev_pm_ops, > + .pm = &vmd_dev_pm_ops, > + .dev_groups = vmd_dev_groups, > }, > }; > module_pci_driver(vmd_drv); > -- > 1.8.3.1 >
On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > When some VMDs are enabled and others are not, it's difficult to > determine which IIO stack corresponds to the enabled VMD. > > To assist userspace with management tasks, VMD BIOS will write the VMD > instance number and socket number into the first enabled root port's IO > Base/Limit registers prior to OS handoff. VMD driver can capture this > information and expose it to userspace. > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > --- > drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c > index 959c7c7..dbe1bff 100644 > --- a/drivers/pci/controller/vmd.c > +++ b/drivers/pci/controller/vmd.c > @@ -98,6 +98,8 @@ struct vmd_dev { > struct irq_domain *irq_domain; > struct pci_bus *bus; > u8 busn_start; > + u8 socket_nr; > + u8 instance_nr; > > struct dma_map_ops dma_ops; > struct dma_domain dma_domain; > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > .write = vmd_pci_write, > }; > > +/** > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > + * @vmd: &struct vmd_dev VMD device descriptor > + * @rp: int iterator cursor > + * @temp: u32 temporary value for config read > + * > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > + * space can be determinately accessed through the VMD Config BAR. Because VMD > + * Root Ports can be individually disabled, it's important to iterate for the > + * first enabled Root Port as determined by reading the Vendor/Device register. > + */ > +#define for_each_vmd_root_port(vmd, rp, temp) \ > + for (rp = 0; rp < 4; rp++) \ > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > + PCI_VENDOR_ID, 4, &temp) || \ > + temp == 0xffffffff) {} else You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff. Though this hasn't yet been merged: https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820 > + > +static int vmd_parse_domain(struct vmd_dev *vmd) > +{ > + int root_port, ret; > + u32 temp, iobase; > + > + vmd->socket_nr = -1; > + vmd->instance_nr = -1; > + > + for_each_vmd_root_port(vmd, root_port, temp) { > + ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), > + PCI_IO_BASE, 2, &iobase); > + if (ret) > + return ret; > + > + vmd->socket_nr = (iobase >> 4) & 0xf; > + vmd->instance_nr = (iobase >> 14) & 0x3; I'm not familiar with VMD - however how can you be sure that the VMD BIOS will always populate these values here? Is it possible that earlier BIOS's won't do this and something will go wrong here? Is there any sanity checking that can happen here? > + > + /* First available will be used */ > + break; > + } > + > + return 0; > +} > + > +static ssize_t socket_nr_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > + > + return sprintf(buf, "%u\n", vmd->socket_nr); > +} > +static DEVICE_ATTR_RO(socket_nr); > + > +static ssize_t instance_nr_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > + > + return sprintf(buf, "%u\n", vmd->instance_nr); > +} > +static DEVICE_ATTR_RO(instance_nr); > + > +static struct attribute *vmd_dev_attrs[] = { > + &dev_attr_socket_nr.attr, > + &dev_attr_instance_nr.attr, > + NULL > +}; > +ATTRIBUTE_GROUPS(vmd_dev); > + > static void vmd_attach_resources(struct vmd_dev *vmd) > { > vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > resource_size_t offset[2] = {0}; > resource_size_t membar2_offset = 0x2000; > struct pci_bus *child; > + int ret; > + > + ret = vmd_parse_domain(vmd); > + if (ret) > + return ret; This always will succeed. But what happens if this function returns yet socket_nr/instance_nr hasn't been written to? Is that OK? Thanks, Andrew Murray > > /* > * Shadow registers may exist in certain VMD device ids which allow > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > */ > if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { > u32 vmlock; > - int ret; > > membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; > ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev) > .probe = vmd_probe, > .remove = vmd_remove, > .driver = { > - .pm = &vmd_dev_pm_ops, > + .pm = &vmd_dev_pm_ops, > + .dev_groups = vmd_dev_groups, > }, > }; > module_pci_driver(vmd_drv); > -- > 1.8.3.1 >
Hi ANdrew, Thanks for the review On Fri, 2019-11-01 at 13:16 +0000, Andrew Murray wrote: > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > When some VMDs are enabled and others are not, it's difficult to > > determine which IIO stack corresponds to the enabled VMD. > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > instance number and socket number into the first enabled root port's IO > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > information and expose it to userspace. > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > > --- > > drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 77 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c > > index 959c7c7..dbe1bff 100644 > > --- a/drivers/pci/controller/vmd.c > > +++ b/drivers/pci/controller/vmd.c > > @@ -98,6 +98,8 @@ struct vmd_dev { > > struct irq_domain *irq_domain; > > struct pci_bus *bus; > > u8 busn_start; > > + u8 socket_nr; > > + u8 instance_nr; > > > > struct dma_map_ops dma_ops; > > struct dma_domain dma_domain; > > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > > .write = vmd_pci_write, > > }; > > > > +/** > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > > + * @vmd: &struct vmd_dev VMD device descriptor > > + * @rp: int iterator cursor > > + * @temp: u32 temporary value for config read > > + * > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > > + * space can be determinately accessed through the VMD Config BAR. Because VMD > > + * Root Ports can be individually disabled, it's important to iterate for the > > + * first enabled Root Port as determined by reading the Vendor/Device register. > > + */ > > +#define for_each_vmd_root_port(vmd, rp, temp) \ > > + for (rp = 0; rp < 4; rp++) \ > > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > > + PCI_VENDOR_ID, 4, &temp) || \ > > + temp == 0xffffffff) {} else > > You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff. > Though this hasn't yet been merged: > > https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820 > Sure it will fit this case perfectly once it's merged > > + > > +static int vmd_parse_domain(struct vmd_dev *vmd) > > +{ > > + int root_port, ret; > > + u32 temp, iobase; > > + > > + vmd->socket_nr = -1; > > + vmd->instance_nr = -1; > > + > > + for_each_vmd_root_port(vmd, root_port, temp) { > > + ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), > > + PCI_IO_BASE, 2, &iobase); > > + if (ret) > > + return ret; > > + > > + vmd->socket_nr = (iobase >> 4) & 0xf; > > + vmd->instance_nr = (iobase >> 14) & 0x3; > > I'm not familiar with VMD - however how can you be sure that the VMD BIOS > will always populate these values here? Is it possible that earlier BIOS's > won't do this and something will go wrong here? > > Is there any sanity checking that can happen here? Yes that's entirely possible and would show indeterminate values in that case. It would be up to the user to understand if the BIOS supports the mode before relying on the data. I am investigating to see if we can do a dmi_match to verify the data before publishing. > > > + > > + /* First available will be used */ > > + break; > > + } > > + > > + return 0; > > +} > > + > > +static ssize_t socket_nr_show(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + struct pci_dev *pdev = to_pci_dev(dev); > > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > > + > > + return sprintf(buf, "%u\n", vmd->socket_nr); > > +} > > +static DEVICE_ATTR_RO(socket_nr); > > + > > +static ssize_t instance_nr_show(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + struct pci_dev *pdev = to_pci_dev(dev); > > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > > + > > + return sprintf(buf, "%u\n", vmd->instance_nr); > > +} > > +static DEVICE_ATTR_RO(instance_nr); > > + > > +static struct attribute *vmd_dev_attrs[] = { > > + &dev_attr_socket_nr.attr, > > + &dev_attr_instance_nr.attr, > > + NULL > > +}; > > +ATTRIBUTE_GROUPS(vmd_dev); > > + > > static void vmd_attach_resources(struct vmd_dev *vmd) > > { > > vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > > resource_size_t offset[2] = {0}; > > resource_size_t membar2_offset = 0x2000; > > struct pci_bus *child; > > + int ret; > > + > > + ret = vmd_parse_domain(vmd); > > + if (ret) > > + return ret; > > This always will succeed. But what happens if this function returns yet > socket_nr/instance_nr hasn't been written to? Is that OK? > Basically only one possibility that could occur and that's if the VMD is enabled without any VMD Root Ports being enabled on the VMD domain. It's an odd configuration but is technically valid, although the domain becomes useless until the user reboots and enables the VMD Root Ports. So it's more-or-less implied either socket_nr/instance_nr will have data or the domain won't be usable. Thanks, Jon > Thanks, > > Andrew Murray > > > > > /* > > * Shadow registers may exist in certain VMD device ids which allow > > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > > */ > > if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { > > u32 vmlock; > > - int ret; > > > > membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; > > ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); > > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev) > > .probe = vmd_probe, > > .remove = vmd_remove, > > .driver = { > > - .pm = &vmd_dev_pm_ops, > > + .pm = &vmd_dev_pm_ops, > > + .dev_groups = vmd_dev_groups, > > }, > > }; > > module_pci_driver(vmd_drv); > > -- > > 1.8.3.1 > >
On Fri, Nov 01, 2019 at 02:24:02PM +0000, Derrick, Jonathan wrote: > Hi ANdrew, > > Thanks for the review > > On Fri, 2019-11-01 at 13:16 +0000, Andrew Murray wrote: > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > When some VMDs are enabled and others are not, it's difficult to > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > instance number and socket number into the first enabled root port's IO > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > information and expose it to userspace. > > > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > > > --- > > > drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- > > > 1 file changed, 77 insertions(+), 2 deletions(-) > > > > > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c > > > index 959c7c7..dbe1bff 100644 > > > --- a/drivers/pci/controller/vmd.c > > > +++ b/drivers/pci/controller/vmd.c > > > @@ -98,6 +98,8 @@ struct vmd_dev { > > > struct irq_domain *irq_domain; > > > struct pci_bus *bus; > > > u8 busn_start; > > > + u8 socket_nr; > > > + u8 instance_nr; > > > > > > struct dma_map_ops dma_ops; > > > struct dma_domain dma_domain; > > > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > > > .write = vmd_pci_write, > > > }; > > > > > > +/** > > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > > > + * @vmd: &struct vmd_dev VMD device descriptor > > > + * @rp: int iterator cursor > > > + * @temp: u32 temporary value for config read > > > + * > > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > > > + * space can be determinately accessed through the VMD Config BAR. Because VMD > > > + * Root Ports can be individually disabled, it's important to iterate for the > > > + * first enabled Root Port as determined by reading the Vendor/Device register. > > > + */ > > > +#define for_each_vmd_root_port(vmd, rp, temp) \ > > > + for (rp = 0; rp < 4; rp++) \ > > > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > > > + PCI_VENDOR_ID, 4, &temp) || \ > > > + temp == 0xffffffff) {} else > > > > You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff. > > Though this hasn't yet been merged: > > > > https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820 > > > > Sure it will fit this case perfectly once it's merged > > > > + > > > +static int vmd_parse_domain(struct vmd_dev *vmd) > > > +{ > > > + int root_port, ret; > > > + u32 temp, iobase; > > > + > > > + vmd->socket_nr = -1; > > > + vmd->instance_nr = -1; > > > + > > > + for_each_vmd_root_port(vmd, root_port, temp) { > > > + ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), > > > + PCI_IO_BASE, 2, &iobase); > > > + if (ret) > > > + return ret; > > > + > > > + vmd->socket_nr = (iobase >> 4) & 0xf; > > > + vmd->instance_nr = (iobase >> 14) & 0x3; > > > > I'm not familiar with VMD - however how can you be sure that the VMD BIOS > > will always populate these values here? Is it possible that earlier BIOS's > > won't do this and something will go wrong here? > > > > Is there any sanity checking that can happen here? > > Yes that's entirely possible and would show indeterminate values in > that case. It would be up to the user to understand if the BIOS > supports the mode before relying on the data. > > I am investigating to see if we can do a dmi_match to verify the data > before publishing. I think that would be helpful if possible as it would simplify the user software - and also prevent the user ever getting garbage data. > > > > > > > + > > > + /* First available will be used */ > > > + break; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static ssize_t socket_nr_show(struct device *dev, > > > + struct device_attribute *attr, char *buf) > > > +{ > > > + struct pci_dev *pdev = to_pci_dev(dev); > > > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > > > + > > > + return sprintf(buf, "%u\n", vmd->socket_nr); > > > +} > > > +static DEVICE_ATTR_RO(socket_nr); > > > + > > > +static ssize_t instance_nr_show(struct device *dev, > > > + struct device_attribute *attr, char *buf) > > > +{ > > > + struct pci_dev *pdev = to_pci_dev(dev); > > > + struct vmd_dev *vmd = pci_get_drvdata(pdev); > > > + > > > + return sprintf(buf, "%u\n", vmd->instance_nr); > > > +} > > > +static DEVICE_ATTR_RO(instance_nr); > > > + > > > +static struct attribute *vmd_dev_attrs[] = { > > > + &dev_attr_socket_nr.attr, > > > + &dev_attr_instance_nr.attr, > > > + NULL > > > +}; > > > +ATTRIBUTE_GROUPS(vmd_dev); > > > + > > > static void vmd_attach_resources(struct vmd_dev *vmd) > > > { > > > vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > > > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > > > resource_size_t offset[2] = {0}; > > > resource_size_t membar2_offset = 0x2000; > > > struct pci_bus *child; > > > + int ret; > > > + > > > + ret = vmd_parse_domain(vmd); > > > + if (ret) > > > + return ret; > > > > This always will succeed. But what happens if this function returns yet > > socket_nr/instance_nr hasn't been written to? Is that OK? > > > > Basically only one possibility that could occur and that's if the VMD > is enabled without any VMD Root Ports being enabled on the VMD domain. > It's an odd configuration but is technically valid, although the domain > becomes useless until the user reboots and enables the VMD Root Ports. > > So it's more-or-less implied either socket_nr/instance_nr will have > data or the domain won't be usable. Of course in this case, the default value will be -1, which should be quite obvious to a user that this isn't a valid value. Thanks, Andrew Murray > > Thanks, > Jon > > > > Thanks, > > > > Andrew Murray > > > > > > > > /* > > > * Shadow registers may exist in certain VMD device ids which allow > > > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) > > > */ > > > if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { > > > u32 vmlock; > > > - int ret; > > > > > > membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; > > > ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); > > > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev) > > > .probe = vmd_probe, > > > .remove = vmd_remove, > > > .driver = { > > > - .pm = &vmd_dev_pm_ops, > > > + .pm = &vmd_dev_pm_ops, > > > + .dev_groups = vmd_dev_groups, > > > }, > > > }; > > > module_pci_driver(vmd_drv); > > > -- > > > 1.8.3.1 > > >
[+cc Andrew] On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > When some VMDs are enabled and others are not, it's difficult to > determine which IIO stack corresponds to the enabled VMD. > > To assist userspace with management tasks, VMD BIOS will write the VMD > instance number and socket number into the first enabled root port's IO > Base/Limit registers prior to OS handoff. VMD driver can capture this > information and expose it to userspace. Hmmm, I'm not sure I understand this, but it sounds possibly fragile. Are these Root Ports visible to the generic PCI core device enumeration? If so, it will find them and read these I/O window registers. Maybe today the PCI core doesn't change them, but I'm not sure we should rely on them always being preserved until the vmd driver can claim the device. But I guess you're using a special config accessor (vmd_cfg_read()), so these are probably invisible to the generic enumeration? > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > + * @vmd: &struct vmd_dev VMD device descriptor > + * @rp: int iterator cursor > + * @temp: u32 temporary value for config read > + * > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > + * space can be determinately accessed through the VMD Config BAR. Because VMD I'm not sure how to parse "determinately accessed". Maybe this config space can *only* be accessed via the VMD Config BAR? > + * Root Ports can be individually disabled, it's important to iterate for the > + * first enabled Root Port as determined by reading the Vendor/Device register. > + */ > +#define for_each_vmd_root_port(vmd, rp, temp) \ > + for (rp = 0; rp < 4; rp++) \ > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > + PCI_VENDOR_ID, 4, &temp) || \ > + temp == 0xffffffff) {} else
Hi Bjorn, On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > [+cc Andrew] > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > When some VMDs are enabled and others are not, it's difficult to > > determine which IIO stack corresponds to the enabled VMD. > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > instance number and socket number into the first enabled root port's IO > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > information and expose it to userspace. > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > Are these Root Ports visible to the generic PCI core device > enumeration? If so, it will find them and read these I/O window > registers. Maybe today the PCI core doesn't change them, but I'm not > sure we should rely on them always being preserved until the vmd > driver can claim the device. > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT parsing occurs before this PCI domain is exposed to the generic PCI scancode with pci_scan_child_bus(). Until that point the VMD PCI domain is invisible to the kernel outside of /dev/mem or resource0. However, yes, it is somewhat fragile in that a third-party driver could attach to the VMD endpoint prior to the VMD driver and modify the values. A /dev/mem or resource0 user could also do this on an unattached VMD endpoint. I'm wondering if this would also be better suited for a special reset in quirks.c, but it would need to expose a bit of VMD config accessing in quirks.c to do that. > But I guess you're using a special config accessor (vmd_cfg_read()), > so these are probably invisible to the generic enumeration? > Yes the VMD domain is invisible to generic PCI until the domain is enumerated late in vmd_enable_domain(). > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > > + * @vmd: &struct vmd_dev VMD device descriptor > > + * @rp: int iterator cursor > > + * @temp: u32 temporary value for config read > > + * > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > > + * space can be determinately accessed through the VMD Config BAR. Because VMD > > I'm not sure how to parse "determinately accessed". Maybe this config > space can *only* be accessed via the VMD Config BAR? Perhaps it should instead say determinately addressed, as each Root Port config space is addressable at some offset of N * 0x8000 from the base of the VMD endpoint config bar. I can see the comment may not be helpful as that detail is abstracted using the vmd_cfg_read() helper. > > > + * Root Ports can be individually disabled, it's important to iterate for the > > + * first enabled Root Port as determined by reading the Vendor/Device register. > > + */ > > +#define for_each_vmd_root_port(vmd, rp, temp) \ > > + for (rp = 0; rp < 4; rp++) \ > > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > > + PCI_VENDOR_ID, 4, &temp) || \ > > + temp == 0xffffffff) {} else
On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote: > Hi Bjorn, > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > > [+cc Andrew] > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > When some VMDs are enabled and others are not, it's difficult to > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > instance number and socket number into the first enabled root port's IO > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > information and expose it to userspace. > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > > Are these Root Ports visible to the generic PCI core device > > enumeration? If so, it will find them and read these I/O window > > registers. Maybe today the PCI core doesn't change them, but I'm not > > sure we should rely on them always being preserved until the vmd > > driver can claim the device. > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT > parsing occurs before this PCI domain is exposed to the generic PCI > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain > is invisible to the kernel outside of /dev/mem or resource0. That's because the VMD controller is a PCI device itself and its BARs values are used to configure the VMD host controller. Interesting. To add to Bjorn's question, this reasoning assumes that whatever code enumerates the PCI device representing the VMD host controller does not overwrite its BARs upon bus enumeration otherwise the VMD controller configuration would be lost. Am I reading the current code correctly ? I assume there is not anything you can do to add firmware bindings to the VMD host controller PCI device to describe these properties you are exporting, so config space is the only available conduit to report them to an OS. Lorenzo > However, yes, it is somewhat fragile in that a third-party driver could > attach to the VMD endpoint prior to the VMD driver and modify the > values. A /dev/mem or resource0 user could also do this on an > unattached VMD endpoint. > > I'm wondering if this would also be better suited for a special reset > in quirks.c, but it would need to expose a bit of VMD config accessing > in quirks.c to do that. > > > But I guess you're using a special config accessor (vmd_cfg_read()), > > so these are probably invisible to the generic enumeration? > > > > Yes the VMD domain is invisible to generic PCI until the domain is > enumerated late in vmd_enable_domain(). > > > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > > > + * @vmd: &struct vmd_dev VMD device descriptor > > > + * @rp: int iterator cursor > > > + * @temp: u32 temporary value for config read > > > + * > > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > > > + * space can be determinately accessed through the VMD Config BAR. Because VMD > > > > I'm not sure how to parse "determinately accessed". Maybe this config > > space can *only* be accessed via the VMD Config BAR? > > Perhaps it should instead say determinately addressed, as each Root > Port config space is addressable at some offset of N * 0x8000 from the > base of the VMD endpoint config bar. I can see the comment may not be > helpful as that detail is abstracted using the vmd_cfg_read() helper. > > > > > > > + * Root Ports can be individually disabled, it's important to iterate for the > > > + * first enabled Root Port as determined by reading the Vendor/Device register. > > > + */ > > > +#define for_each_vmd_root_port(vmd, rp, temp) \ > > > + for (rp = 0; rp < 4; rp++) \ > > > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > > > + PCI_VENDOR_ID, 4, &temp) || \ > > > + temp == 0xffffffff) {} else
On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote: > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote: > > Hi Bjorn, > > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > > > [+cc Andrew] > > > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > > When some VMDs are enabled and others are not, it's difficult to > > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > > instance number and socket number into the first enabled root port's IO > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > > information and expose it to userspace. > > > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > > > Are these Root Ports visible to the generic PCI core device > > > enumeration? If so, it will find them and read these I/O window > > > registers. Maybe today the PCI core doesn't change them, but I'm not > > > sure we should rely on them always being preserved until the vmd > > > driver can claim the device. > > > > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT > > parsing occurs before this PCI domain is exposed to the generic PCI > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain > > is invisible to the kernel outside of /dev/mem or resource0. > > That's because the VMD controller is a PCI device itself and its > BARs values are used to configure the VMD host controller. > > Interesting. > > To add to Bjorn's question, this reasoning assumes that whatever > code enumerates the PCI device representing the VMD host controller > does not overwrite its BARs upon bus enumeration otherwise the VMD > controller configuration would be lost. Am I reading the current > code correctly ? Sorry, I just went through the code again, I think the VMD controller PCI device BARs can and are allowed to be reassigned by the PCI enumeration code - I misread the code, so I raised a non-existent issue here, they are like any other PCI device MEM/IO BARs in this respect. Lorenzo > I assume there is not anything you can do to add firmware bindings to > the VMD host controller PCI device to describe these properties you are > exporting, so config space is the only available conduit to report them > to an OS. > > Lorenzo > > > However, yes, it is somewhat fragile in that a third-party driver could > > attach to the VMD endpoint prior to the VMD driver and modify the > > values. A /dev/mem or resource0 user could also do this on an > > unattached VMD endpoint. > > > > I'm wondering if this would also be better suited for a special reset > > in quirks.c, but it would need to expose a bit of VMD config accessing > > in quirks.c to do that. > > > > > But I guess you're using a special config accessor (vmd_cfg_read()), > > > so these are probably invisible to the generic enumeration? > > > > > > > Yes the VMD domain is invisible to generic PCI until the domain is > > enumerated late in vmd_enable_domain(). > > > > > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports > > > > + * @vmd: &struct vmd_dev VMD device descriptor > > > > + * @rp: int iterator cursor > > > > + * @temp: u32 temporary value for config read > > > > + * > > > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config > > > > + * space can be determinately accessed through the VMD Config BAR. Because VMD > > > > > > I'm not sure how to parse "determinately accessed". Maybe this config > > > space can *only* be accessed via the VMD Config BAR? > > > > Perhaps it should instead say determinately addressed, as each Root > > Port config space is addressable at some offset of N * 0x8000 from the > > base of the VMD endpoint config bar. I can see the comment may not be > > helpful as that detail is abstracted using the vmd_cfg_read() helper. > > > > > > > > > > > + * Root Ports can be individually disabled, it's important to iterate for the > > > > + * first enabled Root Port as determined by reading the Vendor/Device register. > > > > + */ > > > > +#define for_each_vmd_root_port(vmd, rp, temp) \ > > > > + for (rp = 0; rp < 4; rp++) \ > > > > + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ > > > > + PCI_VENDOR_ID, 4, &temp) || \ > > > > + temp == 0xffffffff) {} else
On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote: > On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote: > > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote: > > > Hi Bjorn, > > > > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > > > > [+cc Andrew] > > > > > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > > > When some VMDs are enabled and others are not, it's difficult to > > > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > > > instance number and socket number into the first enabled root port's IO > > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > > > information and expose it to userspace. > > > > > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > > > > Are these Root Ports visible to the generic PCI core device > > > > enumeration? If so, it will find them and read these I/O window > > > > registers. Maybe today the PCI core doesn't change them, but I'm not > > > > sure we should rely on them always being preserved until the vmd > > > > driver can claim the device. > > > > > > > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT > > > parsing occurs before this PCI domain is exposed to the generic PCI > > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain > > > is invisible to the kernel outside of /dev/mem or resource0. > > > > That's because the VMD controller is a PCI device itself and its > > BARs values are used to configure the VMD host controller. > > > > Interesting. > > > > To add to Bjorn's question, this reasoning assumes that whatever > > code enumerates the PCI device representing the VMD host controller > > does not overwrite its BARs upon bus enumeration otherwise the VMD > > controller configuration would be lost. Am I reading the current > > code correctly ? > > Sorry, I just went through the code again, I think the VMD controller > PCI device BARs can and are allowed to be reassigned by the PCI > enumeration code - I misread the code, so I raised a non-existent issue > here, they are like any other PCI device MEM/IO BARs in this respect. > > Lorenzo > Yes the VMD endpoint itself exposes the domain containing the Root Ports. It's the Root Ports which get enumerated by generic PCI scancode, and also the Root Port config space where this domain info is supplied. Without a VMD driver, the only aperture to access the Root Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka MEMBAR0). Without this patch, a /dev/mem, resource0, or third-party driver could overwrite these values if they don't also restore them on close/unbind. I imagine a kexec user would also overwrite these values. This is one of the reasons I was also thinking it could live in device specific reset code as long as it can call into VMD for the specifics. Many kernel vendors already ship with VMD=y, so I am tempted to simply make that permanent and export a reset call to a dev specific reset in quirks.c.
On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote: > Without this patch, a /dev/mem, resource0, or third-party driver could > overwrite these values if they don't also restore them on close/unbind. > I imagine a kexec user would also overwrite these values. Don't you have the same problem with the in-kernel driver? It looks like pci core will clear the PCI_IO_BASE config registers in pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO resource. If you reload the driver, it'll read the wrong values on the second probing.
On Wed, 2019-11-06 at 07:22 +0900, Keith Busch wrote: > On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote: > > Without this patch, a /dev/mem, resource0, or third-party driver could > > overwrite these values if they don't also restore them on close/unbind. > > I imagine a kexec user would also overwrite these values. > > Don't you have the same problem with the in-kernel driver? It > looks like pci core will clear the PCI_IO_BASE config registers in > pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO > resource. If you reload the driver, it'll read the wrong values on the > second probing. Is there a corner case I am missing with patch 3/3 that restores on unload?
On Tue, Nov 05, 2019 at 10:38:05PM +0000, Derrick, Jonathan wrote: > On Wed, 2019-11-06 at 07:22 +0900, Keith Busch wrote: > > On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote: > > > Without this patch, a /dev/mem, resource0, or third-party driver could > > > overwrite these values if they don't also restore them on close/unbind. > > > I imagine a kexec user would also overwrite these values. > > > > Don't you have the same problem with the in-kernel driver? It > > looks like pci core will clear the PCI_IO_BASE config registers in > > pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO > > resource. If you reload the driver, it'll read the wrong values on the > > second probing. > > Is there a corner case I am missing with patch 3/3 that restores on > unload? Nothing wrong with that. I just hadn't read that far :/
On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote: > On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote: > > On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote: > > > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote: > > > > Hi Bjorn, > > > > > > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > > > > > [+cc Andrew] > > > > > > > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > > > > When some VMDs are enabled and others are not, it's difficult to > > > > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > > > > instance number and socket number into the first enabled root port's IO > > > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > > > > information and expose it to userspace. > > > > > > > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > > > > > Are these Root Ports visible to the generic PCI core device > > > > > enumeration? If so, it will find them and read these I/O window > > > > > registers. Maybe today the PCI core doesn't change them, but I'm not > > > > > sure we should rely on them always being preserved until the vmd > > > > > driver can claim the device. > > > > > > > > > > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT > > > > parsing occurs before this PCI domain is exposed to the generic PCI > > > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain > > > > is invisible to the kernel outside of /dev/mem or resource0. > > > > > > That's because the VMD controller is a PCI device itself and its > > > BARs values are used to configure the VMD host controller. > > > > > > Interesting. > > > > > > To add to Bjorn's question, this reasoning assumes that whatever > > > code enumerates the PCI device representing the VMD host controller > > > does not overwrite its BARs upon bus enumeration otherwise the VMD > > > controller configuration would be lost. Am I reading the current > > > code correctly ? > > > > Sorry, I just went through the code again, I think the VMD controller > > PCI device BARs can and are allowed to be reassigned by the PCI > > enumeration code - I misread the code, so I raised a non-existent issue > > here, they are like any other PCI device MEM/IO BARs in this respect. > > > > Lorenzo > > > > Yes the VMD endpoint itself exposes the domain containing the Root > Ports. It's the Root Ports which get enumerated by generic PCI > scancode, and also the Root Port config space where this domain info is > supplied. Without a VMD driver, the only aperture to access the Root > Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka > MEMBAR0). > > Without this patch, a /dev/mem, resource0, or third-party driver could > overwrite these values if they don't also restore them on close/unbind. > I imagine a kexec user would also overwrite these values. > > This is one of the reasons I was also thinking it could live in device > specific reset code as long as it can call into VMD for the specifics. > Many kernel vendors already ship with VMD=y, so I am tempted to simply > make that permanent and export a reset call to a dev specific reset in > quirks.c. Hi Jon, just wanted to ask you what's the plan with this series. Thanks, Lorenzo
On Mon, 2020-01-27 at 10:38 +0000, Lorenzo Pieralisi wrote: > On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote: > > On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote: > > > On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote: > > > > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote: > > > > > Hi Bjorn, > > > > > > > > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote: > > > > > > [+cc Andrew] > > > > > > > > > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote: > > > > > > > When some VMDs are enabled and others are not, it's difficult to > > > > > > > determine which IIO stack corresponds to the enabled VMD. > > > > > > > > > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD > > > > > > > instance number and socket number into the first enabled root port's IO > > > > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this > > > > > > > information and expose it to userspace. > > > > > > > > > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile. > > > > > > Are these Root Ports visible to the generic PCI core device > > > > > > enumeration? If so, it will find them and read these I/O window > > > > > > registers. Maybe today the PCI core doesn't change them, but I'm not > > > > > > sure we should rely on them always being preserved until the vmd > > > > > > driver can claim the device. > > > > > > > > > > > > > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT > > > > > parsing occurs before this PCI domain is exposed to the generic PCI > > > > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain > > > > > is invisible to the kernel outside of /dev/mem or resource0. > > > > > > > > That's because the VMD controller is a PCI device itself and its > > > > BARs values are used to configure the VMD host controller. > > > > > > > > Interesting. > > > > > > > > To add to Bjorn's question, this reasoning assumes that whatever > > > > code enumerates the PCI device representing the VMD host controller > > > > does not overwrite its BARs upon bus enumeration otherwise the VMD > > > > controller configuration would be lost. Am I reading the current > > > > code correctly ? > > > > > > Sorry, I just went through the code again, I think the VMD controller > > > PCI device BARs can and are allowed to be reassigned by the PCI > > > enumeration code - I misread the code, so I raised a non-existent issue > > > here, they are like any other PCI device MEM/IO BARs in this respect. > > > > > > Lorenzo > > > > > > > Yes the VMD endpoint itself exposes the domain containing the Root > > Ports. It's the Root Ports which get enumerated by generic PCI > > scancode, and also the Root Port config space where this domain info is > > supplied. Without a VMD driver, the only aperture to access the Root > > Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka > > MEMBAR0). > > > > Without this patch, a /dev/mem, resource0, or third-party driver could > > overwrite these values if they don't also restore them on close/unbind. > > I imagine a kexec user would also overwrite these values. > > > > This is one of the reasons I was also thinking it could live in device > > specific reset code as long as it can call into VMD for the specifics. > > Many kernel vendors already ship with VMD=y, so I am tempted to simply > > make that permanent and export a reset call to a dev specific reset in > > quirks.c. > > Hi Jon, > > just wanted to ask you what's the plan with this series. > > Thanks, > Lorenzo Please drop. We've implemented a different solution. Thanks again, Jon
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 959c7c7..dbe1bff 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -98,6 +98,8 @@ struct vmd_dev { struct irq_domain *irq_domain; struct pci_bus *bus; u8 busn_start; + u8 socket_nr; + u8 instance_nr; struct dma_map_ops dma_ops; struct dma_domain dma_domain; @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, .write = vmd_pci_write, }; +/** + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports + * @vmd: &struct vmd_dev VMD device descriptor + * @rp: int iterator cursor + * @temp: u32 temporary value for config read + * + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config + * space can be determinately accessed through the VMD Config BAR. Because VMD + * Root Ports can be individually disabled, it's important to iterate for the + * first enabled Root Port as determined by reading the Vendor/Device register. + */ +#define for_each_vmd_root_port(vmd, rp, temp) \ + for (rp = 0; rp < 4; rp++) \ + if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), \ + PCI_VENDOR_ID, 4, &temp) || \ + temp == 0xffffffff) {} else + +static int vmd_parse_domain(struct vmd_dev *vmd) +{ + int root_port, ret; + u32 temp, iobase; + + vmd->socket_nr = -1; + vmd->instance_nr = -1; + + for_each_vmd_root_port(vmd, root_port, temp) { + ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0), + PCI_IO_BASE, 2, &iobase); + if (ret) + return ret; + + vmd->socket_nr = (iobase >> 4) & 0xf; + vmd->instance_nr = (iobase >> 14) & 0x3; + + /* First available will be used */ + break; + } + + return 0; +} + +static ssize_t socket_nr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = pci_get_drvdata(pdev); + + return sprintf(buf, "%u\n", vmd->socket_nr); +} +static DEVICE_ATTR_RO(socket_nr); + +static ssize_t instance_nr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = pci_get_drvdata(pdev); + + return sprintf(buf, "%u\n", vmd->instance_nr); +} +static DEVICE_ATTR_RO(instance_nr); + +static struct attribute *vmd_dev_attrs[] = { + &dev_attr_socket_nr.attr, + &dev_attr_instance_nr.attr, + NULL +}; +ATTRIBUTE_GROUPS(vmd_dev); + static void vmd_attach_resources(struct vmd_dev *vmd) { vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) resource_size_t offset[2] = {0}; resource_size_t membar2_offset = 0x2000; struct pci_bus *child; + int ret; + + ret = vmd_parse_domain(vmd); + if (ret) + return ret; /* * Shadow registers may exist in certain VMD device ids which allow @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) */ if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { u32 vmlock; - int ret; membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev) .probe = vmd_probe, .remove = vmd_remove, .driver = { - .pm = &vmd_dev_pm_ops, + .pm = &vmd_dev_pm_ops, + .dev_groups = vmd_dev_groups, }, }; module_pci_driver(vmd_drv);