diff mbox series

[2/3] PCI: vmd: Expose VMD details from BIOS

Message ID 1571245488-3549-3-git-send-email-jonathan.derrick@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Lorenzo Pieralisi
Headers show
Series Expose VMD BIOS domain info | expand

Commit Message

Jon Derrick Oct. 16, 2019, 5:04 p.m. UTC
When some VMDs are enabled and others are not, it's difficult to
determine which IIO stack corresponds to the enabled VMD.

To assist userspace with management tasks, VMD BIOS will write the VMD
instance number and socket number into the first enabled root port's IO
Base/Limit registers prior to OS handoff. VMD driver can capture this
information and expose it to userspace.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 2 deletions(-)

Comments

Lorenzo Pieralisi Oct. 31, 2019, 11:37 a.m. UTC | #1
On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> When some VMDs are enabled and others are not, it's difficult to
> determine which IIO stack corresponds to the enabled VMD.
> 
> To assist userspace with management tasks, VMD BIOS will write the VMD
> instance number and socket number into the first enabled root port's IO
> Base/Limit registers prior to OS handoff. VMD driver can capture this
> information and expose it to userspace.
> 
> Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
>  drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index 959c7c7..dbe1bff 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -98,6 +98,8 @@ struct vmd_dev {
>  	struct irq_domain	*irq_domain;
>  	struct pci_bus		*bus;
>  	u8			busn_start;
> +	u8			socket_nr;
> +	u8			instance_nr;
>  
>  	struct dma_map_ops	dma_ops;
>  	struct dma_domain	dma_domain;
> @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
>  	.write		= vmd_pci_write,
>  };
>  
> +/**
> + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> + * @vmd: &struct vmd_dev VMD device descriptor
> + * @rp: int iterator cursor
> + * @temp: u32 temporary value for config read
> + *
> + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> + * space can be determinately accessed through the VMD Config BAR. Because VMD
> + * Root Ports can be individually disabled, it's important to iterate for the
> + * first enabled Root Port as determined by reading the Vendor/Device register.
> + */
> +#define for_each_vmd_root_port(vmd, rp, temp)				\
> +	for (rp = 0; rp < 4; rp++)					\
> +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> +				 PCI_VENDOR_ID, 4, &temp) ||		\
> +		    temp == 0xffffffff) {} else

Nit: I do not think this macro is particularly helpful or easy to read.

I leave it up to you but I would turn this code (plus the inner loop in
vmd_parse_domain()) into a function, eg:

struct vmd_dev *vmd_find_first_root_port(..)

with the code in the macro above inlined. Up to you.

Thanks,
Lorenzo

> +static int vmd_parse_domain(struct vmd_dev *vmd)
> +{
> +	int root_port, ret;
> +	u32 temp, iobase;
> +
> +	vmd->socket_nr = -1;
> +	vmd->instance_nr = -1;
> +
> +	for_each_vmd_root_port(vmd, root_port, temp) {
> +		ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),
> +				   PCI_IO_BASE, 2, &iobase);
> +		if (ret)
> +			return ret;
> +
> +		vmd->socket_nr = (iobase >> 4) & 0xf;
> +		vmd->instance_nr = (iobase >> 14) & 0x3;
> +
> +		/* First available will be used */
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +static ssize_t socket_nr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%u\n", vmd->socket_nr);
> +}
> +static DEVICE_ATTR_RO(socket_nr);
> +
> +static ssize_t instance_nr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%u\n", vmd->instance_nr);
> +}
> +static DEVICE_ATTR_RO(instance_nr);
> +
> +static struct attribute *vmd_dev_attrs[] = {
> +	&dev_attr_socket_nr.attr,
> +	&dev_attr_instance_nr.attr,
> +	NULL
> +};
> +ATTRIBUTE_GROUPS(vmd_dev);
> +
>  static void vmd_attach_resources(struct vmd_dev *vmd)
>  {
>  	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
> @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  	resource_size_t offset[2] = {0};
>  	resource_size_t membar2_offset = 0x2000;
>  	struct pci_bus *child;
> +	int ret;
> +
> +	ret = vmd_parse_domain(vmd);
> +	if (ret)
> +		return ret;
>  
>  	/*
>  	 * Shadow registers may exist in certain VMD device ids which allow
> @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  	 */
>  	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
>  		u32 vmlock;
> -		int ret;
>  
>  		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
>  		ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
> @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev)
>  	.probe		= vmd_probe,
>  	.remove		= vmd_remove,
>  	.driver		= {
> -		.pm	= &vmd_dev_pm_ops,
> +		.pm		= &vmd_dev_pm_ops,
> +		.dev_groups	= vmd_dev_groups,
>  	},
>  };
>  module_pci_driver(vmd_drv);
> -- 
> 1.8.3.1
>
Andrew Murray Nov. 1, 2019, 1:16 p.m. UTC | #2
On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> When some VMDs are enabled and others are not, it's difficult to
> determine which IIO stack corresponds to the enabled VMD.
> 
> To assist userspace with management tasks, VMD BIOS will write the VMD
> instance number and socket number into the first enabled root port's IO
> Base/Limit registers prior to OS handoff. VMD driver can capture this
> information and expose it to userspace.
> 
> Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
>  drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index 959c7c7..dbe1bff 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -98,6 +98,8 @@ struct vmd_dev {
>  	struct irq_domain	*irq_domain;
>  	struct pci_bus		*bus;
>  	u8			busn_start;
> +	u8			socket_nr;
> +	u8			instance_nr;
>  
>  	struct dma_map_ops	dma_ops;
>  	struct dma_domain	dma_domain;
> @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
>  	.write		= vmd_pci_write,
>  };
>  
> +/**
> + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> + * @vmd: &struct vmd_dev VMD device descriptor
> + * @rp: int iterator cursor
> + * @temp: u32 temporary value for config read
> + *
> + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> + * space can be determinately accessed through the VMD Config BAR. Because VMD
> + * Root Ports can be individually disabled, it's important to iterate for the
> + * first enabled Root Port as determined by reading the Vendor/Device register.
> + */
> +#define for_each_vmd_root_port(vmd, rp, temp)				\
> +	for (rp = 0; rp < 4; rp++)					\
> +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> +				 PCI_VENDOR_ID, 4, &temp) ||		\
> +		    temp == 0xffffffff) {} else

You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff.
Though this hasn't yet been merged:

https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820

> +
> +static int vmd_parse_domain(struct vmd_dev *vmd)
> +{
> +	int root_port, ret;
> +	u32 temp, iobase;
> +
> +	vmd->socket_nr = -1;
> +	vmd->instance_nr = -1;
> +
> +	for_each_vmd_root_port(vmd, root_port, temp) {
> +		ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),
> +				   PCI_IO_BASE, 2, &iobase);
> +		if (ret)
> +			return ret;
> +
> +		vmd->socket_nr = (iobase >> 4) & 0xf;
> +		vmd->instance_nr = (iobase >> 14) & 0x3;

I'm not familiar with VMD - however how can you be sure that the VMD BIOS
will always populate these values here? Is it possible that earlier BIOS's
won't do this and something will go wrong here?

Is there any sanity checking that can happen here?

> +
> +		/* First available will be used */
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +static ssize_t socket_nr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%u\n", vmd->socket_nr);
> +}
> +static DEVICE_ATTR_RO(socket_nr);
> +
> +static ssize_t instance_nr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%u\n", vmd->instance_nr);
> +}
> +static DEVICE_ATTR_RO(instance_nr);
> +
> +static struct attribute *vmd_dev_attrs[] = {
> +	&dev_attr_socket_nr.attr,
> +	&dev_attr_instance_nr.attr,
> +	NULL
> +};
> +ATTRIBUTE_GROUPS(vmd_dev);
> +
>  static void vmd_attach_resources(struct vmd_dev *vmd)
>  {
>  	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
> @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  	resource_size_t offset[2] = {0};
>  	resource_size_t membar2_offset = 0x2000;
>  	struct pci_bus *child;
> +	int ret;
> +
> +	ret = vmd_parse_domain(vmd);
> +	if (ret)
> +		return ret;

This always will succeed. But what happens if this function returns yet
socket_nr/instance_nr hasn't been written to? Is that OK?

Thanks,

Andrew Murray

>  
>  	/*
>  	 * Shadow registers may exist in certain VMD device ids which allow
> @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  	 */
>  	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
>  		u32 vmlock;
> -		int ret;
>  
>  		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
>  		ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
> @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev)
>  	.probe		= vmd_probe,
>  	.remove		= vmd_remove,
>  	.driver		= {
> -		.pm	= &vmd_dev_pm_ops,
> +		.pm		= &vmd_dev_pm_ops,
> +		.dev_groups	= vmd_dev_groups,
>  	},
>  };
>  module_pci_driver(vmd_drv);
> -- 
> 1.8.3.1
>
Jon Derrick Nov. 1, 2019, 2:24 p.m. UTC | #3
Hi ANdrew,

Thanks for the review

On Fri, 2019-11-01 at 13:16 +0000, Andrew Murray wrote:
> On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > When some VMDs are enabled and others are not, it's difficult to
> > determine which IIO stack corresponds to the enabled VMD.
> > 
> > To assist userspace with management tasks, VMD BIOS will write the VMD
> > instance number and socket number into the first enabled root port's IO
> > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > information and expose it to userspace.
> > 
> > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > ---
> >  drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 77 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > index 959c7c7..dbe1bff 100644
> > --- a/drivers/pci/controller/vmd.c
> > +++ b/drivers/pci/controller/vmd.c
> > @@ -98,6 +98,8 @@ struct vmd_dev {
> >  	struct irq_domain	*irq_domain;
> >  	struct pci_bus		*bus;
> >  	u8			busn_start;
> > +	u8			socket_nr;
> > +	u8			instance_nr;
> >  
> >  	struct dma_map_ops	dma_ops;
> >  	struct dma_domain	dma_domain;
> > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
> >  	.write		= vmd_pci_write,
> >  };
> >  
> > +/**
> > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> > + * @vmd: &struct vmd_dev VMD device descriptor
> > + * @rp: int iterator cursor
> > + * @temp: u32 temporary value for config read
> > + *
> > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> > + * space can be determinately accessed through the VMD Config BAR. Because VMD
> > + * Root Ports can be individually disabled, it's important to iterate for the
> > + * first enabled Root Port as determined by reading the Vendor/Device register.
> > + */
> > +#define for_each_vmd_root_port(vmd, rp, temp)				\
> > +	for (rp = 0; rp < 4; rp++)					\
> > +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> > +				 PCI_VENDOR_ID, 4, &temp) ||		\
> > +		    temp == 0xffffffff) {} else
> 
> You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff.
> Though this hasn't yet been merged:
> 
> https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820
> 

Sure it will fit this case perfectly once it's merged

> > +
> > +static int vmd_parse_domain(struct vmd_dev *vmd)
> > +{
> > +	int root_port, ret;
> > +	u32 temp, iobase;
> > +
> > +	vmd->socket_nr = -1;
> > +	vmd->instance_nr = -1;
> > +
> > +	for_each_vmd_root_port(vmd, root_port, temp) {
> > +		ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),
> > +				   PCI_IO_BASE, 2, &iobase);
> > +		if (ret)
> > +			return ret;
> > +
> > +		vmd->socket_nr = (iobase >> 4) & 0xf;
> > +		vmd->instance_nr = (iobase >> 14) & 0x3;
> 
> I'm not familiar with VMD - however how can you be sure that the VMD BIOS
> will always populate these values here? Is it possible that earlier BIOS's
> won't do this and something will go wrong here?
> 
> Is there any sanity checking that can happen here?

Yes that's entirely possible and would show indeterminate values in
that case. It would be up to the user to understand if the BIOS
supports the mode before relying on the data.

I am investigating to see if we can do a dmi_match to verify the data
before publishing.


> 
> > +
> > +		/* First available will be used */
> > +		break;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static ssize_t socket_nr_show(struct device *dev,
> > +			      struct device_attribute *attr, char *buf)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> > +
> > +	return sprintf(buf, "%u\n", vmd->socket_nr);
> > +}
> > +static DEVICE_ATTR_RO(socket_nr);
> > +
> > +static ssize_t instance_nr_show(struct device *dev,
> > +			      struct device_attribute *attr, char *buf)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> > +
> > +	return sprintf(buf, "%u\n", vmd->instance_nr);
> > +}
> > +static DEVICE_ATTR_RO(instance_nr);
> > +
> > +static struct attribute *vmd_dev_attrs[] = {
> > +	&dev_attr_socket_nr.attr,
> > +	&dev_attr_instance_nr.attr,
> > +	NULL
> > +};
> > +ATTRIBUTE_GROUPS(vmd_dev);
> > +
> >  static void vmd_attach_resources(struct vmd_dev *vmd)
> >  {
> >  	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
> > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> >  	resource_size_t offset[2] = {0};
> >  	resource_size_t membar2_offset = 0x2000;
> >  	struct pci_bus *child;
> > +	int ret;
> > +
> > +	ret = vmd_parse_domain(vmd);
> > +	if (ret)
> > +		return ret;
> 
> This always will succeed. But what happens if this function returns yet
> socket_nr/instance_nr hasn't been written to? Is that OK?
> 

Basically only one possibility that could occur and that's if the VMD
is enabled without any VMD Root Ports being enabled on the VMD domain.
It's an odd configuration but is technically valid, although the domain
becomes useless until the user reboots and enables the VMD Root Ports. 

So it's more-or-less implied either socket_nr/instance_nr will have
data or the domain won't be usable.

Thanks,
Jon


> Thanks,
> 
> Andrew Murray
> 
> >  
> >  	/*
> >  	 * Shadow registers may exist in certain VMD device ids which allow
> > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> >  	 */
> >  	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
> >  		u32 vmlock;
> > -		int ret;
> >  
> >  		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
> >  		ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
> > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev)
> >  	.probe		= vmd_probe,
> >  	.remove		= vmd_remove,
> >  	.driver		= {
> > -		.pm	= &vmd_dev_pm_ops,
> > +		.pm		= &vmd_dev_pm_ops,
> > +		.dev_groups	= vmd_dev_groups,
> >  	},
> >  };
> >  module_pci_driver(vmd_drv);
> > -- 
> > 1.8.3.1
> >
Andrew Murray Nov. 1, 2019, 2:44 p.m. UTC | #4
On Fri, Nov 01, 2019 at 02:24:02PM +0000, Derrick, Jonathan wrote:
> Hi ANdrew,
> 
> Thanks for the review
> 
> On Fri, 2019-11-01 at 13:16 +0000, Andrew Murray wrote:
> > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > When some VMDs are enabled and others are not, it's difficult to
> > > determine which IIO stack corresponds to the enabled VMD.
> > > 
> > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > instance number and socket number into the first enabled root port's IO
> > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > information and expose it to userspace.
> > > 
> > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
> > > Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > > ---
> > >  drivers/pci/controller/vmd.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 77 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > > index 959c7c7..dbe1bff 100644
> > > --- a/drivers/pci/controller/vmd.c
> > > +++ b/drivers/pci/controller/vmd.c
> > > @@ -98,6 +98,8 @@ struct vmd_dev {
> > >  	struct irq_domain	*irq_domain;
> > >  	struct pci_bus		*bus;
> > >  	u8			busn_start;
> > > +	u8			socket_nr;
> > > +	u8			instance_nr;
> > >  
> > >  	struct dma_map_ops	dma_ops;
> > >  	struct dma_domain	dma_domain;
> > > @@ -543,6 +545,74 @@ static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
> > >  	.write		= vmd_pci_write,
> > >  };
> > >  
> > > +/**
> > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> > > + * @vmd: &struct vmd_dev VMD device descriptor
> > > + * @rp: int iterator cursor
> > > + * @temp: u32 temporary value for config read
> > > + *
> > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> > > + * space can be determinately accessed through the VMD Config BAR. Because VMD
> > > + * Root Ports can be individually disabled, it's important to iterate for the
> > > + * first enabled Root Port as determined by reading the Vendor/Device register.
> > > + */
> > > +#define for_each_vmd_root_port(vmd, rp, temp)				\
> > > +	for (rp = 0; rp < 4; rp++)					\
> > > +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> > > +				 PCI_VENDOR_ID, 4, &temp) ||		\
> > > +		    temp == 0xffffffff) {} else
> > 
> > You may want to consider using PCI_ERROR_RESPONSE here instead of 0xffffffff.
> > Though this hasn't yet been merged:
> > 
> > https://patchwork.ozlabs.org/project/linux-pci/list/?series=126820
> > 
> 
> Sure it will fit this case perfectly once it's merged
> 
> > > +
> > > +static int vmd_parse_domain(struct vmd_dev *vmd)
> > > +{
> > > +	int root_port, ret;
> > > +	u32 temp, iobase;
> > > +
> > > +	vmd->socket_nr = -1;
> > > +	vmd->instance_nr = -1;
> > > +
> > > +	for_each_vmd_root_port(vmd, root_port, temp) {
> > > +		ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),
> > > +				   PCI_IO_BASE, 2, &iobase);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		vmd->socket_nr = (iobase >> 4) & 0xf;
> > > +		vmd->instance_nr = (iobase >> 14) & 0x3;
> > 
> > I'm not familiar with VMD - however how can you be sure that the VMD BIOS
> > will always populate these values here? Is it possible that earlier BIOS's
> > won't do this and something will go wrong here?
> > 
> > Is there any sanity checking that can happen here?
> 
> Yes that's entirely possible and would show indeterminate values in
> that case. It would be up to the user to understand if the BIOS
> supports the mode before relying on the data.
> 
> I am investigating to see if we can do a dmi_match to verify the data
> before publishing.

I think that would be helpful if possible as it would simplify the
user software - and also prevent the user ever getting garbage data.

> 
> 
> > 
> > > +
> > > +		/* First available will be used */
> > > +		break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static ssize_t socket_nr_show(struct device *dev,
> > > +			      struct device_attribute *attr, char *buf)
> > > +{
> > > +	struct pci_dev *pdev = to_pci_dev(dev);
> > > +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> > > +
> > > +	return sprintf(buf, "%u\n", vmd->socket_nr);
> > > +}
> > > +static DEVICE_ATTR_RO(socket_nr);
> > > +
> > > +static ssize_t instance_nr_show(struct device *dev,
> > > +			      struct device_attribute *attr, char *buf)
> > > +{
> > > +	struct pci_dev *pdev = to_pci_dev(dev);
> > > +	struct vmd_dev *vmd = pci_get_drvdata(pdev);
> > > +
> > > +	return sprintf(buf, "%u\n", vmd->instance_nr);
> > > +}
> > > +static DEVICE_ATTR_RO(instance_nr);
> > > +
> > > +static struct attribute *vmd_dev_attrs[] = {
> > > +	&dev_attr_socket_nr.attr,
> > > +	&dev_attr_instance_nr.attr,
> > > +	NULL
> > > +};
> > > +ATTRIBUTE_GROUPS(vmd_dev);
> > > +
> > >  static void vmd_attach_resources(struct vmd_dev *vmd)
> > >  {
> > >  	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
> > > @@ -582,6 +652,11 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >  	resource_size_t offset[2] = {0};
> > >  	resource_size_t membar2_offset = 0x2000;
> > >  	struct pci_bus *child;
> > > +	int ret;
> > > +
> > > +	ret = vmd_parse_domain(vmd);
> > > +	if (ret)
> > > +		return ret;
> > 
> > This always will succeed. But what happens if this function returns yet
> > socket_nr/instance_nr hasn't been written to? Is that OK?
> > 
> 
> Basically only one possibility that could occur and that's if the VMD
> is enabled without any VMD Root Ports being enabled on the VMD domain.
> It's an odd configuration but is technically valid, although the domain
> becomes useless until the user reboots and enables the VMD Root Ports. 
> 
> So it's more-or-less implied either socket_nr/instance_nr will have
> data or the domain won't be usable.

Of course in this case, the default value will be -1, which should be
quite obvious to a user that this isn't a valid value.

Thanks,

Andrew Murray

> 
> Thanks,
> Jon
> 
> 
> > Thanks,
> > 
> > Andrew Murray
> > 
> > >  
> > >  	/*
> > >  	 * Shadow registers may exist in certain VMD device ids which allow
> > > @@ -591,7 +666,6 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >  	 */
> > >  	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
> > >  		u32 vmlock;
> > > -		int ret;
> > >  
> > >  		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
> > >  		ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
> > > @@ -876,7 +950,8 @@ static int vmd_resume(struct device *dev)
> > >  	.probe		= vmd_probe,
> > >  	.remove		= vmd_remove,
> > >  	.driver		= {
> > > -		.pm	= &vmd_dev_pm_ops,
> > > +		.pm		= &vmd_dev_pm_ops,
> > > +		.dev_groups	= vmd_dev_groups,
> > >  	},
> > >  };
> > >  module_pci_driver(vmd_drv);
> > > -- 
> > > 1.8.3.1
> > >
Bjorn Helgaas Nov. 1, 2019, 9:53 p.m. UTC | #5
[+cc Andrew]

On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> When some VMDs are enabled and others are not, it's difficult to
> determine which IIO stack corresponds to the enabled VMD.
> 
> To assist userspace with management tasks, VMD BIOS will write the VMD
> instance number and socket number into the first enabled root port's IO
> Base/Limit registers prior to OS handoff. VMD driver can capture this
> information and expose it to userspace.

Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
Are these Root Ports visible to the generic PCI core device
enumeration?  If so, it will find them and read these I/O window
registers.  Maybe today the PCI core doesn't change them, but I'm not
sure we should rely on them always being preserved until the vmd
driver can claim the device.

But I guess you're using a special config accessor (vmd_cfg_read()),
so these are probably invisible to the generic enumeration?

> + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> + * @vmd: &struct vmd_dev VMD device descriptor
> + * @rp: int iterator cursor
> + * @temp: u32 temporary value for config read
> + *
> + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> + * space can be determinately accessed through the VMD Config BAR. Because VMD

I'm not sure how to parse "determinately accessed".  Maybe this config
space can *only* be accessed via the VMD Config BAR?

> + * Root Ports can be individually disabled, it's important to iterate for the
> + * first enabled Root Port as determined by reading the Vendor/Device register.
> + */
> +#define for_each_vmd_root_port(vmd, rp, temp)				\
> +	for (rp = 0; rp < 4; rp++)					\
> +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> +				 PCI_VENDOR_ID, 4, &temp) ||		\
> +		    temp == 0xffffffff) {} else
Jon Derrick Nov. 1, 2019, 10:16 p.m. UTC | #6
Hi Bjorn,

On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> [+cc Andrew]
> 
> On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > When some VMDs are enabled and others are not, it's difficult to
> > determine which IIO stack corresponds to the enabled VMD.
> > 
> > To assist userspace with management tasks, VMD BIOS will write the VMD
> > instance number and socket number into the first enabled root port's IO
> > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > information and expose it to userspace.
> 
> Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> Are these Root Ports visible to the generic PCI core device
> enumeration?  If so, it will find them and read these I/O window
> registers.  Maybe today the PCI core doesn't change them, but I'm not
> sure we should rely on them always being preserved until the vmd
> driver can claim the device.
> 

The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
parsing occurs before this PCI domain is exposed to the generic PCI
scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
is invisible to the kernel outside of /dev/mem or resource0.

However, yes, it is somewhat fragile in that a third-party driver could
attach to the VMD endpoint prior to the VMD driver and modify the
values. A /dev/mem or resource0 user could also do this on an
unattached VMD endpoint.

I'm wondering if this would also be better suited for a special reset
in quirks.c, but it would need to expose a bit of VMD config accessing
in quirks.c to do that.

> But I guess you're using a special config accessor (vmd_cfg_read()),
> so these are probably invisible to the generic enumeration?
> 

Yes the VMD domain is invisible to generic PCI until the domain is
enumerated late in vmd_enable_domain().

> > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> > + * @vmd: &struct vmd_dev VMD device descriptor
> > + * @rp: int iterator cursor
> > + * @temp: u32 temporary value for config read
> > + *
> > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> > + * space can be determinately accessed through the VMD Config BAR. Because VMD
> 
> I'm not sure how to parse "determinately accessed".  Maybe this config
> space can *only* be accessed via the VMD Config BAR?

Perhaps it should instead say determinately addressed, as each Root
Port config space is addressable at some offset of N * 0x8000 from the
base of the VMD endpoint config bar. I can see the comment may not be
helpful as that detail is abstracted using the vmd_cfg_read() helper.


> 
> > + * Root Ports can be individually disabled, it's important to iterate for the
> > + * first enabled Root Port as determined by reading the Vendor/Device register.
> > + */
> > +#define for_each_vmd_root_port(vmd, rp, temp)				\
> > +	for (rp = 0; rp < 4; rp++)					\
> > +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> > +				 PCI_VENDOR_ID, 4, &temp) ||		\
> > +		    temp == 0xffffffff) {} else
Lorenzo Pieralisi Nov. 4, 2019, 6:07 p.m. UTC | #7
On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote:
> Hi Bjorn,
> 
> On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> > [+cc Andrew]
> > 
> > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > When some VMDs are enabled and others are not, it's difficult to
> > > determine which IIO stack corresponds to the enabled VMD.
> > > 
> > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > instance number and socket number into the first enabled root port's IO
> > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > information and expose it to userspace.
> > 
> > Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> > Are these Root Ports visible to the generic PCI core device
> > enumeration?  If so, it will find them and read these I/O window
> > registers.  Maybe today the PCI core doesn't change them, but I'm not
> > sure we should rely on them always being preserved until the vmd
> > driver can claim the device.
> > 
> 
> The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
> parsing occurs before this PCI domain is exposed to the generic PCI
> scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
> is invisible to the kernel outside of /dev/mem or resource0.

That's because the VMD controller is a PCI device itself and its
BARs values are used to configure the VMD host controller.

Interesting.

To add to Bjorn's question, this reasoning assumes that whatever
code enumerates the PCI device representing the VMD host controller
does not overwrite its BARs upon bus enumeration otherwise the VMD
controller configuration would be lost. Am I reading the current
code correctly ?

I assume there is not anything you can do to add firmware bindings to
the VMD host controller PCI device to describe these properties you are
exporting, so config space is the only available conduit to report them
to an OS.

Lorenzo

> However, yes, it is somewhat fragile in that a third-party driver could
> attach to the VMD endpoint prior to the VMD driver and modify the
> values. A /dev/mem or resource0 user could also do this on an
> unattached VMD endpoint.
> 
> I'm wondering if this would also be better suited for a special reset
> in quirks.c, but it would need to expose a bit of VMD config accessing
> in quirks.c to do that.
> 
> > But I guess you're using a special config accessor (vmd_cfg_read()),
> > so these are probably invisible to the generic enumeration?
> > 
> 
> Yes the VMD domain is invisible to generic PCI until the domain is
> enumerated late in vmd_enable_domain().
> 
> > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> > > + * @vmd: &struct vmd_dev VMD device descriptor
> > > + * @rp: int iterator cursor
> > > + * @temp: u32 temporary value for config read
> > > + *
> > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> > > + * space can be determinately accessed through the VMD Config BAR. Because VMD
> > 
> > I'm not sure how to parse "determinately accessed".  Maybe this config
> > space can *only* be accessed via the VMD Config BAR?
> 
> Perhaps it should instead say determinately addressed, as each Root
> Port config space is addressable at some offset of N * 0x8000 from the
> base of the VMD endpoint config bar. I can see the comment may not be
> helpful as that detail is abstracted using the vmd_cfg_read() helper.
> 
> 
> > 
> > > + * Root Ports can be individually disabled, it's important to iterate for the
> > > + * first enabled Root Port as determined by reading the Vendor/Device register.
> > > + */
> > > +#define for_each_vmd_root_port(vmd, rp, temp)				\
> > > +	for (rp = 0; rp < 4; rp++)					\
> > > +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> > > +				 PCI_VENDOR_ID, 4, &temp) ||		\
> > > +		    temp == 0xffffffff) {} else
Lorenzo Pieralisi Nov. 5, 2019, 10:12 a.m. UTC | #8
On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote:
> On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote:
> > Hi Bjorn,
> > 
> > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> > > [+cc Andrew]
> > > 
> > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > > When some VMDs are enabled and others are not, it's difficult to
> > > > determine which IIO stack corresponds to the enabled VMD.
> > > > 
> > > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > > instance number and socket number into the first enabled root port's IO
> > > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > > information and expose it to userspace.
> > > 
> > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> > > Are these Root Ports visible to the generic PCI core device
> > > enumeration?  If so, it will find them and read these I/O window
> > > registers.  Maybe today the PCI core doesn't change them, but I'm not
> > > sure we should rely on them always being preserved until the vmd
> > > driver can claim the device.
> > > 
> > 
> > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
> > parsing occurs before this PCI domain is exposed to the generic PCI
> > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
> > is invisible to the kernel outside of /dev/mem or resource0.
> 
> That's because the VMD controller is a PCI device itself and its
> BARs values are used to configure the VMD host controller.
> 
> Interesting.
> 
> To add to Bjorn's question, this reasoning assumes that whatever
> code enumerates the PCI device representing the VMD host controller
> does not overwrite its BARs upon bus enumeration otherwise the VMD
> controller configuration would be lost. Am I reading the current
> code correctly ?

Sorry, I just went through the code again, I think the VMD controller
PCI device BARs can and are allowed to be reassigned by the PCI
enumeration code - I misread the code, so I raised a non-existent issue
here, they are like any other PCI device MEM/IO BARs in this respect.

Lorenzo

> I assume there is not anything you can do to add firmware bindings to
> the VMD host controller PCI device to describe these properties you are
> exporting, so config space is the only available conduit to report them
> to an OS.
> 
> Lorenzo
> 
> > However, yes, it is somewhat fragile in that a third-party driver could
> > attach to the VMD endpoint prior to the VMD driver and modify the
> > values. A /dev/mem or resource0 user could also do this on an
> > unattached VMD endpoint.
> > 
> > I'm wondering if this would also be better suited for a special reset
> > in quirks.c, but it would need to expose a bit of VMD config accessing
> > in quirks.c to do that.
> > 
> > > But I guess you're using a special config accessor (vmd_cfg_read()),
> > > so these are probably invisible to the generic enumeration?
> > > 
> > 
> > Yes the VMD domain is invisible to generic PCI until the domain is
> > enumerated late in vmd_enable_domain().
> > 
> > > > + * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
> > > > + * @vmd: &struct vmd_dev VMD device descriptor
> > > > + * @rp: int iterator cursor
> > > > + * @temp: u32 temporary value for config read
> > > > + *
> > > > + * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
> > > > + * space can be determinately accessed through the VMD Config BAR. Because VMD
> > > 
> > > I'm not sure how to parse "determinately accessed".  Maybe this config
> > > space can *only* be accessed via the VMD Config BAR?
> > 
> > Perhaps it should instead say determinately addressed, as each Root
> > Port config space is addressable at some offset of N * 0x8000 from the
> > base of the VMD endpoint config bar. I can see the comment may not be
> > helpful as that detail is abstracted using the vmd_cfg_read() helper.
> > 
> > 
> > > 
> > > > + * Root Ports can be individually disabled, it's important to iterate for the
> > > > + * first enabled Root Port as determined by reading the Vendor/Device register.
> > > > + */
> > > > +#define for_each_vmd_root_port(vmd, rp, temp)				\
> > > > +	for (rp = 0; rp < 4; rp++)					\
> > > > +		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
> > > > +				 PCI_VENDOR_ID, 4, &temp) ||		\
> > > > +		    temp == 0xffffffff) {} else
Jon Derrick Nov. 5, 2019, 9:32 p.m. UTC | #9
On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote:
> On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote:
> > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote:
> > > Hi Bjorn,
> > > 
> > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> > > > [+cc Andrew]
> > > > 
> > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > > > When some VMDs are enabled and others are not, it's difficult to
> > > > > determine which IIO stack corresponds to the enabled VMD.
> > > > > 
> > > > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > > > instance number and socket number into the first enabled root port's IO
> > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > > > information and expose it to userspace.
> > > > 
> > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> > > > Are these Root Ports visible to the generic PCI core device
> > > > enumeration?  If so, it will find them and read these I/O window
> > > > registers.  Maybe today the PCI core doesn't change them, but I'm not
> > > > sure we should rely on them always being preserved until the vmd
> > > > driver can claim the device.
> > > > 
> > > 
> > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
> > > parsing occurs before this PCI domain is exposed to the generic PCI
> > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
> > > is invisible to the kernel outside of /dev/mem or resource0.
> > 
> > That's because the VMD controller is a PCI device itself and its
> > BARs values are used to configure the VMD host controller.
> > 
> > Interesting.
> > 
> > To add to Bjorn's question, this reasoning assumes that whatever
> > code enumerates the PCI device representing the VMD host controller
> > does not overwrite its BARs upon bus enumeration otherwise the VMD
> > controller configuration would be lost. Am I reading the current
> > code correctly ?
> 
> Sorry, I just went through the code again, I think the VMD controller
> PCI device BARs can and are allowed to be reassigned by the PCI
> enumeration code - I misread the code, so I raised a non-existent issue
> here, they are like any other PCI device MEM/IO BARs in this respect.
> 
> Lorenzo
> 

Yes the VMD endpoint itself exposes the domain containing the Root
Ports. It's the Root Ports which get enumerated by generic PCI
scancode, and also the Root Port config space where this domain info is
supplied. Without a VMD driver, the only aperture to access the Root
Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka
MEMBAR0).

Without this patch, a /dev/mem, resource0, or third-party driver could
overwrite these values if they don't also restore them on close/unbind.
I imagine a kexec user would also overwrite these values.

This is one of the reasons I was also thinking it could live in device
specific reset code as long as it can call into VMD for the specifics.
Many kernel vendors already ship with VMD=y, so I am tempted to simply
make that permanent and export a reset call to a dev specific reset in
quirks.c.
Keith Busch Nov. 5, 2019, 10:22 p.m. UTC | #10
On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote:
> Without this patch, a /dev/mem, resource0, or third-party driver could
> overwrite these values if they don't also restore them on close/unbind.
> I imagine a kexec user would also overwrite these values.

Don't you have the same problem with the in-kernel driver? It
looks like pci core will clear the PCI_IO_BASE config registers in
pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO
resource. If you reload the driver, it'll read the wrong values on the
second probing.
Jon Derrick Nov. 5, 2019, 10:38 p.m. UTC | #11
On Wed, 2019-11-06 at 07:22 +0900, Keith Busch wrote:
> On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote:
> > Without this patch, a /dev/mem, resource0, or third-party driver could
> > overwrite these values if they don't also restore them on close/unbind.
> > I imagine a kexec user would also overwrite these values.
> 
> Don't you have the same problem with the in-kernel driver? It
> looks like pci core will clear the PCI_IO_BASE config registers in
> pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO
> resource. If you reload the driver, it'll read the wrong values on the
> second probing.

Is there a corner case I am missing with patch 3/3 that restores on
unload?
Keith Busch Nov. 5, 2019, 10:45 p.m. UTC | #12
On Tue, Nov 05, 2019 at 10:38:05PM +0000, Derrick, Jonathan wrote:
> On Wed, 2019-11-06 at 07:22 +0900, Keith Busch wrote:
> > On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote:
> > > Without this patch, a /dev/mem, resource0, or third-party driver could
> > > overwrite these values if they don't also restore them on close/unbind.
> > > I imagine a kexec user would also overwrite these values.
> > 
> > Don't you have the same problem with the in-kernel driver? It
> > looks like pci core will clear the PCI_IO_BASE config registers in
> > pci_setup_bridge_io() because VMD doesn't provide an IORESOURCE_IO
> > resource. If you reload the driver, it'll read the wrong values on the
> > second probing.
> 
> Is there a corner case I am missing with patch 3/3 that restores on
> unload?

Nothing wrong with that. I just hadn't read that far :/
Lorenzo Pieralisi Jan. 27, 2020, 10:38 a.m. UTC | #13
On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote:
> On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote:
> > On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote:
> > > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote:
> > > > Hi Bjorn,
> > > > 
> > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> > > > > [+cc Andrew]
> > > > > 
> > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > > > > When some VMDs are enabled and others are not, it's difficult to
> > > > > > determine which IIO stack corresponds to the enabled VMD.
> > > > > > 
> > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > > > > instance number and socket number into the first enabled root port's IO
> > > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > > > > information and expose it to userspace.
> > > > > 
> > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> > > > > Are these Root Ports visible to the generic PCI core device
> > > > > enumeration?  If so, it will find them and read these I/O window
> > > > > registers.  Maybe today the PCI core doesn't change them, but I'm not
> > > > > sure we should rely on them always being preserved until the vmd
> > > > > driver can claim the device.
> > > > > 
> > > > 
> > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
> > > > parsing occurs before this PCI domain is exposed to the generic PCI
> > > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
> > > > is invisible to the kernel outside of /dev/mem or resource0.
> > > 
> > > That's because the VMD controller is a PCI device itself and its
> > > BARs values are used to configure the VMD host controller.
> > > 
> > > Interesting.
> > > 
> > > To add to Bjorn's question, this reasoning assumes that whatever
> > > code enumerates the PCI device representing the VMD host controller
> > > does not overwrite its BARs upon bus enumeration otherwise the VMD
> > > controller configuration would be lost. Am I reading the current
> > > code correctly ?
> > 
> > Sorry, I just went through the code again, I think the VMD controller
> > PCI device BARs can and are allowed to be reassigned by the PCI
> > enumeration code - I misread the code, so I raised a non-existent issue
> > here, they are like any other PCI device MEM/IO BARs in this respect.
> > 
> > Lorenzo
> > 
> 
> Yes the VMD endpoint itself exposes the domain containing the Root
> Ports. It's the Root Ports which get enumerated by generic PCI
> scancode, and also the Root Port config space where this domain info is
> supplied. Without a VMD driver, the only aperture to access the Root
> Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka
> MEMBAR0).
> 
> Without this patch, a /dev/mem, resource0, or third-party driver could
> overwrite these values if they don't also restore them on close/unbind.
> I imagine a kexec user would also overwrite these values.
> 
> This is one of the reasons I was also thinking it could live in device
> specific reset code as long as it can call into VMD for the specifics.
> Many kernel vendors already ship with VMD=y, so I am tempted to simply
> make that permanent and export a reset call to a dev specific reset in
> quirks.c.

Hi Jon,

just wanted to ask you what's the plan with this series.

Thanks,
Lorenzo
Jon Derrick Jan. 27, 2020, 11:48 p.m. UTC | #14
On Mon, 2020-01-27 at 10:38 +0000, Lorenzo Pieralisi wrote:
> On Tue, Nov 05, 2019 at 09:32:07PM +0000, Derrick, Jonathan wrote:
> > On Tue, 2019-11-05 at 10:12 +0000, Lorenzo Pieralisi wrote:
> > > On Mon, Nov 04, 2019 at 06:07:00PM +0000, Lorenzo Pieralisi wrote:
> > > > On Fri, Nov 01, 2019 at 10:16:39PM +0000, Derrick, Jonathan wrote:
> > > > > Hi Bjorn,
> > > > > 
> > > > > On Fri, 2019-11-01 at 16:53 -0500, Bjorn Helgaas wrote:
> > > > > > [+cc Andrew]
> > > > > > 
> > > > > > On Wed, Oct 16, 2019 at 11:04:47AM -0600, Jon Derrick wrote:
> > > > > > > When some VMDs are enabled and others are not, it's difficult to
> > > > > > > determine which IIO stack corresponds to the enabled VMD.
> > > > > > > 
> > > > > > > To assist userspace with management tasks, VMD BIOS will write the VMD
> > > > > > > instance number and socket number into the first enabled root port's IO
> > > > > > > Base/Limit registers prior to OS handoff. VMD driver can capture this
> > > > > > > information and expose it to userspace.
> > > > > > 
> > > > > > Hmmm, I'm not sure I understand this, but it sounds possibly fragile.
> > > > > > Are these Root Ports visible to the generic PCI core device
> > > > > > enumeration?  If so, it will find them and read these I/O window
> > > > > > registers.  Maybe today the PCI core doesn't change them, but I'm not
> > > > > > sure we should rely on them always being preserved until the vmd
> > > > > > driver can claim the device.
> > > > > > 
> > > > > 
> > > > > The Root Ports are on the VMD PCI domain, and this IO BASE/LIMIT
> > > > > parsing occurs before this PCI domain is exposed to the generic PCI
> > > > > scancode with pci_scan_child_bus(). Until that point the VMD PCI domain
> > > > > is invisible to the kernel outside of /dev/mem or resource0.
> > > > 
> > > > That's because the VMD controller is a PCI device itself and its
> > > > BARs values are used to configure the VMD host controller.
> > > > 
> > > > Interesting.
> > > > 
> > > > To add to Bjorn's question, this reasoning assumes that whatever
> > > > code enumerates the PCI device representing the VMD host controller
> > > > does not overwrite its BARs upon bus enumeration otherwise the VMD
> > > > controller configuration would be lost. Am I reading the current
> > > > code correctly ?
> > > 
> > > Sorry, I just went through the code again, I think the VMD controller
> > > PCI device BARs can and are allowed to be reassigned by the PCI
> > > enumeration code - I misread the code, so I raised a non-existent issue
> > > here, they are like any other PCI device MEM/IO BARs in this respect.
> > > 
> > > Lorenzo
> > > 
> > 
> > Yes the VMD endpoint itself exposes the domain containing the Root
> > Ports. It's the Root Ports which get enumerated by generic PCI
> > scancode, and also the Root Port config space where this domain info is
> > supplied. Without a VMD driver, the only aperture to access the Root
> > Port config space is MMIO through the VMD endpoint's 'Config' BAR (aka
> > MEMBAR0).
> > 
> > Without this patch, a /dev/mem, resource0, or third-party driver could
> > overwrite these values if they don't also restore them on close/unbind.
> > I imagine a kexec user would also overwrite these values.
> > 
> > This is one of the reasons I was also thinking it could live in device
> > specific reset code as long as it can call into VMD for the specifics.
> > Many kernel vendors already ship with VMD=y, so I am tempted to simply
> > make that permanent and export a reset call to a dev specific reset in
> > quirks.c.
> 
> Hi Jon,
> 
> just wanted to ask you what's the plan with this series.
> 
> Thanks,
> Lorenzo



Please drop. We've implemented a different solution.

Thanks again,
Jon
diff mbox series

Patch

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index 959c7c7..dbe1bff 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -98,6 +98,8 @@  struct vmd_dev {
 	struct irq_domain	*irq_domain;
 	struct pci_bus		*bus;
 	u8			busn_start;
+	u8			socket_nr;
+	u8			instance_nr;
 
 	struct dma_map_ops	dma_ops;
 	struct dma_domain	dma_domain;
@@ -543,6 +545,74 @@  static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
 	.write		= vmd_pci_write,
 };
 
+/**
+ * for_each_vmd_root_port - iterate over all enabled VMD Root Ports
+ * @vmd: &struct vmd_dev VMD device descriptor
+ * @rp: int iterator cursor
+ * @temp: u32 temporary value for config read
+ *
+ * VMD Root Ports are located in the VMD PCIe Domain at 00:[0-3].0, and config
+ * space can be determinately accessed through the VMD Config BAR. Because VMD
+ * Root Ports can be individually disabled, it's important to iterate for the
+ * first enabled Root Port as determined by reading the Vendor/Device register.
+ */
+#define for_each_vmd_root_port(vmd, rp, temp)				\
+	for (rp = 0; rp < 4; rp++)					\
+		if (vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),	\
+				 PCI_VENDOR_ID, 4, &temp) ||		\
+		    temp == 0xffffffff) {} else
+
+static int vmd_parse_domain(struct vmd_dev *vmd)
+{
+	int root_port, ret;
+	u32 temp, iobase;
+
+	vmd->socket_nr = -1;
+	vmd->instance_nr = -1;
+
+	for_each_vmd_root_port(vmd, root_port, temp) {
+		ret = vmd_cfg_read(vmd, 0, PCI_DEVFN(root_port, 0),
+				   PCI_IO_BASE, 2, &iobase);
+		if (ret)
+			return ret;
+
+		vmd->socket_nr = (iobase >> 4) & 0xf;
+		vmd->instance_nr = (iobase >> 14) & 0x3;
+
+		/* First available will be used */
+		break;
+	}
+
+	return 0;
+}
+
+static ssize_t socket_nr_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+
+	return sprintf(buf, "%u\n", vmd->socket_nr);
+}
+static DEVICE_ATTR_RO(socket_nr);
+
+static ssize_t instance_nr_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+
+	return sprintf(buf, "%u\n", vmd->instance_nr);
+}
+static DEVICE_ATTR_RO(instance_nr);
+
+static struct attribute *vmd_dev_attrs[] = {
+	&dev_attr_socket_nr.attr,
+	&dev_attr_instance_nr.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(vmd_dev);
+
 static void vmd_attach_resources(struct vmd_dev *vmd)
 {
 	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
@@ -582,6 +652,11 @@  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 	resource_size_t offset[2] = {0};
 	resource_size_t membar2_offset = 0x2000;
 	struct pci_bus *child;
+	int ret;
+
+	ret = vmd_parse_domain(vmd);
+	if (ret)
+		return ret;
 
 	/*
 	 * Shadow registers may exist in certain VMD device ids which allow
@@ -591,7 +666,6 @@  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 	 */
 	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
 		u32 vmlock;
-		int ret;
 
 		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
 		ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
@@ -876,7 +950,8 @@  static int vmd_resume(struct device *dev)
 	.probe		= vmd_probe,
 	.remove		= vmd_remove,
 	.driver		= {
-		.pm	= &vmd_dev_pm_ops,
+		.pm		= &vmd_dev_pm_ops,
+		.dev_groups	= vmd_dev_groups,
 	},
 };
 module_pci_driver(vmd_drv);