diff mbox series

[v2,5/7] driver core: Add device location to "struct device" and expose it in sysfs

Message ID 20200630044943.3425049-6-rajatja@google.com (mailing list archive)
State Superseded, archived
Delegated to: Bjorn Helgaas
Headers show
Series Tighten PCI security, expose dev location in sysfs | expand

Commit Message

Rajat Jain June 30, 2020, 4:49 a.m. UTC
Add a new (optional) field to denote the physical location of a device
in the system, and expose it in sysfs. This was discussed here:
https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/

(The primary choice for attribute name i.e. "location" is already
exposed as an ABI elsewhere, so settled for "site"). Individual buses
that want to support this new attribute can opt-in by setting a flag in
bus_type, and then populating the location of device while enumerating
it.

Signed-off-by: Rajat Jain <rajatja@google.com>
---
v2: (Initial version)

 drivers/base/core.c        | 35 +++++++++++++++++++++++++++++++
 include/linux/device.h     | 42 ++++++++++++++++++++++++++++++++++++++
 include/linux/device/bus.h |  8 ++++++++
 3 files changed, 85 insertions(+)

Comments

Greg KH June 30, 2020, 8:01 a.m. UTC | #1
On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> Add a new (optional) field to denote the physical location of a device
> in the system, and expose it in sysfs. This was discussed here:
> https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> 
> (The primary choice for attribute name i.e. "location" is already
> exposed as an ABI elsewhere, so settled for "site").

Where is "location" exported?  I see one USB port sysfs attribute, is
that what you are worried about here?

> Individual buses
> that want to support this new attribute can opt-in by setting a flag in
> bus_type, and then populating the location of device while enumerating
> it.
> 
> Signed-off-by: Rajat Jain <rajatja@google.com>
> ---
> v2: (Initial version)
> 
>  drivers/base/core.c        | 35 +++++++++++++++++++++++++++++++
>  include/linux/device.h     | 42 ++++++++++++++++++++++++++++++++++++++
>  include/linux/device/bus.h |  8 ++++++++
>  3 files changed, 85 insertions(+)


No Documentation/ABI/ update for this new attribute?  Why not?

> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 67d39a90b45c7..14c815526b7fa 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -1778,6 +1778,32 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr,
>  }
>  static DEVICE_ATTR_RW(online);
>  
> +static ssize_t site_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	const char *site;
> +
> +	device_lock(dev);
> +	switch (dev->site) {
> +	case SITE_INTERNAL:
> +		site = "INTERNAL";
> +		break;
> +	case SITE_EXTENDED:
> +		site = "EXTENDED";
> +		break;
> +	case SITE_EXTERNAL:
> +		site = "EXTERNAL";
> +		break;
> +	case SITE_UNKNOWN:
> +	default:
> +		site = "UNKNOWN";
> +		break;
> +	}
> +	device_unlock(dev);

Why are you locking/unlocking a device here?

You have a reference count on the structure, are you worried about
something else changing here on it?  If so, what?  You aren't locking it
when the state is set (which is fine, really, you shouldn't need to.)


> +	return sprintf(buf, "%s\n", site);
> +}
> +static DEVICE_ATTR_RO(site);
> +
>  int device_add_groups(struct device *dev, const struct attribute_group **groups)
>  {
>  	return sysfs_create_groups(&dev->kobj, groups);
> @@ -1949,8 +1975,16 @@ static int device_add_attrs(struct device *dev)
>  			goto err_remove_dev_groups;
>  	}
>  
> +	if (bus_supports_site(dev->bus)) {
> +		error = device_create_file(dev, &dev_attr_site);
> +		if (error)
> +			goto err_remove_dev_attr_online;
> +	}
> +
>  	return 0;
>  
> + err_remove_dev_attr_online:
> +	device_remove_file(dev, &dev_attr_online);
>   err_remove_dev_groups:
>  	device_remove_groups(dev, dev->groups);
>   err_remove_type_groups:
> @@ -1968,6 +2002,7 @@ static void device_remove_attrs(struct device *dev)
>  	struct class *class = dev->class;
>  	const struct device_type *type = dev->type;
>  
> +	device_remove_file(dev, &dev_attr_site);
>  	device_remove_file(dev, &dev_attr_online);
>  	device_remove_groups(dev, dev->groups);
>  
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 15460a5ac024a..a4143735ae712 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -428,6 +428,31 @@ enum dl_dev_state {
>  	DL_DEV_UNBINDING,
>  };
>  
> +/**
> + * enum device_site - Physical location of the device in the system.
> + * The semantics of values depend on subsystem / bus:
> + *
> + * @SITE_UNKNOWN:  Location is Unknown (default)
> + *
> + * @SITE_INTERNAL: Device is internal to the system, and cannot be (easily)
> + *                 removed. E.g. SoC internal devices, onboard soldered
> + *                 devices, internal M.2 cards (that cannot be removed
> + *                 without opening the chassis).
> + * @SITE_EXTENDED: Device sits an extension of the system. E.g. devices
> + *                 on external PCIe trays, docking stations etc. These
> + *                 devices may be removable, but are generally housed
> + *                 internally on an extension board, so they are removed
> + *                 only when that whole extension board is removed.
> + * @SITE_EXTERNAL: Devices truly external to the system (i.e. plugged on
> + *                 an external port) that may be removed or added frequently.
> + */
> +enum device_site {
> +	SITE_UNKNOWN = 0,
> +	SITE_INTERNAL,
> +	SITE_EXTENDED,
> +	SITE_EXTERNAL,
> +};
> +
>  /**
>   * struct dev_links_info - Device data related to device links.
>   * @suppliers: List of links to supplier devices.
> @@ -513,6 +538,7 @@ struct dev_links_info {
>   * 		device (i.e. the bus driver that discovered the device).
>   * @iommu_group: IOMMU group the device belongs to.
>   * @iommu:	Per device generic IOMMU runtime data
> + * @site:	Physical location of the device w.r.t. the system
>   *
>   * @offline_disabled: If set, the device is permanently online.
>   * @offline:	Set after successful invocation of bus type's .offline().
> @@ -613,6 +639,8 @@ struct device {
>  	struct iommu_group	*iommu_group;
>  	struct dev_iommu	*iommu;
>  
> +	enum device_site	site;	/* Device physical location */
> +
>  	bool			offline_disabled:1;
>  	bool			offline:1;
>  	bool			of_node_reused:1;
> @@ -806,6 +834,20 @@ static inline bool dev_has_sync_state(struct device *dev)
>  	return false;
>  }
>  
> +static inline int dev_set_site(struct device *dev, enum device_site site)
> +{
> +	if (site < SITE_UNKNOWN || site > SITE_EXTERNAL)
> +		return -EINVAL;

It's an enum, why check the range?

thanks,

greg k-h
Heikki Krogerus June 30, 2020, 10:49 a.m. UTC | #2
On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> Add a new (optional) field to denote the physical location of a device
> in the system, and expose it in sysfs. This was discussed here:
> https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> 
> (The primary choice for attribute name i.e. "location" is already
> exposed as an ABI elsewhere, so settled for "site"). Individual buses
> that want to support this new attribute can opt-in by setting a flag in
> bus_type, and then populating the location of device while enumerating
> it.

So why not just call it "physical_location"?


thanks,
Greg KH June 30, 2020, 12:52 p.m. UTC | #3
On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > Add a new (optional) field to denote the physical location of a device
> > in the system, and expose it in sysfs. This was discussed here:
> > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > 
> > (The primary choice for attribute name i.e. "location" is already
> > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > that want to support this new attribute can opt-in by setting a flag in
> > bus_type, and then populating the location of device while enumerating
> > it.
> 
> So why not just call it "physical_location"?

That's better, and will allow us to put "3rd blue plug from the left,
4th row down" in there someday :)

All of this is "relative" to the CPU, right?  But what CPU?  Again, how
are the systems with drawers of PCI and CPUs and memory that can be
added/removed at any point in time being handled here?  What is
"internal" and "external" for them?

What exactly is the physical boundry here that is attempting to be
described?

thanks,

greg "not all the world is your laptop" k-h
Rafael J. Wysocki June 30, 2020, 1 p.m. UTC | #4
On Tue, Jun 30, 2020 at 2:52 PM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> > On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > > Add a new (optional) field to denote the physical location of a device
> > > in the system, and expose it in sysfs. This was discussed here:
> > > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > >
> > > (The primary choice for attribute name i.e. "location" is already
> > > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > > that want to support this new attribute can opt-in by setting a flag in
> > > bus_type, and then populating the location of device while enumerating
> > > it.
> >
> > So why not just call it "physical_location"?
>
> That's better, and will allow us to put "3rd blue plug from the left,
> 4th row down" in there someday :)
>
> All of this is "relative" to the CPU, right?  But what CPU?  Again, how
> are the systems with drawers of PCI and CPUs and memory that can be
> added/removed at any point in time being handled here?  What is
> "internal" and "external" for them?
>
> What exactly is the physical boundry here that is attempting to be
> described?

Also, where is the "physical location" information going to come from?

If that is the platform firmware (which I suspect is the anticipated
case), there may be problems with reliability related to that.
Greg KH June 30, 2020, 3:38 p.m. UTC | #5
On Tue, Jun 30, 2020 at 03:00:34PM +0200, Rafael J. Wysocki wrote:
> On Tue, Jun 30, 2020 at 2:52 PM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> >
> > On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> > > On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > > > Add a new (optional) field to denote the physical location of a device
> > > > in the system, and expose it in sysfs. This was discussed here:
> > > > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > > >
> > > > (The primary choice for attribute name i.e. "location" is already
> > > > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > > > that want to support this new attribute can opt-in by setting a flag in
> > > > bus_type, and then populating the location of device while enumerating
> > > > it.
> > >
> > > So why not just call it "physical_location"?
> >
> > That's better, and will allow us to put "3rd blue plug from the left,
> > 4th row down" in there someday :)
> >
> > All of this is "relative" to the CPU, right?  But what CPU?  Again, how
> > are the systems with drawers of PCI and CPUs and memory that can be
> > added/removed at any point in time being handled here?  What is
> > "internal" and "external" for them?
> >
> > What exactly is the physical boundry here that is attempting to be
> > described?
> 
> Also, where is the "physical location" information going to come from?

Who knows?  :)

Some BIOS seem to provide this, but do you trust that?

> If that is the platform firmware (which I suspect is the anticipated
> case), there may be problems with reliability related to that.

s/may/will/

which means making the kernel inact a policy like this patch series
tries to add, will result in a lot of broken systems, which is why I
keep saying that it needs to be done in userspace.

It's as if some of us haven't been down this road before and just keep
being ignored...

{sigh}

greg k-h
Rafael J. Wysocki June 30, 2020, 4:08 p.m. UTC | #6
On Tue, Jun 30, 2020 at 5:38 PM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Tue, Jun 30, 2020 at 03:00:34PM +0200, Rafael J. Wysocki wrote:
> > On Tue, Jun 30, 2020 at 2:52 PM Greg Kroah-Hartman
> > <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> > > > On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > > > > Add a new (optional) field to denote the physical location of a device
> > > > > in the system, and expose it in sysfs. This was discussed here:
> > > > > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > > > >
> > > > > (The primary choice for attribute name i.e. "location" is already
> > > > > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > > > > that want to support this new attribute can opt-in by setting a flag in
> > > > > bus_type, and then populating the location of device while enumerating
> > > > > it.
> > > >
> > > > So why not just call it "physical_location"?
> > >
> > > That's better, and will allow us to put "3rd blue plug from the left,
> > > 4th row down" in there someday :)
> > >
> > > All of this is "relative" to the CPU, right?  But what CPU?  Again, how
> > > are the systems with drawers of PCI and CPUs and memory that can be
> > > added/removed at any point in time being handled here?  What is
> > > "internal" and "external" for them?
> > >
> > > What exactly is the physical boundry here that is attempting to be
> > > described?
> >
> > Also, where is the "physical location" information going to come from?
>
> Who knows?  :)
>
> Some BIOS seem to provide this, but do you trust that?
>
> > If that is the platform firmware (which I suspect is the anticipated
> > case), there may be problems with reliability related to that.
>
> s/may/will/
>
> which means making the kernel inact a policy like this patch series
> tries to add, will result in a lot of broken systems, which is why I
> keep saying that it needs to be done in userspace.
>
> It's as if some of us haven't been down this road before and just keep
> being ignored...
>
> {sigh}

Well, to be honest, if you are a "vertical" vendor and you control the
entire stack, *including* the platform firmware, it would be kind of
OK for you to do that in a product kernel.

However, this is not a practical thing to do in the mainline kernel
which must work for everybody, including people who happen to use
systems with broken or even actively unfriendly firmware on them.

So I'm inclined to say that IMO this series "as is" would not be an
improvement from the mainline perspective.

I guess it would make sense to have an attribute for user space to
write to in order to make the kernel reject device plug-in events
coming from a given port or connector, but the kernel has no reliable
means to determine *which* ports or connectors are "safe", and even if
there was a way for it to do that, it still may not agree with user
space on which ports or connectors should be regarded as "safe".

Cheers!
Greg KH June 30, 2020, 5 p.m. UTC | #7
On Tue, Jun 30, 2020 at 06:08:31PM +0200, Rafael J. Wysocki wrote:
> On Tue, Jun 30, 2020 at 5:38 PM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> >
> > On Tue, Jun 30, 2020 at 03:00:34PM +0200, Rafael J. Wysocki wrote:
> > > On Tue, Jun 30, 2020 at 2:52 PM Greg Kroah-Hartman
> > > <gregkh@linuxfoundation.org> wrote:
> > > >
> > > > On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> > > > > On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > > > > > Add a new (optional) field to denote the physical location of a device
> > > > > > in the system, and expose it in sysfs. This was discussed here:
> > > > > > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > > > > >
> > > > > > (The primary choice for attribute name i.e. "location" is already
> > > > > > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > > > > > that want to support this new attribute can opt-in by setting a flag in
> > > > > > bus_type, and then populating the location of device while enumerating
> > > > > > it.
> > > > >
> > > > > So why not just call it "physical_location"?
> > > >
> > > > That's better, and will allow us to put "3rd blue plug from the left,
> > > > 4th row down" in there someday :)
> > > >
> > > > All of this is "relative" to the CPU, right?  But what CPU?  Again, how
> > > > are the systems with drawers of PCI and CPUs and memory that can be
> > > > added/removed at any point in time being handled here?  What is
> > > > "internal" and "external" for them?
> > > >
> > > > What exactly is the physical boundry here that is attempting to be
> > > > described?
> > >
> > > Also, where is the "physical location" information going to come from?
> >
> > Who knows?  :)
> >
> > Some BIOS seem to provide this, but do you trust that?
> >
> > > If that is the platform firmware (which I suspect is the anticipated
> > > case), there may be problems with reliability related to that.
> >
> > s/may/will/
> >
> > which means making the kernel inact a policy like this patch series
> > tries to add, will result in a lot of broken systems, which is why I
> > keep saying that it needs to be done in userspace.
> >
> > It's as if some of us haven't been down this road before and just keep
> > being ignored...
> >
> > {sigh}
> 
> Well, to be honest, if you are a "vertical" vendor and you control the
> entire stack, *including* the platform firmware, it would be kind of
> OK for you to do that in a product kernel.
> 
> However, this is not a practical thing to do in the mainline kernel
> which must work for everybody, including people who happen to use
> systems with broken or even actively unfriendly firmware on them.
> 
> So I'm inclined to say that IMO this series "as is" would not be an
> improvement from the mainline perspective.

It can be, we have been using this for USB devices for many many years
now, quite successfully.  The key is not to trust that the platform
firmware got it right :)

> I guess it would make sense to have an attribute for user space to
> write to in order to make the kernel reject device plug-in events
> coming from a given port or connector, but the kernel has no reliable
> means to determine *which* ports or connectors are "safe", and even if
> there was a way for it to do that, it still may not agree with user
> space on which ports or connectors should be regarded as "safe".

Again, we have been doing this for USB devices for a very long time, PCI
shouldn't be any different.  Why people keep ignoring working solutions
is beyond me, there's nothing "special" about PCI devices here for this
type of "worry" or reasoning to try to create new solutions.

So, again, I ask, go do what USB does, and to do that, take the logic
out of the USB core, make it bus-agnositic, and _THEN_ add it to the PCI
code.  Why the original submitter keeps ignoring my request to do this
is beyond me, I guess they like making patches that will get rejected :(

thanks,

greg k-h
Saravana Kannan June 30, 2020, 5:43 p.m. UTC | #8
On Mon, Jun 29, 2020 at 9:49 PM Rajat Jain <rajatja@google.com> wrote:
>
> Add a new (optional) field to denote the physical location of a device
> in the system, and expose it in sysfs. This was discussed here:
> https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
>
> (The primary choice for attribute name i.e. "location" is already
> exposed as an ABI elsewhere, so settled for "site"). Individual buses
> that want to support this new attribute can opt-in by setting a flag in
> bus_type, and then populating the location of device while enumerating
> it.
>
> Signed-off-by: Rajat Jain <rajatja@google.com>
> ---
> v2: (Initial version)
>
>  drivers/base/core.c        | 35 +++++++++++++++++++++++++++++++
>  include/linux/device.h     | 42 ++++++++++++++++++++++++++++++++++++++
>  include/linux/device/bus.h |  8 ++++++++
>  3 files changed, 85 insertions(+)
>

<snip> I'm not CC'ed in 4/7, so just replying

> diff --git a/include/linux/device.h b/include/linux/device.h
> index 15460a5ac024a..a4143735ae712 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -428,6 +428,31 @@ enum dl_dev_state {
>         DL_DEV_UNBINDING,
>  };
>
> +/**
> + * enum device_site - Physical location of the device in the system.
> + * The semantics of values depend on subsystem / bus:
> + *
> + * @SITE_UNKNOWN:  Location is Unknown (default)
> + *
> + * @SITE_INTERNAL: Device is internal to the system, and cannot be (easily)
> + *                 removed. E.g. SoC internal devices, onboard soldered
> + *                 devices, internal M.2 cards (that cannot be removed
> + *                 without opening the chassis).
> + * @SITE_EXTENDED: Device sits an extension of the system. E.g. devices
> + *                 on external PCIe trays, docking stations etc. These
> + *                 devices may be removable, but are generally housed
> + *                 internally on an extension board, so they are removed
> + *                 only when that whole extension board is removed.
> + * @SITE_EXTERNAL: Devices truly external to the system (i.e. plugged on
> + *                 an external port) that may be removed or added frequently.
> + */
> +enum device_site {
> +       SITE_UNKNOWN = 0,
> +       SITE_INTERNAL,
> +       SITE_EXTENDED,
> +       SITE_EXTERNAL,
> +};
> +
>  /**
>   * struct dev_links_info - Device data related to device links.
>   * @suppliers: List of links to supplier devices.
> @@ -513,6 +538,7 @@ struct dev_links_info {
>   *             device (i.e. the bus driver that discovered the device).
>   * @iommu_group: IOMMU group the device belongs to.
>   * @iommu:     Per device generic IOMMU runtime data
> + * @site:      Physical location of the device w.r.t. the system
>   *
>   * @offline_disabled: If set, the device is permanently online.
>   * @offline:   Set after successful invocation of bus type's .offline().
> @@ -613,6 +639,8 @@ struct device {
>         struct iommu_group      *iommu_group;
>         struct dev_iommu        *iommu;
>
> +       enum device_site        site;   /* Device physical location */
> +
>         bool                    offline_disabled:1;
>         bool                    offline:1;
>         bool                    of_node_reused:1;
> @@ -806,6 +834,20 @@ static inline bool dev_has_sync_state(struct device *dev)
>         return false;
>  }
>
> +static inline int dev_set_site(struct device *dev, enum device_site site)
> +{
> +       if (site < SITE_UNKNOWN || site > SITE_EXTERNAL)
> +               return -EINVAL;
> +
> +       dev->site = site;
> +       return 0;
> +}
> +
> +static inline bool dev_is_external(struct device *dev)
> +{
> +       return dev->site == SITE_EXTERNAL;
> +}

I'm not CC'ed in the rest of the patches in this series, so just
responding here. I see you use this function in patch 6/7 to decide if
the PCI device is trusted. Anything other than EXTERNAL is being
treated as trusted. I'd argue that anything that's not internal should
be distrusted. For example, I can have a hacked up laptop dock that I
can share with you when you visit my home/office and now you are
trusting it when you shouldn't be.

Also, "UNKNOWN" is treated as trusted in patch 6/7. I'm guessing this
is because some of the devices might not have the info in their
firmware? At which point, this feature isn't even protecting all the
PCI ports properly? This adds to Greg point that this should be a
userspace policy so that it can override whatever is wrong/missing in
the firmware.

-Saravana
Rajat Jain July 1, 2020, 6:06 p.m. UTC | #9
Hello,

On Tue, Jun 30, 2020 at 10:00 AM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Tue, Jun 30, 2020 at 06:08:31PM +0200, Rafael J. Wysocki wrote:
> > On Tue, Jun 30, 2020 at 5:38 PM Greg Kroah-Hartman
> > <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Tue, Jun 30, 2020 at 03:00:34PM +0200, Rafael J. Wysocki wrote:
> > > > On Tue, Jun 30, 2020 at 2:52 PM Greg Kroah-Hartman
> > > > <gregkh@linuxfoundation.org> wrote:
> > > > >
> > > > > On Tue, Jun 30, 2020 at 01:49:48PM +0300, Heikki Krogerus wrote:
> > > > > > On Mon, Jun 29, 2020 at 09:49:41PM -0700, Rajat Jain wrote:
> > > > > > > Add a new (optional) field to denote the physical location of a device
> > > > > > > in the system, and expose it in sysfs. This was discussed here:
> > > > > > > https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/
> > > > > > >
> > > > > > > (The primary choice for attribute name i.e. "location" is already
> > > > > > > exposed as an ABI elsewhere, so settled for "site"). Individual buses
> > > > > > > that want to support this new attribute can opt-in by setting a flag in
> > > > > > > bus_type, and then populating the location of device while enumerating
> > > > > > > it.
> > > > > >
> > > > > > So why not just call it "physical_location"?
> > > > >
> > > > > That's better, and will allow us to put "3rd blue plug from the left,
> > > > > 4th row down" in there someday :)
> > > > >
> > > > > All of this is "relative" to the CPU, right?  But what CPU?  Again, how
> > > > > are the systems with drawers of PCI and CPUs and memory that can be
> > > > > added/removed at any point in time being handled here?  What is
> > > > > "internal" and "external" for them?
> > > > >
> > > > > What exactly is the physical boundry here that is attempting to be
> > > > > described?
> > > >
> > > > Also, where is the "physical location" information going to come from?
> > >
> > > Who knows?  :)
> > >
> > > Some BIOS seem to provide this, but do you trust that?
> > >
> > > > If that is the platform firmware (which I suspect is the anticipated
> > > > case), there may be problems with reliability related to that.
> > >
> > > s/may/will/
> > >
> > > which means making the kernel inact a policy like this patch series
> > > tries to add, will result in a lot of broken systems, which is why I
> > > keep saying that it needs to be done in userspace.
> > >
> > > It's as if some of us haven't been down this road before and just keep
> > > being ignored...
> > >
> > > {sigh}
> >
> > Well, to be honest, if you are a "vertical" vendor and you control the
> > entire stack, *including* the platform firmware, it would be kind of
> > OK for you to do that in a product kernel.
> >
> > However, this is not a practical thing to do in the mainline kernel
> > which must work for everybody, including people who happen to use
> > systems with broken or even actively unfriendly firmware on them.
> >
> > So I'm inclined to say that IMO this series "as is" would not be an
> > improvement from the mainline perspective.
>
> It can be, we have been using this for USB devices for many many years
> now, quite successfully.  The key is not to trust that the platform
> firmware got it right :)
>
> > I guess it would make sense to have an attribute for user space to
> > write to in order to make the kernel reject device plug-in events
> > coming from a given port or connector, but the kernel has no reliable
> > means to determine *which* ports or connectors are "safe", and even if
> > there was a way for it to do that, it still may not agree with user
> > space on which ports or connectors should be regarded as "safe".
>
> Again, we have been doing this for USB devices for a very long time, PCI
> shouldn't be any different.  Why people keep ignoring working solutions
> is beyond me, there's nothing "special" about PCI devices here for this
> type of "worry" or reasoning to try to create new solutions.
>
> So, again, I ask, go do what USB does, and to do that, take the logic
> out of the USB core, make it bus-agnositic, and _THEN_ add it to the PCI
> code. Why the original submitter keeps ignoring my request to do this
> is beyond me, I guess they like making patches that will get rejected :(

IMHO I'm actually trying to precisely do what I think was the
conclusion of our discussion, and then some changes because of the
further feedback I received on those patches. Let's take a step back
and please allow me to explain how I got here (my apologies but this
spans a couple of threads, and I"m trying to tie them all together
here):

GOAL: To allow user space to control what (PCI) drivers he wants to
allow on external (thunderbolt) ports. There was a lot of debate about
the need for such a policy at
https://lore.kernel.org/linux-pci/CACK8Z6GR7-wseug=TtVyRarVZX_ao2geoLDNBwjtB+5Y7VWNEQ@mail.gmail.com/
with the final conclusion that it should be OK to implement such a
policy in userspace, as long as the policy is not implemented in the
kernel. The kernel only needs to expose bits & info that is needed by
the userspace to implement such a policy, and it can be used in
conjunction with "drivers_autoprobe" to implement this policy:
--------------------------------------------------------------------
....
That's an odd thing, but sure, if you want to write up such a policy for
your systems, great.  But that policy does not belong in the kernel, it
belongs in userspace.
....
--------------------------------------------------------------------

1) The post https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/
lists out the approach that was agreed on. Replicating it here:
-----------------------------------------------------------------------
  - Expose the PCI pdev->untrusted bit in sysfs.  We don't expose this
    today, but doing so would be trivial.  I think I would prefer a
    sysfs name like "external" so it's more descriptive and less of a
    judgment.

    This comes from either the DT "external-facing" property or the
    ACPI "ExternalFacingPort" property.

  - All devices present at boot are enumerated.  Any statically built
    drivers will bind to them before any userspace code runs.

    If you want to keep statically built drivers from binding, you'd
    need to invent some mechanism so pci_driver_init() could clear
    drivers_autoprobe after registering pci_bus_type.

  - Early userspace code prevents modular drivers from automatically
    binding to PCI devices:

      echo 0 > /sys/bus/pci/drivers_autoprobe

    This prevents modular drivers from binding to all devices, whether
    present at boot or hot-added.

  - Userspace code uses the sysfs "bind" file to control which drivers
    are loaded and can bind to each device, e.g.,

      echo 0000:02:00.0 > /sys/bus/pci/drivers/nvme/bind
-----------------------------------------------------------------------

2) As part of implementing the above agreed approach, when I exposed
PCI "untrusted" attribute to userspace, it ran into discussion that
concluded that instead of this, the device core should be enhanced
with a location attribute.
https://lore.kernel.org/linux-pci/20200618184621.GA446639@kroah.com/
-----------------------------------------------------------------------
...
The attribute should be called something like "location" or something
like that (naming is hard), as you don't always know if something is
external or not (it could be internal, it could be unknown, it could be
internal to an external device that you trust (think PCI drawers for
"super" computers that are hot pluggable but yet really part of the
internal bus).
....
"trust" has no direct relation to the location, except in a policy of
what you wish to do with that device, so as long as you keep them
separate that way, I am fine with it.
...
-----------------------------------------------------------------------

And hence this patch. I don't see an attribute in USB comparable to
this new attribute, except for the boolean "removable" may be. Are you
suggesting to pull that into the device core instead of adding this
"physical_location" attribute?

3) The one deviation from the agreed approach in (1) is
https://patchwork.kernel.org/patch/11633095/ . The reason is I
realized that contrary to what I earlier believed, we might not be
able to disable the PCI link to all external PCI devices at boot. So
external PCI devices may actually bind to drivers before userspace
comes up and does "echo 0 > /sys/bus/pci/drivers_autoprobe").

I'm really happy to do what you think is the right way as long as it
helps achieve my goal above. Really looking for clear directions here.

Thanks & Best Regards,

Rajat


> thanks,
>
> greg k-h
Oliver O'Halloran July 2, 2020, 5:23 a.m. UTC | #10
On Thu, Jul 2, 2020 at 4:07 AM Rajat Jain <rajatja@google.com> wrote:
>
> *snip*
>
> > > I guess it would make sense to have an attribute for user space to
> > > write to in order to make the kernel reject device plug-in events
> > > coming from a given port or connector, but the kernel has no reliable
> > > means to determine *which* ports or connectors are "safe", and even if
> > > there was a way for it to do that, it still may not agree with user
> > > space on which ports or connectors should be regarded as "safe".
> >
> > Again, we have been doing this for USB devices for a very long time, PCI
> > shouldn't be any different.  Why people keep ignoring working solutions
> > is beyond me, there's nothing "special" about PCI devices here for this
> > type of "worry" or reasoning to try to create new solutions.
> >
> > So, again, I ask, go do what USB does, and to do that, take the logic
> > out of the USB core, make it bus-agnositic, and _THEN_ add it to the PCI
> > code. Why the original submitter keeps ignoring my request to do this
> > is beyond me, I guess they like making patches that will get rejected :(
>
> IMHO I'm actually trying to precisely do what I think was the
> conclusion of our discussion, and then some changes because of the
> further feedback I received on those patches. Let's take a step back
> and please allow me to explain how I got here (my apologies but this
> spans a couple of threads, and I"m trying to tie them all together
> here):

The previous thread had some suggestions, but no real conclusions.
That's probably why we're still arguing about it...

> GOAL: To allow user space to control what (PCI) drivers he wants to
> allow on external (thunderbolt) ports. There was a lot of debate about
> the need for such a policy at
> https://lore.kernel.org/linux-pci/CACK8Z6GR7-wseug=TtVyRarVZX_ao2geoLDNBwjtB+5Y7VWNEQ@mail.gmail.com/
> with the final conclusion that it should be OK to implement such a
> policy in userspace, as long as the policy is not implemented in the
> kernel. The kernel only needs to expose bits & info that is needed by
> the userspace to implement such a policy, and it can be used in
> conjunction with "drivers_autoprobe" to implement this policy:
> --------------------------------------------------------------------
> ....
> That's an odd thing, but sure, if you want to write up such a policy for
> your systems, great.  But that policy does not belong in the kernel, it
> belongs in userspace.
> ....
> --------------------------------------------------------------------
> 1) The post https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/
> lists out the approach that was agreed on. Replicating it here:
> -----------------------------------------------------------------------
>   - Expose the PCI pdev->untrusted bit in sysfs.  We don't expose this
>     today, but doing so would be trivial.  I think I would prefer a
>     sysfs name like "external" so it's more descriptive and less of a
>     judgment.
>
>     This comes from either the DT "external-facing" property or the
>     ACPI "ExternalFacingPort" property.
>
>   - All devices present at boot are enumerated.  Any statically built
>     drivers will bind to them before any userspace code runs.
>
>     If you want to keep statically built drivers from binding, you'd
>     need to invent some mechanism so pci_driver_init() could clear
>     drivers_autoprobe after registering pci_bus_type.
>
>   - Early userspace code prevents modular drivers from automatically
>     binding to PCI devices:
>
>       echo 0 > /sys/bus/pci/drivers_autoprobe
>
>     This prevents modular drivers from binding to all devices, whether
>     present at boot or hot-added.
>
>   - Userspace code uses the sysfs "bind" file to control which drivers
>     are loaded and can bind to each device, e.g.,
>
>       echo 0000:02:00.0 > /sys/bus/pci/drivers/nvme/bind

I think this is a reasonable suggestion. However, as Greg pointed out
it's gratuitously different to what USB does for no real reason.

> -----------------------------------------------------------------------
> 2) As part of implementing the above agreed approach, when I exposed
> PCI "untrusted" attribute to userspace, it ran into discussion that
> concluded that instead of this, the device core should be enhanced
> with a location attribute.
> https://lore.kernel.org/linux-pci/20200618184621.GA446639@kroah.com/
> -----------------------------------------------------------------------
> ...
> The attribute should be called something like "location" or something
> like that (naming is hard), as you don't always know if something is
> external or not (it could be internal, it could be unknown, it could be
> internal to an external device that you trust (think PCI drawers for
> "super" computers that are hot pluggable but yet really part of the
> internal bus).
> ....
> "trust" has no direct relation to the location, except in a policy of
> what you wish to do with that device, so as long as you keep them
> separate that way, I am fine with it.
> ...
> -----------------------------------------------------------------------
>
> And hence this patch. I don't see an attribute in USB comparable to
> this new attribute, except for the boolean "removable" may be. Are you
> suggesting to pull that into the device core instead of adding this
> "physical_location" attribute?

He's suggesting you pull the "authorized" attribute into the driver
core. That's the mechanism USB uses to block drivers binding unless
userspace authorizes them. I don't see any reason why we can't re-use
that sysfs interface for PCI devices since the problem being solved is
fundamentally the same. The main question is what we should do as a
default policy in the kernel. For USB the default comes from the
"authorized_default" module param of usbcore:

> /* authorized_default behaviour:
>  * -1 is authorized for all devices except wireless (old behaviour)
>  * 0 is unauthorized for all devices
>  * 1 is authorized for all devices
>  * 2 is authorized for internal devices
>  */
> #define USB_AUTHORIZE_WIRED   -1
> #define USB_AUTHORIZE_NONE    0
> #define USB_AUTHORIZE_ALL     1
> #define USB_AUTHORIZE_INTERNAL        2
>
> static int authorized_default = USB_AUTHORIZE_WIRED;
> module_param(authorized_default, int, S_IRUGO|S_IWUSR);

So the default policy for USB is to authorize any wired USB device and
we can optionally restrict that to just integrated devices. Sounding
familiar?

The internal / external status is still useful to know so we might
want to make a sysfs attribute for that too. However, I'd like to
point out that internal / external isn't the whole story. As I
mentioned in the last thread if I have a BMC device I *really* don't
want it to be authorized by default even though it's an internal
device. Similarly, if I know all my internal cards support PCIe
Component Authentication then I might choose not to trust any PCI
devices unless they authenticate successfully.

> 3) The one deviation from the agreed approach in (1) is
> https://patchwork.kernel.org/patch/11633095/ . The reason is I
> realized that contrary to what I earlier believed, we might not be
> able to disable the PCI link to all external PCI devices at boot. So
> external PCI devices may actually bind to drivers before userspace
> comes up and does "echo 0 > /sys/bus/pci/drivers_autoprobe").

Yep, that's a problem. If we want to provide a useful mechanism to
userspace then the default behaviour of the kernel can't undermine
that mechanism. If that means we need another kernel command line
parameter then I guess we just have to live with it.

Oliver
Greg KH July 2, 2020, 7:32 a.m. UTC | #11
On Thu, Jul 02, 2020 at 03:23:23PM +1000, Oliver O'Halloran wrote:
> On Thu, Jul 2, 2020 at 4:07 AM Rajat Jain <rajatja@google.com> wrote:
> >
> > *snip*
> >
> > > > I guess it would make sense to have an attribute for user space to
> > > > write to in order to make the kernel reject device plug-in events
> > > > coming from a given port or connector, but the kernel has no reliable
> > > > means to determine *which* ports or connectors are "safe", and even if
> > > > there was a way for it to do that, it still may not agree with user
> > > > space on which ports or connectors should be regarded as "safe".
> > >
> > > Again, we have been doing this for USB devices for a very long time, PCI
> > > shouldn't be any different.  Why people keep ignoring working solutions
> > > is beyond me, there's nothing "special" about PCI devices here for this
> > > type of "worry" or reasoning to try to create new solutions.
> > >
> > > So, again, I ask, go do what USB does, and to do that, take the logic
> > > out of the USB core, make it bus-agnositic, and _THEN_ add it to the PCI
> > > code. Why the original submitter keeps ignoring my request to do this
> > > is beyond me, I guess they like making patches that will get rejected :(
> >
> > IMHO I'm actually trying to precisely do what I think was the
> > conclusion of our discussion, and then some changes because of the
> > further feedback I received on those patches. Let's take a step back
> > and please allow me to explain how I got here (my apologies but this
> > spans a couple of threads, and I"m trying to tie them all together
> > here):
> 
> The previous thread had some suggestions, but no real conclusions.
> That's probably why we're still arguing about it...
> 
> > GOAL: To allow user space to control what (PCI) drivers he wants to
> > allow on external (thunderbolt) ports. There was a lot of debate about
> > the need for such a policy at
> > https://lore.kernel.org/linux-pci/CACK8Z6GR7-wseug=TtVyRarVZX_ao2geoLDNBwjtB+5Y7VWNEQ@mail.gmail.com/
> > with the final conclusion that it should be OK to implement such a
> > policy in userspace, as long as the policy is not implemented in the
> > kernel. The kernel only needs to expose bits & info that is needed by
> > the userspace to implement such a policy, and it can be used in
> > conjunction with "drivers_autoprobe" to implement this policy:
> > --------------------------------------------------------------------
> > ....
> > That's an odd thing, but sure, if you want to write up such a policy for
> > your systems, great.  But that policy does not belong in the kernel, it
> > belongs in userspace.
> > ....
> > --------------------------------------------------------------------
> > 1) The post https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/
> > lists out the approach that was agreed on. Replicating it here:
> > -----------------------------------------------------------------------
> >   - Expose the PCI pdev->untrusted bit in sysfs.  We don't expose this
> >     today, but doing so would be trivial.  I think I would prefer a
> >     sysfs name like "external" so it's more descriptive and less of a
> >     judgment.
> >
> >     This comes from either the DT "external-facing" property or the
> >     ACPI "ExternalFacingPort" property.
> >
> >   - All devices present at boot are enumerated.  Any statically built
> >     drivers will bind to them before any userspace code runs.
> >
> >     If you want to keep statically built drivers from binding, you'd
> >     need to invent some mechanism so pci_driver_init() could clear
> >     drivers_autoprobe after registering pci_bus_type.
> >
> >   - Early userspace code prevents modular drivers from automatically
> >     binding to PCI devices:
> >
> >       echo 0 > /sys/bus/pci/drivers_autoprobe
> >
> >     This prevents modular drivers from binding to all devices, whether
> >     present at boot or hot-added.
> >
> >   - Userspace code uses the sysfs "bind" file to control which drivers
> >     are loaded and can bind to each device, e.g.,
> >
> >       echo 0000:02:00.0 > /sys/bus/pci/drivers/nvme/bind
> 
> I think this is a reasonable suggestion. However, as Greg pointed out
> it's gratuitously different to what USB does for no real reason.

Agreed.

> > -----------------------------------------------------------------------
> > 2) As part of implementing the above agreed approach, when I exposed
> > PCI "untrusted" attribute to userspace, it ran into discussion that
> > concluded that instead of this, the device core should be enhanced
> > with a location attribute.
> > https://lore.kernel.org/linux-pci/20200618184621.GA446639@kroah.com/
> > -----------------------------------------------------------------------
> > ...
> > The attribute should be called something like "location" or something
> > like that (naming is hard), as you don't always know if something is
> > external or not (it could be internal, it could be unknown, it could be
> > internal to an external device that you trust (think PCI drawers for
> > "super" computers that are hot pluggable but yet really part of the
> > internal bus).
> > ....
> > "trust" has no direct relation to the location, except in a policy of
> > what you wish to do with that device, so as long as you keep them
> > separate that way, I am fine with it.
> > ...
> > -----------------------------------------------------------------------
> >
> > And hence this patch. I don't see an attribute in USB comparable to
> > this new attribute, except for the boolean "removable" may be. Are you
> > suggesting to pull that into the device core instead of adding this
> > "physical_location" attribute?
> 
> He's suggesting you pull the "authorized" attribute into the driver
> core. That's the mechanism USB uses to block drivers binding unless
> userspace authorizes them. I don't see any reason why we can't re-use
> that sysfs interface for PCI devices since the problem being solved is
> fundamentally the same. The main question is what we should do as a
> default policy in the kernel. For USB the default comes from the
> "authorized_default" module param of usbcore:
> 
> > /* authorized_default behaviour:
> >  * -1 is authorized for all devices except wireless (old behaviour)
> >  * 0 is unauthorized for all devices
> >  * 1 is authorized for all devices
> >  * 2 is authorized for internal devices
> >  */
> > #define USB_AUTHORIZE_WIRED   -1
> > #define USB_AUTHORIZE_NONE    0
> > #define USB_AUTHORIZE_ALL     1
> > #define USB_AUTHORIZE_INTERNAL        2
> >
> > static int authorized_default = USB_AUTHORIZE_WIRED;
> > module_param(authorized_default, int, S_IRUGO|S_IWUSR);
> 
> So the default policy for USB is to authorize any wired USB device and
> we can optionally restrict that to just integrated devices. Sounding
> familiar?

Thank you, that is what I have been trying to get across here, obviously
I didn't do a good job.  :)

Thanks for the summary.

> The internal / external status is still useful to know so we might
> want to make a sysfs attribute for that too. However, I'd like to
> point out that internal / external isn't the whole story. As I
> mentioned in the last thread if I have a BMC device I *really* don't
> want it to be authorized by default even though it's an internal
> device. Similarly, if I know all my internal cards support PCIe
> Component Authentication then I might choose not to trust any PCI
> devices unless they authenticate successfully.

Agreed.

> > 3) The one deviation from the agreed approach in (1) is
> > https://patchwork.kernel.org/patch/11633095/ . The reason is I
> > realized that contrary to what I earlier believed, we might not be
> > able to disable the PCI link to all external PCI devices at boot. So
> > external PCI devices may actually bind to drivers before userspace
> > comes up and does "echo 0 > /sys/bus/pci/drivers_autoprobe").
> 
> Yep, that's a problem. If we want to provide a useful mechanism to
> userspace then the default behaviour of the kernel can't undermine
> that mechanism. If that means we need another kernel command line
> parameter then I guess we just have to live with it.

I really do not want yet-another-kernel-command-line-option if we can
help it at all.  Sane defaults are the best thing to do here.  Userspace
comes up really early, put your policy in there, not in blobs passed
from your bootloader.

thanks,

greg k-h
Oliver O'Halloran July 2, 2020, 8:40 a.m. UTC | #12
On Thu, 2020-07-02 at 09:32 +0200, Greg Kroah-Hartman wrote:
> On Thu, Jul 02, 2020 at 03:23:23PM +1000, Oliver O'Halloran wrote:
> > Yep, that's a problem. If we want to provide a useful mechanism to
> > userspace then the default behaviour of the kernel can't undermine
> > that mechanism. If that means we need another kernel command line
> > parameter then I guess we just have to live with it.
> 
> I really do not want yet-another-kernel-command-line-option if we can
> help it at all.  Sane defaults are the best thing to do here.  Userspace
> comes up really early, put your policy in there, not in blobs passed
> from your bootloader.

Userspace comes up early, but builtin drivers will bind before init is
started. e.g.

# dmesg | egrep '0002:01:00.0|/init'
[    0.976800][    T1] pci 0002:01:00.0: [8086:1589] type 00 class 0x020000
[    0.976923][    T1] pci 0002:01:00.0: reg 0x10: [mem 0x220000000000-0x2200007fffff 64bit pref]
[    0.977004][    T1] pci 0002:01:00.0: reg 0x1c: [mem 0x220002000000-0x220002007fff 64bit pref]
[    0.977068][    T1] pci 0002:01:00.0: reg 0x30: [mem 0x00000000-0x0007ffff pref]
[    0.977122][    T1] pci 0002:01:00.0: BAR3 [mem size 0x00008000 64bit pref]: requesting alignment to 0x10000
[    0.977401][    T1] pci 0002:01:00.0: PME# supported from D0 D3hot
[    1.011929][    T1] pci 0002:01:00.0: BAR 0: assigned [mem 0x220000000000-0x2200007fffff 64bit pref]
[    1.012085][    T1] pci 0002:01:00.0: BAR 6: assigned [mem 0x3fe100000000-0x3fe10007ffff pref]
[    1.012127][    T1] pci 0002:01:00.0: BAR 3: assigned [mem 0x220002000000-0x220002007fff 64bit pref]
[    4.399588][   T12] i40e 0002:01:00.0: enabling device (0140 -> 0142)
[    4.410891][   T12] i40e 0002:01:00.0: fw 5.1.40981 api 1.5 nvm 5.03 0x80002469 1.1313.0 [8086:1589] [15d9:0000]
[    4.647524][   T12] i40e 0002:01:00.0: MAC address: 0c:c4:7a:b7:fc:74
[    4.647685][   T12] i40e 0002:01:00.0: FW LLDP is enabled
[    4.653918][   T12] i40e 0002:01:00.0 eth0: NIC Link is Up, 1000 Mbps Full Duplex, Flow Control: None
[    4.655552][   T12] i40e 0002:01:00.0: PCI-Express: Speed 8.0GT/s Width x8
[    4.656071][   T12] i40e 0002:01:00.0: Features: PF-id[0] VSIs: 34 QP: 80 RSS FD_ATR FD_SB NTUPLE VxLAN Geneve PTP VEPA
[   13.803709][    T1] Run /init as init process
[   13.963242][  T711] i40e 0002:01:00.0 enP2p1s0f0: renamed from eth0

Building everything into the kernel is admittedly pretty niche. I only
do it to avoid re-building the initramfs for my test kernels. It does
seem relatively common on embedded systems, but I'm not sure how many
of those care about PCIe. It would be nice to provide *something* to
cover that case for the people who care.

Oliver
Greg KH July 2, 2020, 8:52 a.m. UTC | #13
On Thu, Jul 02, 2020 at 06:40:09PM +1000, Oliver O'Halloran wrote:
> On Thu, 2020-07-02 at 09:32 +0200, Greg Kroah-Hartman wrote:
> > On Thu, Jul 02, 2020 at 03:23:23PM +1000, Oliver O'Halloran wrote:
> > > Yep, that's a problem. If we want to provide a useful mechanism to
> > > userspace then the default behaviour of the kernel can't undermine
> > > that mechanism. If that means we need another kernel command line
> > > parameter then I guess we just have to live with it.
> > 
> > I really do not want yet-another-kernel-command-line-option if we can
> > help it at all.  Sane defaults are the best thing to do here.  Userspace
> > comes up really early, put your policy in there, not in blobs passed
> > from your bootloader.
> 
> Userspace comes up early, but builtin drivers will bind before init is
> started. e.g.
> 
> # dmesg | egrep '0002:01:00.0|/init'
> [    0.976800][    T1] pci 0002:01:00.0: [8086:1589] type 00 class 0x020000
> [    0.976923][    T1] pci 0002:01:00.0: reg 0x10: [mem 0x220000000000-0x2200007fffff 64bit pref]
> [    0.977004][    T1] pci 0002:01:00.0: reg 0x1c: [mem 0x220002000000-0x220002007fff 64bit pref]
> [    0.977068][    T1] pci 0002:01:00.0: reg 0x30: [mem 0x00000000-0x0007ffff pref]
> [    0.977122][    T1] pci 0002:01:00.0: BAR3 [mem size 0x00008000 64bit pref]: requesting alignment to 0x10000
> [    0.977401][    T1] pci 0002:01:00.0: PME# supported from D0 D3hot
> [    1.011929][    T1] pci 0002:01:00.0: BAR 0: assigned [mem 0x220000000000-0x2200007fffff 64bit pref]
> [    1.012085][    T1] pci 0002:01:00.0: BAR 6: assigned [mem 0x3fe100000000-0x3fe10007ffff pref]
> [    1.012127][    T1] pci 0002:01:00.0: BAR 3: assigned [mem 0x220002000000-0x220002007fff 64bit pref]
> [    4.399588][   T12] i40e 0002:01:00.0: enabling device (0140 -> 0142)
> [    4.410891][   T12] i40e 0002:01:00.0: fw 5.1.40981 api 1.5 nvm 5.03 0x80002469 1.1313.0 [8086:1589] [15d9:0000]
> [    4.647524][   T12] i40e 0002:01:00.0: MAC address: 0c:c4:7a:b7:fc:74
> [    4.647685][   T12] i40e 0002:01:00.0: FW LLDP is enabled
> [    4.653918][   T12] i40e 0002:01:00.0 eth0: NIC Link is Up, 1000 Mbps Full Duplex, Flow Control: None
> [    4.655552][   T12] i40e 0002:01:00.0: PCI-Express: Speed 8.0GT/s Width x8
> [    4.656071][   T12] i40e 0002:01:00.0: Features: PF-id[0] VSIs: 34 QP: 80 RSS FD_ATR FD_SB NTUPLE VxLAN Geneve PTP VEPA
> [   13.803709][    T1] Run /init as init process
> [   13.963242][  T711] i40e 0002:01:00.0 enP2p1s0f0: renamed from eth0
> 
> Building everything into the kernel is admittedly pretty niche. I only
> do it to avoid re-building the initramfs for my test kernels. It does
> seem relatively common on embedded systems, but I'm not sure how many
> of those care about PCIe. It would be nice to provide *something* to
> cover that case for the people who care.

Those people who care should not build those drivers into their kernel :)
Greg KH July 2, 2020, 8:53 a.m. UTC | #14
On Thu, Jul 02, 2020 at 10:52:12AM +0200, Greg Kroah-Hartman wrote:
> On Thu, Jul 02, 2020 at 06:40:09PM +1000, Oliver O'Halloran wrote:
> > On Thu, 2020-07-02 at 09:32 +0200, Greg Kroah-Hartman wrote:
> > > On Thu, Jul 02, 2020 at 03:23:23PM +1000, Oliver O'Halloran wrote:
> > > > Yep, that's a problem. If we want to provide a useful mechanism to
> > > > userspace then the default behaviour of the kernel can't undermine
> > > > that mechanism. If that means we need another kernel command line
> > > > parameter then I guess we just have to live with it.
> > > 
> > > I really do not want yet-another-kernel-command-line-option if we can
> > > help it at all.  Sane defaults are the best thing to do here.  Userspace
> > > comes up really early, put your policy in there, not in blobs passed
> > > from your bootloader.
> > 
> > Userspace comes up early, but builtin drivers will bind before init is
> > started. e.g.
> > 
> > # dmesg | egrep '0002:01:00.0|/init'
> > [    0.976800][    T1] pci 0002:01:00.0: [8086:1589] type 00 class 0x020000
> > [    0.976923][    T1] pci 0002:01:00.0: reg 0x10: [mem 0x220000000000-0x2200007fffff 64bit pref]
> > [    0.977004][    T1] pci 0002:01:00.0: reg 0x1c: [mem 0x220002000000-0x220002007fff 64bit pref]
> > [    0.977068][    T1] pci 0002:01:00.0: reg 0x30: [mem 0x00000000-0x0007ffff pref]
> > [    0.977122][    T1] pci 0002:01:00.0: BAR3 [mem size 0x00008000 64bit pref]: requesting alignment to 0x10000
> > [    0.977401][    T1] pci 0002:01:00.0: PME# supported from D0 D3hot
> > [    1.011929][    T1] pci 0002:01:00.0: BAR 0: assigned [mem 0x220000000000-0x2200007fffff 64bit pref]
> > [    1.012085][    T1] pci 0002:01:00.0: BAR 6: assigned [mem 0x3fe100000000-0x3fe10007ffff pref]
> > [    1.012127][    T1] pci 0002:01:00.0: BAR 3: assigned [mem 0x220002000000-0x220002007fff 64bit pref]
> > [    4.399588][   T12] i40e 0002:01:00.0: enabling device (0140 -> 0142)
> > [    4.410891][   T12] i40e 0002:01:00.0: fw 5.1.40981 api 1.5 nvm 5.03 0x80002469 1.1313.0 [8086:1589] [15d9:0000]
> > [    4.647524][   T12] i40e 0002:01:00.0: MAC address: 0c:c4:7a:b7:fc:74
> > [    4.647685][   T12] i40e 0002:01:00.0: FW LLDP is enabled
> > [    4.653918][   T12] i40e 0002:01:00.0 eth0: NIC Link is Up, 1000 Mbps Full Duplex, Flow Control: None
> > [    4.655552][   T12] i40e 0002:01:00.0: PCI-Express: Speed 8.0GT/s Width x8
> > [    4.656071][   T12] i40e 0002:01:00.0: Features: PF-id[0] VSIs: 34 QP: 80 RSS FD_ATR FD_SB NTUPLE VxLAN Geneve PTP VEPA
> > [   13.803709][    T1] Run /init as init process
> > [   13.963242][  T711] i40e 0002:01:00.0 enP2p1s0f0: renamed from eth0
> > 
> > Building everything into the kernel is admittedly pretty niche. I only
> > do it to avoid re-building the initramfs for my test kernels. It does
> > seem relatively common on embedded systems, but I'm not sure how many
> > of those care about PCIe. It would be nice to provide *something* to
> > cover that case for the people who care.
> 
> Those people who care should not build those drivers into their kernel :)

That being said, that is the _last_ thing to worry about in this type of
patchset, lots of work needs to be done before we can care about this.
In fact, that should just be a totally separate patch after all of the
real work is done here first.

thanks,

greg k-h
Rajat Jain July 7, 2020, 6:03 a.m. UTC | #15
On Wed, Jul 1, 2020 at 10:23 PM Oliver O'Halloran <oohall@gmail.com> wrote:
>
> On Thu, Jul 2, 2020 at 4:07 AM Rajat Jain <rajatja@google.com> wrote:
> >
> > *snip*
> >
> > > > I guess it would make sense to have an attribute for user space to
> > > > write to in order to make the kernel reject device plug-in events
> > > > coming from a given port or connector, but the kernel has no reliable
> > > > means to determine *which* ports or connectors are "safe", and even if
> > > > there was a way for it to do that, it still may not agree with user
> > > > space on which ports or connectors should be regarded as "safe".
> > >
> > > Again, we have been doing this for USB devices for a very long time, PCI
> > > shouldn't be any different.  Why people keep ignoring working solutions
> > > is beyond me, there's nothing "special" about PCI devices here for this
> > > type of "worry" or reasoning to try to create new solutions.
> > >
> > > So, again, I ask, go do what USB does, and to do that, take the logic
> > > out of the USB core, make it bus-agnositic, and _THEN_ add it to the PCI
> > > code. Why the original submitter keeps ignoring my request to do this
> > > is beyond me, I guess they like making patches that will get rejected :(
> >
> > IMHO I'm actually trying to precisely do what I think was the
> > conclusion of our discussion, and then some changes because of the
> > further feedback I received on those patches. Let's take a step back
> > and please allow me to explain how I got here (my apologies but this
> > spans a couple of threads, and I"m trying to tie them all together
> > here):
>
> The previous thread had some suggestions, but no real conclusions.
> That's probably why we're still arguing about it...
>
> > GOAL: To allow user space to control what (PCI) drivers he wants to
> > allow on external (thunderbolt) ports. There was a lot of debate about
> > the need for such a policy at
> > https://lore.kernel.org/linux-pci/CACK8Z6GR7-wseug=TtVyRarVZX_ao2geoLDNBwjtB+5Y7VWNEQ@mail.gmail.com/
> > with the final conclusion that it should be OK to implement such a
> > policy in userspace, as long as the policy is not implemented in the
> > kernel. The kernel only needs to expose bits & info that is needed by
> > the userspace to implement such a policy, and it can be used in
> > conjunction with "drivers_autoprobe" to implement this policy:
> > --------------------------------------------------------------------
> > ....
> > That's an odd thing, but sure, if you want to write up such a policy for
> > your systems, great.  But that policy does not belong in the kernel, it
> > belongs in userspace.
> > ....
> > --------------------------------------------------------------------
> > 1) The post https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/
> > lists out the approach that was agreed on. Replicating it here:
> > -----------------------------------------------------------------------
> >   - Expose the PCI pdev->untrusted bit in sysfs.  We don't expose this
> >     today, but doing so would be trivial.  I think I would prefer a
> >     sysfs name like "external" so it's more descriptive and less of a
> >     judgment.
> >
> >     This comes from either the DT "external-facing" property or the
> >     ACPI "ExternalFacingPort" property.
> >
> >   - All devices present at boot are enumerated.  Any statically built
> >     drivers will bind to them before any userspace code runs.
> >
> >     If you want to keep statically built drivers from binding, you'd
> >     need to invent some mechanism so pci_driver_init() could clear
> >     drivers_autoprobe after registering pci_bus_type.
> >
> >   - Early userspace code prevents modular drivers from automatically
> >     binding to PCI devices:
> >
> >       echo 0 > /sys/bus/pci/drivers_autoprobe
> >
> >     This prevents modular drivers from binding to all devices, whether
> >     present at boot or hot-added.
> >
> >   - Userspace code uses the sysfs "bind" file to control which drivers
> >     are loaded and can bind to each device, e.g.,
> >
> >       echo 0000:02:00.0 > /sys/bus/pci/drivers/nvme/bind
>
> I think this is a reasonable suggestion. However, as Greg pointed out
> it's gratuitously different to what USB does for no real reason.
>
> > -----------------------------------------------------------------------
> > 2) As part of implementing the above agreed approach, when I exposed
> > PCI "untrusted" attribute to userspace, it ran into discussion that
> > concluded that instead of this, the device core should be enhanced
> > with a location attribute.
> > https://lore.kernel.org/linux-pci/20200618184621.GA446639@kroah.com/
> > -----------------------------------------------------------------------
> > ...
> > The attribute should be called something like "location" or something
> > like that (naming is hard), as you don't always know if something is
> > external or not (it could be internal, it could be unknown, it could be
> > internal to an external device that you trust (think PCI drawers for
> > "super" computers that are hot pluggable but yet really part of the
> > internal bus).
> > ....
> > "trust" has no direct relation to the location, except in a policy of
> > what you wish to do with that device, so as long as you keep them
> > separate that way, I am fine with it.
> > ...
> > -----------------------------------------------------------------------
> >
> > And hence this patch. I don't see an attribute in USB comparable to
> > this new attribute, except for the boolean "removable" may be. Are you
> > suggesting to pull that into the device core instead of adding this
> > "physical_location" attribute?
>
> He's suggesting you pull the "authorized" attribute into the driver
> core. That's the mechanism USB uses to block drivers binding unless
> userspace authorizes them. I don't see any reason why we can't re-use
> that sysfs interface for PCI devices since the problem being solved is
> fundamentally the same. The main question is what we should do as a
> default policy in the kernel. For USB the default comes from the
> "authorized_default" module param of usbcore:
>
> > /* authorized_default behaviour:
> >  * -1 is authorized for all devices except wireless (old behaviour)
> >  * 0 is unauthorized for all devices
> >  * 1 is authorized for all devices
> >  * 2 is authorized for internal devices
> >  */
> > #define USB_AUTHORIZE_WIRED   -1
> > #define USB_AUTHORIZE_NONE    0
> > #define USB_AUTHORIZE_ALL     1
> > #define USB_AUTHORIZE_INTERNAL        2
> >
> > static int authorized_default = USB_AUTHORIZE_WIRED;
> > module_param(authorized_default, int, S_IRUGO|S_IWUSR);
>
> So the default policy for USB is to authorize any wired USB device and
> we can optionally restrict that to just integrated devices. Sounding
> familiar?

Thank you for explaining! It is a lot more clear now :-)

I have separated out the PCI portions of this patchset (patches 1-4
i.e. ones not related to this controversial change) into its own
patchset. W.r.t patches 5-7, I think I'd like to collect my thoughts
and send out a fresh RFC once I am ready (I'm running out of time on
my deliverables so may have to carry some patches internally for the
time being). But 2 quick points:

1) Currently there are already at least 2 existing buses with their
own versions of "authorized": usb and thunderbolt, and the UAPI /
semantics of "authorized" is different amongst these.

Documentation/ABI/testing/sysfs-bus-thunderbolt - "authorized" is boolean
Documentation/usb/authorization.rst  - "authorized" is 0/1/2

(Side note: In addition to that, usb also has additional "authorized"
related attributes e.g. interface_authorized_default etc which might
not have an easy corresponding sensible meaning in other buses, so we
may have to still leave it in USB.)

So my question is, assuming we do not want to change or break existing
UAPI, if I move the "authorized" attribute to the device core, who
defines the semantics of the values it can take? It seems to me like
individual buses should define that. And if so, then device core
cannot use "authorized" value to decide to prevent drivers from
binding to it?

2) It seemed to me
(https://lore.kernel.org/linux-acpi/20200618184621.GA446639@kroah.com/)
that we had at least somewhat agreement that the location of a device
is a useful piece of info to have for userspace to have. The point I'm
trying to make is that "exporting the location of device in sysfs"
seems independent of "move untrusted attribute to the device core".
LIke you said below, location of device is still useful (may not be
sufficient, BMC case you mention) for the userspace to have, in order
to decide whether to allow a device.  So why object to this patch?

Thanks,

Rajat



>
> The internal / external status is still useful to know so we might
> want to make a sysfs attribute for that too. However, I'd like to
> point out that internal / external isn't the whole story. As I
> mentioned in the last thread if I have a BMC device I *really* don't
> want it to be authorized by default even though it's an internal
> device. Similarly, if I know all my internal cards support PCIe
> Component Authentication then I might choose not to trust any PCI
> devices unless they authenticate successfully.
>
> > 3) The one deviation from the agreed approach in (1) is
> > https://patchwork.kernel.org/patch/11633095/ . The reason is I
> > realized that contrary to what I earlier believed, we might not be
> > able to disable the PCI link to all external PCI devices at boot. So
> > external PCI devices may actually bind to drivers before userspace
> > comes up and does "echo 0 > /sys/bus/pci/drivers_autoprobe").
>
> Yep, that's a problem. If we want to provide a useful mechanism to
> userspace then the default behaviour of the kernel can't undermine
> that mechanism. If that means we need another kernel command line
> parameter then I guess we just have to live with it.
>
> Oliver
diff mbox series

Patch

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 67d39a90b45c7..14c815526b7fa 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1778,6 +1778,32 @@  static ssize_t online_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(online);
 
+static ssize_t site_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	const char *site;
+
+	device_lock(dev);
+	switch (dev->site) {
+	case SITE_INTERNAL:
+		site = "INTERNAL";
+		break;
+	case SITE_EXTENDED:
+		site = "EXTENDED";
+		break;
+	case SITE_EXTERNAL:
+		site = "EXTERNAL";
+		break;
+	case SITE_UNKNOWN:
+	default:
+		site = "UNKNOWN";
+		break;
+	}
+	device_unlock(dev);
+	return sprintf(buf, "%s\n", site);
+}
+static DEVICE_ATTR_RO(site);
+
 int device_add_groups(struct device *dev, const struct attribute_group **groups)
 {
 	return sysfs_create_groups(&dev->kobj, groups);
@@ -1949,8 +1975,16 @@  static int device_add_attrs(struct device *dev)
 			goto err_remove_dev_groups;
 	}
 
+	if (bus_supports_site(dev->bus)) {
+		error = device_create_file(dev, &dev_attr_site);
+		if (error)
+			goto err_remove_dev_attr_online;
+	}
+
 	return 0;
 
+ err_remove_dev_attr_online:
+	device_remove_file(dev, &dev_attr_online);
  err_remove_dev_groups:
 	device_remove_groups(dev, dev->groups);
  err_remove_type_groups:
@@ -1968,6 +2002,7 @@  static void device_remove_attrs(struct device *dev)
 	struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 
+	device_remove_file(dev, &dev_attr_site);
 	device_remove_file(dev, &dev_attr_online);
 	device_remove_groups(dev, dev->groups);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 15460a5ac024a..a4143735ae712 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -428,6 +428,31 @@  enum dl_dev_state {
 	DL_DEV_UNBINDING,
 };
 
+/**
+ * enum device_site - Physical location of the device in the system.
+ * The semantics of values depend on subsystem / bus:
+ *
+ * @SITE_UNKNOWN:  Location is Unknown (default)
+ *
+ * @SITE_INTERNAL: Device is internal to the system, and cannot be (easily)
+ *                 removed. E.g. SoC internal devices, onboard soldered
+ *                 devices, internal M.2 cards (that cannot be removed
+ *                 without opening the chassis).
+ * @SITE_EXTENDED: Device sits an extension of the system. E.g. devices
+ *                 on external PCIe trays, docking stations etc. These
+ *                 devices may be removable, but are generally housed
+ *                 internally on an extension board, so they are removed
+ *                 only when that whole extension board is removed.
+ * @SITE_EXTERNAL: Devices truly external to the system (i.e. plugged on
+ *                 an external port) that may be removed or added frequently.
+ */
+enum device_site {
+	SITE_UNKNOWN = 0,
+	SITE_INTERNAL,
+	SITE_EXTENDED,
+	SITE_EXTERNAL,
+};
+
 /**
  * struct dev_links_info - Device data related to device links.
  * @suppliers: List of links to supplier devices.
@@ -513,6 +538,7 @@  struct dev_links_info {
  * 		device (i.e. the bus driver that discovered the device).
  * @iommu_group: IOMMU group the device belongs to.
  * @iommu:	Per device generic IOMMU runtime data
+ * @site:	Physical location of the device w.r.t. the system
  *
  * @offline_disabled: If set, the device is permanently online.
  * @offline:	Set after successful invocation of bus type's .offline().
@@ -613,6 +639,8 @@  struct device {
 	struct iommu_group	*iommu_group;
 	struct dev_iommu	*iommu;
 
+	enum device_site	site;	/* Device physical location */
+
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
@@ -806,6 +834,20 @@  static inline bool dev_has_sync_state(struct device *dev)
 	return false;
 }
 
+static inline int dev_set_site(struct device *dev, enum device_site site)
+{
+	if (site < SITE_UNKNOWN || site > SITE_EXTERNAL)
+		return -EINVAL;
+
+	dev->site = site;
+	return 0;
+}
+
+static inline bool dev_is_external(struct device *dev)
+{
+	return dev->site == SITE_EXTERNAL;
+}
+
 /*
  * High level routines for use by the bus drivers
  */
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 1ea5e1d1545bd..e1079772e45af 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -69,6 +69,8 @@  struct fwnode_handle;
  * @lock_key:	Lock class key for use by the lock validator
  * @need_parent_lock:	When probing or removing a device on this bus, the
  *			device core should lock the device's parent.
+ * @supports_site:	Bus can differentiate between internal/external devices
+ *			and thus supports the device "site" attribute.
  *
  * A bus is a channel between the processor and one or more devices. For the
  * purposes of the device model, all devices are connected via a bus, even if
@@ -112,6 +114,7 @@  struct bus_type {
 	struct lock_class_key lock_key;
 
 	bool need_parent_lock;
+	bool supports_site;
 };
 
 extern int __must_check bus_register(struct bus_type *bus);
@@ -246,6 +249,11 @@  bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev)
 }
 #endif
 
+static inline bool bus_supports_site(struct bus_type *bus)
+{
+	return bus && bus->supports_site;
+}
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,