diff mbox series

[v2,1/6] pci: implement power state

Message ID 20211111130859.1171890-2-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series try improve native hotplug for pcie root ports | expand

Commit Message

Gerd Hoffmann Nov. 11, 2021, 1:08 p.m. UTC
This allows to power off pci devices.  In "off" state the devices will
not be visible.  No pci config space access, no pci bar access, no dma.

Default state is "on", so this patch (alone) should not change behavior.

Use case:  Allows hotplug controllers implement slot power.  Hotplug
controllers doing so should set the inital power state for devices in
the ->plug callback.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/hw/pci/pci.h |  2 ++
 hw/pci/pci.c         | 25 +++++++++++++++++++++++--
 hw/pci/pci_host.c    |  6 ++++--
 3 files changed, 29 insertions(+), 4 deletions(-)

Comments

Michael S. Tsirkin Nov. 15, 2021, 2:46 p.m. UTC | #1
On Thu, Nov 11, 2021 at 02:08:54PM +0100, Gerd Hoffmann wrote:
> This allows to power off pci devices.  In "off" state the devices will
> not be visible.  No pci config space access, no pci bar access, no dma.
> 
> Default state is "on", so this patch (alone) should not change behavior.
> 
> Use case:  Allows hotplug controllers implement slot power.  Hotplug
> controllers doing so should set the inital power state for devices in
> the ->plug callback.
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  include/hw/pci/pci.h |  2 ++
>  hw/pci/pci.c         | 25 +++++++++++++++++++++++--
>  hw/pci/pci_host.c    |  6 ++++--
>  3 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 5c4016b9954a..e7cdf2d5ec5d 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
>  struct PCIDevice {
>      DeviceState qdev;
>      bool partially_hotplugged;
> +    bool has_power;
>  
>      /* PCI config space */
>      uint8_t *config;
> @@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device;
>  }
>  
>  MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
> +void pci_set_power(PCIDevice *pci_dev, bool state);
>  
>  #endif
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4a84e478cef5..e5993c1ef52b 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d)
>              continue;
>  
>          new_addr = pci_bar_address(d, i, r->type, r->size);
> +        if (!d->has_power) {
> +            new_addr = PCI_BAR_UNMAPPED;
> +        }
>  
>          /* This bar isn't changed */
>          if (new_addr == r->addr)

I am a bit confused about why this is necessary.
When power is removed device is reset, does not
this disable device memory automatically?


> @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
>      if (range_covers_byte(addr, l, PCI_COMMAND)) {
>          pci_update_irq_disabled(d, was_irq_disabled);
>          memory_region_set_enabled(&d->bus_master_enable_region,
> -                                  pci_get_word(d->config + PCI_COMMAND)
> -                                    & PCI_COMMAND_MASTER);
> +                                  (pci_get_word(d->config + PCI_COMMAND)
> +                                   & PCI_COMMAND_MASTER) && d->has_power);
>      }
>  
>      msi_write_config(d, addr, val_in, l);
> @@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          pci_qdev_unrealize(DEVICE(pci_dev));
>          return;
>      }
> +
> +    pci_set_power(pci_dev, true);
>  }
>  
>  PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
> @@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
>      return msg;
>  }
>  
> +void pci_set_power(PCIDevice *d, bool state)
> +{
> +    if (d->has_power == state) {
> +        return;
> +    }
> +
> +    d->has_power = state;
> +    pci_update_mappings(d);
> +    memory_region_set_enabled(&d->bus_master_enable_region,
> +                              (pci_get_word(d->config + PCI_COMMAND)
> +                               & PCI_COMMAND_MASTER) && d->has_power);
> +    if (!d->has_power) {
> +        pci_device_reset(d);
> +    }
> +}
> +
>  static const TypeInfo pci_device_type_info = {
>      .name = TYPE_PCI_DEVICE,
>      .parent = TYPE_DEVICE,
> diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
> index cf02f0d6a501..7beafd40a8e9 100644
> --- a/hw/pci/pci_host.c
> +++ b/hw/pci/pci_host.c
> @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
>      /* non-zero functions are only exposed when function 0 is present,
>       * allowing direct removal of unexposed functions.
>       */
> -    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
> +    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
> +        !pci_dev->has_power) {
>          return;
>      }
>  
> @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
>      /* non-zero functions are only exposed when function 0 is present,
>       * allowing direct removal of unexposed functions.
>       */
> -    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
> +    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
> +        !pci_dev->has_power) {
>          return ~0x0;
>      }
>  
> -- 
> 2.33.1
Michael S. Tsirkin Nov. 15, 2021, 2:52 p.m. UTC | #2
On Thu, Nov 11, 2021 at 02:08:54PM +0100, Gerd Hoffmann wrote:
> This allows to power off pci devices.  In "off" state the devices will
> not be visible.  No pci config space access, no pci bar access, no dma.
> 
> Default state is "on", so this patch (alone) should not change behavior.
> 
> Use case:  Allows hotplug controllers implement slot power.  Hotplug
> controllers doing so should set the inital power state for devices in
> the ->plug callback.
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  include/hw/pci/pci.h |  2 ++
>  hw/pci/pci.c         | 25 +++++++++++++++++++++++--
>  hw/pci/pci_host.c    |  6 ++++--
>  3 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 5c4016b9954a..e7cdf2d5ec5d 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
>  struct PCIDevice {
>      DeviceState qdev;
>      bool partially_hotplugged;
> +    bool has_power;
>  
>      /* PCI config space */
>      uint8_t *config;
> @@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device;
>  }
>  
>  MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
> +void pci_set_power(PCIDevice *pci_dev, bool state);
>  
>  #endif
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4a84e478cef5..e5993c1ef52b 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d)
>              continue;
>  
>          new_addr = pci_bar_address(d, i, r->type, r->size);
> +        if (!d->has_power) {
> +            new_addr = PCI_BAR_UNMAPPED;
> +        }
>  
>          /* This bar isn't changed */
>          if (new_addr == r->addr)
> @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
>      if (range_covers_byte(addr, l, PCI_COMMAND)) {
>          pci_update_irq_disabled(d, was_irq_disabled);
>          memory_region_set_enabled(&d->bus_master_enable_region,
> -                                  pci_get_word(d->config + PCI_COMMAND)
> -                                    & PCI_COMMAND_MASTER);
> +                                  (pci_get_word(d->config + PCI_COMMAND)
> +                                   & PCI_COMMAND_MASTER) && d->has_power);
>      }
>  
>      msi_write_config(d, addr, val_in, l);

Here, too - reset will clear bus master enable. Why do we need to
special-case has_power?


> @@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          pci_qdev_unrealize(DEVICE(pci_dev));
>          return;
>      }
> +
> +    pci_set_power(pci_dev, true);
>  }
>  
>  PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
> @@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
>      return msg;
>  }
>  
> +void pci_set_power(PCIDevice *d, bool state)
> +{
> +    if (d->has_power == state) {
> +        return;
> +    }
> +
> +    d->has_power = state;
> +    pci_update_mappings(d);
> +    memory_region_set_enabled(&d->bus_master_enable_region,
> +                              (pci_get_word(d->config + PCI_COMMAND)
> +                               & PCI_COMMAND_MASTER) && d->has_power);
> +    if (!d->has_power) {
> +        pci_device_reset(d);
> +    }
> +}
> +
>  static const TypeInfo pci_device_type_info = {
>      .name = TYPE_PCI_DEVICE,
>      .parent = TYPE_DEVICE,
> diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
> index cf02f0d6a501..7beafd40a8e9 100644
> --- a/hw/pci/pci_host.c
> +++ b/hw/pci/pci_host.c
> @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
>      /* non-zero functions are only exposed when function 0 is present,
>       * allowing direct removal of unexposed functions.
>       */
> -    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
> +    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
> +        !pci_dev->has_power) {
>          return;
>      }
>  
> @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
>      /* non-zero functions are only exposed when function 0 is present,
>       * allowing direct removal of unexposed functions.
>       */
> -    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
> +    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
> +        !pci_dev->has_power) {
>          return ~0x0;
>      }
>  
> -- 
> 2.33.1
Gerd Hoffmann Nov. 16, 2021, 7:16 a.m. UTC | #3
Hi,

> >          new_addr = pci_bar_address(d, i, r->type, r->size);
> > +        if (!d->has_power) {
> > +            new_addr = PCI_BAR_UNMAPPED;
> > +        }
> >  
> >          /* This bar isn't changed */
> >          if (new_addr == r->addr)
> 
> I am a bit confused about why this is necessary.
> When power is removed device is reset, does not
> this disable device memory automatically?

Hmm.   While it clearly doesn't hurt it might not be needed indeed.  The
reset-on-poweroff should make sure both bars and dma are off, and due to
config space access being blocked the guest shouldn't be able to turn
them on while device power is off.

So, yes, in theory we should be able to drop this.  Assuming nothing in
qemu ever touches the bar mappings (host access to config space is not
blocked).

I'll have a look.

take care,
  Gerd
diff mbox series

Patch

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 5c4016b9954a..e7cdf2d5ec5d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -268,6 +268,7 @@  typedef struct PCIReqIDCache PCIReqIDCache;
 struct PCIDevice {
     DeviceState qdev;
     bool partially_hotplugged;
+    bool has_power;
 
     /* PCI config space */
     uint8_t *config;
@@ -908,5 +909,6 @@  extern const VMStateDescription vmstate_pci_device;
 }
 
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_power(PCIDevice *pci_dev, bool state);
 
 #endif
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4a84e478cef5..e5993c1ef52b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1380,6 +1380,9 @@  static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
+        if (!d->has_power) {
+            new_addr = PCI_BAR_UNMAPPED;
+        }
 
         /* This bar isn't changed */
         if (new_addr == r->addr)
@@ -1464,8 +1467,8 @@  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
     if (range_covers_byte(addr, l, PCI_COMMAND)) {
         pci_update_irq_disabled(d, was_irq_disabled);
         memory_region_set_enabled(&d->bus_master_enable_region,
-                                  pci_get_word(d->config + PCI_COMMAND)
-                                    & PCI_COMMAND_MASTER);
+                                  (pci_get_word(d->config + PCI_COMMAND)
+                                   & PCI_COMMAND_MASTER) && d->has_power);
     }
 
     msi_write_config(d, addr, val_in, l);
@@ -2182,6 +2185,8 @@  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev));
         return;
     }
+
+    pci_set_power(pci_dev, true);
 }
 
 PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
@@ -2853,6 +2858,22 @@  MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
     return msg;
 }
 
+void pci_set_power(PCIDevice *d, bool state)
+{
+    if (d->has_power == state) {
+        return;
+    }
+
+    d->has_power = state;
+    pci_update_mappings(d);
+    memory_region_set_enabled(&d->bus_master_enable_region,
+                              (pci_get_word(d->config + PCI_COMMAND)
+                               & PCI_COMMAND_MASTER) && d->has_power);
+    if (!d->has_power) {
+        pci_device_reset(d);
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index cf02f0d6a501..7beafd40a8e9 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -74,7 +74,8 @@  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return;
     }
 
@@ -97,7 +98,8 @@  uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return ~0x0;
     }