diff mbox

[v4,5/5] Add param Error ** for msi_init()

Message ID 1459855602-16727-6-git-send-email-caoj.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao jin April 5, 2016, 11:26 a.m. UTC
Add param Error **errp, and change pci_add_capability() to
pci_add_capability2(), because pci_add_capability() report error, and
msi_init() is widely used in realize(), so it is not suitable for realize()

Also fix all the callers who should deal with the msi_init() failure
but actually not.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
---
 hw/audio/intel-hda.c               | 11 +++++++---
 hw/ide/ich.c                       |  2 +-
 hw/net/vmxnet3.c                   | 41 +++++++++++++++-----------------------
 hw/pci-bridge/ioh3420.c            |  4 +++-
 hw/pci-bridge/pci_bridge_dev.c     |  4 +++-
 hw/pci-bridge/xio3130_downstream.c |  4 +++-
 hw/pci-bridge/xio3130_upstream.c   |  4 +++-
 hw/pci/msi.c                       |  9 +++++++--
 hw/scsi/megasas.c                  | 12 +++++++----
 hw/scsi/mptsas.c                   | 15 +++++++++-----
 hw/scsi/vmw_pvscsi.c               |  6 +++++-
 hw/usb/hcd-xhci.c                  | 10 +++++++---
 hw/vfio/pci.c                      |  6 ++++--
 include/hw/pci/msi.h               |  3 ++-
 14 files changed, 80 insertions(+), 51 deletions(-)

Comments

Markus Armbruster April 8, 2016, 8:44 a.m. UTC | #1
Cao jin <caoj.fnst@cn.fujitsu.com> writes:

> Add param Error **errp, and change pci_add_capability() to
> pci_add_capability2(), because pci_add_capability() report error, and
> msi_init() is widely used in realize(), so it is not suitable for realize()

Suggest:

    pci: Convert msi_init() to Error and fix callers to check it

    msi_init() reports errors with error_report(), which is wrong when
    it's used in realize().

    Fix by converting it to Error.

But see the discussion of the msi_init() failure modes below; commit
message may need further work for that.

Same issue in msix_init().  Please fix that as well, if it's not too
much trouble.

> Also fix all the callers who should deal with the msi_init() failure
> but actually not.

Grammar: "but actually don't" (you need a verb).

You neglect to explain the bug's impact.  Something like

    Fix its callers to handle failure instead of ignoring it.
    [Description on what goes wrong because of that goes here]

>
> Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
> ---
>  hw/audio/intel-hda.c               | 11 +++++++---
>  hw/ide/ich.c                       |  2 +-
>  hw/net/vmxnet3.c                   | 41 +++++++++++++++-----------------------
>  hw/pci-bridge/ioh3420.c            |  4 +++-
>  hw/pci-bridge/pci_bridge_dev.c     |  4 +++-
>  hw/pci-bridge/xio3130_downstream.c |  4 +++-
>  hw/pci-bridge/xio3130_upstream.c   |  4 +++-
>  hw/pci/msi.c                       |  9 +++++++--
>  hw/scsi/megasas.c                  | 12 +++++++----
>  hw/scsi/mptsas.c                   | 15 +++++++++-----
>  hw/scsi/vmw_pvscsi.c               |  6 +++++-
>  hw/usb/hcd-xhci.c                  | 10 +++++++---
>  hw/vfio/pci.c                      |  6 ++++--
>  include/hw/pci/msi.h               |  3 ++-
>  14 files changed, 80 insertions(+), 51 deletions(-)
>
> diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
> index d372d4a..c3856cc 100644
> --- a/hw/audio/intel-hda.c
> +++ b/hw/audio/intel-hda.c
> @@ -1139,12 +1139,17 @@ static void intel_hda_realize(PCIDevice *pci, Error **errp)
>      /* HDCTL off 0x40 bit 0 selects signaling mode (1-HDA, 0 - Ac97) 18.1.19 */
>      conf[0x40] = 0x01;
>  
> +    if (d->msi) {
> +        msi_init(&d->pci, d->old_msi_addr ? 0x50 : 0x60, 1,
> +                true, false, errp);
> +        if (*errp) {

Crash bug if errp is null.  I guess it's never null here right now, but
let's not rely on that for robustness, and to avoid setting a bad
example.  Bad examples multiply like rabbits.

The big comment in error.h explains how to receive an error correctly:

 * Receive an error and pass it on to the caller:
 *     Error *err = NULL;
 *     foo(arg, &err);
 *     if (err) {
 *         handle the error...
 *         error_propagate(errp, err);
 *     }
 * where Error **errp is a parameter, by convention the last one.

This is what you should do here.

 *
 * Do *not* "optimize" this to
 *     foo(arg, errp);
 *     if (*errp) { // WRONG!
 *         handle the error...
 *     }
 * because errp may be NULL!

This is what your patch does.

 *
 * But when all you do with the error is pass it on, please use
 *     foo(arg, errp);
 * for readability.
 *

> +            return;
> +        }
> +    }
> +
>      memory_region_init_io(&d->mmio, OBJECT(d), &intel_hda_mmio_ops, d,
>                            "intel-hda", 0x4000);
>      pci_register_bar(&d->pci, 0, 0, &d->mmio);
> -    if (d->msi) {
> -        msi_init(&d->pci, d->old_msi_addr ? 0x50 : 0x60, 1, true, false);
> -    }
>  
>      hda_codec_bus_init(DEVICE(pci), &d->codecs, sizeof(d->codecs),
>                         intel_hda_response, intel_hda_xfer);
> diff --git a/hw/ide/ich.c b/hw/ide/ich.c
> index 0a13334..db4fdb5 100644
> --- a/hw/ide/ich.c
> +++ b/hw/ide/ich.c
> @@ -146,7 +146,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
>      /* Although the AHCI 1.3 specification states that the first capability
>       * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
>       * AHCI device puts the MSI capability first, pointing to 0x80. */
> -    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
> +    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false, errp);

Sure there's nothing to undo on error?  Instead of undoing, you may want
to move msi_init() before the stuff that needs undoing.

>  }
>  
>  static void pci_ich9_uninit(PCIDevice *dev)
> diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
> index 7a38e47..d8dbb0b 100644
> --- a/hw/net/vmxnet3.c
> +++ b/hw/net/vmxnet3.c
> @@ -2189,27 +2189,6 @@ vmxnet3_cleanup_msix(VMXNET3State *s)
>      }
>  }
>  
> -#define VMXNET3_USE_64BIT         (true)
> -#define VMXNET3_PER_VECTOR_MASK   (false)
> -
> -static bool
> -vmxnet3_init_msi(VMXNET3State *s)
> -{
> -    PCIDevice *d = PCI_DEVICE(s);
> -    int res;
> -
> -    res = msi_init(d, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
> -                   VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
> -    if (0 > res) {
> -        VMW_WRPRN("Failed to initialize MSI, error %d", res);
> -        s->msi_used = false;
> -    } else {
> -        s->msi_used = true;
> -    }
> -
> -    return s->msi_used;
> -}
> -
>  static void
>  vmxnet3_cleanup_msi(VMXNET3State *s)
>  {
> @@ -2271,13 +2250,29 @@ static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s)
>      return dsnp;
>  }
>  
> +
> +#define VMXNET3_USE_64BIT         (true)
> +#define VMXNET3_PER_VECTOR_MASK   (false)
> +
>  static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
>  {
>      DeviceState *dev = DEVICE(pci_dev);
>      VMXNET3State *s = VMXNET3(pci_dev);
> +    int ret;
>  
>      VMW_CBPRN("Starting init...");
>  
> +    ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
> +                   VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, errp);
> +    if (ret < 0) {
> +        error_free_or_abort(errp);

Aborts when errp is null.

> +        VMW_WRPRN("Failed to initialize MSI, error = %d."
> +                  " Configuration is inconsistent.", ret);

For friendlier debug messages, you could do

       ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
                      VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, &err);
       if (ret < 0) {
           VMW_WRPRN("Failed to initialize MSI: %s", error_get_pretty(err);
           error_free(err);

However, begs the question why we let realize succeed after msi_init()
failure for this device, but not for others.  See discussion of
msi_init() failure modes below.

> +        s->msi_used = false;
> +    } else {
> +        s->msi_used = true;
> +    }
> +
>      memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
>                            "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
>      pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
> @@ -2302,10 +2297,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
>          VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
>      }
>  
> -    if (!vmxnet3_init_msi(s)) {
> -        VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent.");
> -    }
> -
>      vmxnet3_net_init(s);
>  
>      if (pci_is_express(pci_dev)) {
> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> index b4a7806..d752e62 100644
> --- a/hw/pci-bridge/ioh3420.c
> +++ b/hw/pci-bridge/ioh3420.c
> @@ -97,6 +97,7 @@ static int ioh3420_initfn(PCIDevice *d)
>      PCIEPort *p = PCIE_PORT(d);
>      PCIESlot *s = PCIE_SLOT(d);
>      int rc;
> +    Error *err = NULL;
>  
>      pci_bridge_initfn(d, TYPE_PCIE_BUS);
>      pcie_port_init_reg(d);
> @@ -109,8 +110,9 @@ static int ioh3420_initfn(PCIDevice *d)
>  
>      rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR,
>                    IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
> -                  IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
> +                  IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
>      if (rc < 0) {
> +        error_report_err(err);
>          goto err_bridge;
>      }
>  
> diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
> index 32f4daa..07c7bf8 100644
> --- a/hw/pci-bridge/pci_bridge_dev.c
> +++ b/hw/pci-bridge/pci_bridge_dev.c
> @@ -52,6 +52,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
>      PCIBridge *br = PCI_BRIDGE(dev);
>      PCIBridgeDev *bridge_dev = PCI_BRIDGE_DEV(dev);
>      int err;
> +    Error *local_err = NULL;
>  
>      pci_bridge_initfn(dev, TYPE_PCI_BUS);
>  
> @@ -75,8 +76,9 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
>  
>      if ((bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_MSI_REQ)) &&
>          msi_nonbroken) {
> -        err = msi_init(dev, 0, 1, true, true);
> +        err = msi_init(dev, 0, 1, true, true, &local_err);
>          if (err < 0) {
> +            error_report_err(local_err);
>              goto msi_error;
>          }
>      }
> diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> index e6d653d..0982801 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -60,14 +60,16 @@ static int xio3130_downstream_initfn(PCIDevice *d)
>      PCIEPort *p = PCIE_PORT(d);
>      PCIESlot *s = PCIE_SLOT(d);
>      int rc;
> +    Error *err = NULL;
>  
>      pci_bridge_initfn(d, TYPE_PCIE_BUS);
>      pcie_port_init_reg(d);
>  
>      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
>                    XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
> -                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
> +                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
>      if (rc < 0) {
> +        error_report_err(err);
>          goto err_bridge;
>      }
>  
> diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
> index d976844..1d2c597 100644
> --- a/hw/pci-bridge/xio3130_upstream.c
> +++ b/hw/pci-bridge/xio3130_upstream.c
> @@ -56,14 +56,16 @@ static int xio3130_upstream_initfn(PCIDevice *d)
>  {
>      PCIEPort *p = PCIE_PORT(d);
>      int rc;
> +    Error *err = NULL;
>  
>      pci_bridge_initfn(d, TYPE_PCIE_BUS);
>      pcie_port_init_reg(d);
>  
>      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
>                    XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
> -                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
> +                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
>      if (rc < 0) {
> +        error_report_err(err);
>          goto err_bridge;
>      }
>  
> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
> index e2a701b..bf7a3b9 100644
> --- a/hw/pci/msi.c
> +++ b/hw/pci/msi.c
> @@ -179,14 +179,17 @@ bool msi_enabled(const PCIDevice *dev)
>   * -ENOTSUP means lacking msi support for a msi-capable platform.
>   */
>  int msi_init(struct PCIDevice *dev, uint8_t offset,
> -             unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask)
> +             unsigned int nr_vectors, bool msi64bit,
> +             bool msi_per_vector_mask, Error **errp)
>  {
>      unsigned int vectors_order;
>      uint16_t flags;
>      uint8_t cap_size;
>      int config_offset;
> +    Error *err = NULL;
>  
>      if (!msi_nonbroken) {
> +        error_setg(errp, "MSI is not supported by interrupt controller");
>          return -ENOTSUP;
>      }
>  
> @@ -210,8 +213,10 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
>      }
>  
>      cap_size = msi_cap_sizeof(flags);
> -    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, cap_size);
> +    config_offset = pci_add_capability2(dev, PCI_CAP_ID_MSI, offset,
> +                                        cap_size, &err);
>      if (config_offset < 0) {
> +        error_propagate(errp, err);
>          return config_offset;
>      }
>  

msi_init() has three failure modes:

* -ENOTSUP

  Board's MSI emulation is not known to work: !msi_nonbroken.

  This is not necessarily an error.

  It is when the device model requires MSI.

  It isnt' when a non-MSI variant of the device model exists.  Then
  caller should silently switch to the non-MSI variant[*].

* -ENOSPC

  Out of PCI config space.  Can happen only when offset == 0.  I believe
  this is a programming error, and therefore should be an assertion
  failure.  But changing pci_add_capability2() that way is outside this
  patch's scope, and up to the PCI maintainers.

* -EINVAL

  PCI capabilities overlap.  Can happen only when offset != 0.  Also a
  programming error, except when assigning a physical device.  There,
  it's a broken physical device.

So, for devices with a non-MSI variant, realize() should use msi_init()
like this:

    ret = msi_init(..., &err);
    if (ret == -ENOTSUP) {
        error_free(err);
        [switch off MSI]
    } else if (ret < 0) {
        error_propagate(errp, err);
        [handle error]
    }

Your patch lacks the special -ENOTSUP case.

init() should error_report_err() + return -1 instead of
error_propagate(), of course.

[handle error] needs to take care to revert previous side effects.

For devices that require MSI, it's either

    msi_init(..., &err);
    if (err) {
        error_propagate(errp, err);
        [handle error]
    }

or

    if (msi_init(..., errp)) {
        [handle error]
    }

I don't have the time to review the rest of the patch now, but I hope
this is enough for a productive v5.

[...]

[*] Inappropriate when the user ordered msi=on, but that's outside this
patch's scope.
Cao jin April 9, 2016, 12:19 p.m. UTC | #2
Hi

On 04/08/2016 04:44 PM, Markus Armbruster wrote:

>> diff --git a/hw/ide/ich.c b/hw/ide/ich.c
>> index 0a13334..db4fdb5 100644
>> --- a/hw/ide/ich.c
>> +++ b/hw/ide/ich.c
>> @@ -146,7 +146,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
>>       /* Although the AHCI 1.3 specification states that the first capability
>>        * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
>>        * AHCI device puts the MSI capability first, pointing to 0x80. */
>> -    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
>> +    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false, errp);
>
> Sure there's nothing to undo on error?  Instead of undoing, you may want
> to move msi_init() before the stuff that needs undoing.
>

ich9-ahci is a on-board device of Q35, like cover-letter says: when it 
fail, qemu will exit. So, is it necessary to undo on error?

maybe you saw, I did move msi_init() for some other devices.

>> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
>
> msi_init() has three failure modes:
>
> * -ENOTSUP
>
>    Board's MSI emulation is not known to work: !msi_nonbroken.
>
>    This is not necessarily an error.
>
>    It is when the device model requires MSI.
>
>    It isnt' when a non-MSI variant of the device model exists.  Then
>    caller should silently switch to the non-MSI variant[*].
>

Several questions on this topic:
1. How to confirm whether a device model has non-MSI variant? AFAICT, it 
is these who have msi property.

2. For those have non-MSI variant devices(have msi property), as I see 
in the code, they all have it on by default, So we won`t know it is user 
order, or user don`t set it at all.

If user don`t know msi and don`t set it on, I think it is acceptable to 
create the non-msi variant for user silently. But if it is user order, 
like you said, it is an error.

So, how about: inform user to swich msi off and try again when encounter 
-ENOTSUP, no matter it is user order, or user doesn`t set it at all?

Actually in this v4, I do checked whether device has a msi property, 
like cover-letter said:

   3. most devices have msi/msix(except vmxnet3 & pvscsi) property as a 
   switch, if it has and is switched on, then msi_init() failure should 
   results in return directly. So in this version, mptsas is updated
Cao jin April 9, 2016, 1 p.m. UTC | #3
On 04/09/2016 08:19 PM, Cao jin wrote:
> Hi
>

>>
>
> Several questions on this topic:
> 1. How to confirm whether a device model has non-MSI variant? AFAICT, it
> is these who have msi property.
>
> 2. For those have non-MSI variant devices(have msi property), as I see
> in the code, they all have it on by default, So we won`t know it is user

Sorry, not accurate, not 'all', should be 'most'(megasas have it off by 
default)
Marcel Apfelbaum April 10, 2016, 8:20 a.m. UTC | #4
On 04/09/2016 03:19 PM, Cao jin wrote:
> Hi
>
> On 04/08/2016 04:44 PM, Markus Armbruster wrote:
>
>>> diff --git a/hw/ide/ich.c b/hw/ide/ich.c
>>> index 0a13334..db4fdb5 100644
>>> --- a/hw/ide/ich.c
>>> +++ b/hw/ide/ich.c
>>> @@ -146,7 +146,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
>>>       /* Although the AHCI 1.3 specification states that the first capability
>>>        * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
>>>        * AHCI device puts the MSI capability first, pointing to 0x80. */
>>> -    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
>>> +    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false, errp);
>>
>> Sure there's nothing to undo on error?  Instead of undoing, you may want
>> to move msi_init() before the stuff that needs undoing.
>>
>
> ich9-ahci is a on-board device of Q35, like cover-letter says: when it fail, qemu will exit. So, is it necessary to undo on error?
>
> maybe you saw, I did move msi_init() for some other devices.
>
>>> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
>>
>> msi_init() has three failure modes:
>>
>> * -ENOTSUP
>>
>>    Board's MSI emulation is not known to work: !msi_nonbroken.
>>
>>    This is not necessarily an error.
>>
>>    It is when the device model requires MSI.
>>
>>    It isnt' when a non-MSI variant of the device model exists.  Then
>>    caller should silently switch to the non-MSI variant[*].
>>
>

Hi,
I'll let Markus to continue the review, it brings very valuable information,
I will only try to answer the questions below.

> Several questions on this topic:
> 1. How to confirm whether a device model has non-MSI variant? AFAICT, it is these who have msi property.
>

MSI is required for PCI Express devices, optional for PCI devices.
Even if a PCI device supports MSI, it is strongly advised to support
legacy INTx for backward compatibility.
Bottom line, as far as I know, almost all PCI devices support legacy interrupts.
(an exception is the ivshmem device that requires MSI)

> 2. For those have non-MSI variant devices(have msi property), as I see in the code, they all have it on by default, So we won`t know it is user order, or user don`t set it at all.
>

I didn't quite understand the sentence, but some devices have a "use_msi" property that can be set by the user. If no such property exists,
we can assume the user "prefers" the msi version.


> If user don`t know msi and don`t set it on, I think it is acceptable to create the non-msi variant for user silently. But if it is user order, like you said, it is an error.
>

I am not sure about this. At least a warning should be given IMHO.

> So, how about: inform user to swich msi off and try again when encounter -ENOTSUP, no matter it is user order, or user doesn`t set it at all?
>

Not all devices have an "msi" switch. If the board has msi broken and the devices supports legacy interrupts, its OK to continue without MSI.

> Actually in this v4, I do checked whether device has a msi property, like cover-letter said:
>
>    3. most devices have msi/msix(except vmxnet3 & pvscsi) property as a   switch, if it has and is switched on, then msi_init() failure should   results in return directly. So in this version, mptsas
> is updated
>

I don't see a "msi" properties on PCIDevice class or VirtioPCIClass, are you sure we have an msi switch for most of the PCI devices?

Thanks for looking into this,
Marcel
Cao jin April 10, 2016, 9:38 a.m. UTC | #5
On 04/10/2016 04:20 PM, Marcel Apfelbaum wrote:

>
> Hi,
> I'll let Markus to continue the review, it brings very valuable
> information,
> I will only try to answer the questions below.
>
>> Several questions on this topic:
>> 1. How to confirm whether a device model has non-MSI variant? AFAICT,
>> it is these who have msi property.
>>
>
> MSI is required for PCI Express devices, optional for PCI devices.
> Even if a PCI device supports MSI, it is strongly advised to support
> legacy INTx for backward compatibility.
> Bottom line, as far as I know, almost all PCI devices support legacy
> interrupts.
> (an exception is the ivshmem device that requires MSI)
>
>> 2. For those have non-MSI variant devices(have msi property), as I see
>> in the code, they all have it on by default, So we won`t know it is
>> user order, or user don`t set it at all.
>>
>
> I didn't quite understand the sentence, but some devices have a
> "use_msi" property that can be set by the user. If no such property exists,
> we can assume the user "prefers" the msi version.
>
Hi,
Sorry for my bad description. let me explain myself again.
I think(guess), if a device model has msi property, it means this device 
model has non-msi variant. For those devices who has msi property, I see 
most of them will have it on by default. So when these devices 
initialize msi, qemu won`t know whether it is user order or not.

If I understand Markus right:
1). If user orders msi on, when msi_init returns -ENOTSUP, It is error. 
Then I suggest to inform user "set msi=off and try again"
2). If user doesn`t order msi on(so device have msi on by default), when 
msi_init returns -ENOTSUP, I am ok with Markus`s suggestion: *caller 
should silently switch to the non-MSI variant*

But now the condition is, qemu can`t distinguish whether user ordered 
msi or not, so for the condition 2) above, my suggestion is the same as 1)

>
>> If user don`t know msi and don`t set it on, I think it is acceptable
>> to create the non-msi variant for user silently. But if it is user
>> order, like you said, it is an error.
>>
>
> I am not sure about this. At least a warning should be given IMHO.
>
>> So, how about: inform user to swich msi off and try again when
>> encounter -ENOTSUP, no matter it is user order, or user doesn`t set it
>> at all?
>>
>
> Not all devices have an "msi" switch. If the board has msi broken and
> the devices supports legacy interrupts, its OK to continue without MSI.
>
>> Actually in this v4, I do checked whether device has a msi property,
>> like cover-letter said:
>>
>>    3. most devices have msi/msix(except vmxnet3 & pvscsi) property as
>> a   switch, if it has and is switched on, then msi_init() failure
>> should   results in return directly. So in this version, mptsas
>> is updated
>>
>
> I don't see a "msi" properties on PCIDevice class or VirtioPCIClass, are
> you sure we have an msi switch for most of the PCI devices?
>

My bad, I didn`t limit the range. I mean the devices who will call 
msi_init, they mostly have msi(or msix, or both) property
Marcel Apfelbaum April 11, 2016, 10 a.m. UTC | #6
On 04/10/2016 12:38 PM, Cao jin wrote:
>
>
> On 04/10/2016 04:20 PM, Marcel Apfelbaum wrote:
>
>>
>> Hi,
>> I'll let Markus to continue the review, it brings very valuable
>> information,
>> I will only try to answer the questions below.
>>
>>> Several questions on this topic:
>>> 1. How to confirm whether a device model has non-MSI variant? AFAICT,
>>> it is these who have msi property.
>>>
>>
>> MSI is required for PCI Express devices, optional for PCI devices.
>> Even if a PCI device supports MSI, it is strongly advised to support
>> legacy INTx for backward compatibility.
>> Bottom line, as far as I know, almost all PCI devices support legacy
>> interrupts.
>> (an exception is the ivshmem device that requires MSI)
>>
>>> 2. For those have non-MSI variant devices(have msi property), as I see
>>> in the code, they all have it on by default, So we won`t know it is
>>> user order, or user don`t set it at all.
>>>
>>
>> I didn't quite understand the sentence, but some devices have a
>> "use_msi" property that can be set by the user. If no such property exists,
>> we can assume the user "prefers" the msi version.
>>
> Hi,
> Sorry for my bad description. let me explain myself again.
> I think(guess), if a device model has msi property, it means this device model has non-msi variant. For those devices who has msi property, I see most of them will have it on by default. So when these
> devices initialize msi, qemu won`t know whether it is user order or not.
>
> If I understand Markus right:
> 1). If user orders msi on, when msi_init returns -ENOTSUP, It is error. Then I suggest to inform user "set msi=off and try again"

If the user *specifically* asked for msi=on (setting a property) is OK to fail the device init process.

> 2). If user doesn`t order msi on(so device have msi on by default), when msi_init returns -ENOTSUP, I am ok with Markus`s suggestion: *caller should silently switch to the non-MSI variant*
>
> But now the condition is, qemu can`t distinguish whether user ordered msi or not, so for the condition 2) above, my suggestion is the same as 1)
>

If we can't distinguish between the cases, 2 would not be user-friendly. Once it would be possible to differentiate we could go for 2.


Thanks,
Marcel

>>
>>> If user don`t know msi and don`t set it on, I think it is acceptable
>>> to create the non-msi variant for user silently. But if it is user
>>> order, like you said, it is an error.
>>>
>>
>> I am not sure about this. At least a warning should be given IMHO.
>>
>>> So, how about: inform user to swich msi off and try again when
>>> encounter -ENOTSUP, no matter it is user order, or user doesn`t set it
>>> at all?
>>>
>>
>> Not all devices have an "msi" switch. If the board has msi broken and
>> the devices supports legacy interrupts, its OK to continue without MSI.
>>
>>> Actually in this v4, I do checked whether device has a msi property,
>>> like cover-letter said:
>>>
>>>    3. most devices have msi/msix(except vmxnet3 & pvscsi) property as
>>> a   switch, if it has and is switched on, then msi_init() failure
>>> should   results in return directly. So in this version, mptsas
>>> is updated
>>>
>>
>> I don't see a "msi" properties on PCIDevice class or VirtioPCIClass, are
>> you sure we have an msi switch for most of the PCI devices?
>>
>
> My bad, I didn`t limit the range. I mean the devices who will call msi_init, they mostly have msi(or msix, or both) property
>
>
>
Cao jin April 11, 2016, 12:02 p.m. UTC | #7
On 04/11/2016 06:00 PM, Marcel Apfelbaum wrote:
>
>> 2). If user doesn`t order msi on(so device have msi on by default),
>> when msi_init returns -ENOTSUP, I am ok with Markus`s suggestion:
>> *caller should silently switch to the non-MSI variant*
>>
>> But now the condition is, qemu can`t distinguish whether user ordered
>> msi or not, so for the condition 2) above, my suggestion is the same
>> as 1)
>>
>
> If we can't distinguish between the cases, 2 would not be user-friendly.
> Once it would be possible to differentiate we could go for 2.
>

Hi,

How to differentiate? Do you mean: let these msi(msix) property default 
to 'off'? It seem most device have it on, a few have it off.

>
> Thanks,
> Marcel
>
Markus Armbruster April 12, 2016, 8:34 a.m. UTC | #8
Cao jin <caoj.fnst@cn.fujitsu.com> writes:

> Hi
>
> On 04/08/2016 04:44 PM, Markus Armbruster wrote:
>
>>> diff --git a/hw/ide/ich.c b/hw/ide/ich.c
>>> index 0a13334..db4fdb5 100644
>>> --- a/hw/ide/ich.c
>>> +++ b/hw/ide/ich.c
>>> @@ -146,7 +146,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
>>>       /* Although the AHCI 1.3 specification states that the first capability
>>>        * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
>>>        * AHCI device puts the MSI capability first, pointing to 0x80. */
>>> -    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
>>> +    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false, errp);
>>
>> Sure there's nothing to undo on error?  Instead of undoing, you may want
>> to move msi_init() before the stuff that needs undoing.
>>
>
> ich9-ahci is a on-board device of Q35, like cover-letter says: when it
> fail, qemu will exit. So, is it necessary to undo on error?

Yes, it's a Q35 onboard device, but it's not only that: QEMU happily
accepts -device ich9-ahci.

A realize() method should fail cleanly unless its device has
cannot_instantiate_with_device_add_yet set.

> maybe you saw, I did move msi_init() for some other devices.

Yes.  Appreciated!

[...]
Markus Armbruster April 12, 2016, 11:50 a.m. UTC | #9
Marcel Apfelbaum <marcel@redhat.com> writes:

> On 04/10/2016 12:38 PM, Cao jin wrote:
>>
>>
>> On 04/10/2016 04:20 PM, Marcel Apfelbaum wrote:
>>
>>>
>>> Hi,
>>> I'll let Markus to continue the review, it brings very valuable
>>> information,
>>> I will only try to answer the questions below.
>>>
>>>> Several questions on this topic:
>>>> 1. How to confirm whether a device model has non-MSI variant? AFAICT,
>>>> it is these who have msi property.

Examine how it uses msi_init().  That's how we give a PCI device
capability MSI.  If the device model treats msi_init() failure as fatal,
it doesn't have a non-MSI variant.

Some device models let the user ask for MSI with a property, and make
the msi_init() call depend on the property.  The property is commonly
called "msi" or "use_msi", with values "on" and "off".

msi=off then works as expected: you get the variant without MSI.

The meaning of msi=on depends on how msi_init() failure is handled.  If
it's fatal, then msi=on works as expected: you get the variant with MSI.
But if it's not fatal, then you may or may not get it, which I consider
a grossly misleading user interface.

To clean this up, we could add msi=auto, and move the non-fatal behavior
from msi=on to msi=auto.

Same for MSI-X with msix_init() and property "msix".

>>> MSI is required for PCI Express devices, optional for PCI devices.
>>> Even if a PCI device supports MSI, it is strongly advised to support
>>> legacy INTx for backward compatibility.

Devices commonly do, but the standards certainly permit devices that
only do MSI or MSI-X.

PCI Local Bus Specification Revision 3.0 (2004) section 6.8: "It is
recommended that devices implement interrupt pins to provide
compatibility in systems that do not support MSI (devices default to
interrupt pins).  However, it is expected that the need for interrupt
pins will diminish over time.  Devices that do not support interrupt
pins due to pin constraints (rely on polling for device service) may
implement messages to increase performance without adding additional
pins.  Therefore, system configuration software must not assume that a
message capable device has an interrupt pin."

PCIe emulates INTx for software compatibility with PCI.  PCI Express
Base Specification Revision 3.0 (2010) section 6.1.2: "The legacy INTx
emulation mechanism may be deprecated in a future version of this
specification."

>>> Bottom line, as far as I know, almost all PCI devices support legacy
>>> interrupts.
>>> (an exception is the ivshmem device that requires MSI)

Virtual devices modelled after a physical device should follow the
physical device's example as closely as practical.

For reasons discussed recently, we don't actually do that when it comes
to MSI and MSI-X.  Instead, we tend to provide up to four variants of a
virtual device: with/without MSI/MSI-X capability.  Typically they all
support legacy INTx.

ivshmem doesn't have a physical model, so we can do whatever we want.

The deprecated "ivshmem" device comes in four variants: with/without
interrupts, with/without MSI-X capability.  The variant without
interrupts but with MSI-X capability is of course silly.  The variant
with interrupts and with MSI-X capability can't do legacy INTx.  It
could, but it's not worth changing now.

The modern "ivshmem-plain" and "ivshmem-doorbell" device don't come in
variants.  The former doesn't have interrupts (guest-compatible to
"ivshmem" without interrupts and without MSI-X), and the latter only
supports MSI-X interrupts (guest-compatible to "ivshmem" with interrupts
and with MSI-X).

>>>> 2. For those have non-MSI variant devices(have msi property), as I see
>>>> in the code, they all have it on by default, So we won`t know it is
>>>> user order, or user don`t set it at all.

Correct.

Note that if we add value "auto" and make it the default, then value
"on" is known to be the user's order.

>>> I didn't quite understand the sentence, but some devices have a
>>> "use_msi" property that can be set by the user. If no such property exists,
>>> we can assume the user "prefers" the msi version.
>>>
>> Hi,
>> Sorry for my bad description. let me explain myself again.
>> I think(guess), if a device model has msi property, it means this device model has non-msi variant. For those devices who has msi property, I see most of them will have it on by default. So when these
>> devices initialize msi, qemu won`t know whether it is user order or not.
>>
>> If I understand Markus right:
>> 1). If user orders msi on, when msi_init returns -ENOTSUP, It is error. Then I suggest to inform user "set msi=off and try again"
>
> If the user *specifically* asked for msi=on (setting a property) is OK to fail the device init process.

Yes, failing is the only sane behavior when the user asked for msi=on
but we can't provide it.

However, in the device model code, we can't know whether the asked for
msi=on, or whether the property defaults to on.  That's why I'd like us
to add msi=auto and make it the default.

>> 2). If user doesn`t order msi on(so device have msi on by default), when msi_init returns -ENOTSUP, I am ok with Markus`s suggestion: *caller should silently switch to the non-MSI variant*
>>
>> But now the condition is, qemu can`t distinguish whether user ordered msi or not, so for the condition 2) above, my suggestion is the same as 1)
>>
>
> If we can't distinguish between the cases, 2 would not be user-friendly. Once it would be possible to differentiate we could go for 2.
>
>
> Thanks,
> Marcel
>
>>>
>>>> If user don`t know msi and don`t set it on, I think it is acceptable
>>>> to create the non-msi variant for user silently. But if it is user
>>>> order, like you said, it is an error.
>>>>
>>>
>>> I am not sure about this. At least a warning should be given IMHO.
>>>
>>>> So, how about: inform user to swich msi off and try again when
>>>> encounter -ENOTSUP, no matter it is user order, or user doesn`t set it
>>>> at all?
>>>>
>>>
>>> Not all devices have an "msi" switch. If the board has msi broken and
>>> the devices supports legacy interrupts, its OK to continue without MSI.
>>>
>>>> Actually in this v4, I do checked whether device has a msi property,
>>>> like cover-letter said:
>>>>
>>>>    3. most devices have msi/msix(except vmxnet3 & pvscsi) property as
>>>> a   switch, if it has and is switched on, then msi_init() failure
>>>> should   results in return directly. So in this version, mptsas
>>>> is updated
>>>>
>>>
>>> I don't see a "msi" properties on PCIDevice class or VirtioPCIClass, are
>>> you sure we have an msi switch for most of the PCI devices?
>>>
>>
>> My bad, I didn`t limit the range. I mean the devices who will call msi_init, they mostly have msi(or msix, or both) property

Further questions?
Cao jin April 29, 2016, 9:28 a.m. UTC | #10
Hi Markus,
     sorry for replying so late, I am stucked by other tasks for a while.

On 04/12/2016 07:50 PM, Markus Armbruster wrote:
>
> Examine how it uses msi_init().  That's how we give a PCI device
> capability MSI.  If the device model treats msi_init() failure as fatal,
> it doesn't have a non-MSI variant.
>
> Some device models let the user ask for MSI with a property, and make
> the msi_init() call depend on the property.  The property is commonly
> called "msi" or "use_msi", with values "on" and "off".
>
> msi=off then works as expected: you get the variant without MSI.
>
> The meaning of msi=on depends on how msi_init() failure is handled.  If
> it's fatal, then msi=on works as expected: you get the variant with MSI.
> But if it's not fatal, then you may or may not get it, which I consider
> a grossly misleading user interface.
>
> To clean this up, we could add msi=auto, and move the non-fatal behavior
> from msi=on to msi=auto.
>
> Same for MSI-X with msix_init() and property "msix".
>
Let me try to understand your meaning, correct me pls if I am wrong:

replace the msi property type from bit/int to enum OnOffAuto(it seems 
need a tiny surgery for device structure), and default to auto. Then 
process going to look like this:

(msi property = auto) means enable msi by default. If msi_init fail, we 
switch to the non-MSI variant; If msi_init success, we got msi variant.

Another condition I want to mention, ahci, pvscsi & vmxnet3 don`t have 
msi property, and when msi_init fail, they all will use intx. One thing 
need to be confirmed: whether they need a msi property or not?
Markus Armbruster April 29, 2016, 12:46 p.m. UTC | #11
Cao jin <caoj.fnst@cn.fujitsu.com> writes:

> Hi Markus,
>     sorry for replying so late, I am stucked by other tasks for a while.
>
> On 04/12/2016 07:50 PM, Markus Armbruster wrote:
>>
>> Examine how it uses msi_init().  That's how we give a PCI device
>> capability MSI.  If the device model treats msi_init() failure as fatal,
>> it doesn't have a non-MSI variant.
>>
>> Some device models let the user ask for MSI with a property, and make
>> the msi_init() call depend on the property.  The property is commonly
>> called "msi" or "use_msi", with values "on" and "off".
>>
>> msi=off then works as expected: you get the variant without MSI.
>>
>> The meaning of msi=on depends on how msi_init() failure is handled.  If
>> it's fatal, then msi=on works as expected: you get the variant with MSI.
>> But if it's not fatal, then you may or may not get it, which I consider
>> a grossly misleading user interface.
>>
>> To clean this up, we could add msi=auto, and move the non-fatal behavior
>> from msi=on to msi=auto.
>>
>> Same for MSI-X with msix_init() and property "msix".
>>
> Let me try to understand your meaning, correct me pls if I am wrong:
>
> replace the msi property type from bit/int to enum OnOffAuto(it seems
> need a tiny surgery for device structure), and default to auto. Then
> process going to look like this:
>
> (msi property = auto) means enable msi by default. If msi_init fail,
> we switch to the non-MSI variant; If msi_init success, we got msi
> variant.

You got it.

> Another condition I want to mention, ahci, pvscsi & vmxnet3 don`t have
> msi property, and when msi_init fail, they all will use intx. One
> thing need to be confirmed: whether they need a msi property or not?

While I don't particularly like additional configuration knobs, I like
the "try to use MSI, fall back to INTx silently" behavior even less.
I'd welcome such a property.  Can guarantee the respective maintainers
will agree with me, of course.
diff mbox

Patch

diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index d372d4a..c3856cc 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -1139,12 +1139,17 @@  static void intel_hda_realize(PCIDevice *pci, Error **errp)
     /* HDCTL off 0x40 bit 0 selects signaling mode (1-HDA, 0 - Ac97) 18.1.19 */
     conf[0x40] = 0x01;
 
+    if (d->msi) {
+        msi_init(&d->pci, d->old_msi_addr ? 0x50 : 0x60, 1,
+                true, false, errp);
+        if (*errp) {
+            return;
+        }
+    }
+
     memory_region_init_io(&d->mmio, OBJECT(d), &intel_hda_mmio_ops, d,
                           "intel-hda", 0x4000);
     pci_register_bar(&d->pci, 0, 0, &d->mmio);
-    if (d->msi) {
-        msi_init(&d->pci, d->old_msi_addr ? 0x50 : 0x60, 1, true, false);
-    }
 
     hda_codec_bus_init(DEVICE(pci), &d->codecs, sizeof(d->codecs),
                        intel_hda_response, intel_hda_xfer);
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 0a13334..db4fdb5 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -146,7 +146,7 @@  static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp)
     /* Although the AHCI 1.3 specification states that the first capability
      * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
      * AHCI device puts the MSI capability first, pointing to 0x80. */
-    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
+    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false, errp);
 }
 
 static void pci_ich9_uninit(PCIDevice *dev)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 7a38e47..d8dbb0b 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2189,27 +2189,6 @@  vmxnet3_cleanup_msix(VMXNET3State *s)
     }
 }
 
-#define VMXNET3_USE_64BIT         (true)
-#define VMXNET3_PER_VECTOR_MASK   (false)
-
-static bool
-vmxnet3_init_msi(VMXNET3State *s)
-{
-    PCIDevice *d = PCI_DEVICE(s);
-    int res;
-
-    res = msi_init(d, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
-                   VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
-    if (0 > res) {
-        VMW_WRPRN("Failed to initialize MSI, error %d", res);
-        s->msi_used = false;
-    } else {
-        s->msi_used = true;
-    }
-
-    return s->msi_used;
-}
-
 static void
 vmxnet3_cleanup_msi(VMXNET3State *s)
 {
@@ -2271,13 +2250,29 @@  static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s)
     return dsnp;
 }
 
+
+#define VMXNET3_USE_64BIT         (true)
+#define VMXNET3_PER_VECTOR_MASK   (false)
+
 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
 {
     DeviceState *dev = DEVICE(pci_dev);
     VMXNET3State *s = VMXNET3(pci_dev);
+    int ret;
 
     VMW_CBPRN("Starting init...");
 
+    ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
+                   VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, errp);
+    if (ret < 0) {
+        error_free_or_abort(errp);
+        VMW_WRPRN("Failed to initialize MSI, error = %d."
+                  " Configuration is inconsistent.", ret);
+        s->msi_used = false;
+    } else {
+        s->msi_used = true;
+    }
+
     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
@@ -2302,10 +2297,6 @@  static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
     }
 
-    if (!vmxnet3_init_msi(s)) {
-        VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent.");
-    }
-
     vmxnet3_net_init(s);
 
     if (pci_is_express(pci_dev)) {
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index b4a7806..d752e62 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -97,6 +97,7 @@  static int ioh3420_initfn(PCIDevice *d)
     PCIEPort *p = PCIE_PORT(d);
     PCIESlot *s = PCIE_SLOT(d);
     int rc;
+    Error *err = NULL;
 
     pci_bridge_initfn(d, TYPE_PCIE_BUS);
     pcie_port_init_reg(d);
@@ -109,8 +110,9 @@  static int ioh3420_initfn(PCIDevice *d)
 
     rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR,
                   IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
-                  IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
+                  IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err_bridge;
     }
 
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 32f4daa..07c7bf8 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -52,6 +52,7 @@  static int pci_bridge_dev_initfn(PCIDevice *dev)
     PCIBridge *br = PCI_BRIDGE(dev);
     PCIBridgeDev *bridge_dev = PCI_BRIDGE_DEV(dev);
     int err;
+    Error *local_err = NULL;
 
     pci_bridge_initfn(dev, TYPE_PCI_BUS);
 
@@ -75,8 +76,9 @@  static int pci_bridge_dev_initfn(PCIDevice *dev)
 
     if ((bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_MSI_REQ)) &&
         msi_nonbroken) {
-        err = msi_init(dev, 0, 1, true, true);
+        err = msi_init(dev, 0, 1, true, true, &local_err);
         if (err < 0) {
+            error_report_err(local_err);
             goto msi_error;
         }
     }
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index e6d653d..0982801 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -60,14 +60,16 @@  static int xio3130_downstream_initfn(PCIDevice *d)
     PCIEPort *p = PCIE_PORT(d);
     PCIESlot *s = PCIE_SLOT(d);
     int rc;
+    Error *err = NULL;
 
     pci_bridge_initfn(d, TYPE_PCIE_BUS);
     pcie_port_init_reg(d);
 
     rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
                   XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
-                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
+                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err_bridge;
     }
 
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index d976844..1d2c597 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -56,14 +56,16 @@  static int xio3130_upstream_initfn(PCIDevice *d)
 {
     PCIEPort *p = PCIE_PORT(d);
     int rc;
+    Error *err = NULL;
 
     pci_bridge_initfn(d, TYPE_PCIE_BUS);
     pcie_port_init_reg(d);
 
     rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
                   XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
-                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
+                  XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err_bridge;
     }
 
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index e2a701b..bf7a3b9 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -179,14 +179,17 @@  bool msi_enabled(const PCIDevice *dev)
  * -ENOTSUP means lacking msi support for a msi-capable platform.
  */
 int msi_init(struct PCIDevice *dev, uint8_t offset,
-             unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask)
+             unsigned int nr_vectors, bool msi64bit,
+             bool msi_per_vector_mask, Error **errp)
 {
     unsigned int vectors_order;
     uint16_t flags;
     uint8_t cap_size;
     int config_offset;
+    Error *err = NULL;
 
     if (!msi_nonbroken) {
+        error_setg(errp, "MSI is not supported by interrupt controller");
         return -ENOTSUP;
     }
 
@@ -210,8 +213,10 @@  int msi_init(struct PCIDevice *dev, uint8_t offset,
     }
 
     cap_size = msi_cap_sizeof(flags);
-    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, cap_size);
+    config_offset = pci_add_capability2(dev, PCI_CAP_ID_MSI, offset,
+                                        cap_size, &err);
     if (config_offset < 0) {
+        error_propagate(errp, err);
         return config_offset;
     }
 
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 56fb645..0aaf3af 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2340,6 +2340,14 @@  static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
     /* Interrupt pin 1 */
     pci_conf[PCI_INTERRUPT_PIN] = 0x01;
 
+    if (megasas_use_msi(s)) {
+        msi_init(dev, 0x50, 1, true, false, errp);
+        if (*errp) {
+            s->flags &= ~MEGASAS_MASK_USE_MSI;
+            return;
+        }
+    }
+
     memory_region_init_io(&s->mmio_io, OBJECT(s), &megasas_mmio_ops, s,
                           "megasas-mmio", 0x4000);
     memory_region_init_io(&s->port_io, OBJECT(s), &megasas_port_ops, s,
@@ -2347,10 +2355,6 @@  static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
     memory_region_init_io(&s->queue_io, OBJECT(s), &megasas_queue_ops, s,
                           "megasas-queue", 0x40000);
 
-    if (megasas_use_msi(s) &&
-        msi_init(dev, 0x50, 1, true, false) < 0) {
-        s->flags &= ~MEGASAS_MASK_USE_MSI;
-    }
     if (megasas_use_msix(s) &&
         msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000,
                   &s->mmio_io, b->mmio_bar, 0x3800, 0x68)) {
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 1c18c84..2009fa1 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1274,10 +1274,20 @@  static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
 {
     DeviceState *d = DEVICE(dev);
     MPTSASState *s = MPT_SAS(dev);
+    Error *err = NULL;
 
     dev->config[PCI_LATENCY_TIMER] = 0;
     dev->config[PCI_INTERRUPT_PIN] = 0x01;
 
+    if (s->msi_available) {
+        if (msi_init(dev, 0, 1, true, false, &err) >= 0) {
+            s->msi_in_use = true;
+        }
+        else {
+            return;
+        }
+    }
+
     memory_region_init_io(&s->mmio_io, OBJECT(s), &mptsas_mmio_ops, s,
                           "mptsas-mmio", 0x4000);
     memory_region_init_io(&s->port_io, OBJECT(s), &mptsas_port_ops, s,
@@ -1285,11 +1295,6 @@  static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
     memory_region_init_io(&s->diag_io, OBJECT(s), &mptsas_diag_ops, s,
                           "mptsas-diag", 0x10000);
 
-    if (s->msi_available &&
-        msi_init(dev, 0, 1, true, false) >= 0) {
-        s->msi_in_use = true;
-    }
-
     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io);
     pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY |
                                  PCI_BASE_ADDRESS_MEM_TYPE_32, &s->mmio_io);
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 4ce3581..2d38d6c 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1043,12 +1043,16 @@  static void
 pvscsi_init_msi(PVSCSIState *s)
 {
     int res;
+    Error *err = NULL;
     PCIDevice *d = PCI_DEVICE(s);
 
     res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
-                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK);
+                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, &err);
     if (res < 0) {
         trace_pvscsi_init_msi_fail(res);
+        error_append_hint(&err, "MSI capability fail to init,"
+                " will use INTx intead\n");
+        error_report_err(err);
         s->msi_used = false;
     } else {
         s->msi_used = true;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index bcde8a2..f132a57 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -3588,6 +3588,13 @@  static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
 
     usb_xhci_init(xhci);
 
+    if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI)) {
+        ret = msi_init(dev, 0x70, xhci->numintrs, true, false, errp);
+        if (ret < 0) {
+            return;
+        }
+    }
+
     if (xhci->numintrs > MAXINTRS) {
         xhci->numintrs = MAXINTRS;
     }
@@ -3645,9 +3652,6 @@  static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
         assert(ret >= 0);
     }
 
-    if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI)) {
-        msi_init(dev, 0x70, xhci->numintrs, true, false);
-    }
     if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI_X)) {
         msix_init(dev, xhci->numintrs,
                   &xhci->mem, 0, OFF_MSIX_TABLE,
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d091d8c..55ceb67 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1171,6 +1171,7 @@  static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
     uint16_t ctrl;
     bool msi_64bit, msi_maskbit;
     int ret, entries;
+    Error *err = NULL;
 
     if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
               vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
@@ -1184,12 +1185,13 @@  static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
 
     trace_vfio_msi_setup(vdev->vbasedev.name, pos);
 
-    ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
+    ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err);
     if (ret < 0) {
         if (ret == -ENOTSUP) {
             return 0;
         }
-        error_report("vfio: msi_init failed");
+        error_prepend(&err, "vfio: msi_init failed: ");
+        error_report_err(err);
         return ret;
     }
     vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h
index 8124908..4837bcf 100644
--- a/include/hw/pci/msi.h
+++ b/include/hw/pci/msi.h
@@ -35,7 +35,8 @@  void msi_set_message(PCIDevice *dev, MSIMessage msg);
 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
-             unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
+             unsigned int nr_vectors, bool msi64bit,
+             bool msi_per_vector_mask, Error **errp);
 void msi_uninit(struct PCIDevice *dev);
 void msi_reset(PCIDevice *dev);
 void msi_notify(PCIDevice *dev, unsigned int vector);