diff mbox series

[v3] vpci/msix: fix PBA accesses

Message ID 20220307125347.71814-1-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series [v3] vpci/msix: fix PBA accesses | expand

Commit Message

Roger Pau Monné March 7, 2022, 12:53 p.m. UTC
Map the PBA in order to access it from the MSI-X read and write
handlers. Note that previously the handlers would pass the physical
host address into the {read,write}{l,q} handlers, which is wrong as
those expect a linear address.

Map the PBA using ioremap when the first access is performed. Note
that 32bit arches might want to abstract the call to ioremap into a
vPCI arch handler, so they can use a fixmap range to map the PBA.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Alex Olson <this.is.a0lson@gmail.com>
---
Changes since v2:
 - Use helper function to map PBA.
 - Mark memory as iomem.

Changes since v1:
 - Also handle writes.

I don't seem to have a box with a driver that will try to access the
PBA, so I would consider this specific code path only build tested. At
least it doesn't seem to regress the current state of vPCI.
---
 xen/drivers/vpci/msix.c | 59 ++++++++++++++++++++++++++++++++++++++---
 xen/drivers/vpci/vpci.c |  2 ++
 xen/include/xen/vpci.h  |  2 ++
 3 files changed, 59 insertions(+), 4 deletions(-)

Comments

Jan Beulich March 7, 2022, 2:12 p.m. UTC | #1
On 07.03.2022 13:53, Roger Pau Monne wrote:
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>  }
>  
> +static void __iomem *get_pba(struct vpci *vpci)
> +{
> +    struct vpci_msix *msix = vpci->msix;
> +    void __iomem *pba;
> +
> +    /*
> +     * PBA will only be unmapped when the device is deassigned, so access it
> +     * without holding the vpci lock.
> +     */
> +    if ( likely(msix->pba) )
> +        return msix->pba;
> +
> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> +    if ( !pba )
> +        return msix->pba;

For this particular purpose may want to consider using ACCESS_ONCE() for
all accesses to this field.

> +    spin_lock(&vpci->lock);
> +    if ( !msix->pba )
> +        msix->pba = pba;
> +    else
> +        iounmap(pba);
> +    spin_unlock(&vpci->lock);

Whenever possible I think we should try to do things, in particular ones
involving further locks, with as few locks held as possible. IOW I'd like
to ask that you pull out the iounmap().

> @@ -200,6 +227,10 @@ static int cf_check msix_read(
>  
>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>      {
> +        struct vpci *vpci = msix->pdev->vpci;
> +        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
> +        void __iomem *pba = get_pba(vpci);

const?

Jan
Roger Pau Monné March 7, 2022, 4:06 p.m. UTC | #2
On Mon, Mar 07, 2022 at 03:12:32PM +0100, Jan Beulich wrote:
> On 07.03.2022 13:53, Roger Pau Monne wrote:
> > --- a/xen/drivers/vpci/msix.c
> > +++ b/xen/drivers/vpci/msix.c
> > @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
> >      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >  }
> >  
> > +static void __iomem *get_pba(struct vpci *vpci)
> > +{
> > +    struct vpci_msix *msix = vpci->msix;
> > +    void __iomem *pba;
> > +
> > +    /*
> > +     * PBA will only be unmapped when the device is deassigned, so access it
> > +     * without holding the vpci lock.
> > +     */
> > +    if ( likely(msix->pba) )
> > +        return msix->pba;
> > +
> > +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> > +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> > +    if ( !pba )
> > +        return msix->pba;
> 
> For this particular purpose may want to consider using ACCESS_ONCE() for
> all accesses to this field.

Hm, I think I've asked before, but we do assume that ACCESS_ONCE will
generate a single instruction, or else we would have to use
read_atomic.

> > +    spin_lock(&vpci->lock);
> > +    if ( !msix->pba )
> > +        msix->pba = pba;

Here we would then use write_atomic.

> > +    else
> > +        iounmap(pba);
> > +    spin_unlock(&vpci->lock);
> 
> Whenever possible I think we should try to do things, in particular ones
> involving further locks, with as few locks held as possible. IOW I'd like
> to ask that you pull out the iounmap().
> 
> > @@ -200,6 +227,10 @@ static int cf_check msix_read(
> >  
> >      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
> >      {
> > +        struct vpci *vpci = msix->pdev->vpci;
> > +        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
> > +        void __iomem *pba = get_pba(vpci);
> 
> const?

Sure.

Thanks, Roger.
Jan Beulich March 7, 2022, 4:11 p.m. UTC | #3
On 07.03.2022 17:06, Roger Pau Monné wrote:
> On Mon, Mar 07, 2022 at 03:12:32PM +0100, Jan Beulich wrote:
>> On 07.03.2022 13:53, Roger Pau Monne wrote:
>>> --- a/xen/drivers/vpci/msix.c
>>> +++ b/xen/drivers/vpci/msix.c
>>> @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>>>  }
>>>  
>>> +static void __iomem *get_pba(struct vpci *vpci)
>>> +{
>>> +    struct vpci_msix *msix = vpci->msix;
>>> +    void __iomem *pba;
>>> +
>>> +    /*
>>> +     * PBA will only be unmapped when the device is deassigned, so access it
>>> +     * without holding the vpci lock.
>>> +     */
>>> +    if ( likely(msix->pba) )
>>> +        return msix->pba;
>>> +
>>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
>>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
>>> +    if ( !pba )
>>> +        return msix->pba;
>>
>> For this particular purpose may want to consider using ACCESS_ONCE() for
>> all accesses to this field.
> 
> Hm, I think I've asked before, but we do assume that ACCESS_ONCE will
> generate a single instruction, or else we would have to use
> read_atomic.

Yeah, that looks to be the assumption. It has become my understanding
that ACCESS_ONCE() is generally favored over {read,write}_atomic().
Personally I prefer the latter when the goal is to have single insns.

Jan
Roger Pau Monné March 7, 2022, 4:15 p.m. UTC | #4
On Mon, Mar 07, 2022 at 05:11:34PM +0100, Jan Beulich wrote:
> On 07.03.2022 17:06, Roger Pau Monné wrote:
> > On Mon, Mar 07, 2022 at 03:12:32PM +0100, Jan Beulich wrote:
> >> On 07.03.2022 13:53, Roger Pau Monne wrote:
> >>> --- a/xen/drivers/vpci/msix.c
> >>> +++ b/xen/drivers/vpci/msix.c
> >>> @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
> >>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >>>  }
> >>>  
> >>> +static void __iomem *get_pba(struct vpci *vpci)
> >>> +{
> >>> +    struct vpci_msix *msix = vpci->msix;
> >>> +    void __iomem *pba;
> >>> +
> >>> +    /*
> >>> +     * PBA will only be unmapped when the device is deassigned, so access it
> >>> +     * without holding the vpci lock.
> >>> +     */
> >>> +    if ( likely(msix->pba) )
> >>> +        return msix->pba;
> >>> +
> >>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> >>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> >>> +    if ( !pba )
> >>> +        return msix->pba;
> >>
> >> For this particular purpose may want to consider using ACCESS_ONCE() for
> >> all accesses to this field.
> > 
> > Hm, I think I've asked before, but we do assume that ACCESS_ONCE will
> > generate a single instruction, or else we would have to use
> > read_atomic.
> 
> Yeah, that looks to be the assumption. It has become my understanding
> that ACCESS_ONCE() is generally favored over {read,write}_atomic().
> Personally I prefer the latter when the goal is to have single insns.

Oh, OK, so I should use 'ACCESS_ONCE(msix->pba) = pba;' rather than
write_atomic then.

Thanks, Roger.
Jan Beulich March 7, 2022, 4:17 p.m. UTC | #5
On 07.03.2022 17:15, Roger Pau Monné wrote:
> On Mon, Mar 07, 2022 at 05:11:34PM +0100, Jan Beulich wrote:
>> On 07.03.2022 17:06, Roger Pau Monné wrote:
>>> On Mon, Mar 07, 2022 at 03:12:32PM +0100, Jan Beulich wrote:
>>>> On 07.03.2022 13:53, Roger Pau Monne wrote:
>>>>> --- a/xen/drivers/vpci/msix.c
>>>>> +++ b/xen/drivers/vpci/msix.c
>>>>> @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>>>>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>>>>>  }
>>>>>  
>>>>> +static void __iomem *get_pba(struct vpci *vpci)
>>>>> +{
>>>>> +    struct vpci_msix *msix = vpci->msix;
>>>>> +    void __iomem *pba;
>>>>> +
>>>>> +    /*
>>>>> +     * PBA will only be unmapped when the device is deassigned, so access it
>>>>> +     * without holding the vpci lock.
>>>>> +     */
>>>>> +    if ( likely(msix->pba) )
>>>>> +        return msix->pba;
>>>>> +
>>>>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
>>>>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
>>>>> +    if ( !pba )
>>>>> +        return msix->pba;
>>>>
>>>> For this particular purpose may want to consider using ACCESS_ONCE() for
>>>> all accesses to this field.
>>>
>>> Hm, I think I've asked before, but we do assume that ACCESS_ONCE will
>>> generate a single instruction, or else we would have to use
>>> read_atomic.
>>
>> Yeah, that looks to be the assumption. It has become my understanding
>> that ACCESS_ONCE() is generally favored over {read,write}_atomic().
>> Personally I prefer the latter when the goal is to have single insns.
> 
> Oh, OK, so I should use 'ACCESS_ONCE(msix->pba) = pba;' rather than
> write_atomic then.

To please others, perhaps. As said, I'd be fine with you using
{read,write}_atomic().

Jan
Roger Pau Monné March 7, 2022, 4:28 p.m. UTC | #6
On Mon, Mar 07, 2022 at 05:17:59PM +0100, Jan Beulich wrote:
> On 07.03.2022 17:15, Roger Pau Monné wrote:
> > On Mon, Mar 07, 2022 at 05:11:34PM +0100, Jan Beulich wrote:
> >> On 07.03.2022 17:06, Roger Pau Monné wrote:
> >>> On Mon, Mar 07, 2022 at 03:12:32PM +0100, Jan Beulich wrote:
> >>>> On 07.03.2022 13:53, Roger Pau Monne wrote:
> >>>>> --- a/xen/drivers/vpci/msix.c
> >>>>> +++ b/xen/drivers/vpci/msix.c
> >>>>> @@ -182,6 +182,33 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
> >>>>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >>>>>  }
> >>>>>  
> >>>>> +static void __iomem *get_pba(struct vpci *vpci)
> >>>>> +{
> >>>>> +    struct vpci_msix *msix = vpci->msix;
> >>>>> +    void __iomem *pba;
> >>>>> +
> >>>>> +    /*
> >>>>> +     * PBA will only be unmapped when the device is deassigned, so access it
> >>>>> +     * without holding the vpci lock.
> >>>>> +     */
> >>>>> +    if ( likely(msix->pba) )
> >>>>> +        return msix->pba;
> >>>>> +
> >>>>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> >>>>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> >>>>> +    if ( !pba )
> >>>>> +        return msix->pba;
> >>>>
> >>>> For this particular purpose may want to consider using ACCESS_ONCE() for
> >>>> all accesses to this field.
> >>>
> >>> Hm, I think I've asked before, but we do assume that ACCESS_ONCE will
> >>> generate a single instruction, or else we would have to use
> >>> read_atomic.
> >>
> >> Yeah, that looks to be the assumption. It has become my understanding
> >> that ACCESS_ONCE() is generally favored over {read,write}_atomic().
> >> Personally I prefer the latter when the goal is to have single insns.
> > 
> > Oh, OK, so I should use 'ACCESS_ONCE(msix->pba) = pba;' rather than
> > write_atomic then.
> 
> To please others, perhaps. As said, I'd be fine with you using
> {read,write}_atomic().

Well, given that you are the only one that has provided review I'm
fine with using {read,write}_atomic if that's also your preference.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index a1fa7a5f13..fdd406e309 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -182,6 +182,33 @@  static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
     return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
 }
 
+static void __iomem *get_pba(struct vpci *vpci)
+{
+    struct vpci_msix *msix = vpci->msix;
+    void __iomem *pba;
+
+    /*
+     * PBA will only be unmapped when the device is deassigned, so access it
+     * without holding the vpci lock.
+     */
+    if ( likely(msix->pba) )
+        return msix->pba;
+
+    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
+                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
+    if ( !pba )
+        return msix->pba;
+
+    spin_lock(&vpci->lock);
+    if ( !msix->pba )
+        msix->pba = pba;
+    else
+        iounmap(pba);
+    spin_unlock(&vpci->lock);
+
+    return msix->pba;
+}
+
 static int cf_check msix_read(
     struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
 {
@@ -200,6 +227,10 @@  static int cf_check msix_read(
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        void __iomem *pba = get_pba(vpci);
+
         /*
          * Access to PBA.
          *
@@ -207,14 +238,22 @@  static int cf_check msix_read(
          * guest address space. If this changes the address will need to be
          * translated.
          */
+        if ( !pba )
+        {
+            gprintk(XENLOG_WARNING,
+                    "%pp: unable to map MSI-X PBA, report all pending\n",
+                    msix->pdev);
+            return X86EMUL_OKAY;
+        }
+
         switch ( len )
         {
         case 4:
-            *data = readl(addr);
+            *data = readl(pba + idx);
             break;
 
         case 8:
-            *data = readq(addr);
+            *data = readq(pba + idx);
             break;
 
         default:
@@ -275,19 +314,31 @@  static int cf_check msix_write(
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        void __iomem *pba = get_pba(vpci);
 
         if ( !is_hardware_domain(d) )
             /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
             return X86EMUL_OKAY;
 
+        if ( !pba )
+        {
+            /* Unable to map the PBA, ignore write. */
+            gprintk(XENLOG_WARNING,
+                    "%pp: unable to map MSI-X PBA, write ignored\n",
+                    msix->pdev);
+            return X86EMUL_OKAY;
+        }
+
         switch ( len )
         {
         case 4:
-            writel(data, addr);
+            writel(data, pba + idx);
             break;
 
         case 8:
-            writeq(data, addr);
+            writeq(data, pba + idx);
             break;
 
         default:
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index f3b32d66cb..9fb3c05b2b 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -51,6 +51,8 @@  void vpci_remove_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
+    if ( pdev->vpci->msix && pdev->vpci->msix->pba )
+        iounmap(pdev->vpci->msix->pba);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index bcad1516ae..67c9a0c631 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -127,6 +127,8 @@  struct vpci {
         bool enabled         : 1;
         /* Masked? */
         bool masked          : 1;
+        /* PBA map */
+        void __iomem *pba;
         /* Entries. */
         struct vpci_msix_entry {
             uint64_t addr;