diff mbox series

igb: Add Function Level Reset to PF and VF

Message ID 20230526173035.69055-1-clg@redhat.com (mailing list archive)
State New, archived
Headers show
Series igb: Add Function Level Reset to PF and VF | expand

Commit Message

Cédric Le Goater May 26, 2023, 5:30 p.m. UTC
The Intel 82576EB GbE Controller say that the Physical and Virtual
Functions support Function Level Reset. Add the capability to each
device model.

Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 hw/net/igb.c   | 3 +++
 hw/net/igbvf.c | 3 +++
 2 files changed, 6 insertions(+)

Comments

Sriram Yagnaraman May 28, 2023, 10:50 a.m. UTC | #1
> -----Original Message-----
> From: Cédric Le Goater <clg@redhat.com>
> Sent: Friday, 26 May 2023 19:31
> To: qemu-devel@nongnu.org
> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
> Le Goater <clg@redhat.com>
> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
> support Function Level Reset. Add the capability to each device model.
> 
> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>  hw/net/igb.c   | 3 +++
>  hw/net/igbvf.c | 3 +++
>  2 files changed, 6 insertions(+)
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
> 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
> addr,
> 
>      trace_igb_write_config(addr, val, len);
>      pci_default_write_config(dev, addr, val, len);
> +    pcie_cap_flr_write_config(dev, addr, val, len);
> 
>      if (range_covers_byte(addr, len, PCI_COMMAND) &&
>          (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>      }
> 
>      /* PCIe extended capabilities (in order) */
> +    pcie_cap_flr_init(pci_dev);
> +
>      if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>          hw_error("Failed to initialize AER capability");
>      }
> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> 284ea611848b..0a58dad06802 100644
> --- a/hw/net/igbvf.c
> +++ b/hw/net/igbvf.c
> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
> uint32_t addr, uint32_t val,  {
>      trace_igbvf_write_config(addr, val, len);
>      pci_default_write_config(dev, addr, val, len);
> +    pcie_cap_flr_write_config(dev, addr, val, len);
>  }
> 
>  static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
> **errp)
>          hw_error("Failed to initialize PCIe capability");
>      }
> 
> +    pcie_cap_flr_init(dev);

Sorry for my naive question, and perhaps not related to your patch, IGBVF device class doesn't seem to have any reset functions registered via igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which is probably what we want.

> +
>      if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>          hw_error("Failed to initialize AER capability");
>      }
> --
> 2.40.1
Sriram Yagnaraman May 28, 2023, 11:25 a.m. UTC | #2
> -----Original Message-----
> From: qemu-devel-bounces+sriram.yagnaraman=est.tech@nongnu.org
> <qemu-devel-bounces+sriram.yagnaraman=est.tech@nongnu.org> On Behalf
> Of Sriram Yagnaraman
> Sent: Sunday, 28 May 2023 12:51
> To: Cédric Le Goater <clg@redhat.com>; qemu-devel@nongnu.org
> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Jason Wang
> <jasowang@redhat.com>
> Subject: RE: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> 
> > -----Original Message-----
> > From: Cédric Le Goater <clg@redhat.com>
> > Sent: Friday, 26 May 2023 19:31
> > To: qemu-devel@nongnu.org
> > Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
> > <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>;
> Cédric
> > Le Goater <clg@redhat.com>
> > Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> >
> > The Intel 82576EB GbE Controller say that the Physical and Virtual
> > Functions support Function Level Reset. Add the capability to each device
> model.
> >
> > Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
> > Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> > Signed-off-by: Cédric Le Goater <clg@redhat.com>
> > ---
> >  hw/net/igb.c   | 3 +++
> >  hw/net/igbvf.c | 3 +++
> >  2 files changed, 6 insertions(+)
> >
> > diff --git a/hw/net/igb.c b/hw/net/igb.c index
> > 1c989d767725..08e389338dca
> > 100644
> > --- a/hw/net/igb.c
> > +++ b/hw/net/igb.c
> > @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
> > uint32_t addr,
> >
> >      trace_igb_write_config(addr, val, len);
> >      pci_default_write_config(dev, addr, val, len);
> > +    pcie_cap_flr_write_config(dev, addr, val, len);
> >
> >      if (range_covers_byte(addr, len, PCI_COMMAND) &&
> >          (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -
> 427,6
> > +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
> > +**errp)
> >      }
> >
> >      /* PCIe extended capabilities (in order) */
> > +    pcie_cap_flr_init(pci_dev);
> > +
> >      if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
> >          hw_error("Failed to initialize AER capability");
> >      }
> > diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> > 284ea611848b..0a58dad06802 100644
> > --- a/hw/net/igbvf.c
> > +++ b/hw/net/igbvf.c
> > @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
> > uint32_t addr, uint32_t val,  {
> >      trace_igbvf_write_config(addr, val, len);
> >      pci_default_write_config(dev, addr, val, len);
> > +    pcie_cap_flr_write_config(dev, addr, val, len);
> >  }
> >
> >  static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned
> > size) @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice
> > *dev, Error
> > **errp)
> >          hw_error("Failed to initialize PCIe capability");
> >      }
> >
> > +    pcie_cap_flr_init(dev);
> 
> Sorry for my naive question, and perhaps not related to your patch, IGBVF
> device class doesn't seem to have any reset functions registered via
> igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which
> is probably what we want.
> 

Something like this perhaps? Not compile tested, just an idea.
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea61184..9f07983bc9 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -283,9 +283,17 @@ static void igbvf_pci_uninit(PCIDevice *dev)
     msix_uninit(dev, &s->msix, &s->msix);
 }
 
+static void igbvf_qdev_reset_hold(Object *obj)
+{
+    trace_e1000e_cb_qdev_reset_hold();
+
+    igbvf_mmio_write(obj, E1000_CTRL, E1000_CTRL_RST, 0x4); /* Write to VTCTRL to trigger VF reset */
+}
+
 static void igbvf_class_init(ObjectClass *class, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(class);
+    ResettableClass *rc = RESETTABLE_CLASS(class);
     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
 
     c->realize = igbvf_pci_realize;
@@ -295,6 +303,8 @@ static void igbvf_class_init(ObjectClass *class, void *data)
     c->revision = 1;
     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
 
+    rc->phases.hold = igbvf_qdev_reset_hold;
+
     dc->desc = "Intel 82576 Virtual Function";
     dc->user_creatable = false;

> > +
> >      if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
> >          hw_error("Failed to initialize AER capability");
> >      }
> > --
> > 2.40.1
Akihiko Odaki May 29, 2023, 2:45 a.m. UTC | #3
On 2023/05/28 19:50, Sriram Yagnaraman wrote:
> 
>> -----Original Message-----
>> From: Cédric Le Goater <clg@redhat.com>
>> Sent: Friday, 26 May 2023 19:31
>> To: qemu-devel@nongnu.org
>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
>> Le Goater <clg@redhat.com>
>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>
>> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
>> support Function Level Reset. Add the capability to each device model.
>>
>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>>   hw/net/igb.c   | 3 +++
>>   hw/net/igbvf.c | 3 +++
>>   2 files changed, 6 insertions(+)
>>
>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
>> 100644
>> --- a/hw/net/igb.c
>> +++ b/hw/net/igb.c
>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
>> addr,
>>
>>       trace_igb_write_config(addr, val, len);
>>       pci_default_write_config(dev, addr, val, len);
>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>
>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>>       }
>>
>>       /* PCIe extended capabilities (in order) */
>> +    pcie_cap_flr_init(pci_dev);
>> +
>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>           hw_error("Failed to initialize AER capability");
>>       }
>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>> 284ea611848b..0a58dad06802 100644
>> --- a/hw/net/igbvf.c
>> +++ b/hw/net/igbvf.c
>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>> uint32_t addr, uint32_t val,  {
>>       trace_igbvf_write_config(addr, val, len);
>>       pci_default_write_config(dev, addr, val, len);
>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>   }
>>
>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
>> **errp)
>>           hw_error("Failed to initialize PCIe capability");
>>       }
>>
>> +    pcie_cap_flr_init(dev);
> 
> Sorry for my naive question, and perhaps not related to your patch, IGBVF device class doesn't seem to have any reset functions registered via igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which is probably what we want.

You're right. Advertising FLR capability without implementing it can 
confuse the guest though such probability is quite a low in practice. 
The reset should be implemented first.

> 
>> +
>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>           hw_error("Failed to initialize AER capability");
>>       }
>> --
>> 2.40.1
>
Cédric Le Goater May 29, 2023, 7:01 a.m. UTC | #4
On 5/29/23 04:45, Akihiko Odaki wrote:
> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>
>>> -----Original Message-----
>>> From: Cédric Le Goater <clg@redhat.com>
>>> Sent: Friday, 26 May 2023 19:31
>>> To: qemu-devel@nongnu.org
>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
>>> Le Goater <clg@redhat.com>
>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>
>>> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
>>> support Function Level Reset. Add the capability to each device model.
>>>
>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>> ---
>>>   hw/net/igb.c   | 3 +++
>>>   hw/net/igbvf.c | 3 +++
>>>   2 files changed, 6 insertions(+)
>>>
>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
>>> 100644
>>> --- a/hw/net/igb.c
>>> +++ b/hw/net/igb.c
>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
>>> addr,
>>>
>>>       trace_igb_write_config(addr, val, len);
>>>       pci_default_write_config(dev, addr, val, len);
>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>
>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>>>       }
>>>
>>>       /* PCIe extended capabilities (in order) */
>>> +    pcie_cap_flr_init(pci_dev);
>>> +
>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>           hw_error("Failed to initialize AER capability");
>>>       }
>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>> 284ea611848b..0a58dad06802 100644
>>> --- a/hw/net/igbvf.c
>>> +++ b/hw/net/igbvf.c
>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>>> uint32_t addr, uint32_t val,  {
>>>       trace_igbvf_write_config(addr, val, len);
>>>       pci_default_write_config(dev, addr, val, len);
>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>   }
>>>
>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
>>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
>>> **errp)
>>>           hw_error("Failed to initialize PCIe capability");
>>>       }
>>>
>>> +    pcie_cap_flr_init(dev);
>>
>> Sorry for my naive question, and perhaps not related to your patch, IGBVF device class doesn't seem to have any reset functions registered via igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which is probably what we want.

It does through the VTCTRL registers.

> You're right. Advertising FLR capability without implementing it can confuse the guest though such probability is quite a low in practice. The reset should be implemented first.


I was looking at an issue from a VFIO perspective which does a FLR
on a device when pass through. Software and FLR are equivalent for
a VF.

I am not sure a VF needs more really, since it is all controlled
by the PF.

C.

> 
>>
>>> +
>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>           hw_error("Failed to initialize AER capability");
>>>       }
>>> -- 
>>> 2.40.1
>>
>
Akihiko Odaki May 29, 2023, 7:45 a.m. UTC | #5
On 2023/05/29 16:01, Cédric Le Goater wrote:
> On 5/29/23 04:45, Akihiko Odaki wrote:
>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>
>>>> -----Original Message-----
>>>> From: Cédric Le Goater <clg@redhat.com>
>>>> Sent: Friday, 26 May 2023 19:31
>>>> To: qemu-devel@nongnu.org
>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
>>>> Le Goater <clg@redhat.com>
>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>
>>>> The Intel 82576EB GbE Controller say that the Physical and Virtual 
>>>> Functions
>>>> support Function Level Reset. Add the capability to each device model.
>>>>
>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>> ---
>>>>   hw/net/igb.c   | 3 +++
>>>>   hw/net/igbvf.c | 3 +++
>>>>   2 files changed, 6 insertions(+)
>>>>
>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 
>>>> 1c989d767725..08e389338dca
>>>> 100644
>>>> --- a/hw/net/igb.c
>>>> +++ b/hw/net/igb.c
>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, 
>>>> uint32_t
>>>> addr,
>>>>
>>>>       trace_igb_write_config(addr, val, len);
>>>>       pci_default_write_config(dev, addr, val, len);
>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>
>>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>>>>       }
>>>>
>>>>       /* PCIe extended capabilities (in order) */
>>>> +    pcie_cap_flr_init(pci_dev);
>>>> +
>>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>           hw_error("Failed to initialize AER capability");
>>>>       }
>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>> 284ea611848b..0a58dad06802 100644
>>>> --- a/hw/net/igbvf.c
>>>> +++ b/hw/net/igbvf.c
>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>>>> uint32_t addr, uint32_t val,  {
>>>>       trace_igbvf_write_config(addr, val, len);
>>>>       pci_default_write_config(dev, addr, val, len);
>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>   }
>>>>
>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, 
>>>> unsigned size)
>>>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
>>>> **errp)
>>>>           hw_error("Failed to initialize PCIe capability");
>>>>       }
>>>>
>>>> +    pcie_cap_flr_init(dev);
>>>
>>> Sorry for my naive question, and perhaps not related to your patch, 
>>> IGBVF device class doesn't seem to have any reset functions 
>>> registered via igbvf_class_init(). So, I am guessing an FLR will not 
>>> trigger igb_vf_reset(), which is probably what we want.
> 
> It does through the VTCTRL registers.
> 
>> You're right. Advertising FLR capability without implementing it can 
>> confuse the guest though such probability is quite a low in practice. 
>> The reset should be implemented first.
> 
> 
> I was looking at an issue from a VFIO perspective which does a FLR
> on a device when pass through. Software and FLR are equivalent for
> a VF.

They should be equivalent according to the datasheet, but unfortunately 
current igbvf implementation does nothing when reset. What Sriram 
proposes is to add code to actually write VTCTRL when FLR occurred and 
make FLR and software reset equivalent. And I think that should be done 
before this change; you should advertise FLR capability after the reset 
is actually implemented.

Regards,
Akihiko Odaki

> 
> I am not sure a VF needs more really, since it is all controlled
> by the PF. >
> C.
> 
>>
>>>
>>>> +
>>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>>           hw_error("Failed to initialize AER capability");
>>>>       }
>>>> -- 
>>>> 2.40.1
>>>
>>
>
Cédric Le Goater May 29, 2023, 3:07 p.m. UTC | #6
On 5/29/23 09:45, Akihiko Odaki wrote:
> On 2023/05/29 16:01, Cédric Le Goater wrote:
>> On 5/29/23 04:45, Akihiko Odaki wrote:
>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>>
>>>>> -----Original Message-----
>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>> Sent: Friday, 26 May 2023 19:31
>>>>> To: qemu-devel@nongnu.org
>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
>>>>> Le Goater <clg@redhat.com>
>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>>
>>>>> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
>>>>> support Function Level Reset. Add the capability to each device model.
>>>>>
>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>> ---
>>>>>   hw/net/igb.c   | 3 +++
>>>>>   hw/net/igbvf.c | 3 +++
>>>>>   2 files changed, 6 insertions(+)
>>>>>
>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
>>>>> 100644
>>>>> --- a/hw/net/igb.c
>>>>> +++ b/hw/net/igb.c
>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
>>>>> addr,
>>>>>
>>>>>       trace_igb_write_config(addr, val, len);
>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>
>>>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>>>>>       }
>>>>>
>>>>>       /* PCIe extended capabilities (in order) */
>>>>> +    pcie_cap_flr_init(pci_dev);
>>>>> +
>>>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>           hw_error("Failed to initialize AER capability");
>>>>>       }
>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>>> 284ea611848b..0a58dad06802 100644
>>>>> --- a/hw/net/igbvf.c
>>>>> +++ b/hw/net/igbvf.c
>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>>>>> uint32_t addr, uint32_t val,  {
>>>>>       trace_igbvf_write_config(addr, val, len);
>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>   }
>>>>>
>>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
>>>>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
>>>>> **errp)
>>>>>           hw_error("Failed to initialize PCIe capability");
>>>>>       }
>>>>>
>>>>> +    pcie_cap_flr_init(dev);
>>>>
>>>> Sorry for my naive question, and perhaps not related to your patch, IGBVF device class doesn't seem to have any reset functions registered via igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which is probably what we want.
>>
>> It does through the VTCTRL registers.
>>
>>> You're right. Advertising FLR capability without implementing it can confuse the guest though such probability is quite a low in practice. The reset should be implemented first.
>>
>>
>> I was looking at an issue from a VFIO perspective which does a FLR
>> on a device when pass through. Software and FLR are equivalent for
>> a VF.
> 
> They should be equivalent according to the datasheet, but unfortunately current igbvf implementation does nothing when reset. What Sriram proposes is to add code to actually write VTCTRL when FLR occurred and make FLR and software reset equivalent. 
> And I think that should be done before this change; you should advertise FLR capability after the reset is actually implemented.


AFAICT, the VFs are reset correctly by the OS when created or probed and
by QEMU when they are passthrough in a nested guest OS (with this patch).
igb_vf_reset() is clearly called in QEMU, see routine e1000_reset_hw_vf()
in Linux.

I don't think a reset op is necessary because VFs are software constructs
but I don't mind really. If so, then, I wouldn't mimic what the OS does
by writing the RST bit in the CTRL register of the VF, I would simply
install igb_vf_reset() as a reset handler.

Thanks,

C.



> 
> Regards,
> Akihiko Odaki
> 
>>
>> I am not sure a VF needs more really, since it is all controlled
>> by the PF. >
>> C.
>>
>>>
>>>>
>>>>> +
>>>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>           hw_error("Failed to initialize AER capability");
>>>>>       }
>>>>> -- 
>>>>> 2.40.1
>>>>
>>>
>>
>
Akihiko Odaki May 30, 2023, 2:02 a.m. UTC | #7
On 2023/05/30 0:07, Cédric Le Goater wrote:
> On 5/29/23 09:45, Akihiko Odaki wrote:
>> On 2023/05/29 16:01, Cédric Le Goater wrote:
>>> On 5/29/23 04:45, Akihiko Odaki wrote:
>>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>>> Sent: Friday, 26 May 2023 19:31
>>>>>> To: qemu-devel@nongnu.org
>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>>>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; 
>>>>>> Cédric
>>>>>> Le Goater <clg@redhat.com>
>>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>>>
>>>>>> The Intel 82576EB GbE Controller say that the Physical and Virtual 
>>>>>> Functions
>>>>>> support Function Level Reset. Add the capability to each device 
>>>>>> model.
>>>>>>
>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>> ---
>>>>>>   hw/net/igb.c   | 3 +++
>>>>>>   hw/net/igbvf.c | 3 +++
>>>>>>   2 files changed, 6 insertions(+)
>>>>>>
>>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 
>>>>>> 1c989d767725..08e389338dca
>>>>>> 100644
>>>>>> --- a/hw/net/igb.c
>>>>>> +++ b/hw/net/igb.c
>>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, 
>>>>>> uint32_t
>>>>>> addr,
>>>>>>
>>>>>>       trace_igb_write_config(addr, val, len);
>>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>
>>>>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ 
>>>>>> -427,6
>>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
>>>>>> **errp)
>>>>>>       }
>>>>>>
>>>>>>       /* PCIe extended capabilities (in order) */
>>>>>> +    pcie_cap_flr_init(pci_dev);
>>>>>> +
>>>>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>           hw_error("Failed to initialize AER capability");
>>>>>>       }
>>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>>>> 284ea611848b..0a58dad06802 100644
>>>>>> --- a/hw/net/igbvf.c
>>>>>> +++ b/hw/net/igbvf.c
>>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>>>>>> uint32_t addr, uint32_t val,  {
>>>>>>       trace_igbvf_write_config(addr, val, len);
>>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>   }
>>>>>>
>>>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, 
>>>>>> unsigned size)
>>>>>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, 
>>>>>> Error
>>>>>> **errp)
>>>>>>           hw_error("Failed to initialize PCIe capability");
>>>>>>       }
>>>>>>
>>>>>> +    pcie_cap_flr_init(dev);
>>>>>
>>>>> Sorry for my naive question, and perhaps not related to your patch, 
>>>>> IGBVF device class doesn't seem to have any reset functions 
>>>>> registered via igbvf_class_init(). So, I am guessing an FLR will 
>>>>> not trigger igb_vf_reset(), which is probably what we want.
>>>
>>> It does through the VTCTRL registers.
>>>
>>>> You're right. Advertising FLR capability without implementing it can 
>>>> confuse the guest though such probability is quite a low in 
>>>> practice. The reset should be implemented first.
>>>
>>>
>>> I was looking at an issue from a VFIO perspective which does a FLR
>>> on a device when pass through. Software and FLR are equivalent for
>>> a VF.
>>
>> They should be equivalent according to the datasheet, but 
>> unfortunately current igbvf implementation does nothing when reset. 
>> What Sriram proposes is to add code to actually write VTCTRL when FLR 
>> occurred and make FLR and software reset equivalent. And I think that 
>> should be done before this change; you should advertise FLR capability 
>> after the reset is actually implemented.
> 
> 
> AFAICT, the VFs are reset correctly by the OS when created or probed and
> by QEMU when they are passthrough in a nested guest OS (with this patch).
> igb_vf_reset() is clearly called in QEMU, see routine e1000_reset_hw_vf()
> in Linux.

I don't think this patch makes difference for e1000_reset_hw_vf() as it 
does not rely on FLR.

> 
> I don't think a reset op is necessary because VFs are software constructs
> but I don't mind really. If so, then, I wouldn't mimic what the OS does
> by writing the RST bit in the CTRL register of the VF, I would simply
> install igb_vf_reset() as a reset handler.

Thinking about the reason why VFIO performs FLR, probably VFIO expects 
the FLR clears all of states the kernel set to prevent the VF from 
leaking kernel addresses or addresses of other user space which the VF 
was assigned to in the past, for example.

Implementing the reset operation is not necessary to make it function 
but to make it secure, particularly we promise the guest that we clear 
the VF state by advertising FLR.

Regards,
Akihiko Odaki

> 
> Thanks,
> 
> C.
> 
> 
> 
>>
>> Regards,
>> Akihiko Odaki
>>
>>>
>>> I am not sure a VF needs more really, since it is all controlled
>>> by the PF. >
>>> C.
>>>
>>>>
>>>>>
>>>>>> +
>>>>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>           hw_error("Failed to initialize AER capability");
>>>>>>       }
>>>>>> -- 
>>>>>> 2.40.1
>>>>>
>>>>
>>>
>>
>
Sriram Yagnaraman May 30, 2023, 8:30 a.m. UTC | #8
> -----Original Message-----
> From: Akihiko Odaki <akihiko.odaki@daynix.com>
> Sent: Tuesday, 30 May 2023 04:02
> To: Cédric Le Goater <clg@redhat.com>; Sriram Yagnaraman
> <sriram.yagnaraman@est.tech>; qemu-devel@nongnu.org
> Cc: Jason Wang <jasowang@redhat.com>
> Subject: Re: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> On 2023/05/30 0:07, Cédric Le Goater wrote:
> > On 5/29/23 09:45, Akihiko Odaki wrote:
> >> On 2023/05/29 16:01, Cédric Le Goater wrote:
> >>> On 5/29/23 04:45, Akihiko Odaki wrote:
> >>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Cédric Le Goater <clg@redhat.com>
> >>>>>> Sent: Friday, 26 May 2023 19:31
> >>>>>> To: qemu-devel@nongnu.org
> >>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
> >>>>>> <sriram.yagnaraman@est.tech>; Jason Wang
> <jasowang@redhat.com>;
> >>>>>> Cédric Le Goater <clg@redhat.com>
> >>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> >>>>>>
> >>>>>> The Intel 82576EB GbE Controller say that the Physical and
> >>>>>> Virtual Functions support Function Level Reset. Add the
> >>>>>> capability to each device model.
> >>>>>>
> >>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
> >>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> >>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> >>>>>> ---
> >>>>>>   hw/net/igb.c   | 3 +++
> >>>>>>   hw/net/igbvf.c | 3 +++
> >>>>>>   2 files changed, 6 insertions(+)
> >>>>>>
> >>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index
> >>>>>> 1c989d767725..08e389338dca
> >>>>>> 100644
> >>>>>> --- a/hw/net/igb.c
> >>>>>> +++ b/hw/net/igb.c
> >>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
> >>>>>> uint32_t addr,
> >>>>>>
> >>>>>>       trace_igb_write_config(addr, val, len);
> >>>>>>       pci_default_write_config(dev, addr, val, len);
> >>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
> >>>>>>
> >>>>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
> >>>>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> @@
> >>>>>> -427,6
> >>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
> >>>>>> **errp)
> >>>>>>       }
> >>>>>>
> >>>>>>       /* PCIe extended capabilities (in order) */
> >>>>>> +    pcie_cap_flr_init(pci_dev);
> >>>>>> +
> >>>>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
> >>>>>>           hw_error("Failed to initialize AER capability");
> >>>>>>       }
> >>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> >>>>>> 284ea611848b..0a58dad06802 100644
> >>>>>> --- a/hw/net/igbvf.c
> >>>>>> +++ b/hw/net/igbvf.c
> >>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice
> >>>>>> *dev, uint32_t addr, uint32_t val,  {
> >>>>>>       trace_igbvf_write_config(addr, val, len);
> >>>>>>       pci_default_write_config(dev, addr, val, len);
> >>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
> >>>>>>   }
> >>>>>>
> >>>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr,
> >>>>>> unsigned size) @@ -266,6 +267,8 @@ static void
> >>>>>> igbvf_pci_realize(PCIDevice *dev, Error
> >>>>>> **errp)
> >>>>>>           hw_error("Failed to initialize PCIe capability");
> >>>>>>       }
> >>>>>>
> >>>>>> +    pcie_cap_flr_init(dev);
> >>>>>
> >>>>> Sorry for my naive question, and perhaps not related to your
> >>>>> patch, IGBVF device class doesn't seem to have any reset functions
> >>>>> registered via igbvf_class_init(). So, I am guessing an FLR will
> >>>>> not trigger igb_vf_reset(), which is probably what we want.
> >>>
> >>> It does through the VTCTRL registers.
> >>>
> >>>> You're right. Advertising FLR capability without implementing it
> >>>> can confuse the guest though such probability is quite a low in
> >>>> practice. The reset should be implemented first.
> >>>
> >>>
> >>> I was looking at an issue from a VFIO perspective which does a FLR
> >>> on a device when pass through. Software and FLR are equivalent for a
> >>> VF.
> >>
> >> They should be equivalent according to the datasheet, but
> >> unfortunately current igbvf implementation does nothing when reset.
> >> What Sriram proposes is to add code to actually write VTCTRL when FLR
> >> occurred and make FLR and software reset equivalent. And I think that
> >> should be done before this change; you should advertise FLR
> >> capability after the reset is actually implemented.
> >
> >
> > AFAICT, the VFs are reset correctly by the OS when created or probed
> > and by QEMU when they are passthrough in a nested guest OS (with this
> patch).
> > igb_vf_reset() is clearly called in QEMU, see routine
> > e1000_reset_hw_vf() in Linux.
> 
> I don't think this patch makes difference for e1000_reset_hw_vf() as it does not
> rely on FLR.
> 
> >
> > I don't think a reset op is necessary because VFs are software
> > constructs but I don't mind really. If so, then, I wouldn't mimic what
> > the OS does by writing the RST bit in the CTRL register of the VF, I
> > would simply install igb_vf_reset() as a reset handler.
> 
> Thinking about the reason why VFIO performs FLR, probably VFIO expects the
> FLR clears all of states the kernel set to prevent the VF from leaking kernel
> addresses or addresses of other user space which the VF was assigned to in the
> past, for example.
> 
> Implementing the reset operation is not necessary to make it function but to
> make it secure, particularly we promise the guest that we clear the VF state by
> advertising FLR.
> 
> Regards,
> Akihiko Odaki
> 

I did some digging, and I can see that the linux igbvf device driver registers for FLR and performs a SW reset anyhow.
https://lore.kernel.org/all/20230301105706.547921-1-kamil.maziarz@intel.com/
I have not checked what the other drivers do though, I can send a patch if you think it is worth having a reset operation on the igbvf device. 

> >
> > Thanks,
> >
> > C.
> >
> >
> >
> >>
> >> Regards,
> >> Akihiko Odaki
> >>
> >>>
> >>> I am not sure a VF needs more really, since it is all controlled by
> >>> the PF. > C.
> >>>
> >>>>
> >>>>>
> >>>>>> +
> >>>>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
> >>>>>>           hw_error("Failed to initialize AER capability");
> >>>>>>       }
> >>>>>> --
> >>>>>> 2.40.1
> >>>>>
> >>>>
> >>>
> >>
> >
Akihiko Odaki May 30, 2023, 12:30 p.m. UTC | #9
On 2023/05/30 17:30, Sriram Yagnaraman wrote:
> 
> 
>> -----Original Message-----
>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>> Sent: Tuesday, 30 May 2023 04:02
>> To: Cédric Le Goater <clg@redhat.com>; Sriram Yagnaraman
>> <sriram.yagnaraman@est.tech>; qemu-devel@nongnu.org
>> Cc: Jason Wang <jasowang@redhat.com>
>> Subject: Re: [PATCH] igb: Add Function Level Reset to PF and VF
>>
>> On 2023/05/30 0:07, Cédric Le Goater wrote:
>>> On 5/29/23 09:45, Akihiko Odaki wrote:
>>>> On 2023/05/29 16:01, Cédric Le Goater wrote:
>>>>> On 5/29/23 04:45, Akihiko Odaki wrote:
>>>>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>>>>> Sent: Friday, 26 May 2023 19:31
>>>>>>>> To: qemu-devel@nongnu.org
>>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>>>>>> <sriram.yagnaraman@est.tech>; Jason Wang
>> <jasowang@redhat.com>;
>>>>>>>> Cédric Le Goater <clg@redhat.com>
>>>>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>>>>>
>>>>>>>> The Intel 82576EB GbE Controller say that the Physical and
>>>>>>>> Virtual Functions support Function Level Reset. Add the
>>>>>>>> capability to each device model.
>>>>>>>>
>>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>>>> ---
>>>>>>>>    hw/net/igb.c   | 3 +++
>>>>>>>>    hw/net/igbvf.c | 3 +++
>>>>>>>>    2 files changed, 6 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index
>>>>>>>> 1c989d767725..08e389338dca
>>>>>>>> 100644
>>>>>>>> --- a/hw/net/igb.c
>>>>>>>> +++ b/hw/net/igb.c
>>>>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
>>>>>>>> uint32_t addr,
>>>>>>>>
>>>>>>>>        trace_igb_write_config(addr, val, len);
>>>>>>>>        pci_default_write_config(dev, addr, val, len);
>>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>>
>>>>>>>>        if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>>>>>            (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
>> @@
>>>>>>>> -427,6
>>>>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
>>>>>>>> **errp)
>>>>>>>>        }
>>>>>>>>
>>>>>>>>        /* PCIe extended capabilities (in order) */
>>>>>>>> +    pcie_cap_flr_init(pci_dev);
>>>>>>>> +
>>>>>>>>        if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>>>            hw_error("Failed to initialize AER capability");
>>>>>>>>        }
>>>>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>>>>>> 284ea611848b..0a58dad06802 100644
>>>>>>>> --- a/hw/net/igbvf.c
>>>>>>>> +++ b/hw/net/igbvf.c
>>>>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice
>>>>>>>> *dev, uint32_t addr, uint32_t val,  {
>>>>>>>>        trace_igbvf_write_config(addr, val, len);
>>>>>>>>        pci_default_write_config(dev, addr, val, len);
>>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>>    }
>>>>>>>>
>>>>>>>>    static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr,
>>>>>>>> unsigned size) @@ -266,6 +267,8 @@ static void
>>>>>>>> igbvf_pci_realize(PCIDevice *dev, Error
>>>>>>>> **errp)
>>>>>>>>            hw_error("Failed to initialize PCIe capability");
>>>>>>>>        }
>>>>>>>>
>>>>>>>> +    pcie_cap_flr_init(dev);
>>>>>>>
>>>>>>> Sorry for my naive question, and perhaps not related to your
>>>>>>> patch, IGBVF device class doesn't seem to have any reset functions
>>>>>>> registered via igbvf_class_init(). So, I am guessing an FLR will
>>>>>>> not trigger igb_vf_reset(), which is probably what we want.
>>>>>
>>>>> It does through the VTCTRL registers.
>>>>>
>>>>>> You're right. Advertising FLR capability without implementing it
>>>>>> can confuse the guest though such probability is quite a low in
>>>>>> practice. The reset should be implemented first.
>>>>>
>>>>>
>>>>> I was looking at an issue from a VFIO perspective which does a FLR
>>>>> on a device when pass through. Software and FLR are equivalent for a
>>>>> VF.
>>>>
>>>> They should be equivalent according to the datasheet, but
>>>> unfortunately current igbvf implementation does nothing when reset.
>>>> What Sriram proposes is to add code to actually write VTCTRL when FLR
>>>> occurred and make FLR and software reset equivalent. And I think that
>>>> should be done before this change; you should advertise FLR
>>>> capability after the reset is actually implemented.
>>>
>>>
>>> AFAICT, the VFs are reset correctly by the OS when created or probed
>>> and by QEMU when they are passthrough in a nested guest OS (with this
>> patch).
>>> igb_vf_reset() is clearly called in QEMU, see routine
>>> e1000_reset_hw_vf() in Linux.
>>
>> I don't think this patch makes difference for e1000_reset_hw_vf() as it does not
>> rely on FLR.
>>
>>>
>>> I don't think a reset op is necessary because VFs are software
>>> constructs but I don't mind really. If so, then, I wouldn't mimic what
>>> the OS does by writing the RST bit in the CTRL register of the VF, I
>>> would simply install igb_vf_reset() as a reset handler.
>>
>> Thinking about the reason why VFIO performs FLR, probably VFIO expects the
>> FLR clears all of states the kernel set to prevent the VF from leaking kernel
>> addresses or addresses of other user space which the VF was assigned to in the
>> past, for example.
>>
>> Implementing the reset operation is not necessary to make it function but to
>> make it secure, particularly we promise the guest that we clear the VF state by
>> advertising FLR.
>>
>> Regards,
>> Akihiko Odaki
>>
> 
> I did some digging, and I can see that the linux igbvf device driver registers for FLR and performs a SW reset anyhow.
> https://lore.kernel.org/all/20230301105706.547921-1-kamil.maziarz@intel.com/

The register function in the Linux driver should be considered as 
something different from FLR. FLR we have discussed is a PCIe capability 
that the hardware advertises.

> I have not checked what the other drivers do though, I can send a patch if you think it is worth having a reset operation on the igbvf device.

I think it's better if Cédric writes such a patch and place it before 
the patch to advertise FLR in a series. It will be easier to make the 
patches in order this way.

Regards,
Akihiko Odaki

> 
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>
>>>>
>>>> Regards,
>>>> Akihiko Odaki
>>>>
>>>>>
>>>>> I am not sure a VF needs more really, since it is all controlled by
>>>>> the PF. > C.
>>>>>
>>>>>>
>>>>>>>
>>>>>>>> +
>>>>>>>>        if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>>>            hw_error("Failed to initialize AER capability");
>>>>>>>>        }
>>>>>>>> --
>>>>>>>> 2.40.1
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
Cédric Le Goater May 30, 2023, 2:56 p.m. UTC | #10
On 5/30/23 04:02, Akihiko Odaki wrote:
> On 2023/05/30 0:07, Cédric Le Goater wrote:
>> On 5/29/23 09:45, Akihiko Odaki wrote:
>>> On 2023/05/29 16:01, Cédric Le Goater wrote:
>>>> On 5/29/23 04:45, Akihiko Odaki wrote:
>>>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>>>> Sent: Friday, 26 May 2023 19:31
>>>>>>> To: qemu-devel@nongnu.org
>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>>>>> <sriram.yagnaraman@est.tech>; Jason Wang <jasowang@redhat.com>; Cédric
>>>>>>> Le Goater <clg@redhat.com>
>>>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>>>>
>>>>>>> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
>>>>>>> support Function Level Reset. Add the capability to each device model.
>>>>>>>
>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>>> ---
>>>>>>>   hw/net/igb.c   | 3 +++
>>>>>>>   hw/net/igbvf.c | 3 +++
>>>>>>>   2 files changed, 6 insertions(+)
>>>>>>>
>>>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
>>>>>>> 100644
>>>>>>> --- a/hw/net/igb.c
>>>>>>> +++ b/hw/net/igb.c
>>>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
>>>>>>> addr,
>>>>>>>
>>>>>>>       trace_igb_write_config(addr, val, len);
>>>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>
>>>>>>>       if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>>>>           (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
>>>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>>>>>>>       }
>>>>>>>
>>>>>>>       /* PCIe extended capabilities (in order) */
>>>>>>> +    pcie_cap_flr_init(pci_dev);
>>>>>>> +
>>>>>>>       if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>>           hw_error("Failed to initialize AER capability");
>>>>>>>       }
>>>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>>>>> 284ea611848b..0a58dad06802 100644
>>>>>>> --- a/hw/net/igbvf.c
>>>>>>> +++ b/hw/net/igbvf.c
>>>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
>>>>>>> uint32_t addr, uint32_t val,  {
>>>>>>>       trace_igbvf_write_config(addr, val, len);
>>>>>>>       pci_default_write_config(dev, addr, val, len);
>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>   }
>>>>>>>
>>>>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
>>>>>>> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
>>>>>>> **errp)
>>>>>>>           hw_error("Failed to initialize PCIe capability");
>>>>>>>       }
>>>>>>>
>>>>>>> +    pcie_cap_flr_init(dev);
>>>>>>
>>>>>> Sorry for my naive question, and perhaps not related to your patch, IGBVF device class doesn't seem to have any reset functions registered via igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), which is probably what we want.
>>>>
>>>> It does through the VTCTRL registers.
>>>>
>>>>> You're right. Advertising FLR capability without implementing it can confuse the guest though such probability is quite a low in practice. The reset should be implemented first.
>>>>
>>>>
>>>> I was looking at an issue from a VFIO perspective which does a FLR
>>>> on a device when pass through. Software and FLR are equivalent for
>>>> a VF.
>>>
>>> They should be equivalent according to the datasheet, but unfortunately current igbvf implementation does nothing when reset. What Sriram proposes is to add code to actually write VTCTRL when FLR occurred and make FLR and software reset equivalent. And I think that should be done before this change; you should advertise FLR capability after the reset is actually implemented.
>>
>>
>> AFAICT, the VFs are reset correctly by the OS when created or probed and
>> by QEMU when they are passthrough in a nested guest OS (with this patch).
>> igb_vf_reset() is clearly called in QEMU, see routine e1000_reset_hw_vf()
>> in Linux.
> 
> I don't think this patch makes difference for e1000_reset_hw_vf() as it does not rely on FLR.
> 
>>
>> I don't think a reset op is necessary because VFs are software constructs
>> but I don't mind really. If so, then, I wouldn't mimic what the OS does
>> by writing the RST bit in the CTRL register of the VF, I would simply
>> install igb_vf_reset() as a reset handler.
> 
> Thinking about the reason why VFIO performs FLR, probably VFIO expects the FLR clears all of states the kernel set to prevent the VF from leaking kernel addresses or addresses of other user space which the VF was assigned to in the past, for example.

yes and if the FLR cap is not present, VFIO tries to do a custom bus reset
which fails in an ugly way since the PF can not be reset.

Thanks

C.


> 
> Implementing the reset operation is not necessary to make it function but to make it secure, particularly we promise the guest that we clear the VF state by advertising FLR.
> 
> Regards,
> Akihiko Odaki
> 
>>
>> Thanks,
>>
>> C.
>>
>>
>>
>>>
>>> Regards,
>>> Akihiko Odaki
>>>
>>>>
>>>> I am not sure a VF needs more really, since it is all controlled
>>>> by the PF. >
>>>> C.
>>>>
>>>>>
>>>>>>
>>>>>>> +
>>>>>>>       if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>>           hw_error("Failed to initialize AER capability");
>>>>>>>       }
>>>>>>> -- 
>>>>>>> 2.40.1
>>>>>>
>>>>>
>>>>
>>>
>>
>
Cédric Le Goater May 30, 2023, 3:05 p.m. UTC | #11
On 5/30/23 14:30, Akihiko Odaki wrote:
> On 2023/05/30 17:30, Sriram Yagnaraman wrote:
>>
>>
>>> -----Original Message-----
>>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>>> Sent: Tuesday, 30 May 2023 04:02
>>> To: Cédric Le Goater <clg@redhat.com>; Sriram Yagnaraman
>>> <sriram.yagnaraman@est.tech>; qemu-devel@nongnu.org
>>> Cc: Jason Wang <jasowang@redhat.com>
>>> Subject: Re: [PATCH] igb: Add Function Level Reset to PF and VF
>>>
>>> On 2023/05/30 0:07, Cédric Le Goater wrote:
>>>> On 5/29/23 09:45, Akihiko Odaki wrote:
>>>>> On 2023/05/29 16:01, Cédric Le Goater wrote:
>>>>>> On 5/29/23 04:45, Akihiko Odaki wrote:
>>>>>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>>>>>> Sent: Friday, 26 May 2023 19:31
>>>>>>>>> To: qemu-devel@nongnu.org
>>>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>; Sriram Yagnaraman
>>>>>>>>> <sriram.yagnaraman@est.tech>; Jason Wang
>>> <jasowang@redhat.com>;
>>>>>>>>> Cédric Le Goater <clg@redhat.com>
>>>>>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
>>>>>>>>>
>>>>>>>>> The Intel 82576EB GbE Controller say that the Physical and
>>>>>>>>> Virtual Functions support Function Level Reset. Add the
>>>>>>>>> capability to each device model.
>>>>>>>>>
>>>>>>>>> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>>>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>>>>> ---
>>>>>>>>>    hw/net/igb.c   | 3 +++
>>>>>>>>>    hw/net/igbvf.c | 3 +++
>>>>>>>>>    2 files changed, 6 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index
>>>>>>>>> 1c989d767725..08e389338dca
>>>>>>>>> 100644
>>>>>>>>> --- a/hw/net/igb.c
>>>>>>>>> +++ b/hw/net/igb.c
>>>>>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
>>>>>>>>> uint32_t addr,
>>>>>>>>>
>>>>>>>>>        trace_igb_write_config(addr, val, len);
>>>>>>>>>        pci_default_write_config(dev, addr, val, len);
>>>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>>>
>>>>>>>>>        if (range_covers_byte(addr, len, PCI_COMMAND) &&
>>>>>>>>>            (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
>>> @@
>>>>>>>>> -427,6
>>>>>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
>>>>>>>>> **errp)
>>>>>>>>>        }
>>>>>>>>>
>>>>>>>>>        /* PCIe extended capabilities (in order) */
>>>>>>>>> +    pcie_cap_flr_init(pci_dev);
>>>>>>>>> +
>>>>>>>>>        if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>>>>>>>>>            hw_error("Failed to initialize AER capability");
>>>>>>>>>        }
>>>>>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
>>>>>>>>> 284ea611848b..0a58dad06802 100644
>>>>>>>>> --- a/hw/net/igbvf.c
>>>>>>>>> +++ b/hw/net/igbvf.c
>>>>>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice
>>>>>>>>> *dev, uint32_t addr, uint32_t val,  {
>>>>>>>>>        trace_igbvf_write_config(addr, val, len);
>>>>>>>>>        pci_default_write_config(dev, addr, val, len);
>>>>>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
>>>>>>>>>    }
>>>>>>>>>
>>>>>>>>>    static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr,
>>>>>>>>> unsigned size) @@ -266,6 +267,8 @@ static void
>>>>>>>>> igbvf_pci_realize(PCIDevice *dev, Error
>>>>>>>>> **errp)
>>>>>>>>>            hw_error("Failed to initialize PCIe capability");
>>>>>>>>>        }
>>>>>>>>>
>>>>>>>>> +    pcie_cap_flr_init(dev);
>>>>>>>>
>>>>>>>> Sorry for my naive question, and perhaps not related to your
>>>>>>>> patch, IGBVF device class doesn't seem to have any reset functions
>>>>>>>> registered via igbvf_class_init(). So, I am guessing an FLR will
>>>>>>>> not trigger igb_vf_reset(), which is probably what we want.
>>>>>>
>>>>>> It does through the VTCTRL registers.
>>>>>>
>>>>>>> You're right. Advertising FLR capability without implementing it
>>>>>>> can confuse the guest though such probability is quite a low in
>>>>>>> practice. The reset should be implemented first.
>>>>>>
>>>>>>
>>>>>> I was looking at an issue from a VFIO perspective which does a FLR
>>>>>> on a device when pass through. Software and FLR are equivalent for a
>>>>>> VF.
>>>>>
>>>>> They should be equivalent according to the datasheet, but
>>>>> unfortunately current igbvf implementation does nothing when reset.
>>>>> What Sriram proposes is to add code to actually write VTCTRL when FLR
>>>>> occurred and make FLR and software reset equivalent. And I think that
>>>>> should be done before this change; you should advertise FLR
>>>>> capability after the reset is actually implemented.
>>>>
>>>>
>>>> AFAICT, the VFs are reset correctly by the OS when created or probed
>>>> and by QEMU when they are passthrough in a nested guest OS (with this
>>> patch).
>>>> igb_vf_reset() is clearly called in QEMU, see routine
>>>> e1000_reset_hw_vf() in Linux.
>>>
>>> I don't think this patch makes difference for e1000_reset_hw_vf() as it does not
>>> rely on FLR.
>>>
>>>>
>>>> I don't think a reset op is necessary because VFs are software
>>>> constructs but I don't mind really. If so, then, I wouldn't mimic what
>>>> the OS does by writing the RST bit in the CTRL register of the VF, I
>>>> would simply install igb_vf_reset() as a reset handler.
>>>
>>> Thinking about the reason why VFIO performs FLR, probably VFIO expects the
>>> FLR clears all of states the kernel set to prevent the VF from leaking kernel
>>> addresses or addresses of other user space which the VF was assigned to in the
>>> past, for example.
>>>
>>> Implementing the reset operation is not necessary to make it function but to
>>> make it secure, particularly we promise the guest that we clear the VF state by
>>> advertising FLR.
>>>
>>> Regards,
>>> Akihiko Odaki
>>>
>>
>> I did some digging, and I can see that the linux igbvf device driver registers for FLR and performs a SW reset anyhow.
>> https://lore.kernel.org/all/20230301105706.547921-1-kamil.maziarz@intel.com/
> 
> The register function in the Linux driver should be considered as something different from FLR. FLR we have discussed is a PCIe capability that the hardware advertises.
> 
>> I have not checked what the other drivers do though, I can send a patch if you think it is worth having a reset operation on the igbvf device.
> 
> I think it's better if Cédric writes such a patch and place it before the patch to advertise FLR in a series. It will be easier to make the patches in order this way.

ok. Will do.

Thanks,

C.
diff mbox series

Patch

diff --git a/hw/net/igb.c b/hw/net/igb.c
index 1c989d767725..08e389338dca 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -101,6 +101,7 @@  static void igb_write_config(PCIDevice *dev, uint32_t addr,
 
     trace_igb_write_config(addr, val, len);
     pci_default_write_config(dev, addr, val, len);
+    pcie_cap_flr_write_config(dev, addr, val, len);
 
     if (range_covers_byte(addr, len, PCI_COMMAND) &&
         (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
@@ -427,6 +428,8 @@  static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
     }
 
     /* PCIe extended capabilities (in order) */
+    pcie_cap_flr_init(pci_dev);
+
     if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
         hw_error("Failed to initialize AER capability");
     }
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea611848b..0a58dad06802 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -204,6 +204,7 @@  static void igbvf_write_config(PCIDevice *dev, uint32_t addr, uint32_t val,
 {
     trace_igbvf_write_config(addr, val, len);
     pci_default_write_config(dev, addr, val, len);
+    pcie_cap_flr_write_config(dev, addr, val, len);
 }
 
 static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
@@ -266,6 +267,8 @@  static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
         hw_error("Failed to initialize PCIe capability");
     }
 
+    pcie_cap_flr_init(dev);
+
     if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
         hw_error("Failed to initialize AER capability");
     }