diff mbox

[RFC,05/12] IXGBE: Add new sysfs interface of "notify_vf"

Message ID 1445445464-5056-6-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

lan,Tianyu Oct. 21, 2015, 4:37 p.m. UTC
This patch is to add new sysfs interface of "notify_vf" under sysfs
directory of VF PCI device for Qemu to notify VF when migration status
is changed.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 30 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h  |  4 ++++
 2 files changed, 34 insertions(+)

Comments

Alexander Duyck Oct. 21, 2015, 8:52 p.m. UTC | #1
On 10/21/2015 09:37 AM, Lan Tianyu wrote:
> This patch is to add new sysfs interface of "notify_vf" under sysfs
> directory of VF PCI device for Qemu to notify VF when migration status
> is changed.
>
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 30 ++++++++++++++++++++++++++
>   drivers/net/ethernet/intel/ixgbe/ixgbe_type.h  |  4 ++++
>   2 files changed, 34 insertions(+)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> index e247d67..5cc7817 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> @@ -217,10 +217,37 @@ static ssize_t ixgbe_store_state_in_pf(struct device *dev,
>   	return count;
>   }
>   
> +static ssize_t ixgbe_store_notify_vf(struct device *dev,
> +				       struct device_attribute *attr,
> +				       const char *buf, size_t count)
> +{
> +	struct ixgbe_adapter *adapter = to_adapter(dev);
> +	struct ixgbe_hw *hw = &adapter->hw;
> +	struct pci_dev *vf_pdev = to_pci_dev(dev);
> +	int vfn = vf_pdev->virtfn_index;
> +	u32 ivar;
> +
> +	/* Enable VF mailbox irq first */
> +	IXGBE_WRITE_REG(hw, IXGBE_PVTEIMS(vfn), 0x4);
> +	IXGBE_WRITE_REG(hw, IXGBE_PVTEIAM(vfn), 0x4);
> +	IXGBE_WRITE_REG(hw, IXGBE_PVTEIAC(vfn), 0x4);
> +
> +	ivar = IXGBE_READ_REG(hw, IXGBE_PVTIVAR_MISC(vfn));
> +	ivar &= ~0xFF;
> +	ivar |= 0x2 | IXGBE_IVAR_ALLOC_VAL;
> +	IXGBE_WRITE_REG(hw, IXGBE_PVTIVAR_MISC(vfn), ivar);
> +
> +	ixgbe_ping_vf(adapter, vfn);
> +	return count;
> +}
> +

NAK, this won't fly.  You can't just go in from the PF and enable 
interrupts on the VF hoping they are configured well enough to handle an 
interrupt you decide to trigger from them.

Also have you even considered the MSI-X configuration on the VF?  I 
haven't seen anything anywhere that would have migrated the VF's MSI-X 
configuration from BAR 3 on one system to the new system.

>   static struct device_attribute ixgbe_per_state_in_pf_attribute =
>   	__ATTR(state_in_pf, S_IRUGO | S_IWUSR,
>   		ixgbe_show_state_in_pf, ixgbe_store_state_in_pf);
>   
> +static struct device_attribute ixgbe_per_notify_vf_attribute =
> +	__ATTR(notify_vf, S_IWUSR, NULL, ixgbe_store_notify_vf);
> +
>   void ixgbe_add_vf_attrib(struct ixgbe_adapter *adapter)
>   {
>   	struct pci_dev *pdev = adapter->pdev;
> @@ -241,6 +268,8 @@ void ixgbe_add_vf_attrib(struct ixgbe_adapter *adapter)
>   		if (vfdev->is_virtfn) {
>   			ret = device_create_file(&vfdev->dev,
>   					&ixgbe_per_state_in_pf_attribute);
> +			ret |= device_create_file(&vfdev->dev,
> +					&ixgbe_per_notify_vf_attribute);
>   			if (ret)
>   				pr_warn("Unable to add VF attribute for dev %s,\n",
>   					dev_name(&vfdev->dev));
> @@ -269,6 +298,7 @@ void ixgbe_remove_vf_attrib(struct ixgbe_adapter *adapter)
>   	while (vfdev) {
>   		if (vfdev->is_virtfn) {
>   			device_remove_file(&vfdev->dev, &ixgbe_per_state_in_pf_attribute);
> +			device_remove_file(&vfdev->dev, &ixgbe_per_notify_vf_attribute);
>   		}
>   
>   		vfdev = pci_get_device(pdev->vendor, vf_id, vfdev);

More driver specific sysfs.  This needs to be moved out of the driver if 
this is to be considered anything more than a proof of concept.

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> index dd6ba59..c6ddb66 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> @@ -2302,6 +2302,10 @@ enum {
>   #define IXGBE_PVFTDT(P)		(0x06018 + (0x40 * (P)))
>   #define IXGBE_PVFTDWBAL(P)	(0x06038 + (0x40 * (P)))
>   #define IXGBE_PVFTDWBAH(P)	(0x0603C + (0x40 * (P)))
> +#define IXGBE_PVTEIMS(P)	(0x00D00 + (4 * (P)))
> +#define IXGBE_PVTIVAR_MISC(P)	(0x04E00 + (4 * (P)))
> +#define IXGBE_PVTEIAC(P)       (0x00F00 + (4 * P))
> +#define IXGBE_PVTEIAM(P)       (0x04D00 + (4 * P))
>   
>   #define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
>   		(IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Oct. 22, 2015, 12:51 p.m. UTC | #2
On Wed, Oct 21, 2015 at 01:52:48PM -0700, Alexander Duyck wrote:
> Also have you even considered the MSI-X configuration on the VF?  I haven't
> seen anything anywhere that would have migrated the VF's MSI-X configuration
> from BAR 3 on one system to the new system.

Hypervisors do this for virtual devices so they can do this
for physical devices too.
lan,Tianyu Oct. 24, 2015, 3:43 p.m. UTC | #3
On 10/22/2015 4:52 AM, Alexander Duyck wrote:
> Also have you even considered the MSI-X configuration on the VF?  I
> haven't seen anything anywhere that would have migrated the VF's MSI-X
> configuration from BAR 3 on one system to the new system.

MSI-X migration is done by Hypervisor(Qemu).
Following link is my Qemu patch to do that.
http://marc.info/?l=kvm&m=144544706530484&w=2
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck Oct. 25, 2015, 6:03 a.m. UTC | #4
On 10/24/2015 08:43 AM, Lan, Tianyu wrote:
>
> On 10/22/2015 4:52 AM, Alexander Duyck wrote:
>> Also have you even considered the MSI-X configuration on the VF?  I
>> haven't seen anything anywhere that would have migrated the VF's MSI-X
>> configuration from BAR 3 on one system to the new system.
>
> MSI-X migration is done by Hypervisor(Qemu).
> Following link is my Qemu patch to do that.
> http://marc.info/?l=kvm&m=144544706530484&w=2

I really don't like the idea of trying to migrate the MSI-X across from 
host to host while it is still active.  I really think Qemu shouldn't be 
moving this kind of data over in a migration.

I think that having the VF do a suspend/resume is the best way to go.  
Then it simplifies things as all you have to deal with is the dirty page 
tracking for the Rx DMA and you should be able to do this without making 
things too difficult.

- Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
lan,Tianyu Oct. 25, 2015, 6:45 a.m. UTC | #5
On 10/25/2015 2:03 PM, Alexander Duyck wrote:
> On 10/24/2015 08:43 AM, Lan, Tianyu wrote:
>>
>> On 10/22/2015 4:52 AM, Alexander Duyck wrote:
>>> Also have you even considered the MSI-X configuration on the VF?  I
>>> haven't seen anything anywhere that would have migrated the VF's MSI-X
>>> configuration from BAR 3 on one system to the new system.
>>
>> MSI-X migration is done by Hypervisor(Qemu).
>> Following link is my Qemu patch to do that.
>> http://marc.info/?l=kvm&m=144544706530484&w=2
>
> I really don't like the idea of trying to migrate the MSI-X across from
> host to host while it is still active.  I really think Qemu shouldn't be
> moving this kind of data over in a migration.

Hi Alex:

VF MSI-X regs in the VM are faked by Qemu and Qemu maps host vectors of
VF with guest's vector. The MSIX data migrated are for faked regs rather
than the one on the host. After migration, Qemu will remap guest vectors
with host vectors on the new machine. Moreover, VM is stopped during
migrating MSI-X data.


>
> I think that having the VF do a suspend/resume is the best way to go.
> Then it simplifies things as all you have to deal with is the dirty page
> tracking for the Rx DMA and you should be able to do this without making
> things too difficult.
>

Yes, that will be simple and most concern is service down time. I will
test later.


> - Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index e247d67..5cc7817 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -217,10 +217,37 @@  static ssize_t ixgbe_store_state_in_pf(struct device *dev,
 	return count;
 }
 
+static ssize_t ixgbe_store_notify_vf(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct ixgbe_adapter *adapter = to_adapter(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *vf_pdev = to_pci_dev(dev);
+	int vfn = vf_pdev->virtfn_index;
+	u32 ivar;
+
+	/* Enable VF mailbox irq first */
+	IXGBE_WRITE_REG(hw, IXGBE_PVTEIMS(vfn), 0x4);
+	IXGBE_WRITE_REG(hw, IXGBE_PVTEIAM(vfn), 0x4);
+	IXGBE_WRITE_REG(hw, IXGBE_PVTEIAC(vfn), 0x4);
+
+	ivar = IXGBE_READ_REG(hw, IXGBE_PVTIVAR_MISC(vfn));
+	ivar &= ~0xFF;
+	ivar |= 0x2 | IXGBE_IVAR_ALLOC_VAL;
+	IXGBE_WRITE_REG(hw, IXGBE_PVTIVAR_MISC(vfn), ivar);
+
+	ixgbe_ping_vf(adapter, vfn);
+	return count;
+}
+
 static struct device_attribute ixgbe_per_state_in_pf_attribute =
 	__ATTR(state_in_pf, S_IRUGO | S_IWUSR,
 		ixgbe_show_state_in_pf, ixgbe_store_state_in_pf);
 
+static struct device_attribute ixgbe_per_notify_vf_attribute =
+	__ATTR(notify_vf, S_IWUSR, NULL, ixgbe_store_notify_vf);
+
 void ixgbe_add_vf_attrib(struct ixgbe_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
@@ -241,6 +268,8 @@  void ixgbe_add_vf_attrib(struct ixgbe_adapter *adapter)
 		if (vfdev->is_virtfn) {
 			ret = device_create_file(&vfdev->dev,
 					&ixgbe_per_state_in_pf_attribute);
+			ret |= device_create_file(&vfdev->dev,
+					&ixgbe_per_notify_vf_attribute);
 			if (ret)
 				pr_warn("Unable to add VF attribute for dev %s,\n",
 					dev_name(&vfdev->dev));
@@ -269,6 +298,7 @@  void ixgbe_remove_vf_attrib(struct ixgbe_adapter *adapter)
 	while (vfdev) {
 		if (vfdev->is_virtfn) {
 			device_remove_file(&vfdev->dev, &ixgbe_per_state_in_pf_attribute);
+			device_remove_file(&vfdev->dev, &ixgbe_per_notify_vf_attribute);
 		}
 
 		vfdev = pci_get_device(pdev->vendor, vf_id, vfdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index dd6ba59..c6ddb66 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2302,6 +2302,10 @@  enum {
 #define IXGBE_PVFTDT(P)		(0x06018 + (0x40 * (P)))
 #define IXGBE_PVFTDWBAL(P)	(0x06038 + (0x40 * (P)))
 #define IXGBE_PVFTDWBAH(P)	(0x0603C + (0x40 * (P)))
+#define IXGBE_PVTEIMS(P)	(0x00D00 + (4 * (P)))
+#define IXGBE_PVTIVAR_MISC(P)	(0x04E00 + (4 * (P)))
+#define IXGBE_PVTEIAC(P)       (0x00F00 + (4 * P))
+#define IXGBE_PVTEIAM(P)       (0x04D00 + (4 * P))
 
 #define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
 		(IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))