diff mbox

PCI: Refresh offset/stride after NumVFs is written

Message ID 1416624759-13543-1-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Wei Yang Nov. 22, 2014, 2:52 a.m. UTC
According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
affect the offset and stride. Current implementation doesn't refresh the
offset/stride cached in pci_sriov structure.

This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
value after NumVFs is written.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 drivers/pci/iov.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

Comments

Gavin Shan Nov. 24, 2014, 11:01 p.m. UTC | #1
On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>affect the offset and stride. Current implementation doesn't refresh the
>offset/stride cached in pci_sriov structure.
>
>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>value after NumVFs is written.
>
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>---
> drivers/pci/iov.c |   17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>index 4d109c0..c7010c5 100644
>--- a/drivers/pci/iov.c
>+++ b/drivers/pci/iov.c
>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
> 		dev->sriov->stride * id) & 0xff;
> }
>
>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>+{
>+	struct pci_sriov *iov = dev->sriov;
>+
>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);

I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
effect.

>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>+}
>+
> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
> {
> 	struct pci_bus *child;
>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
> 			return rc;
> 	}
>
>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>+	pci_iov_set_numvfs(dev, nr_virtfn);
> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
> 	pci_cfg_access_lock(dev);
> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>@@ -272,7 +281,7 @@ failed:
> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
> 	pci_cfg_access_lock(dev);
> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>+	pci_iov_set_numvfs(dev, 0);
> 	ssleep(1);

The 1 second delay here might be for waiting VFs to be ready.

> 	pci_cfg_access_unlock(dev);
>
>@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev)
> 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
>
> 	iov->num_VFs = 0;
>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>+	pci_iov_set_numvfs(dev, 0);
> }
>
> static int sriov_init(struct pci_dev *dev, int pos)
>@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev)
> 		pci_update_resource(dev, i);
>
> 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs);
>+	pci_iov_set_numvfs(dev, iov->num_VFs);
> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
> 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
> 		msleep(100);

Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously.

Thanks,
Gavin

>-- 
>1.7.9.5
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Yang Nov. 25, 2014, 3:14 a.m. UTC | #2
On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>affect the offset and stride. Current implementation doesn't refresh the
>>offset/stride cached in pci_sriov structure.
>>
>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>value after NumVFs is written.
>>
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>---
>> drivers/pci/iov.c |   17 +++++++++++++----
>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>
>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>index 4d109c0..c7010c5 100644
>>--- a/drivers/pci/iov.c
>>+++ b/drivers/pci/iov.c
>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>> 		dev->sriov->stride * id) & 0xff;
>> }
>>
>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>+{
>>+	struct pci_sriov *iov = dev->sriov;
>>+
>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>
>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>effect.
>
>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>+}
>>+
>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>> {
>> 	struct pci_bus *child;
>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>> 			return rc;
>> 	}
>>
>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>> 	pci_cfg_access_lock(dev);
>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>@@ -272,7 +281,7 @@ failed:
>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>> 	pci_cfg_access_lock(dev);
>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>+	pci_iov_set_numvfs(dev, 0);
>> 	ssleep(1);
>
>The 1 second delay here might be for waiting VFs to be ready.
>

Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?

>> 	pci_cfg_access_unlock(dev);
>>
>>@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev)
>> 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
>>
>> 	iov->num_VFs = 0;
>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>+	pci_iov_set_numvfs(dev, 0);
>> }
>>
>> static int sriov_init(struct pci_dev *dev, int pos)
>>@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev)
>> 		pci_update_resource(dev, i);
>>
>> 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs);
>>+	pci_iov_set_numvfs(dev, iov->num_VFs);
>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>> 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
>> 		msleep(100);
>
>Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously.

The logic here will not be changed.

>
>Thanks,
>Gavin
>
>>-- 
>>1.7.9.5
>>
>>--
>>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>>the body of a message to majordomo@vger.kernel.org
>>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
Gavin Shan Nov. 25, 2014, 3:46 a.m. UTC | #3
On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>affect the offset and stride. Current implementation doesn't refresh the
>>>offset/stride cached in pci_sriov structure.
>>>
>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>value after NumVFs is written.
>>>
>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>---
>>> drivers/pci/iov.c |   17 +++++++++++++----
>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>
>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>index 4d109c0..c7010c5 100644
>>>--- a/drivers/pci/iov.c
>>>+++ b/drivers/pci/iov.c
>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>> 		dev->sriov->stride * id) & 0xff;
>>> }
>>>
>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>+{
>>>+	struct pci_sriov *iov = dev->sriov;
>>>+
>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>
>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>effect.
>>
>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>+}
>>>+
>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>> {
>>> 	struct pci_bus *child;
>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>> 			return rc;
>>> 	}
>>>
>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>> 	pci_cfg_access_lock(dev);
>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>@@ -272,7 +281,7 @@ failed:
>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>> 	pci_cfg_access_lock(dev);
>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>+	pci_iov_set_numvfs(dev, 0);
>>> 	ssleep(1);
>>
>>The 1 second delay here might be for waiting VFs to be ready.
>>
>
>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>

I was not suggesting to do that. I just raised the concern for you
to look into.

>>> 	pci_cfg_access_unlock(dev);
>>>
>>>@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev)
>>> 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
>>>
>>> 	iov->num_VFs = 0;
>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>+	pci_iov_set_numvfs(dev, 0);
>>> }
>>>
>>> static int sriov_init(struct pci_dev *dev, int pos)
>>>@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev)
>>> 		pci_update_resource(dev, i);
>>>
>>> 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs);
>>>+	pci_iov_set_numvfs(dev, iov->num_VFs);
>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>> 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
>>> 		msleep(100);
>>
>>Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously.
>
>The logic here will not be changed.
>
>>
>>Thanks,
>>Gavin
>>
>>>-- 
>>>1.7.9.5
>>>
>>>--
>>>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>>>the body of a message to majordomo@vger.kernel.org
>>>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>
>-- 
>Richard Yang
>Help you, Help me

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Yang Nov. 25, 2014, 9:11 a.m. UTC | #4
On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>offset/stride cached in pci_sriov structure.
>>>>
>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>value after NumVFs is written.
>>>>
>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>---
>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>
>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>index 4d109c0..c7010c5 100644
>>>>--- a/drivers/pci/iov.c
>>>>+++ b/drivers/pci/iov.c
>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>> 		dev->sriov->stride * id) & 0xff;
>>>> }
>>>>
>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>+{
>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>+
>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>
>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>effect.
>>>
>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>+}
>>>>+
>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>> {
>>>> 	struct pci_bus *child;
>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>> 			return rc;
>>>> 	}
>>>>
>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>> 	pci_cfg_access_lock(dev);
>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>@@ -272,7 +281,7 @@ failed:
>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>> 	pci_cfg_access_lock(dev);
>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>+	pci_iov_set_numvfs(dev, 0);
>>>> 	ssleep(1);
>>>
>>>The 1 second delay here might be for waiting VFs to be ready.
>>>
>>
>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>
>
>I was not suggesting to do that. I just raised the concern for you
>to look into.
>

I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:

To allow components to perform internal initialization, system software must wait for at least
100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
return a Successful Completion status for a valid Configuration Request is broken. After Setting the
VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
having previously returned a Successful Completion without an intervening VF disable or other valid
reset condition.

As my understanding, it will take 1ms or 1s after VF Enable bit is set.
Actually I am confused with the two different time, in which case we needs to
wait for different time? And some place we add a lock, but no lock in others.

While from the SPEC, I don't see some description the NumVFs field will take
some time to be effective.
Gavin Shan Nov. 25, 2014, 11:03 p.m. UTC | #5
On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote:
>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>>offset/stride cached in pci_sriov structure.
>>>>>
>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>>value after NumVFs is written.
>>>>>
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>---
>>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>>
>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>>index 4d109c0..c7010c5 100644
>>>>>--- a/drivers/pci/iov.c
>>>>>+++ b/drivers/pci/iov.c
>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>>> 		dev->sriov->stride * id) & 0xff;
>>>>> }
>>>>>
>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>>+{
>>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>>+
>>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>
>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>>effect.
>>>>
>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>>+}
>>>>>+
>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>>> {
>>>>> 	struct pci_bus *child;
>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>>> 			return rc;
>>>>> 	}
>>>>>
>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>>> 	pci_cfg_access_lock(dev);
>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>@@ -272,7 +281,7 @@ failed:
>>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>>> 	pci_cfg_access_lock(dev);
>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>>+	pci_iov_set_numvfs(dev, 0);
>>>>> 	ssleep(1);
>>>>
>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>
>>>
>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>
>>
>>I was not suggesting to do that. I just raised the concern for you
>>to look into.
>>
>
>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>
>To allow components to perform internal initialization, system software must wait for at least
>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>having previously returned a Successful Completion without an intervening VF disable or other valid
>reset condition.
>
>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>Actually I am confused with the two different time, in which case we needs to
>wait for different time? And some place we add a lock, but no lock in others.
>

Are you talking about 1ms or 100ms? I assume it's 100ms. If I
understand things correctly, 100ms delay before issuing config
read request, CRS can be returned before it reaches the timeout
(1 second).

>While from the SPEC, I don't see some description the NumVFs field will take
>some time to be effective.
>

If I'm correct, we can't change NumVFs without disabling/reanbling VFs.

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Yang Nov. 26, 2014, 3:50 a.m. UTC | #6
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote:
>On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote:
>>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>>>offset/stride cached in pci_sriov structure.
>>>>>>
>>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>>>value after NumVFs is written.
>>>>>>
>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>---
>>>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>>>
>>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>>>index 4d109c0..c7010c5 100644
>>>>>>--- a/drivers/pci/iov.c
>>>>>>+++ b/drivers/pci/iov.c
>>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>>>> 		dev->sriov->stride * id) & 0xff;
>>>>>> }
>>>>>>
>>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>>>+{
>>>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>>>+
>>>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>
>>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>>>effect.
>>>>>
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>>>+}
>>>>>>+
>>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>>>> {
>>>>>> 	struct pci_bus *child;
>>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>>>> 			return rc;
>>>>>> 	}
>>>>>>
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>@@ -272,7 +281,7 @@ failed:
>>>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>>>+	pci_iov_set_numvfs(dev, 0);
>>>>>> 	ssleep(1);
>>>>>
>>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>>
>>>>
>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>>
>>>
>>>I was not suggesting to do that. I just raised the concern for you
>>>to look into.
>>>
>>
>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>>
>>To allow components to perform internal initialization, system software must wait for at least
>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>>having previously returned a Successful Completion without an intervening VF disable or other valid
>>reset condition.
>>
>>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>>Actually I am confused with the two different time, in which case we needs to
>>wait for different time? And some place we add a lock, but no lock in others.
>>
>
>Are you talking about 1ms or 100ms? I assume it's 100ms. If I
>understand things correctly, 100ms delay before issuing config
>read request, CRS can be returned before it reaches the timeout
>(1 second).

Thanks for your explanation. I took a look at the CRS definition in SPEC and
get more understanding on this concept.

If my understanding is correct, we need to wait 1s in all case.

100ms is the minimum requirement before any Request to the VFs after VF Enable
is set to 1. But the VF may not ready until 1s passed. For example in
sriov_enable(), after msleep(100) it will call virtfn_add() in which will
send Request to the VF. It is not for sure at this moment, 1s has passed.

If we want to use the CRS mechanism, we need to call
pci_bus_read_dev_vendor_id() on the VF to determine VF is ready. But this
would be complecated on powernv platform now.

>
>>While from the SPEC, I don't see some description the NumVFs field will take
>>some time to be effective.
>>
>
>If I'm correct, we can't change NumVFs without disabling/reanbling VFs.
>

I found this in SRIOV SPEC sec 3.3.7

NumVFs may only be written while VF Enable is Clear. If NumVFs is written when VF Enable is
Set, the results are undefined.

Will take a close look in the current implementation to see whether they
comply with the SPEC.


>Thanks,
>Gavin
Wei Yang Nov. 26, 2014, 8:14 a.m. UTC | #7
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote:
>On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote:
>>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>>>offset/stride cached in pci_sriov structure.
>>>>>>
>>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>>>value after NumVFs is written.
>>>>>>
>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>---
>>>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>>>
>>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>>>index 4d109c0..c7010c5 100644
>>>>>>--- a/drivers/pci/iov.c
>>>>>>+++ b/drivers/pci/iov.c
>>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>>>> 		dev->sriov->stride * id) & 0xff;
>>>>>> }
>>>>>>
>>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>>>+{
>>>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>>>+
>>>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>
>>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>>>effect.
>>>>>
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>>>+}
>>>>>>+
>>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>>>> {
>>>>>> 	struct pci_bus *child;
>>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>>>> 			return rc;
>>>>>> 	}
>>>>>>
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>@@ -272,7 +281,7 @@ failed:
>>>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>>>+	pci_iov_set_numvfs(dev, 0);
>>>>>> 	ssleep(1);
>>>>>
>>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>>
>>>>
>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>>
>>>
>>>I was not suggesting to do that. I just raised the concern for you
>>>to look into.
>>>
>>
>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>>
>>To allow components to perform internal initialization, system software must wait for at least
>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>>having previously returned a Successful Completion without an intervening VF disable or other valid
>>reset condition.
>>
>>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>>Actually I am confused with the two different time, in which case we needs to
>>wait for different time? And some place we add a lock, but no lock in others.
>>
>
>Are you talking about 1ms or 100ms? I assume it's 100ms. If I
>understand things correctly, 100ms delay before issuing config
>read request, CRS can be returned before it reaches the timeout
>(1 second).
>
>>While from the SPEC, I don't see some description the NumVFs field will take
>>some time to be effective.
>>
>
>If I'm correct, we can't change NumVFs without disabling/reanbling VFs.
>

In current implementation, NumVFs is written when VF Enable is not set in all
cases. While I guess what you mean is whether offset/stride will change
immediately after NumVFs is changed when VF Enable is not set.

From the SRIOV SPEC, I don't see some statement that the offset/stride will be
effective after VF Enable is set. And in current implementation, offset/stride
is retrieved when VF Enable is not set, both in sriov_init() and
sriov_enable().

So my conclusion is the offset/stride will take effect immediately after
NumVFs is written.

>Thanks,
>Gavin
Wei Yang Nov. 26, 2014, 8:40 a.m. UTC | #8
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote:
>On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote:
>>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>>>offset/stride cached in pci_sriov structure.
>>>>>>
>>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>>>value after NumVFs is written.
>>>>>>
>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>---
>>>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>>>
>>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>>>index 4d109c0..c7010c5 100644
>>>>>>--- a/drivers/pci/iov.c
>>>>>>+++ b/drivers/pci/iov.c
>>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>>>> 		dev->sriov->stride * id) & 0xff;
>>>>>> }
>>>>>>
>>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>>>+{
>>>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>>>+
>>>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>
>>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>>>effect.
>>>>>
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>>>+}
>>>>>>+
>>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>>>> {
>>>>>> 	struct pci_bus *child;
>>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>>>> 			return rc;
>>>>>> 	}
>>>>>>
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>@@ -272,7 +281,7 @@ failed:
>>>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>>>> 	pci_cfg_access_lock(dev);
>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>>>+	pci_iov_set_numvfs(dev, 0);
>>>>>> 	ssleep(1);
>>>>>
>>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>>
>>>>
>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>>
>>>
>>>I was not suggesting to do that. I just raised the concern for you
>>>to look into.
>>>
>>
>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>>
>>To allow components to perform internal initialization, system software must wait for at least
>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>>having previously returned a Successful Completion without an intervening VF disable or other valid
>>reset condition.
>>
>>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>>Actually I am confused with the two different time, in which case we needs to
>>wait for different time? And some place we add a lock, but no lock in others.
>>
>
>Are you talking about 1ms or 100ms? I assume it's 100ms. If I
>understand things correctly, 100ms delay before issuing config
>read request, CRS can be returned before it reaches the timeout
>(1 second).

Took a look at the sleep again. There are five sleep in iov.c, two are 100ms
sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s
sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC
3.3.3.1.

While I still suggest to wait 1s after VF Enable is set. Otherwise we need to
add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is
ready, when CRS Software Visibility is enabled on the Bridge. Like in
sriov_enable(), the msleep() needs to be replaced by a call on a VF, while
the VF is not created in the system yet.

If my understanding is not correct, please let me know.

>
>Thanks,
>Gavin
Wei Yang Dec. 4, 2014, 1:26 a.m. UTC | #9
On Wed, Nov 26, 2014 at 04:40:51PM +0800, Wei Yang wrote:
>On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote:
>>On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote:
>>>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote:
>>>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote:
>>>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote:
>>>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote:
>>>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will
>>>>>>>affect the offset and stride. Current implementation doesn't refresh the
>>>>>>>offset/stride cached in pci_sriov structure.
>>>>>>>
>>>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two
>>>>>>>value after NumVFs is written.
>>>>>>>
>>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>>---
>>>>>>> drivers/pci/iov.c |   17 +++++++++++++----
>>>>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>>>>
>>>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>>>>>>index 4d109c0..c7010c5 100644
>>>>>>>--- a/drivers/pci/iov.c
>>>>>>>+++ b/drivers/pci/iov.c
>>>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
>>>>>>> 		dev->sriov->stride * id) & 0xff;
>>>>>>> }
>>>>>>>
>>>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
>>>>>>>+{
>>>>>>>+	struct pci_sriov *iov = dev->sriov;
>>>>>>>+
>>>>>>>+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>>
>>>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take
>>>>>>effect.
>>>>>>
>>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
>>>>>>>+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
>>>>>>>+}
>>>>>>>+
>>>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
>>>>>>> {
>>>>>>> 	struct pci_bus *child;
>>>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
>>>>>>> 			return rc;
>>>>>>> 	}
>>>>>>>
>>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
>>>>>>>+	pci_iov_set_numvfs(dev, nr_virtfn);
>>>>>>> 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
>>>>>>> 	pci_cfg_access_lock(dev);
>>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>>@@ -272,7 +281,7 @@ failed:
>>>>>>> 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
>>>>>>> 	pci_cfg_access_lock(dev);
>>>>>>> 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
>>>>>>>-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
>>>>>>>+	pci_iov_set_numvfs(dev, 0);
>>>>>>> 	ssleep(1);
>>>>>>
>>>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>>>
>>>>>
>>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>>>
>>>>
>>>>I was not suggesting to do that. I just raised the concern for you
>>>>to look into.
>>>>
>>>
>>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>>>
>>>To allow components to perform internal initialization, system software must wait for at least
>>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>>>having previously returned a Successful Completion without an intervening VF disable or other valid
>>>reset condition.
>>>
>>>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>>>Actually I am confused with the two different time, in which case we needs to
>>>wait for different time? And some place we add a lock, but no lock in others.
>>>
>>
>>Are you talking about 1ms or 100ms? I assume it's 100ms. If I
>>understand things correctly, 100ms delay before issuing config
>>read request, CRS can be returned before it reaches the timeout
>>(1 second).
>
>Took a look at the sleep again. There are five sleep in iov.c, two are 100ms
>sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s
>sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC
>3.3.3.1.
>
>While I still suggest to wait 1s after VF Enable is set. Otherwise we need to
>add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is
>ready, when CRS Software Visibility is enabled on the Bridge. Like in
>sriov_enable(), the msleep() needs to be replaced by a call on a VF, while
>the VF is not created in the system yet.
>
>If my understanding is not correct, please let me know.
>

Hi, Gavin,

Do you agree with my opinion in this mail? or it is still not correct?

>>
>>Thanks,
>>Gavin
>
>-- 
>Richard Yang
>Help you, Help me
Wei Yang Dec. 9, 2014, 10:35 a.m. UTC | #10
On Wed, Nov 26, 2014 at 04:40:51PM +0800, Wei Yang wrote:
>On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote:
>>>>>>
>>>>>>The 1 second delay here might be for waiting VFs to be ready.
>>>>>>
>>>>>
>>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better?
>>>>>
>>>>
>>>>I was not suggesting to do that. I just raised the concern for you
>>>>to look into.
>>>>
>>>
>>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this:
>>>
>>>To allow components to perform internal initialization, system software must wait for at least
>>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to
>>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must
>>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to
>>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the
>>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to
>>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion
>>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after
>>>having previously returned a Successful Completion without an intervening VF disable or other valid
>>>reset condition.
>>>
>>>As my understanding, it will take 1ms or 1s after VF Enable bit is set.
>>>Actually I am confused with the two different time, in which case we needs to
>>>wait for different time? And some place we add a lock, but no lock in others.
>>>
>>
>>Are you talking about 1ms or 100ms? I assume it's 100ms. If I
>>understand things correctly, 100ms delay before issuing config
>>read request, CRS can be returned before it reaches the timeout
>>(1 second).
>
>Took a look at the sleep again. There are five sleep in iov.c, two are 100ms
>sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s
>sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC
>3.3.3.1.
>
>While I still suggest to wait 1s after VF Enable is set. Otherwise we need to
>add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is
>ready, when CRS Software Visibility is enabled on the Bridge. Like in
>sriov_enable(), the msleep() needs to be replaced by a call on a VF, while
>the VF is not created in the system yet.
>
>If my understanding is not correct, please let me know.

Bjorn,

After discussion with Gavin, I think it is ok to refresh the offset/stride
after NumVFs is written.

1. NumVFs are written all with VFE cleared (comply with SPEC)
2. The time to wait, like 100ms and 1s are the time to wait before accessing
   VF not the PF. So to me, it is not necessary to wait before retrieve the
   offset/stride after writing NumVFs.

If my understanding is not correct, please let me know.

>
>>
>>Thanks,
>>Gavin
>
>-- 
>Richard Yang
>Help you, Help me
diff mbox

Patch

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 4d109c0..c7010c5 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -31,6 +31,15 @@  static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
 		dev->sriov->stride * id) & 0xff;
 }
 
+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
+}
+
 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
 {
 	struct pci_bus *child;
@@ -243,7 +252,7 @@  static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 			return rc;
 	}
 
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_iov_set_numvfs(dev, nr_virtfn);
 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
@@ -272,7 +281,7 @@  failed:
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
+	pci_iov_set_numvfs(dev, 0);
 	ssleep(1);
 	pci_cfg_access_unlock(dev);
 
@@ -303,7 +312,7 @@  static void sriov_disable(struct pci_dev *dev)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
 
 	iov->num_VFs = 0;
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
+	pci_iov_set_numvfs(dev, 0);
 }
 
 static int sriov_init(struct pci_dev *dev, int pos)
@@ -439,7 +448,7 @@  static void sriov_restore_state(struct pci_dev *dev)
 		pci_update_resource(dev, i);
 
 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs);
+	pci_iov_set_numvfs(dev, iov->num_VFs);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
 		msleep(100);