diff mbox series

[v4,04/17] PCI: epf-test: Fix DMA transfer completion detection

Message ID 20230330085357.2653599-5-damien.lemoal@opensource.wdc.com (mailing list archive)
State Superseded
Headers show
Series PCI endpoint fixes and improvements | expand

Commit Message

Damien Le Moal March 30, 2023, 8:53 a.m. UTC
pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not
handling DMA transfer completion correctly, leading to completion
notifications to the RC side that are too early. This problem can be
detected when the RC side is running an IOMMU with messages such as:

pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
domain=0x001c address=0xfff00000 flags=0x0000]

When running the pcitest.sh tests: the address used for a previous
test transfer generates the above error while the next test transfer is
running.

Fix this by testing the dma transfer status in
pci_epf_test_dma_callback() and notifying the completion only when the
transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in
pci_epf_test_data_transfer(), be paranoid and check again the transfer
status and always call dmaengine_terminate_sync() before returning.

Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 drivers/pci/endpoint/functions/pci-epf-test.c | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

Comments

Manivannan Sadhasivam March 31, 2023, 5:25 a.m. UTC | #1
On Thu, Mar 30, 2023 at 05:53:44PM +0900, Damien Le Moal wrote:
> pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not
> handling DMA transfer completion correctly, leading to completion
> notifications to the RC side that are too early. This problem can be
> detected when the RC side is running an IOMMU with messages such as:
> 
> pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> domain=0x001c address=0xfff00000 flags=0x0000]
> 
> When running the pcitest.sh tests: the address used for a previous
> test transfer generates the above error while the next test transfer is
> running.
> 
> Fix this by testing the dma transfer status in
> pci_epf_test_dma_callback() and notifying the completion only when the
> transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in
> pci_epf_test_data_transfer(), be paranoid and check again the transfer
> status and always call dmaengine_terminate_sync() before returning.
> 
> Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>

Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>

- Mani

> ---
>  drivers/pci/endpoint/functions/pci-epf-test.c | 38 +++++++++++++------
>  1 file changed, 27 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
> index d65419735d2e..dbea6eb0dee7 100644
> --- a/drivers/pci/endpoint/functions/pci-epf-test.c
> +++ b/drivers/pci/endpoint/functions/pci-epf-test.c
> @@ -54,6 +54,9 @@ struct pci_epf_test {
>  	struct delayed_work	cmd_handler;
>  	struct dma_chan		*dma_chan_tx;
>  	struct dma_chan		*dma_chan_rx;
> +	struct dma_chan		*transfer_chan;
> +	dma_cookie_t		transfer_cookie;
> +	enum dma_status		transfer_status;
>  	struct completion	transfer_complete;
>  	bool			dma_supported;
>  	bool			dma_private;
> @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 };
>  static void pci_epf_test_dma_callback(void *param)
>  {
>  	struct pci_epf_test *epf_test = param;
> -
> -	complete(&epf_test->transfer_complete);
> +	struct dma_tx_state state;
> +
> +	epf_test->transfer_status =
> +		dmaengine_tx_status(epf_test->transfer_chan,
> +				    epf_test->transfer_cookie, &state);
> +	if (epf_test->transfer_status == DMA_COMPLETE ||
> +	    epf_test->transfer_status == DMA_ERROR)
> +		complete(&epf_test->transfer_complete);
>  }
>  
>  /**
> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>  	struct dma_async_tx_descriptor *tx;
>  	struct dma_slave_config sconf = {};
>  	struct device *dev = &epf->dev;
> -	dma_cookie_t cookie;
>  	int ret;
>  
>  	if (IS_ERR_OR_NULL(chan)) {
> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>  	}
>  
>  	reinit_completion(&epf_test->transfer_complete);
> +	epf_test->transfer_chan = chan;
>  	tx->callback = pci_epf_test_dma_callback;
>  	tx->callback_param = epf_test;
> -	cookie = tx->tx_submit(tx);
> +	epf_test->transfer_cookie = tx->tx_submit(tx);
>  
> -	ret = dma_submit_error(cookie);
> +	ret = dma_submit_error(epf_test->transfer_cookie);
>  	if (ret) {
> -		dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie);
> -		return -EIO;
> +		dev_err(dev, "Failed to do DMA tx_submit %d\n", ret);
> +		goto terminate;
>  	}
>  
>  	dma_async_issue_pending(chan);
>  	ret = wait_for_completion_interruptible(&epf_test->transfer_complete);
>  	if (ret < 0) {
> -		dmaengine_terminate_sync(chan);
> -		dev_err(dev, "DMA wait_for_completion_timeout\n");
> -		return -ETIMEDOUT;
> +		dev_err(dev, "DMA wait_for_completion interrupted\n");
> +		goto terminate;
>  	}
>  
> -	return 0;
> +	if (epf_test->transfer_status == DMA_ERROR) {
> +		dev_err(dev, "DMA transfer failed\n");
> +		ret = -EIO;
> +	}
> +
> +terminate:
> +	dmaengine_terminate_sync(chan);
> +
> +	return ret;
>  }
>  
>  struct epf_dma_filter {
> -- 
> 2.39.2
>
Shunsuke Mie April 4, 2023, 9:47 a.m. UTC | #2
On 2023/03/30 17:53, Damien Le Moal wrote:
> pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not
> handling DMA transfer completion correctly, leading to completion
> notifications to the RC side that are too early. This problem can be
> detected when the RC side is running an IOMMU with messages such as:
>
> pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> domain=0x001c address=0xfff00000 flags=0x0000]
>
> When running the pcitest.sh tests: the address used for a previous
> test transfer generates the above error while the next test transfer is
> running.
>
> Fix this by testing the dma transfer status in
> pci_epf_test_dma_callback() and notifying the completion only when the
> transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in
> pci_epf_test_data_transfer(), be paranoid and check again the transfer
> status and always call dmaengine_terminate_sync() before returning.
>
> Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> ---
>   drivers/pci/endpoint/functions/pci-epf-test.c | 38 +++++++++++++------
>   1 file changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
> index d65419735d2e..dbea6eb0dee7 100644
> --- a/drivers/pci/endpoint/functions/pci-epf-test.c
> +++ b/drivers/pci/endpoint/functions/pci-epf-test.c
> @@ -54,6 +54,9 @@ struct pci_epf_test {
>   	struct delayed_work	cmd_handler;
>   	struct dma_chan		*dma_chan_tx;
>   	struct dma_chan		*dma_chan_rx;
> +	struct dma_chan		*transfer_chan;
> +	dma_cookie_t		transfer_cookie;
> +	enum dma_status		transfer_status;
>   	struct completion	transfer_complete;
>   	bool			dma_supported;
>   	bool			dma_private;
> @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 };
>   static void pci_epf_test_dma_callback(void *param)
>   {
>   	struct pci_epf_test *epf_test = param;
> -
> -	complete(&epf_test->transfer_complete);
> +	struct dma_tx_state state;
> +
> +	epf_test->transfer_status =
> +		dmaengine_tx_status(epf_test->transfer_chan,
> +				    epf_test->transfer_cookie, &state);
> +	if (epf_test->transfer_status == DMA_COMPLETE ||
> +	    epf_test->transfer_status == DMA_ERROR)
> +		complete(&epf_test->transfer_complete);
>   }
>   
>   /**
> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>   	struct dma_async_tx_descriptor *tx;
>   	struct dma_slave_config sconf = {};
>   	struct device *dev = &epf->dev;
> -	dma_cookie_t cookie;
>   	int ret;
>   
>   	if (IS_ERR_OR_NULL(chan)) {
> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>   	}
>   
>   	reinit_completion(&epf_test->transfer_complete);
> +	epf_test->transfer_chan = chan;
>   	tx->callback = pci_epf_test_dma_callback;
>   	tx->callback_param = epf_test;
> -	cookie = tx->tx_submit(tx);
> +	epf_test->transfer_cookie = tx->tx_submit(tx);

How about changing the code to use dmaengine_submit() API instead of 
calling a raw function pointer?

>   
> -	ret = dma_submit_error(cookie);
> +	ret = dma_submit_error(epf_test->transfer_cookie);
>   	if (ret) {
> -		dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie);
> -		return -EIO;
> +		dev_err(dev, "Failed to do DMA tx_submit %d\n", ret);
> +		goto terminate;
>   	}
>   
>   	dma_async_issue_pending(chan);
>   	ret = wait_for_completion_interruptible(&epf_test->transfer_complete);
>   	if (ret < 0) {
> -		dmaengine_terminate_sync(chan);
> -		dev_err(dev, "DMA wait_for_completion_timeout\n");
> -		return -ETIMEDOUT;
> +		dev_err(dev, "DMA wait_for_completion interrupted\n");
> +		goto terminate;
>   	}
>   
> -	return 0;
> +	if (epf_test->transfer_status == DMA_ERROR) {
> +		dev_err(dev, "DMA transfer failed\n");
> +		ret = -EIO;
> +	}
> +
> +terminate:
> +	dmaengine_terminate_sync(chan);
> +
> +	return ret;
>   }
>   
>   struct epf_dma_filter {

Best,

Shunsuke.
Damien Le Moal April 4, 2023, 10:16 a.m. UTC | #3
On 4/4/23 18:47, Shunsuke Mie wrote:
>> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>>   	struct dma_async_tx_descriptor *tx;
>>   	struct dma_slave_config sconf = {};
>>   	struct device *dev = &epf->dev;
>> -	dma_cookie_t cookie;
>>   	int ret;
>>   
>>   	if (IS_ERR_OR_NULL(chan)) {
>> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>>   	}
>>   
>>   	reinit_completion(&epf_test->transfer_complete);
>> +	epf_test->transfer_chan = chan;
>>   	tx->callback = pci_epf_test_dma_callback;
>>   	tx->callback_param = epf_test;
>> -	cookie = tx->tx_submit(tx);
>> +	epf_test->transfer_cookie = tx->tx_submit(tx);
> 
> How about changing the code to use dmaengine_submit() API instead of 
> calling a raw function pointer?

This is done in patch 5 of the series.
Shunsuke Mie April 4, 2023, 10:30 a.m. UTC | #4
2023年4月4日(火) 19:17 Damien Le Moal <dlemoal@fastmail.com>:
>
> On 4/4/23 18:47, Shunsuke Mie wrote:
> >> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
> >>      struct dma_async_tx_descriptor *tx;
> >>      struct dma_slave_config sconf = {};
> >>      struct device *dev = &epf->dev;
> >> -    dma_cookie_t cookie;
> >>      int ret;
> >>
> >>      if (IS_ERR_OR_NULL(chan)) {
> >> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
> >>      }
> >>
> >>      reinit_completion(&epf_test->transfer_complete);
> >> +    epf_test->transfer_chan = chan;
> >>      tx->callback = pci_epf_test_dma_callback;
> >>      tx->callback_param = epf_test;
> >> -    cookie = tx->tx_submit(tx);
> >> +    epf_test->transfer_cookie = tx->tx_submit(tx);
> >
> > How about changing the code to use dmaengine_submit() API instead of
> > calling a raw function pointer?
>
> This is done in patch 5 of the series.
Sorry, I missed it.
>
Damien Le Moal April 13, 2023, 1:50 a.m. UTC | #5
On 4/4/23 19:16, Damien Le Moal wrote:
> On 4/4/23 18:47, Shunsuke Mie wrote:
>>> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>>>   	struct dma_async_tx_descriptor *tx;
>>>   	struct dma_slave_config sconf = {};
>>>   	struct device *dev = &epf->dev;
>>> -	dma_cookie_t cookie;
>>>   	int ret;
>>>   
>>>   	if (IS_ERR_OR_NULL(chan)) {
>>> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
>>>   	}
>>>   
>>>   	reinit_completion(&epf_test->transfer_complete);
>>> +	epf_test->transfer_chan = chan;
>>>   	tx->callback = pci_epf_test_dma_callback;
>>>   	tx->callback_param = epf_test;
>>> -	cookie = tx->tx_submit(tx);
>>> +	epf_test->transfer_cookie = tx->tx_submit(tx);
>>
>> How about changing the code to use dmaengine_submit() API instead of 
>> calling a raw function pointer?
> 
> This is done in patch 5 of the series.

Bjorn,

My apologies for not replying to the series cover letter as I do not have it
locally (due to WD on-going network issues).

This is a ping about this series. Can we get it queued ?
(I do not see it in PCI next tree)
Bjorn Helgaas April 14, 2023, 3:41 p.m. UTC | #6
On Thu, Apr 13, 2023 at 10:50:03AM +0900, Damien Le Moal wrote:
> On 4/4/23 19:16, Damien Le Moal wrote:
> > On 4/4/23 18:47, Shunsuke Mie wrote:
> >>> @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
> >>>   	struct dma_async_tx_descriptor *tx;
> >>>   	struct dma_slave_config sconf = {};
> >>>   	struct device *dev = &epf->dev;
> >>> -	dma_cookie_t cookie;
> >>>   	int ret;
> >>>   
> >>>   	if (IS_ERR_OR_NULL(chan)) {
> >>> @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
> >>>   	}
> >>>   
> >>>   	reinit_completion(&epf_test->transfer_complete);
> >>> +	epf_test->transfer_chan = chan;
> >>>   	tx->callback = pci_epf_test_dma_callback;
> >>>   	tx->callback_param = epf_test;
> >>> -	cookie = tx->tx_submit(tx);
> >>> +	epf_test->transfer_cookie = tx->tx_submit(tx);
> >>
> >> How about changing the code to use dmaengine_submit() API instead of 
> >> calling a raw function pointer?
> > 
> > This is done in patch 5 of the series.
> 
> Bjorn,
> 
> My apologies for not replying to the series cover letter as I do not have it
> locally (due to WD on-going network issues).
> 
> This is a ping about this series. Can we get it queued ?
> (I do not see it in PCI next tree)

Lorenzo and Krzysztof take care of the endpoint subsystem, so I'll
wait for them.  In the meantime I'll send a couple typo fixes and a
cursory review for trivial issue.s

Bjorn
Bjorn Helgaas April 14, 2023, 3:45 p.m. UTC | #7
On Thu, Apr 13, 2023 at 10:50:03AM +0900, Damien Le Moal wrote:
> My apologies for not replying to the series cover letter as I do not have it
> locally (due to WD on-going network issues).

Oh, I forgot to mention that in this situation where a message is lost
for some reason (or if I want to reply to a thread that I didn't
receive directly), lore is a great resource.

For example, here's the cover letter on lore:

  https://lore.kernel.org/r/20230330085357.2653599-1-damien.lemoal@opensource.wdcom

and there's a link to download the entire thread as an mbox file.
Then it's easy to uncompress it, point mutt at it, and respond.
There's also a sample "git send-email" line to respond, but I've never
actually used that.

Might not fit your workflow, but just FYI in case it's ever useful.

Bjorn
Bjorn Helgaas April 14, 2023, 4 p.m. UTC | #8
On Thu, Mar 30, 2023 at 05:53:44PM +0900, Damien Le Moal wrote:
> pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not
> handling DMA transfer completion correctly, leading to completion
> notifications to the RC side that are too early. This problem can be
> detected when the RC side is running an IOMMU with messages such as:
> 
> pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> domain=0x001c address=0xfff00000 flags=0x0000]

Indent quoted material two spaces.  This looks like the original
message was a single line.  I'd keep it that way in the commit log to
make it easier to grep rot.

> When running the pcitest.sh tests: the address used for a previous
> test transfer generates the above error while the next test transfer is
> running.
> 
> Fix this by testing the dma transfer status in
> pci_epf_test_dma_callback() and notifying the completion only when the
> transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in
> pci_epf_test_data_transfer(), be paranoid and check again the transfer
> status and always call dmaengine_terminate_sync() before returning.

s/the dma transfer/the DMA transfer/
diff mbox series

Patch

diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index d65419735d2e..dbea6eb0dee7 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -54,6 +54,9 @@  struct pci_epf_test {
 	struct delayed_work	cmd_handler;
 	struct dma_chan		*dma_chan_tx;
 	struct dma_chan		*dma_chan_rx;
+	struct dma_chan		*transfer_chan;
+	dma_cookie_t		transfer_cookie;
+	enum dma_status		transfer_status;
 	struct completion	transfer_complete;
 	bool			dma_supported;
 	bool			dma_private;
@@ -85,8 +88,14 @@  static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 };
 static void pci_epf_test_dma_callback(void *param)
 {
 	struct pci_epf_test *epf_test = param;
-
-	complete(&epf_test->transfer_complete);
+	struct dma_tx_state state;
+
+	epf_test->transfer_status =
+		dmaengine_tx_status(epf_test->transfer_chan,
+				    epf_test->transfer_cookie, &state);
+	if (epf_test->transfer_status == DMA_COMPLETE ||
+	    epf_test->transfer_status == DMA_ERROR)
+		complete(&epf_test->transfer_complete);
 }
 
 /**
@@ -120,7 +129,6 @@  static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
 	struct dma_async_tx_descriptor *tx;
 	struct dma_slave_config sconf = {};
 	struct device *dev = &epf->dev;
-	dma_cookie_t cookie;
 	int ret;
 
 	if (IS_ERR_OR_NULL(chan)) {
@@ -152,25 +160,33 @@  static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
 	}
 
 	reinit_completion(&epf_test->transfer_complete);
+	epf_test->transfer_chan = chan;
 	tx->callback = pci_epf_test_dma_callback;
 	tx->callback_param = epf_test;
-	cookie = tx->tx_submit(tx);
+	epf_test->transfer_cookie = tx->tx_submit(tx);
 
-	ret = dma_submit_error(cookie);
+	ret = dma_submit_error(epf_test->transfer_cookie);
 	if (ret) {
-		dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie);
-		return -EIO;
+		dev_err(dev, "Failed to do DMA tx_submit %d\n", ret);
+		goto terminate;
 	}
 
 	dma_async_issue_pending(chan);
 	ret = wait_for_completion_interruptible(&epf_test->transfer_complete);
 	if (ret < 0) {
-		dmaengine_terminate_sync(chan);
-		dev_err(dev, "DMA wait_for_completion_timeout\n");
-		return -ETIMEDOUT;
+		dev_err(dev, "DMA wait_for_completion interrupted\n");
+		goto terminate;
 	}
 
-	return 0;
+	if (epf_test->transfer_status == DMA_ERROR) {
+		dev_err(dev, "DMA transfer failed\n");
+		ret = -EIO;
+	}
+
+terminate:
+	dmaengine_terminate_sync(chan);
+
+	return ret;
 }
 
 struct epf_dma_filter {