diff mbox

PCI: hv: Do not wait forever on a device that has disappeared

Message ID KL1P15301MB0006DF4209AEE3809ABD303FBF6B0@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Dexuan Cui May 23, 2018, 9:12 p.m. UTC
Before the guest finishes the device initialization, the device can be
removed anytime by the host, and after that the host won't respond to
the guest's request, so the guest should be prepared to handle this
case.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)

Comments

Lorenzo Pieralisi May 24, 2018, 12:41 p.m. UTC | #1
On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;

This is pretty racy, isn't it ? Also, I reckon you should consider the
timeout return value as an error condition unless I am completely
missing the point of what you are doing.

Lorenzo

> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
>
Dexuan Cui May 24, 2018, 11:55 p.m. UTC | #2
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Thursday, May 24, 2018 05:41
> On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> >
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> *hv_pcidev,
> >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >
> > +/*
> > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > + * so let's use polling here, since this is not a hot path.
> > + */
> > +static int wait_for_response(struct hv_device *hdev,
> > +			     struct completion *comp)
> > +{
> > +	while (true) {
> > +		if (hdev->channel->rescind) {
> > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > +			return -ENODEV;
> > +		}
> > +
> > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > +			break;
> > +	}
> > +
> > +	return 0;
> 
> This is pretty racy, isn't it ? Also, I reckon you should consider the
> timeout return value as an error condition unless I am completely
> missing the point of what you are doing.
> 
> Lorenzo

Actually, this is not racy: we only exit the loop when 
1) the channel is rescinded 
or
2) the channel is not rescinded, and the event is completed.

wait_for_completion_timeout() returns 0 if timed out: in this case,
we keep spinning in the loop every 0.1 second, testing the 2 conditions.

If the chanel is not rescinded, here we should wait for the event 
forever, as the host is supposed to respond to us quickly, and the
event will be completed accordingly. This is what the current code
does. But, in case the channel is rescinded, we need to exit the loop 
immediately with an error return value: this is the only change
made by the patch.

Ideally, we should not use this ugly "polling" method, and the
rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
wait_for_response(), but as I mentioned, there is no good way
to get notified from vmbus_onoffer_rescind(), so I'm proposing
this "polling" method: it's simple and it can work correctly,
and this is not a hot path.

Thanks,
-- Dexuan
Lorenzo Pieralisi May 25, 2018, 10:29 a.m. UTC | #3
On Thu, May 24, 2018 at 11:55:35PM +0000, Dexuan Cui wrote:
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Thursday, May 24, 2018 05:41
> > On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> > >
> > > Before the guest finishes the device initialization, the device can be
> > > removed anytime by the host, and after that the host won't respond to
> > > the guest's request, so the guest should be prepared to handle this
> > > case.
> > >
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> > *hv_pcidev,
> > >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >
> > > +/*
> > > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > > + * so let's use polling here, since this is not a hot path.
> > > + */
> > > +static int wait_for_response(struct hv_device *hdev,
> > > +			     struct completion *comp)
> > > +{
> > > +	while (true) {
> > > +		if (hdev->channel->rescind) {
> > > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > > +			return -ENODEV;
> > > +		}
> > > +
> > > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > 
> > This is pretty racy, isn't it ? Also, I reckon you should consider the
> > timeout return value as an error condition unless I am completely
> > missing the point of what you are doing.
> > 
> > Lorenzo
> 
> Actually, this is not racy: we only exit the loop when 
> 1) the channel is rescinded 
> or
> 2) the channel is not rescinded, and the event is completed.
> 
> wait_for_completion_timeout() returns 0 if timed out: in this case,
> we keep spinning in the loop every 0.1 second, testing the 2 conditions.

Yes sorry, you are right, the exit condition is correct, I am waiting for
maintainers ACK to merge it, I need it as soon as possible if you want
this to make it for v4.18.

Thanks,
Lorenzo

> If the chanel is not rescinded, here we should wait for the event 
> forever, as the host is supposed to respond to us quickly, and the
> event will be completed accordingly. This is what the current code
> does. But, in case the channel is rescinded, we need to exit the loop 
> immediately with an error return value: this is the only change
> made by the patch.
> 
> Ideally, we should not use this ugly "polling" method, and the
> rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
> wait_for_response(), but as I mentioned, there is no good way
> to get notified from vmbus_onoffer_rescind(), so I'm proposing
> this "polling" method: it's simple and it can work correctly,
> and this is not a hot path.
> 
> Thanks,
> -- Dexuan
Haiyang Zhang May 25, 2018, 11:43 a.m. UTC | #4
> -----Original Message-----
> From: Dexuan Cui
> Sent: Wednesday, May 23, 2018 5:12 PM
> To: 'Lorenzo Pieralisi' <lorenzo.pieralisi@arm.com>; 'Bjorn Helgaas'
> <bhelgaas@google.com>; 'linux-pci@vger.kernel.org' <linux-
> pci@vger.kernel.org>; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; 'olaf@aepfle.de' <olaf@aepfle.de>;
> 'apw@canonical.com' <apw@canonical.com>; 'jasowang@redhat.com'
> <jasowang@redhat.com>
> Cc: 'linux-kernel@vger.kernel.org' <linux-kernel@vger.kernel.org>; 'driverdev-
> devel@linuxdriverproject.org' <driverdev-devel@linuxdriverproject.org>;
> Haiyang Zhang <haiyangz@microsoft.com>; 'vkuznets@redhat.com'
> <vkuznets@redhat.com>; 'marcelo.cerri@canonical.com'
> <marcelo.cerri@canonical.com>
> Subject: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
> 
> 
> Before the guest finishes the device initialization, the device can be removed
> anytime by the host, and after that the host won't respond to the guest's
> request, so the guest should be prepared to handle this case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-------
> ----

Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>

Thank you!
Lorenzo Pieralisi May 25, 2018, 1:56 p.m. UTC | #5
On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)

Applied with a minor tweak to the commit log to pci/hv for v4.18.

Thanks,
Lorenzo

> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
>
Michael Kelley (EOSG) May 29, 2018, 12:19 a.m. UTC | #6
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 

While this patch solves the immediate problem of getting hung waiting
for a response from Hyper-V that will never come, there's another scenario
to look at that I think introduces a race.  Suppose the guest VM issues a
vmbus_sendpacket() request in one of the cases covered by this patch,
and suppose that Hyper-V queues a response to the request, and then
immediately follows with a rescind request.   Processing the response will
get queued to a tasklet associated with the channel, while processing the
rescind will get queued to a tasklet associated with the top-level vmbus
connection.   From what I can see, the code doesn't impose any ordering
on processing the two.  If the rescind is processed first, the new
wait_for_response() function may wake up, notice the rescind flag, and
return an error.  Its caller will return an error, and in doing so pop the
completion packet off the stack.   When the response is processed later,
it will try to signal completion via a completion packet that no longer
exists, and memory corruption will likely occur.

Am I missing anything that would prevent this scenario from happening?
It is admittedly low probability, and a solution seems non-trivial.  I haven't
looked specifically, but a similar scenario is probably possible with the
drivers for other VMbus devices.  We should work on a generic solution.

Michael
Andy Shevchenko May 29, 2018, 9:20 p.m. UTC | #7
On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com> wrote:
>
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.

> +       while (true) {
> +               if (hdev->channel->rescind) {
> +                       dev_warn_once(&hdev->device, "The device is gone.\n");
> +                       return -ENODEV;
> +               }
> +
> +               if (wait_for_completion_timeout(comp, HZ / 10))
> +                       break;
> +       }

Infinite loops are usually a red flags.

What's wrong with simple:

do {
  ...
} while (wait_...(...) == 0);

?

> +       if (!ret)
> +               ret = wait_for_response(hdev, &comp);

Better to use well established patterns, i.e.

if (ret)
 return ret;

> +               if (!ret)
> +                       ret = wait_for_response(hdev, &comp_pkt.host_event);

Here it looks okay on the first glance, but better to think about it
again and refactor.
Dexuan Cui May 29, 2018, 9:28 p.m. UTC | #8
> From: Andy Shevchenko <andy.shevchenko@gmail.com>

> Sent: Tuesday, May 29, 2018 14:21

> On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com>

> wrote:

> >

> > Before the guest finishes the device initialization, the device can be

> > removed anytime by the host, and after that the host won't respond to

> > the guest's request, so the guest should be prepared to handle this

> > case.

> 

> > +       while (true) {

> > +               if (hdev->channel->rescind) {

> > +                       dev_warn_once(&hdev->device, "The device is

> gone.\n");

> > +                       return -ENODEV;

> > +               }

> > +

> > +               if (wait_for_completion_timeout(comp, HZ / 10))

> > +                       break;

> > +       }

> 

> Infinite loops are usually a red flags.

> 

> What's wrong with simple:

> 

> do {

>   ...

> } while (wait_...(...) == 0);

> 

> ?

Thanks for the suggestion, Andy!
The coding style you suggested looks better to me. :-)

> > +       if (!ret)

> > +               ret = wait_for_response(hdev, &comp);

> 

> Better to use well established patterns, i.e.

> 

> if (ret)

>  return ret;

Agreed.

> 

> > +               if (!ret)

> > +                       ret = wait_for_response(hdev,

> &comp_pkt.host_event);

> 

> Here it looks okay on the first glance, but better to think about it

> again and refactor.


> With Best Regards,

> Andy Shevchenko


I'll try to send out a patch to improve the coding style, after I address
Michael Kelley's concern of a race.

Thanks,
-- Dexuan
diff mbox

Patch

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ad6a64d..248765f 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -556,6 +556,26 @@  static void put_pcichild(struct hv_pci_dev *hv_pcidev,
 static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
 static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
 
+/*
+ * There is no good way to get notified from vmbus_onoffer_rescind(),
+ * so let's use polling here, since this is not a hot path.
+ */
+static int wait_for_response(struct hv_device *hdev,
+			     struct completion *comp)
+{
+	while (true) {
+		if (hdev->channel->rescind) {
+			dev_warn_once(&hdev->device, "The device is gone.\n");
+			return -ENODEV;
+		}
+
+		if (wait_for_completion_timeout(comp, HZ / 10))
+			break;
+	}
+
+	return 0;
+}
+
 /**
  * devfn_to_wslot() - Convert from Linux PCI slot to Windows
  * @devfn:	The Linux representation of PCI slot
@@ -1569,7 +1589,8 @@  static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
 	if (ret)
 		goto error;
 
-	wait_for_completion(&comp_pkt.host_event);
+	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
+		goto error;
 
 	hpdev->desc = *desc;
 	refcount_set(&hpdev->refs, 1);
@@ -2070,15 +2091,16 @@  static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 				sizeof(struct pci_version_request),
 				(unsigned long)pkt, VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
+
 		if (ret) {
 			dev_err(&hdev->device,
-				"PCI Pass-through VSP failed sending version reqquest: %#x",
+				"PCI Pass-through VSP failed to request version: %d",
 				ret);
 			goto exit;
 		}
 
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status >= 0) {
 			pci_protocol_version = pci_protocol_versions[i];
 			dev_info(&hdev->device,
@@ -2287,11 +2309,12 @@  static int hv_pci_enter_d0(struct hv_device *hdev)
 	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
 			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (!ret)
+		ret = wait_for_response(hdev, &comp_pkt.host_event);
+
 	if (ret)
 		goto exit;
 
-	wait_for_completion(&comp_pkt.host_event);
-
 	if (comp_pkt.completion_status < 0) {
 		dev_err(&hdev->device,
 			"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -2331,11 +2354,10 @@  static int hv_pci_query_relations(struct hv_device *hdev)
 
 	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
 			       0, VM_PKT_DATA_INBAND, 0);
-	if (ret)
-		return ret;
+	if (!ret)
+		ret = wait_for_response(hdev, &comp);
 
-	wait_for_completion(&comp);
-	return 0;
+	return ret;
 }
 
 /**
@@ -2405,11 +2427,11 @@  static int hv_send_resources_allocated(struct hv_device *hdev)
 				size_res, (unsigned long)pkt,
 				VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
 		if (ret)
 			break;
 
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status < 0) {
 			ret = -EPROTO;
 			dev_err(&hdev->device,