diff mbox series

genirq/msi: Make sure early activation of all PCI MSIs

Message ID 20210121110247.20320-1-shameerali.kolothum.thodi@huawei.com (mailing list archive)
State New, archived
Headers show
Series genirq/msi: Make sure early activation of all PCI MSIs | expand

Commit Message

Shameerali Kolothum Thodi Jan. 21, 2021, 11:02 a.m. UTC
We currently do early activation of MSI irqs for PCI/MSI based on
the MSI_FLAG_ACTIVATE_EARLY flag. Though this activates all the
allocated MSIs in the case of MSI-X, it only does so for the
base irq in the case of MSI. This is because, for MSI, there
is only one msi_desc entry for all the 32 irqs it can support
and the current implementation iterates over the msi entries and
ends up activating the base irq only.

The above creates an issue on platforms where the msi controller
supports direct injection of vLPIs(eg: ARM GICv4 ITS). On these
platforms, upon irq activation, ITS driver maps the event to an
ITT entry. And for Guest pass-through to work, early mapping of
all the dev MSI vectors is required. Otherwise, the vfio irq
bypass manager registration will fail. eg, On a HiSilicon D06
platform with GICv4 enabled, Guest boot with zip dev pass-through
reports,

"vfio-pci 0000:75:00.1: irq bypass producer (token 0000000006e5176a)
registration fails: 66311"

and Guest boot fails.

This is traced to,
   kvm_arch_irq_bypass_add_producer
     kvm_vgic_v4_set_forwarding
       vgic_its_resolve_lpi --> returns E_ITS_INT_UNMAPPED_INTERRUPT

Hence make sure we activate all the irqs for both MSI and MSI-x cases.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
It is unclear to me whether not performing the early activation of all
MSI irqs was deliberate and has consequences on any other platforms.
Please let me know.

Thanks,
Shameer 
---
 kernel/irq/msi.c | 114 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 90 insertions(+), 24 deletions(-)

Comments

Marc Zyngier Jan. 21, 2021, 9:25 p.m. UTC | #1
Hi Shameer,

On Thu, 21 Jan 2021 11:02:47 +0000,
Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
> 
> We currently do early activation of MSI irqs for PCI/MSI based on
> the MSI_FLAG_ACTIVATE_EARLY flag. Though this activates all the
> allocated MSIs in the case of MSI-X, it only does so for the
> base irq in the case of MSI. This is because, for MSI, there
> is only one msi_desc entry for all the 32 irqs it can support
> and the current implementation iterates over the msi entries and
> ends up activating the base irq only.
> 
> The above creates an issue on platforms where the msi controller
> supports direct injection of vLPIs(eg: ARM GICv4 ITS). On these
> platforms, upon irq activation, ITS driver maps the event to an
> ITT entry. And for Guest pass-through to work, early mapping of
> all the dev MSI vectors is required. Otherwise, the vfio irq
> bypass manager registration will fail. eg, On a HiSilicon D06
> platform with GICv4 enabled, Guest boot with zip dev pass-through
> reports,
> 
> "vfio-pci 0000:75:00.1: irq bypass producer (token 0000000006e5176a)
> registration fails: 66311"
> 
> and Guest boot fails.
> 
> This is traced to,
>    kvm_arch_irq_bypass_add_producer
>      kvm_vgic_v4_set_forwarding
>        vgic_its_resolve_lpi --> returns E_ITS_INT_UNMAPPED_INTERRUPT
> 
> Hence make sure we activate all the irqs for both MSI and MSI-x cases.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> It is unclear to me whether not performing the early activation of all
> MSI irqs was deliberate and has consequences on any other platforms.
> Please let me know.

Probably just an oversight.

> 
> Thanks,
> Shameer 
> ---
>  kernel/irq/msi.c | 114 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 90 insertions(+), 24 deletions(-)
> 
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index 2c0c4d6d0f83..eec187fc32a9 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -395,6 +395,78 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
>  	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
>  }
>  
> +static void msi_domain_deactivate_irq(struct irq_domain *domain, int irq)
> +{
> +	struct irq_data *irqd;
> +
> +	irqd = irq_domain_get_irq_data(domain, irq);
> +	if (irqd_is_activated(irqd))
> +		irq_domain_deactivate_irq(irqd);
> +}
> +
> +static int msi_domain_activate_irq(struct irq_domain *domain,
> +				   int irq, bool can_reserve)
> +{
> +	struct irq_data *irqd;
> +
> +	irqd = irq_domain_get_irq_data(domain, irq);
> +	if (!can_reserve) {
> +		irqd_clr_can_reserve(irqd);
> +		if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> +			irqd_set_msi_nomask_quirk(irqd);
> +	}
> +	return irq_domain_activate_irq(irqd, can_reserve);
> +}
> +
> +static int msi_domain_activate_msix_irqs(struct irq_domain *domain,
> +					 struct device *dev, bool can_reserve)
> +{
> +	struct msi_desc *desc;
> +	int ret, irq;
> +
> +	for_each_msi_entry(desc, dev) {
> +		irq = desc->irq;
> +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> +		if (ret)
> +			goto out;
> +	}
> +	return 0;
> +
> +out:
> +	for_each_msi_entry(desc, dev) {
> +		if (irq == desc->irq)
> +			break;
> +		msi_domain_deactivate_irq(domain, desc->irq);
> +	}
> +	return ret;
> +}
> +
> +static int msi_domain_activate_msi_irqs(struct irq_domain *domain,
> +					struct device *dev, bool can_reserve)
> +{
> +	struct msi_desc *desc;
> +	int i, ret, base, irq;
> +
> +	desc = first_msi_entry(dev);
> +	base = desc->irq;
> +
> +	for (i = 0; i < desc->nvec_used; i++) {
> +		irq = base + i;
> +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> +		if (ret)
> +			goto out;
> +	}
> +	return 0;
> +
> +out:
> +	for (i = 0; i < desc->nvec_used; i++) {
> +		if (irq == base + i)
> +			break;
> +		msi_domain_deactivate_irq(domain, base + i);
> +	}
> +	return ret;
> +}
> +
>  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  			    int nvec)
>  {
> @@ -443,21 +515,25 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  		else
>  			dev_dbg(dev, "irq [%d-%d] for MSI\n",
>  				virq, virq + desc->nvec_used - 1);
> -		/*
> -		 * This flag is set by the PCI layer as we need to activate
> -		 * the MSI entries before the PCI layer enables MSI in the
> -		 * card. Otherwise the card latches a random msi message.
> -		 */
> -		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> -			continue;
> +	}
>  
> -		irq_data = irq_domain_get_irq_data(domain, desc->irq);
> -		if (!can_reserve) {
> -			irqd_clr_can_reserve(irq_data);
> -			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> -				irqd_set_msi_nomask_quirk(irq_data);
> -		}
> -		ret = irq_domain_activate_irq(irq_data, can_reserve);
> +	/*
> +	 * This flag is set by the PCI layer as we need to activate
> +	 * the MSI entries before the PCI layer enables MSI in the
> +	 * card. Otherwise the card latches a random msi message.
> +	 * Early activation is also required when the msi controller
> +	 * supports direct injection of virtual LPIs(eg. ARM GICv4 ITS).
> +	 * Otherwise, the DevID/EventID -> LPI translation for pass-through
> +	 * devices will fail. Make sure we do activate all the allocated
> +	 * irqs for MSI and MSI-X cases.
> +	 */
> +	if ((info->flags & MSI_FLAG_ACTIVATE_EARLY)) {
> +		desc = first_msi_entry(dev);
> +
> +		if (desc->msi_attrib.is_msix)
> +			ret = msi_domain_activate_msix_irqs(domain, dev, can_reserve);
> +		else
> +			ret = msi_domain_activate_msi_irqs(domain, dev, can_reserve);
>  		if (ret)
>  			goto cleanup;
>  	}
> @@ -475,16 +551,6 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  	return 0;
>  
>  cleanup:
> -	for_each_msi_entry(desc, dev) {
> -		struct irq_data *irqd;
> -
> -		if (desc->irq == virq)
> -			break;
> -
> -		irqd = irq_domain_get_irq_data(domain, desc->irq);
> -		if (irqd_is_activated(irqd))
> -			irq_domain_deactivate_irq(irqd);
> -	}
>  	msi_domain_free_irqs(domain, dev);
>  	return ret;
>  }
> -- 
> 2.17.1
> 
> 

I find this pretty complicated, and the I'd like to avoid injecting
the PCI MSI-vs-MSI-X concept in something that is supposed to be
bus-agnostic.

What's wrong with the following (untested) patch, which looks much
simpler?

Thanks,

	M.

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..a930d03c2c67 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -451,15 +451,17 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
 			continue;
 
-		irq_data = irq_domain_get_irq_data(domain, desc->irq);
-		if (!can_reserve) {
-			irqd_clr_can_reserve(irq_data);
-			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
-				irqd_set_msi_nomask_quirk(irq_data);
+		for (i = 0; i < desc->nvec_used; i++) {
+			irq_data = irq_domain_get_irq_data(domain, desc->irq + i);
+			if (!can_reserve) {
+				irqd_clr_can_reserve(irq_data);
+				if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+					irqd_set_msi_nomask_quirk(irq_data);
+			}
+			ret = irq_domain_activate_irq(irq_data, can_reserve);
+			if (ret)
+				goto cleanup;
 		}
-		ret = irq_domain_activate_irq(irq_data, can_reserve);
-		if (ret)
-			goto cleanup;
 	}
 
 	/*
@@ -478,12 +480,14 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	for_each_msi_entry(desc, dev) {
 		struct irq_data *irqd;
 
+		for (i = 0; i < desc->nvec_used; i++) {
+			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
+			if (irqd_is_activated(irqd))
+				irq_domain_deactivate_irq(irqd);
+		}
+
 		if (desc->irq == virq)
 			break;
-
-		irqd = irq_domain_get_irq_data(domain, desc->irq);
-		if (irqd_is_activated(irqd))
-			irq_domain_deactivate_irq(irqd);
 	}
 	msi_domain_free_irqs(domain, dev);
 	return ret;
Shameerali Kolothum Thodi Jan. 22, 2021, 9:21 a.m. UTC | #2
Hi Marc,

> -----Original Message-----
> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: 21 January 2021 21:25
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: linux-kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> eric.auger@redhat.com; tglx@linutronix.de; linuxarm@openeuler.org;
> Zengtao (B) <prime.zeng@hisilicon.com>; Wangzhou (B)
> <wangzhou1@hisilicon.com>
> Subject: Re: [PATCH] genirq/msi: Make sure early activation of all PCI MSIs
> 
> Hi Shameer,
> 
> On Thu, 21 Jan 2021 11:02:47 +0000,
> Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
> >
> > We currently do early activation of MSI irqs for PCI/MSI based on the
> > MSI_FLAG_ACTIVATE_EARLY flag. Though this activates all the allocated
> > MSIs in the case of MSI-X, it only does so for the base irq in the
> > case of MSI. This is because, for MSI, there is only one msi_desc
> > entry for all the 32 irqs it can support and the current
> > implementation iterates over the msi entries and ends up activating
> > the base irq only.
> >
> > The above creates an issue on platforms where the msi controller
> > supports direct injection of vLPIs(eg: ARM GICv4 ITS). On these
> > platforms, upon irq activation, ITS driver maps the event to an ITT
> > entry. And for Guest pass-through to work, early mapping of all the
> > dev MSI vectors is required. Otherwise, the vfio irq bypass manager
> > registration will fail. eg, On a HiSilicon D06 platform with GICv4
> > enabled, Guest boot with zip dev pass-through reports,
> >
> > "vfio-pci 0000:75:00.1: irq bypass producer (token 0000000006e5176a)
> > registration fails: 66311"
> >
> > and Guest boot fails.
> >
> > This is traced to,
> >    kvm_arch_irq_bypass_add_producer
> >      kvm_vgic_v4_set_forwarding
> >        vgic_its_resolve_lpi --> returns
> E_ITS_INT_UNMAPPED_INTERRUPT
> >
> > Hence make sure we activate all the irqs for both MSI and MSI-x cases.
> >
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > ---
> > It is unclear to me whether not performing the early activation of all
> > MSI irqs was deliberate and has consequences on any other platforms.
> > Please let me know.
> 
> Probably just an oversight.

Ok. That’s a good news :)

> >
> > Thanks,
> > Shameer
> > ---
> >  kernel/irq/msi.c | 114
> > +++++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 90 insertions(+), 24 deletions(-)
> >
> > diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index
> > 2c0c4d6d0f83..eec187fc32a9 100644
> > --- a/kernel/irq/msi.c
> > +++ b/kernel/irq/msi.c
> > @@ -395,6 +395,78 @@ static bool msi_check_reservation_mode(struct
> irq_domain *domain,
> >  	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;  }
> >
> > +static void msi_domain_deactivate_irq(struct irq_domain *domain, int
> > +irq) {
> > +	struct irq_data *irqd;
> > +
> > +	irqd = irq_domain_get_irq_data(domain, irq);
> > +	if (irqd_is_activated(irqd))
> > +		irq_domain_deactivate_irq(irqd);
> > +}
> > +
> > +static int msi_domain_activate_irq(struct irq_domain *domain,
> > +				   int irq, bool can_reserve)
> > +{
> > +	struct irq_data *irqd;
> > +
> > +	irqd = irq_domain_get_irq_data(domain, irq);
> > +	if (!can_reserve) {
> > +		irqd_clr_can_reserve(irqd);
> > +		if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> > +			irqd_set_msi_nomask_quirk(irqd);
> > +	}
> > +	return irq_domain_activate_irq(irqd, can_reserve); }
> > +
> > +static int msi_domain_activate_msix_irqs(struct irq_domain *domain,
> > +					 struct device *dev, bool can_reserve) {
> > +	struct msi_desc *desc;
> > +	int ret, irq;
> > +
> > +	for_each_msi_entry(desc, dev) {
> > +		irq = desc->irq;
> > +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> > +		if (ret)
> > +			goto out;
> > +	}
> > +	return 0;
> > +
> > +out:
> > +	for_each_msi_entry(desc, dev) {
> > +		if (irq == desc->irq)
> > +			break;
> > +		msi_domain_deactivate_irq(domain, desc->irq);
> > +	}
> > +	return ret;
> > +}
> > +
> > +static int msi_domain_activate_msi_irqs(struct irq_domain *domain,
> > +					struct device *dev, bool can_reserve) {
> > +	struct msi_desc *desc;
> > +	int i, ret, base, irq;
> > +
> > +	desc = first_msi_entry(dev);
> > +	base = desc->irq;
> > +
> > +	for (i = 0; i < desc->nvec_used; i++) {
> > +		irq = base + i;
> > +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> > +		if (ret)
> > +			goto out;
> > +	}
> > +	return 0;
> > +
> > +out:
> > +	for (i = 0; i < desc->nvec_used; i++) {
> > +		if (irq == base + i)
> > +			break;
> > +		msi_domain_deactivate_irq(domain, base + i);
> > +	}
> > +	return ret;
> > +}
> > +
> >  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device
> *dev,
> >  			    int nvec)
> >  {
> > @@ -443,21 +515,25 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
> >  		else
> >  			dev_dbg(dev, "irq [%d-%d] for MSI\n",
> >  				virq, virq + desc->nvec_used - 1);
> > -		/*
> > -		 * This flag is set by the PCI layer as we need to activate
> > -		 * the MSI entries before the PCI layer enables MSI in the
> > -		 * card. Otherwise the card latches a random msi message.
> > -		 */
> > -		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> > -			continue;
> > +	}
> >
> > -		irq_data = irq_domain_get_irq_data(domain, desc->irq);
> > -		if (!can_reserve) {
> > -			irqd_clr_can_reserve(irq_data);
> > -			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> > -				irqd_set_msi_nomask_quirk(irq_data);
> > -		}
> > -		ret = irq_domain_activate_irq(irq_data, can_reserve);
> > +	/*
> > +	 * This flag is set by the PCI layer as we need to activate
> > +	 * the MSI entries before the PCI layer enables MSI in the
> > +	 * card. Otherwise the card latches a random msi message.
> > +	 * Early activation is also required when the msi controller
> > +	 * supports direct injection of virtual LPIs(eg. ARM GICv4 ITS).
> > +	 * Otherwise, the DevID/EventID -> LPI translation for pass-through
> > +	 * devices will fail. Make sure we do activate all the allocated
> > +	 * irqs for MSI and MSI-X cases.
> > +	 */
> > +	if ((info->flags & MSI_FLAG_ACTIVATE_EARLY)) {
> > +		desc = first_msi_entry(dev);
> > +
> > +		if (desc->msi_attrib.is_msix)
> > +			ret = msi_domain_activate_msix_irqs(domain, dev,
> can_reserve);
> > +		else
> > +			ret = msi_domain_activate_msi_irqs(domain, dev, can_reserve);
> >  		if (ret)
> >  			goto cleanup;
> >  	}
> > @@ -475,16 +551,6 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
> >  	return 0;
> >
> >  cleanup:
> > -	for_each_msi_entry(desc, dev) {
> > -		struct irq_data *irqd;
> > -
> > -		if (desc->irq == virq)
> > -			break;
> > -
> > -		irqd = irq_domain_get_irq_data(domain, desc->irq);
> > -		if (irqd_is_activated(irqd))
> > -			irq_domain_deactivate_irq(irqd);
> > -	}
> >  	msi_domain_free_irqs(domain, dev);
> >  	return ret;
> >  }
> > --
> > 2.17.1
> >
> >
> 
> I find this pretty complicated, and the I'd like to avoid injecting the PCI
> MSI-vs-MSI-X concept in something that is supposed to be bus-agnostic.

Agree. That’s mainly because I was very skeptical(TBH, very limited in my knowledge
on this msi core code) about changing the MSI-X activation logic here and just thought
of limiting the impact to MSI case as a first attempt.

> What's wrong with the following (untested) patch, which looks much simpler?

Yes, had tried this as one of the early fix, but as said above was not very sure of
the impact on other platforms. Tested this again and it works.

Please send it.

Thanks,
Shameer

> Thanks,
> 
> 	M.
> 
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index
> 2c0c4d6d0f83..a930d03c2c67 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -451,15 +451,17 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
>  		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
>  			continue;
> 
> -		irq_data = irq_domain_get_irq_data(domain, desc->irq);
> -		if (!can_reserve) {
> -			irqd_clr_can_reserve(irq_data);
> -			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> -				irqd_set_msi_nomask_quirk(irq_data);
> +		for (i = 0; i < desc->nvec_used; i++) {
> +			irq_data = irq_domain_get_irq_data(domain, desc->irq + i);
> +			if (!can_reserve) {
> +				irqd_clr_can_reserve(irq_data);
> +				if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> +					irqd_set_msi_nomask_quirk(irq_data);
> +			}
> +			ret = irq_domain_activate_irq(irq_data, can_reserve);
> +			if (ret)
> +				goto cleanup;
>  		}
> -		ret = irq_domain_activate_irq(irq_data, can_reserve);
> -		if (ret)
> -			goto cleanup;
>  	}
> 
>  	/*
> @@ -478,12 +480,14 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
>  	for_each_msi_entry(desc, dev) {
>  		struct irq_data *irqd;
> 
> +		for (i = 0; i < desc->nvec_used; i++) {
> +			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
> +			if (irqd_is_activated(irqd))
> +				irq_domain_deactivate_irq(irqd);
> +		}
> +
>  		if (desc->irq == virq)
>  			break;
> -
> -		irqd = irq_domain_get_irq_data(domain, desc->irq);
> -		if (irqd_is_activated(irqd))
> -			irq_domain_deactivate_irq(irqd);
>  	}
>  	msi_domain_free_irqs(domain, dev);
>  	return ret;
> 
> --
> Without deviation from the norm, progress is not possible.
Marc Zyngier Jan. 23, 2021, 12:32 p.m. UTC | #3
Hi Shameer,

On 2021-01-22 09:21, Shameerali Kolothum Thodi wrote:
> Hi Marc,
> 

[...]

>> I find this pretty complicated, and the I'd like to avoid injecting 
>> the PCI
>> MSI-vs-MSI-X concept in something that is supposed to be bus-agnostic.
> 
> Agree. That’s mainly because I was very skeptical(TBH, very limited in
> my knowledge
> on this msi core code) about changing the MSI-X activation logic here
> and just thought
> of limiting the impact to MSI case as a first attempt.
> 
>> What's wrong with the following (untested) patch, which looks much 
>> simpler?
> 
> Yes, had tried this as one of the early fix, but as said above was not
> very sure of
> the impact on other platforms. Tested this again and it works.
> 
> Please send it.

Actually, there is a better way, which is to have a proper iterator on
vectors instead of descriptors. With that and a bit of cleanup, the
patch[1] looks pretty neat.

Please give it a go.

Thanks,

         M.

[1] https://lore.kernel.org/r/20210123122759.1781359-1-maz@kernel.org
diff mbox series

Patch

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..eec187fc32a9 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -395,6 +395,78 @@  static bool msi_check_reservation_mode(struct irq_domain *domain,
 	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
 }
 
+static void msi_domain_deactivate_irq(struct irq_domain *domain, int irq)
+{
+	struct irq_data *irqd;
+
+	irqd = irq_domain_get_irq_data(domain, irq);
+	if (irqd_is_activated(irqd))
+		irq_domain_deactivate_irq(irqd);
+}
+
+static int msi_domain_activate_irq(struct irq_domain *domain,
+				   int irq, bool can_reserve)
+{
+	struct irq_data *irqd;
+
+	irqd = irq_domain_get_irq_data(domain, irq);
+	if (!can_reserve) {
+		irqd_clr_can_reserve(irqd);
+		if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+			irqd_set_msi_nomask_quirk(irqd);
+	}
+	return irq_domain_activate_irq(irqd, can_reserve);
+}
+
+static int msi_domain_activate_msix_irqs(struct irq_domain *domain,
+					 struct device *dev, bool can_reserve)
+{
+	struct msi_desc *desc;
+	int ret, irq;
+
+	for_each_msi_entry(desc, dev) {
+		irq = desc->irq;
+		ret = msi_domain_activate_irq(domain, irq, can_reserve);
+		if (ret)
+			goto out;
+	}
+	return 0;
+
+out:
+	for_each_msi_entry(desc, dev) {
+		if (irq == desc->irq)
+			break;
+		msi_domain_deactivate_irq(domain, desc->irq);
+	}
+	return ret;
+}
+
+static int msi_domain_activate_msi_irqs(struct irq_domain *domain,
+					struct device *dev, bool can_reserve)
+{
+	struct msi_desc *desc;
+	int i, ret, base, irq;
+
+	desc = first_msi_entry(dev);
+	base = desc->irq;
+
+	for (i = 0; i < desc->nvec_used; i++) {
+		irq = base + i;
+		ret = msi_domain_activate_irq(domain, irq, can_reserve);
+		if (ret)
+			goto out;
+	}
+	return 0;
+
+out:
+	for (i = 0; i < desc->nvec_used; i++) {
+		if (irq == base + i)
+			break;
+		msi_domain_deactivate_irq(domain, base + i);
+	}
+	return ret;
+}
+
 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 			    int nvec)
 {
@@ -443,21 +515,25 @@  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		else
 			dev_dbg(dev, "irq [%d-%d] for MSI\n",
 				virq, virq + desc->nvec_used - 1);
-		/*
-		 * This flag is set by the PCI layer as we need to activate
-		 * the MSI entries before the PCI layer enables MSI in the
-		 * card. Otherwise the card latches a random msi message.
-		 */
-		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
-			continue;
+	}
 
-		irq_data = irq_domain_get_irq_data(domain, desc->irq);
-		if (!can_reserve) {
-			irqd_clr_can_reserve(irq_data);
-			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
-				irqd_set_msi_nomask_quirk(irq_data);
-		}
-		ret = irq_domain_activate_irq(irq_data, can_reserve);
+	/*
+	 * This flag is set by the PCI layer as we need to activate
+	 * the MSI entries before the PCI layer enables MSI in the
+	 * card. Otherwise the card latches a random msi message.
+	 * Early activation is also required when the msi controller
+	 * supports direct injection of virtual LPIs(eg. ARM GICv4 ITS).
+	 * Otherwise, the DevID/EventID -> LPI translation for pass-through
+	 * devices will fail. Make sure we do activate all the allocated
+	 * irqs for MSI and MSI-X cases.
+	 */
+	if ((info->flags & MSI_FLAG_ACTIVATE_EARLY)) {
+		desc = first_msi_entry(dev);
+
+		if (desc->msi_attrib.is_msix)
+			ret = msi_domain_activate_msix_irqs(domain, dev, can_reserve);
+		else
+			ret = msi_domain_activate_msi_irqs(domain, dev, can_reserve);
 		if (ret)
 			goto cleanup;
 	}
@@ -475,16 +551,6 @@  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	return 0;
 
 cleanup:
-	for_each_msi_entry(desc, dev) {
-		struct irq_data *irqd;
-
-		if (desc->irq == virq)
-			break;
-
-		irqd = irq_domain_get_irq_data(domain, desc->irq);
-		if (irqd_is_activated(irqd))
-			irq_domain_deactivate_irq(irqd);
-	}
 	msi_domain_free_irqs(domain, dev);
 	return ret;
 }