diff mbox series

[RFC,KERNEL,v6,3/3] xen/privcmd: Add new syscall to get gsi from irq

Message ID 20240419033616.607889-4-Jiqian.Chen@amd.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Support device passthrough when dom0 is PVH on Xen | expand

Commit Message

Chen, Jiqian April 19, 2024, 3:36 a.m. UTC
In PVH dom0, it uses the linux local interrupt mechanism,
when it allocs irq for a gsi, it is dynamic, and follow
the principle of applying first, distributing first. And
the irq number is alloced from small to large, but the
applying gsi number is not, may gsi 38 comes before gsi 28,
it causes the irq number is not equal with the gsi number.
And when passthrough a device, QEMU will use device's gsi
number to do pirq mapping, but the gsi number is got from
file /sys/bus/pci/devices/<sbdf>/irq, irq!= gsi, so it will
fail when mapping.
And in current linux codes, there is no method to translate
irq to gsi for userspace.

For above purpose, record the relationship of gsi and irq
when PVH dom0 do acpi_register_gsi_ioapic for devices and
adds a new syscall into privcmd to let userspace can get
that translation when they have a need.

Co-developed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
 arch/x86/include/asm/apic.h      |  8 +++++++
 arch/x86/include/asm/xen/pci.h   |  5 ++++
 arch/x86/kernel/acpi/boot.c      |  2 +-
 arch/x86/pci/xen.c               | 21 +++++++++++++++++
 drivers/xen/events/events_base.c | 39 ++++++++++++++++++++++++++++++++
 drivers/xen/privcmd.c            | 19 ++++++++++++++++
 include/uapi/xen/privcmd.h       |  7 ++++++
 include/xen/events.h             |  5 ++++
 8 files changed, 105 insertions(+), 1 deletion(-)

Comments

Jürgen Groß May 10, 2024, 6:46 a.m. UTC | #1
On 19.04.24 05:36, Jiqian Chen wrote:
> In PVH dom0, it uses the linux local interrupt mechanism,
> when it allocs irq for a gsi, it is dynamic, and follow
> the principle of applying first, distributing first. And
> the irq number is alloced from small to large, but the
> applying gsi number is not, may gsi 38 comes before gsi 28,
> it causes the irq number is not equal with the gsi number.
> And when passthrough a device, QEMU will use device's gsi
> number to do pirq mapping, but the gsi number is got from
> file /sys/bus/pci/devices/<sbdf>/irq, irq!= gsi, so it will
> fail when mapping.
> And in current linux codes, there is no method to translate
> irq to gsi for userspace.
> 
> For above purpose, record the relationship of gsi and irq
> when PVH dom0 do acpi_register_gsi_ioapic for devices and
> adds a new syscall into privcmd to let userspace can get
> that translation when they have a need.
> 
> Co-developed-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
>   arch/x86/include/asm/apic.h      |  8 +++++++
>   arch/x86/include/asm/xen/pci.h   |  5 ++++
>   arch/x86/kernel/acpi/boot.c      |  2 +-
>   arch/x86/pci/xen.c               | 21 +++++++++++++++++
>   drivers/xen/events/events_base.c | 39 ++++++++++++++++++++++++++++++++
>   drivers/xen/privcmd.c            | 19 ++++++++++++++++
>   include/uapi/xen/privcmd.h       |  7 ++++++
>   include/xen/events.h             |  5 ++++
>   8 files changed, 105 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> index 9d159b771dc8..dd4139250895 100644
> --- a/arch/x86/include/asm/apic.h
> +++ b/arch/x86/include/asm/apic.h
> @@ -169,6 +169,9 @@ extern bool apic_needs_pit(void);
>   
>   extern void apic_send_IPI_allbutself(unsigned int vector);
>   
> +extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
> +				    int trigger, int polarity);
> +
>   #else /* !CONFIG_X86_LOCAL_APIC */
>   static inline void lapic_shutdown(void) { }
>   #define local_apic_timer_c2_ok		1
> @@ -183,6 +186,11 @@ static inline void apic_intr_mode_init(void) { }
>   static inline void lapic_assign_system_vectors(void) { }
>   static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
>   static inline bool apic_needs_pit(void) { return true; }
> +static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
> +				    int trigger, int polarity)
> +{
> +	return (int)gsi;
> +}
>   #endif /* !CONFIG_X86_LOCAL_APIC */
>   
>   #ifdef CONFIG_X86_X2APIC
> diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
> index 9015b888edd6..aa8ded61fc2d 100644
> --- a/arch/x86/include/asm/xen/pci.h
> +++ b/arch/x86/include/asm/xen/pci.h
> @@ -5,6 +5,7 @@
>   #if defined(CONFIG_PCI_XEN)
>   extern int __init pci_xen_init(void);
>   extern int __init pci_xen_hvm_init(void);
> +extern int __init pci_xen_pvh_init(void);
>   #define pci_xen 1
>   #else
>   #define pci_xen 0
> @@ -13,6 +14,10 @@ static inline int pci_xen_hvm_init(void)
>   {
>   	return -1;
>   }
> +static inline int pci_xen_pvh_init(void)
> +{
> +	return -1;
> +}
>   #endif
>   #ifdef CONFIG_XEN_PV_DOM0
>   int __init pci_xen_initial_domain(void);
> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
> index 85a3ce2a3666..72c73458c083 100644
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -749,7 +749,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
>   }
>   
>   #ifdef CONFIG_X86_LOCAL_APIC
> -static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
> +int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>   				    int trigger, int polarity)
>   {
>   	int irq = gsi;
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> index 652cd53e77f6..f056ab5c0a06 100644
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -114,6 +114,21 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
>   				 false /* no mapping of GSI to PIRQ */);
>   }
>   
> +static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
> +				    int trigger, int polarity)
> +{
> +	int irq;
> +
> +	irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
> +	if (irq < 0)
> +		return irq;
> +
> +	if (xen_pvh_add_gsi_irq_map(gsi, irq) == -EEXIST)
> +		printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, irq);
> +
> +	return irq;
> +}
> +
>   #ifdef CONFIG_XEN_PV_DOM0
>   static int xen_register_gsi(u32 gsi, int triggering, int polarity)
>   {
> @@ -558,6 +573,12 @@ int __init pci_xen_hvm_init(void)
>   	return 0;
>   }
>   
> +int __init pci_xen_pvh_init(void)
> +{
> +	__acpi_register_gsi = acpi_register_gsi_xen_pvh;

No support for unregistering the gsi again?

> +	return 0;
> +}
> +
>   #ifdef CONFIG_XEN_PV_DOM0
>   int __init pci_xen_initial_domain(void)
>   {
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index 27553673e46b..80d4f7faac64 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -953,6 +953,43 @@ int xen_irq_from_gsi(unsigned gsi)
>   }
>   EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
>   
> +int xen_gsi_from_irq(unsigned irq)
> +{
> +	struct irq_info *info;
> +
> +	list_for_each_entry(info, &xen_irq_list_head, list) {
> +		if (info->type != IRQT_PIRQ)
> +			continue;
> +
> +		if (info->irq == irq)
> +			return info->u.pirq.gsi;
> +	}
> +
> +	return -1;
> +}
> +EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
> +
> +int xen_pvh_add_gsi_irq_map(unsigned gsi, unsigned irq)
> +{
> +	int tmp_irq;
> +	struct irq_info *info;
> +
> +	tmp_irq = xen_irq_from_gsi(gsi);
> +	if (tmp_irq != -1)
> +		return -EEXIST;
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (info == NULL)
> +		panic("Unable to allocate metadata for GSI%d\n", gsi);

Please don't kill the system here, just return -ENOMEM.

> +
> +	info->type = IRQT_PIRQ;
> +	info->irq = irq;
> +	info->u.pirq.gsi = gsi;
> +	list_add_tail(&info->list, &xen_irq_list_head);

I think you need some kind of locking to protect changing of the list against
concurrent accesses.

> +
> +	return 0;
> +}
> +
>   static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
>   {
>   	evtchn_port_t evtchn;
> @@ -2295,6 +2332,8 @@ void __init xen_init_IRQ(void)
>   	xen_init_setup_upcall_vector();
>   	xen_alloc_callback_vector();
>   
> +	if (xen_pvh_domain())
> +		pci_xen_pvh_init();
>   
>   	if (xen_hvm_domain()) {
>   		native_init_IRQ();
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 67dfa4778864..11feed529e1d 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -842,6 +842,21 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
>   	return rc;
>   }
>   
> +static long privcmd_ioctl_gsi_from_irq(struct file *file, void __user *udata)
> +{
> +	struct privcmd_gsi_from_irq kdata;
> +
> +	if (copy_from_user(&kdata, udata, sizeof(kdata)))
> +		return -EFAULT;
> +
> +	kdata.gsi = xen_gsi_from_irq(kdata.irq);
> +
> +	if (copy_to_user(udata, &kdata, sizeof(kdata)))
> +		return -EFAULT;
> +
> +	return 0;

Shouldn't you return an error if xen_gsi_from_irq() returned -1?


Juergen
Chen, Jiqian May 10, 2024, 9:06 a.m. UTC | #2
Hi,

On 2024/5/10 14:46, Jürgen Groß wrote:
> On 19.04.24 05:36, Jiqian Chen wrote:
>> In PVH dom0, it uses the linux local interrupt mechanism,
>> when it allocs irq for a gsi, it is dynamic, and follow
>> the principle of applying first, distributing first. And
>> the irq number is alloced from small to large, but the
>> applying gsi number is not, may gsi 38 comes before gsi 28,
>> it causes the irq number is not equal with the gsi number.
>> And when passthrough a device, QEMU will use device's gsi
>> number to do pirq mapping, but the gsi number is got from
>> file /sys/bus/pci/devices/<sbdf>/irq, irq!= gsi, so it will
>> fail when mapping.
>> And in current linux codes, there is no method to translate
>> irq to gsi for userspace.
>>
>> For above purpose, record the relationship of gsi and irq
>> when PVH dom0 do acpi_register_gsi_ioapic for devices and
>> adds a new syscall into privcmd to let userspace can get
>> that translation when they have a need.
>>
>> Co-developed-by: Huang Rui <ray.huang@amd.com>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>>   arch/x86/include/asm/apic.h      |  8 +++++++
>>   arch/x86/include/asm/xen/pci.h   |  5 ++++
>>   arch/x86/kernel/acpi/boot.c      |  2 +-
>>   arch/x86/pci/xen.c               | 21 +++++++++++++++++
>>   drivers/xen/events/events_base.c | 39 ++++++++++++++++++++++++++++++++
>>   drivers/xen/privcmd.c            | 19 ++++++++++++++++
>>   include/uapi/xen/privcmd.h       |  7 ++++++
>>   include/xen/events.h             |  5 ++++
>>   8 files changed, 105 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
>> index 9d159b771dc8..dd4139250895 100644
>> --- a/arch/x86/include/asm/apic.h
>> +++ b/arch/x86/include/asm/apic.h
>> @@ -169,6 +169,9 @@ extern bool apic_needs_pit(void);
>>     extern void apic_send_IPI_allbutself(unsigned int vector);
>>   +extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>> +                    int trigger, int polarity);
>> +
>>   #else /* !CONFIG_X86_LOCAL_APIC */
>>   static inline void lapic_shutdown(void) { }
>>   #define local_apic_timer_c2_ok        1
>> @@ -183,6 +186,11 @@ static inline void apic_intr_mode_init(void) { }
>>   static inline void lapic_assign_system_vectors(void) { }
>>   static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
>>   static inline bool apic_needs_pit(void) { return true; }
>> +static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>> +                    int trigger, int polarity)
>> +{
>> +    return (int)gsi;
>> +}
>>   #endif /* !CONFIG_X86_LOCAL_APIC */
>>     #ifdef CONFIG_X86_X2APIC
>> diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
>> index 9015b888edd6..aa8ded61fc2d 100644
>> --- a/arch/x86/include/asm/xen/pci.h
>> +++ b/arch/x86/include/asm/xen/pci.h
>> @@ -5,6 +5,7 @@
>>   #if defined(CONFIG_PCI_XEN)
>>   extern int __init pci_xen_init(void);
>>   extern int __init pci_xen_hvm_init(void);
>> +extern int __init pci_xen_pvh_init(void);
>>   #define pci_xen 1
>>   #else
>>   #define pci_xen 0
>> @@ -13,6 +14,10 @@ static inline int pci_xen_hvm_init(void)
>>   {
>>       return -1;
>>   }
>> +static inline int pci_xen_pvh_init(void)
>> +{
>> +    return -1;
>> +}
>>   #endif
>>   #ifdef CONFIG_XEN_PV_DOM0
>>   int __init pci_xen_initial_domain(void);
>> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
>> index 85a3ce2a3666..72c73458c083 100644
>> --- a/arch/x86/kernel/acpi/boot.c
>> +++ b/arch/x86/kernel/acpi/boot.c
>> @@ -749,7 +749,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
>>   }
>>     #ifdef CONFIG_X86_LOCAL_APIC
>> -static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>> +int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>                       int trigger, int polarity)
>>   {
>>       int irq = gsi;
>> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
>> index 652cd53e77f6..f056ab5c0a06 100644
>> --- a/arch/x86/pci/xen.c
>> +++ b/arch/x86/pci/xen.c
>> @@ -114,6 +114,21 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
>>                    false /* no mapping of GSI to PIRQ */);
>>   }
>>   +static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
>> +                    int trigger, int polarity)
>> +{
>> +    int irq;
>> +
>> +    irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
>> +    if (irq < 0)
>> +        return irq;
>> +
>> +    if (xen_pvh_add_gsi_irq_map(gsi, irq) == -EEXIST)
>> +        printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, irq);
>> +
>> +    return irq;
>> +}
>> +
>>   #ifdef CONFIG_XEN_PV_DOM0
>>   static int xen_register_gsi(u32 gsi, int triggering, int polarity)
>>   {
>> @@ -558,6 +573,12 @@ int __init pci_xen_hvm_init(void)
>>       return 0;
>>   }
>>   +int __init pci_xen_pvh_init(void)
>> +{
>> +    __acpi_register_gsi = acpi_register_gsi_xen_pvh;
> 
> No support for unregistering the gsi again?
__acpi_unregister_gsi is set in function acpi_set_irq_model_ioapic.
Maybe I need to use a new function to call acpi_unregister_gsi_ioapic and remove the mapping of irq and gsi from xen_irq_list_head ?

> 
>> +    return 0;
>> +}
>> +
>>   #ifdef CONFIG_XEN_PV_DOM0
>>   int __init pci_xen_initial_domain(void)
>>   {
>> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
>> index 27553673e46b..80d4f7faac64 100644
>> --- a/drivers/xen/events/events_base.c
>> +++ b/drivers/xen/events/events_base.c
>> @@ -953,6 +953,43 @@ int xen_irq_from_gsi(unsigned gsi)
>>   }
>>   EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
>>   +int xen_gsi_from_irq(unsigned irq)
>> +{
>> +    struct irq_info *info;
>> +
>> +    list_for_each_entry(info, &xen_irq_list_head, list) {
>> +        if (info->type != IRQT_PIRQ)
>> +            continue;
>> +
>> +        if (info->irq == irq)
>> +            return info->u.pirq.gsi;
>> +    }
>> +
>> +    return -1;
>> +}
>> +EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
>> +
>> +int xen_pvh_add_gsi_irq_map(unsigned gsi, unsigned irq)
>> +{
>> +    int tmp_irq;
>> +    struct irq_info *info;
>> +
>> +    tmp_irq = xen_irq_from_gsi(gsi);
>> +    if (tmp_irq != -1)
>> +        return -EEXIST;
>> +
>> +    info = kzalloc(sizeof(*info), GFP_KERNEL);
>> +    if (info == NULL)
>> +        panic("Unable to allocate metadata for GSI%d\n", gsi);
> 
> Please don't kill the system here, just return -ENOMEM.
Will change in next version.

> 
>> +
>> +    info->type = IRQT_PIRQ;
I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.

>> +    info->irq = irq;
>> +    info->u.pirq.gsi = gsi;
>> +    list_add_tail(&info->list, &xen_irq_list_head);
> 
> I think you need some kind of locking to protect changing of the list against
> concurrent accesses.
OK, will add a lock in next version.

> 
>> +
>> +    return 0;
>> +}
>> +
>>   static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
>>   {
>>       evtchn_port_t evtchn;
>> @@ -2295,6 +2332,8 @@ void __init xen_init_IRQ(void)
>>       xen_init_setup_upcall_vector();
>>       xen_alloc_callback_vector();
>>   +    if (xen_pvh_domain())
>> +        pci_xen_pvh_init();
>>         if (xen_hvm_domain()) {
>>           native_init_IRQ();
>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>> index 67dfa4778864..11feed529e1d 100644
>> --- a/drivers/xen/privcmd.c
>> +++ b/drivers/xen/privcmd.c
>> @@ -842,6 +842,21 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
>>       return rc;
>>   }
>>   +static long privcmd_ioctl_gsi_from_irq(struct file *file, void __user *udata)
>> +{
>> +    struct privcmd_gsi_from_irq kdata;
>> +
>> +    if (copy_from_user(&kdata, udata, sizeof(kdata)))
>> +        return -EFAULT;
>> +
>> +    kdata.gsi = xen_gsi_from_irq(kdata.irq);
>> +
>> +    if (copy_to_user(udata, &kdata, sizeof(kdata)))
>> +        return -EFAULT;
>> +
>> +    return 0;
> 
> Shouldn't you return an error if xen_gsi_from_irq() returned -1?
Oh, will change in next version.

> 
> 
> Juergen
Jürgen Groß May 10, 2024, 9:53 a.m. UTC | #3
On 10.05.24 11:06, Chen, Jiqian wrote:
> Hi,
> 
> On 2024/5/10 14:46, Jürgen Groß wrote:
>> On 19.04.24 05:36, Jiqian Chen wrote:
>>> +
>>> +    info->type = IRQT_PIRQ;
> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.

restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
enough?


Juergen
Chen, Jiqian May 10, 2024, 10:13 a.m. UTC | #4
On 2024/5/10 17:53, Jürgen Groß wrote:
> On 10.05.24 11:06, Chen, Jiqian wrote:
>> Hi,
>>
>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>> +
>>>> +    info->type = IRQT_PIRQ;
>> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.
> 
> restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
> enough?
No, it is not enough.
xen_pvh_add_gsi_irq_map adds the mapping of gsi and irq, but the value of gsi is not 0,
once restore_pirqs is called, it will do PHYSDEVOP_map_pirq for that gsi, but in pvh dom0, we shouldn't do PHYSDEVOP_map_pirq.

> 
> 
> Juergen
Jürgen Groß May 10, 2024, 10:21 a.m. UTC | #5
On 10.05.24 12:13, Chen, Jiqian wrote:
> On 2024/5/10 17:53, Jürgen Groß wrote:
>> On 10.05.24 11:06, Chen, Jiqian wrote:
>>> Hi,
>>>
>>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>>> +
>>>>> +    info->type = IRQT_PIRQ;
>>> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.
>>
>> restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
>> enough?
> No, it is not enough.
> xen_pvh_add_gsi_irq_map adds the mapping of gsi and irq, but the value of gsi is not 0,
> once restore_pirqs is called, it will do PHYSDEVOP_map_pirq for that gsi, but in pvh dom0, we shouldn't do PHYSDEVOP_map_pirq.

Okay, then add a new flag to info->u.pirq.flags for that purpose?


Juergen
Chen, Jiqian May 10, 2024, 10:32 a.m. UTC | #6
On 2024/5/10 18:21, Jürgen Groß wrote:
> On 10.05.24 12:13, Chen, Jiqian wrote:
>> On 2024/5/10 17:53, Jürgen Groß wrote:
>>> On 10.05.24 11:06, Chen, Jiqian wrote:
>>>> Hi,
>>>>
>>>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>>>> +
>>>>>> +    info->type = IRQT_PIRQ;
>>>> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.
>>>
>>> restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
>>> enough?
>> No, it is not enough.
>> xen_pvh_add_gsi_irq_map adds the mapping of gsi and irq, but the value of gsi is not 0,
>> once restore_pirqs is called, it will do PHYSDEVOP_map_pirq for that gsi, but in pvh dom0, we shouldn't do PHYSDEVOP_map_pirq.
> 
> Okay, then add a new flag to info->u.pirq.flags for that purpose?
I feel like adding "new flag to info->u.pirq.flags" is not as good as adding " new type to info->type".
Because in restore_pirqs, it considers " info->type != IRQT_PIRQ", if adding " new flag to info->u.pirq.flags", we need to add a new condition in restore_pirqs.
And actually this mapping(gsi and irq of pvh) doesn't have pirq, so it is not suitable to add to u.pirq.flags.

> 
> 
> Juergen
>
Jürgen Groß May 10, 2024, 11:27 a.m. UTC | #7
On 10.05.24 12:32, Chen, Jiqian wrote:
> On 2024/5/10 18:21, Jürgen Groß wrote:
>> On 10.05.24 12:13, Chen, Jiqian wrote:
>>> On 2024/5/10 17:53, Jürgen Groß wrote:
>>>> On 10.05.24 11:06, Chen, Jiqian wrote:
>>>>> Hi,
>>>>>
>>>>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>>>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>>>>> +
>>>>>>> +    info->type = IRQT_PIRQ;
>>>>> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.
>>>>
>>>> restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
>>>> enough?
>>> No, it is not enough.
>>> xen_pvh_add_gsi_irq_map adds the mapping of gsi and irq, but the value of gsi is not 0,
>>> once restore_pirqs is called, it will do PHYSDEVOP_map_pirq for that gsi, but in pvh dom0, we shouldn't do PHYSDEVOP_map_pirq.
>>
>> Okay, then add a new flag to info->u.pirq.flags for that purpose?
> I feel like adding "new flag to info->u.pirq.flags" is not as good as adding " new type to info->type".
> Because in restore_pirqs, it considers " info->type != IRQT_PIRQ", if adding " new flag to info->u.pirq.flags", we need to add a new condition in restore_pirqs.
> And actually this mapping(gsi and irq of pvh) doesn't have pirq, so it is not suitable to add to u.pirq.flags.

Does this mean there is no other IRQT_PIRQ related activity relevant for those
GSIs/IRQs? In that case I agree to add IRQT_GSI.


Juergen
Chen, Jiqian May 11, 2024, 2:16 a.m. UTC | #8
On 2024/5/10 19:27, Jürgen Groß wrote:
> On 10.05.24 12:32, Chen, Jiqian wrote:
>> On 2024/5/10 18:21, Jürgen Groß wrote:
>>> On 10.05.24 12:13, Chen, Jiqian wrote:
>>>> On 2024/5/10 17:53, Jürgen Groß wrote:
>>>>> On 10.05.24 11:06, Chen, Jiqian wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>>>>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>>>>>> +
>>>>>>>> +    info->type = IRQT_PIRQ;
>>>>>> I am considering whether I need to use a new type(like IRQT_GSI) here to distinguish with IRQT_PIRQ, because function restore_pirqs will process all IRQT_PIRQ.
>>>>>
>>>>> restore_pirqs() already considers gsi == 0 to be not GSI related. Isn't this
>>>>> enough?
>>>> No, it is not enough.
>>>> xen_pvh_add_gsi_irq_map adds the mapping of gsi and irq, but the value of gsi is not 0,
>>>> once restore_pirqs is called, it will do PHYSDEVOP_map_pirq for that gsi, but in pvh dom0, we shouldn't do PHYSDEVOP_map_pirq.
>>>
>>> Okay, then add a new flag to info->u.pirq.flags for that purpose?
>> I feel like adding "new flag to info->u.pirq.flags" is not as good as adding " new type to info->type".
>> Because in restore_pirqs, it considers " info->type != IRQT_PIRQ", if adding " new flag to info->u.pirq.flags", we need to add a new condition in restore_pirqs.
>> And actually this mapping(gsi and irq of pvh) doesn't have pirq, so it is not suitable to add to u.pirq.flags.
> 
> Does this mean there is no other IRQT_PIRQ related activity relevant for those GSIs/IRQs?
Yes, I think so.
> In that case I agree to add IRQT_GSI.
Thank you!
> 
> 
> Juergen
Chen, Jiqian May 13, 2024, 7:47 a.m. UTC | #9
Hi,
On 2024/5/10 17:06, Chen, Jiqian wrote:
> Hi,
> 
> On 2024/5/10 14:46, Jürgen Groß wrote:
>> On 19.04.24 05:36, Jiqian Chen wrote:
>>> In PVH dom0, it uses the linux local interrupt mechanism,
>>> when it allocs irq for a gsi, it is dynamic, and follow
>>> the principle of applying first, distributing first. And
>>> the irq number is alloced from small to large, but the
>>> applying gsi number is not, may gsi 38 comes before gsi 28,
>>> it causes the irq number is not equal with the gsi number.
>>> And when passthrough a device, QEMU will use device's gsi
>>> number to do pirq mapping, but the gsi number is got from
>>> file /sys/bus/pci/devices/<sbdf>/irq, irq!= gsi, so it will
>>> fail when mapping.
>>> And in current linux codes, there is no method to translate
>>> irq to gsi for userspace.
>>>
>>> For above purpose, record the relationship of gsi and irq
>>> when PVH dom0 do acpi_register_gsi_ioapic for devices and
>>> adds a new syscall into privcmd to let userspace can get
>>> that translation when they have a need.
>>>
>>> Co-developed-by: Huang Rui <ray.huang@amd.com>
>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>> ---
>>>   arch/x86/include/asm/apic.h      |  8 +++++++
>>>   arch/x86/include/asm/xen/pci.h   |  5 ++++
>>>   arch/x86/kernel/acpi/boot.c      |  2 +-
>>>   arch/x86/pci/xen.c               | 21 +++++++++++++++++
>>>   drivers/xen/events/events_base.c | 39 ++++++++++++++++++++++++++++++++
>>>   drivers/xen/privcmd.c            | 19 ++++++++++++++++
>>>   include/uapi/xen/privcmd.h       |  7 ++++++
>>>   include/xen/events.h             |  5 ++++
>>>   8 files changed, 105 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
>>> index 9d159b771dc8..dd4139250895 100644
>>> --- a/arch/x86/include/asm/apic.h
>>> +++ b/arch/x86/include/asm/apic.h
>>> @@ -169,6 +169,9 @@ extern bool apic_needs_pit(void);
>>>     extern void apic_send_IPI_allbutself(unsigned int vector);
>>>   +extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>> +                    int trigger, int polarity);
>>> +
>>>   #else /* !CONFIG_X86_LOCAL_APIC */
>>>   static inline void lapic_shutdown(void) { }
>>>   #define local_apic_timer_c2_ok        1
>>> @@ -183,6 +186,11 @@ static inline void apic_intr_mode_init(void) { }
>>>   static inline void lapic_assign_system_vectors(void) { }
>>>   static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
>>>   static inline bool apic_needs_pit(void) { return true; }
>>> +static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>> +                    int trigger, int polarity)
>>> +{
>>> +    return (int)gsi;
>>> +}
>>>   #endif /* !CONFIG_X86_LOCAL_APIC */
>>>     #ifdef CONFIG_X86_X2APIC
>>> diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
>>> index 9015b888edd6..aa8ded61fc2d 100644
>>> --- a/arch/x86/include/asm/xen/pci.h
>>> +++ b/arch/x86/include/asm/xen/pci.h
>>> @@ -5,6 +5,7 @@
>>>   #if defined(CONFIG_PCI_XEN)
>>>   extern int __init pci_xen_init(void);
>>>   extern int __init pci_xen_hvm_init(void);
>>> +extern int __init pci_xen_pvh_init(void);
>>>   #define pci_xen 1
>>>   #else
>>>   #define pci_xen 0
>>> @@ -13,6 +14,10 @@ static inline int pci_xen_hvm_init(void)
>>>   {
>>>       return -1;
>>>   }
>>> +static inline int pci_xen_pvh_init(void)
>>> +{
>>> +    return -1;
>>> +}
>>>   #endif
>>>   #ifdef CONFIG_XEN_PV_DOM0
>>>   int __init pci_xen_initial_domain(void);
>>> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
>>> index 85a3ce2a3666..72c73458c083 100644
>>> --- a/arch/x86/kernel/acpi/boot.c
>>> +++ b/arch/x86/kernel/acpi/boot.c
>>> @@ -749,7 +749,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
>>>   }
>>>     #ifdef CONFIG_X86_LOCAL_APIC
>>> -static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>> +int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>>                       int trigger, int polarity)
>>>   {
>>>       int irq = gsi;
>>> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
>>> index 652cd53e77f6..f056ab5c0a06 100644
>>> --- a/arch/x86/pci/xen.c
>>> +++ b/arch/x86/pci/xen.c
>>> @@ -114,6 +114,21 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
>>>                    false /* no mapping of GSI to PIRQ */);
>>>   }
>>>   +static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
>>> +                    int trigger, int polarity)
>>> +{
>>> +    int irq;
>>> +
>>> +    irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
>>> +    if (irq < 0)
>>> +        return irq;
>>> +
>>> +    if (xen_pvh_add_gsi_irq_map(gsi, irq) == -EEXIST)
>>> +        printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, irq);
>>> +
>>> +    return irq;
>>> +}
>>> +
>>>   #ifdef CONFIG_XEN_PV_DOM0
>>>   static int xen_register_gsi(u32 gsi, int triggering, int polarity)
>>>   {
>>> @@ -558,6 +573,12 @@ int __init pci_xen_hvm_init(void)
>>>       return 0;
>>>   }
>>>   +int __init pci_xen_pvh_init(void)
>>> +{
>>> +    __acpi_register_gsi = acpi_register_gsi_xen_pvh;
>>
>> No support for unregistering the gsi again?
> __acpi_unregister_gsi is set in function acpi_set_irq_model_ioapic.
> Maybe I need to use a new function to call acpi_unregister_gsi_ioapic and remove the mapping of irq and gsi from xen_irq_list_head ?
When I tried to support unregistering the gsi and removing the mapping during disable device,
I encountered that after running "xl pci-assignable-add 03:00.0", callstack pcistub_init_device->xen_pcibk_reset_device->pci_disable_device->pcibios_disable_device->acpi_pci_irq_disable->__acpi_unregister_gsi
removed the mapping, after that when user space called xen_gsi_from_irq to get gsi, it failed.

To cover above case, I want to change the implementation of xen_gsi_from_irq to pass sbdf to get the gsi instead of passing irq,
Because the sbdf and gsi of a device is unique and wiil not be changed even device is disabled or re-enabled.

Do you think this kind of change is acceptable?

> 
>>
>>> +    return 0;
>>> +}
>>> +
>>
>> Juergen
>
Jürgen Groß May 13, 2024, 7:59 a.m. UTC | #10
On 13.05.24 09:47, Chen, Jiqian wrote:
> Hi,
> On 2024/5/10 17:06, Chen, Jiqian wrote:
>> Hi,
>>
>> On 2024/5/10 14:46, Jürgen Groß wrote:
>>> On 19.04.24 05:36, Jiqian Chen wrote:
>>>> In PVH dom0, it uses the linux local interrupt mechanism,
>>>> when it allocs irq for a gsi, it is dynamic, and follow
>>>> the principle of applying first, distributing first. And
>>>> the irq number is alloced from small to large, but the
>>>> applying gsi number is not, may gsi 38 comes before gsi 28,
>>>> it causes the irq number is not equal with the gsi number.
>>>> And when passthrough a device, QEMU will use device's gsi
>>>> number to do pirq mapping, but the gsi number is got from
>>>> file /sys/bus/pci/devices/<sbdf>/irq, irq!= gsi, so it will
>>>> fail when mapping.
>>>> And in current linux codes, there is no method to translate
>>>> irq to gsi for userspace.
>>>>
>>>> For above purpose, record the relationship of gsi and irq
>>>> when PVH dom0 do acpi_register_gsi_ioapic for devices and
>>>> adds a new syscall into privcmd to let userspace can get
>>>> that translation when they have a need.
>>>>
>>>> Co-developed-by: Huang Rui <ray.huang@amd.com>
>>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>>> ---
>>>>    arch/x86/include/asm/apic.h      |  8 +++++++
>>>>    arch/x86/include/asm/xen/pci.h   |  5 ++++
>>>>    arch/x86/kernel/acpi/boot.c      |  2 +-
>>>>    arch/x86/pci/xen.c               | 21 +++++++++++++++++
>>>>    drivers/xen/events/events_base.c | 39 ++++++++++++++++++++++++++++++++
>>>>    drivers/xen/privcmd.c            | 19 ++++++++++++++++
>>>>    include/uapi/xen/privcmd.h       |  7 ++++++
>>>>    include/xen/events.h             |  5 ++++
>>>>    8 files changed, 105 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
>>>> index 9d159b771dc8..dd4139250895 100644
>>>> --- a/arch/x86/include/asm/apic.h
>>>> +++ b/arch/x86/include/asm/apic.h
>>>> @@ -169,6 +169,9 @@ extern bool apic_needs_pit(void);
>>>>      extern void apic_send_IPI_allbutself(unsigned int vector);
>>>>    +extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>>> +                    int trigger, int polarity);
>>>> +
>>>>    #else /* !CONFIG_X86_LOCAL_APIC */
>>>>    static inline void lapic_shutdown(void) { }
>>>>    #define local_apic_timer_c2_ok        1
>>>> @@ -183,6 +186,11 @@ static inline void apic_intr_mode_init(void) { }
>>>>    static inline void lapic_assign_system_vectors(void) { }
>>>>    static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
>>>>    static inline bool apic_needs_pit(void) { return true; }
>>>> +static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>>> +                    int trigger, int polarity)
>>>> +{
>>>> +    return (int)gsi;
>>>> +}
>>>>    #endif /* !CONFIG_X86_LOCAL_APIC */
>>>>      #ifdef CONFIG_X86_X2APIC
>>>> diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
>>>> index 9015b888edd6..aa8ded61fc2d 100644
>>>> --- a/arch/x86/include/asm/xen/pci.h
>>>> +++ b/arch/x86/include/asm/xen/pci.h
>>>> @@ -5,6 +5,7 @@
>>>>    #if defined(CONFIG_PCI_XEN)
>>>>    extern int __init pci_xen_init(void);
>>>>    extern int __init pci_xen_hvm_init(void);
>>>> +extern int __init pci_xen_pvh_init(void);
>>>>    #define pci_xen 1
>>>>    #else
>>>>    #define pci_xen 0
>>>> @@ -13,6 +14,10 @@ static inline int pci_xen_hvm_init(void)
>>>>    {
>>>>        return -1;
>>>>    }
>>>> +static inline int pci_xen_pvh_init(void)
>>>> +{
>>>> +    return -1;
>>>> +}
>>>>    #endif
>>>>    #ifdef CONFIG_XEN_PV_DOM0
>>>>    int __init pci_xen_initial_domain(void);
>>>> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
>>>> index 85a3ce2a3666..72c73458c083 100644
>>>> --- a/arch/x86/kernel/acpi/boot.c
>>>> +++ b/arch/x86/kernel/acpi/boot.c
>>>> @@ -749,7 +749,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
>>>>    }
>>>>      #ifdef CONFIG_X86_LOCAL_APIC
>>>> -static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>>> +int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
>>>>                        int trigger, int polarity)
>>>>    {
>>>>        int irq = gsi;
>>>> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
>>>> index 652cd53e77f6..f056ab5c0a06 100644
>>>> --- a/arch/x86/pci/xen.c
>>>> +++ b/arch/x86/pci/xen.c
>>>> @@ -114,6 +114,21 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
>>>>                     false /* no mapping of GSI to PIRQ */);
>>>>    }
>>>>    +static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
>>>> +                    int trigger, int polarity)
>>>> +{
>>>> +    int irq;
>>>> +
>>>> +    irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
>>>> +    if (irq < 0)
>>>> +        return irq;
>>>> +
>>>> +    if (xen_pvh_add_gsi_irq_map(gsi, irq) == -EEXIST)
>>>> +        printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, irq);
>>>> +
>>>> +    return irq;
>>>> +}
>>>> +
>>>>    #ifdef CONFIG_XEN_PV_DOM0
>>>>    static int xen_register_gsi(u32 gsi, int triggering, int polarity)
>>>>    {
>>>> @@ -558,6 +573,12 @@ int __init pci_xen_hvm_init(void)
>>>>        return 0;
>>>>    }
>>>>    +int __init pci_xen_pvh_init(void)
>>>> +{
>>>> +    __acpi_register_gsi = acpi_register_gsi_xen_pvh;
>>>
>>> No support for unregistering the gsi again?
>> __acpi_unregister_gsi is set in function acpi_set_irq_model_ioapic.
>> Maybe I need to use a new function to call acpi_unregister_gsi_ioapic and remove the mapping of irq and gsi from xen_irq_list_head ?
> When I tried to support unregistering the gsi and removing the mapping during disable device,
> I encountered that after running "xl pci-assignable-add 03:00.0", callstack pcistub_init_device->xen_pcibk_reset_device->pci_disable_device->pcibios_disable_device->acpi_pci_irq_disable->__acpi_unregister_gsi
> removed the mapping, after that when user space called xen_gsi_from_irq to get gsi, it failed.
> 
> To cover above case, I want to change the implementation of xen_gsi_from_irq to pass sbdf to get the gsi instead of passing irq,
> Because the sbdf and gsi of a device is unique and wiil not be changed even device is disabled or re-enabled.
> 
> Do you think this kind of change is acceptable?

Yes, I think so.


Juergen
diff mbox series

Patch

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9d159b771dc8..dd4139250895 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -169,6 +169,9 @@  extern bool apic_needs_pit(void);
 
 extern void apic_send_IPI_allbutself(unsigned int vector);
 
+extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+				    int trigger, int polarity);
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
@@ -183,6 +186,11 @@  static inline void apic_intr_mode_init(void) { }
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
 static inline bool apic_needs_pit(void) { return true; }
+static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+				    int trigger, int polarity)
+{
+	return (int)gsi;
+}
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 9015b888edd6..aa8ded61fc2d 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -5,6 +5,7 @@ 
 #if defined(CONFIG_PCI_XEN)
 extern int __init pci_xen_init(void);
 extern int __init pci_xen_hvm_init(void);
+extern int __init pci_xen_pvh_init(void);
 #define pci_xen 1
 #else
 #define pci_xen 0
@@ -13,6 +14,10 @@  static inline int pci_xen_hvm_init(void)
 {
 	return -1;
 }
+static inline int pci_xen_pvh_init(void)
+{
+	return -1;
+}
 #endif
 #ifdef CONFIG_XEN_PV_DOM0
 int __init pci_xen_initial_domain(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 85a3ce2a3666..72c73458c083 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -749,7 +749,7 @@  static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 				    int trigger, int polarity)
 {
 	int irq = gsi;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 652cd53e77f6..f056ab5c0a06 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -114,6 +114,21 @@  static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
 				 false /* no mapping of GSI to PIRQ */);
 }
 
+static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
+				    int trigger, int polarity)
+{
+	int irq;
+
+	irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
+	if (irq < 0)
+		return irq;
+
+	if (xen_pvh_add_gsi_irq_map(gsi, irq) == -EEXIST)
+		printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, irq);
+
+	return irq;
+}
+
 #ifdef CONFIG_XEN_PV_DOM0
 static int xen_register_gsi(u32 gsi, int triggering, int polarity)
 {
@@ -558,6 +573,12 @@  int __init pci_xen_hvm_init(void)
 	return 0;
 }
 
+int __init pci_xen_pvh_init(void)
+{
+	__acpi_register_gsi = acpi_register_gsi_xen_pvh;
+	return 0;
+}
+
 #ifdef CONFIG_XEN_PV_DOM0
 int __init pci_xen_initial_domain(void)
 {
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 27553673e46b..80d4f7faac64 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -953,6 +953,43 @@  int xen_irq_from_gsi(unsigned gsi)
 }
 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 
+int xen_gsi_from_irq(unsigned irq)
+{
+	struct irq_info *info;
+
+	list_for_each_entry(info, &xen_irq_list_head, list) {
+		if (info->type != IRQT_PIRQ)
+			continue;
+
+		if (info->irq == irq)
+			return info->u.pirq.gsi;
+	}
+
+	return -1;
+}
+EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
+
+int xen_pvh_add_gsi_irq_map(unsigned gsi, unsigned irq)
+{
+	int tmp_irq;
+	struct irq_info *info;
+
+	tmp_irq = xen_irq_from_gsi(gsi);
+	if (tmp_irq != -1)
+		return -EEXIST;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL)
+		panic("Unable to allocate metadata for GSI%d\n", gsi);
+
+	info->type = IRQT_PIRQ;
+	info->irq = irq;
+	info->u.pirq.gsi = gsi;
+	list_add_tail(&info->list, &xen_irq_list_head);
+
+	return 0;
+}
+
 static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
 {
 	evtchn_port_t evtchn;
@@ -2295,6 +2332,8 @@  void __init xen_init_IRQ(void)
 	xen_init_setup_upcall_vector();
 	xen_alloc_callback_vector();
 
+	if (xen_pvh_domain())
+		pci_xen_pvh_init();
 
 	if (xen_hvm_domain()) {
 		native_init_IRQ();
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 67dfa4778864..11feed529e1d 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -842,6 +842,21 @@  static long privcmd_ioctl_mmap_resource(struct file *file,
 	return rc;
 }
 
+static long privcmd_ioctl_gsi_from_irq(struct file *file, void __user *udata)
+{
+	struct privcmd_gsi_from_irq kdata;
+
+	if (copy_from_user(&kdata, udata, sizeof(kdata)))
+		return -EFAULT;
+
+	kdata.gsi = xen_gsi_from_irq(kdata.irq);
+
+	if (copy_to_user(udata, &kdata, sizeof(kdata)))
+		return -EFAULT;
+
+	return 0;
+}
+
 #ifdef CONFIG_XEN_PRIVCMD_EVENTFD
 /* Irqfd support */
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -1529,6 +1544,10 @@  static long privcmd_ioctl(struct file *file,
 		ret = privcmd_ioctl_ioeventfd(file, udata);
 		break;
 
+	case IOCTL_PRIVCMD_GSI_FROM_IRQ:
+		ret = privcmd_ioctl_gsi_from_irq(file, udata);
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index 8b8c5d1420fe..61f0ffbec077 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -126,6 +126,11 @@  struct privcmd_ioeventfd {
 	__u8 pad[2];
 };
 
+struct privcmd_gsi_from_irq {
+	__u32 irq;
+	__u32 gsi;
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -157,5 +162,7 @@  struct privcmd_ioeventfd {
 	_IOW('P', 8, struct privcmd_irqfd)
 #define IOCTL_PRIVCMD_IOEVENTFD					\
 	_IOW('P', 9, struct privcmd_ioeventfd)
+#define IOCTL_PRIVCMD_GSI_FROM_IRQ				\
+	_IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_gsi_from_irq))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/events.h b/include/xen/events.h
index 3b07409f8032..411298ae7fb0 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -127,6 +127,11 @@  int xen_pirq_from_irq(unsigned irq);
 /* Return the irq allocated to the gsi */
 int xen_irq_from_gsi(unsigned gsi);
 
+/* Return the gsi from irq */
+int xen_gsi_from_irq(unsigned irq);
+
+int xen_pvh_add_gsi_irq_map(unsigned gsi, unsigned irq);
+
 /* Determine whether to ignore this IRQ if it is passed to a guest. */
 int xen_test_irq_shared(int irq);