diff mbox series

[RFC,v1,4/4] arm/libxl: Emulated PCI device tree node in libxl

Message ID 23346b24762467bd246b91b05f7b0fc1719282f6.1595511416.git.rahul.singh@arm.com (mailing list archive)
State New, archived
Headers show
Series PCI devices passthrough on Arm | expand

Commit Message

Rahul Singh July 23, 2020, 3:40 p.m. UTC
libxl will create an emulated PCI device tree node in the
device tree to enable the guest OS to discover the virtual
PCI during guest boot.

We introduced the new config option [vpci="ecam"] for guests.
When this config option is enabled in a guest configuration,
a PCI device tree node will be created in the guest device tree.

A new area has been reserved in the arm guest physical map at
which the VPCI bus is declared in the device tree (reg and ranges
parameters of the node).

Change-Id: I47d39cbe8184de2226f174644df9790ecc610ccd
Signed-off-by: Rahul Singh <rahul.singh@arm.com>
---
 tools/libxl/libxl_arm.c       | 200 ++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_types.idl   |   6 +
 tools/xl/xl_parse.c           |   7 ++
 xen/include/public/arch-arm.h |  28 +++++
 4 files changed, 241 insertions(+)

Comments

Stefano Stabellini July 23, 2020, 11:39 p.m. UTC | #1
On Thu, 23 Jul 2020, Rahul Singh wrote:
> libxl will create an emulated PCI device tree node in the
> device tree to enable the guest OS to discover the virtual
> PCI during guest boot.
> 
> We introduced the new config option [vpci="ecam"] for guests.
> When this config option is enabled in a guest configuration,
> a PCI device tree node will be created in the guest device tree.
> 
> A new area has been reserved in the arm guest physical map at
> which the VPCI bus is declared in the device tree (reg and ranges
> parameters of the node).
> 
> Change-Id: I47d39cbe8184de2226f174644df9790ecc610ccd

Same question


> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
> ---
>  tools/libxl/libxl_arm.c       | 200 ++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_types.idl   |   6 +
>  tools/xl/xl_parse.c           |   7 ++
>  xen/include/public/arch-arm.h |  28 +++++
>  4 files changed, 241 insertions(+)
> 
> diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
> index 34f8a29056..84568e9dc9 100644
> --- a/tools/libxl/libxl_arm.c
> +++ b/tools/libxl/libxl_arm.c
> @@ -268,6 +268,130 @@ static int fdt_property_regs(libxl__gc *gc, void *fdt,
>      return fdt_property(fdt, "reg", regs, sizeof(regs));
>  }
>  
> +static int fdt_property_vpci_bus_range(libxl__gc *gc, void *fdt,
> +        unsigned num_cells, ...)
> +{
> +    uint32_t bus_range[num_cells];
> +    be32 *cells = &bus_range[0];
> +    int i;
> +    va_list ap;
> +    uint32_t arg;
> +
> +    va_start(ap, num_cells);
> +    for (i = 0 ; i < num_cells; i++) {
> +        arg = va_arg(ap, uint32_t);
> +        set_cell(&cells, 1, arg);
> +    }
> +    va_end(ap);
> +
> +    return fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range));
> +}
> +
> +static int fdt_property_vpci_interrupt_map_mask(libxl__gc *gc, void *fdt,
> +        unsigned num_cells, ...)
> +{
> +    uint32_t interrupt_map_mask[num_cells];
> +    be32 *cells = &interrupt_map_mask[0];
> +    int i;
> +    va_list ap;
> +    uint32_t arg;
> +
> +    va_start(ap, num_cells);
> +    for (i = 0 ; i < num_cells; i++) {
> +        arg = va_arg(ap, uint32_t);
> +        set_cell(&cells, 1, arg);
> +    }
> +    va_end(ap);
> +
> +    return fdt_property(fdt, "interrupt-map-mask", interrupt_map_mask,
> +                                sizeof(interrupt_map_mask));
> +}
> +
> +static int fdt_property_vpci_ranges(libxl__gc *gc, void *fdt,
> +        unsigned vpci_addr_cells,
> +        unsigned cpu_addr_cells,
> +        unsigned vpci_size_cells,
> +        unsigned num_regs, ...)
> +{
> +    uint32_t regs[num_regs*(vpci_addr_cells+cpu_addr_cells+vpci_size_cells)];
> +    be32 *cells = &regs[0];
> +    int i;
> +    va_list ap;
> +    uint64_t arg;
> +
> +    va_start(ap, num_regs);
> +    for (i = 0 ; i < num_regs; i++) {
> +        /* Set the memory bit field */
> +        arg = va_arg(ap, uint64_t);
> +        set_cell(&cells, 1, arg);
> +
> +        /* Set the vpci bus address */
> +        arg = vpci_addr_cells ? va_arg(ap, uint64_t) : 0;
> +        set_cell(&cells, 2 , arg);
> +
> +        /* Set the cpu bus address where vpci address is mapped */
> +        arg = cpu_addr_cells ? va_arg(ap, uint64_t) : 0;
> +        set_cell(&cells, cpu_addr_cells, arg);
> +
> +        /* Set the vpci size requested */
> +        arg = vpci_size_cells ? va_arg(ap, uint64_t) : 0;
> +        set_cell(&cells, vpci_size_cells,arg);
> +    }
> +    va_end(ap);
> +
> +    return fdt_property(fdt, "ranges", regs, sizeof(regs));
> +}
> +
> +static int fdt_property_vpci_interrupt_map(libxl__gc *gc, void *fdt,
> +        unsigned child_unit_addr_cells,
> +        unsigned child_interrupt_specifier_cells,
> +        unsigned parent_unit_addr_cells,
> +        unsigned parent_interrupt_specifier_cells,
> +        unsigned num_regs, ...)
> +{
> +    uint32_t interrupt_map[num_regs * (child_unit_addr_cells +
> +            child_interrupt_specifier_cells + parent_unit_addr_cells
> +            + parent_interrupt_specifier_cells + 1)];
> +    be32 *cells = &interrupt_map[0];
> +    int i,j;
> +    va_list ap;
> +    uint64_t arg;
> +
> +    va_start(ap, num_regs);
> +    for (i = 0 ; i < num_regs; i++) {
> +        /* Set the child unit address*/
> +        for (j = 0 ; j < child_unit_addr_cells; j++) {
> +            arg = va_arg(ap, uint32_t);
> +            set_cell(&cells, 1 , arg);
> +        }
> +
> +        /* Set the child interrupt specifier*/
> +        for (j = 0 ; j < child_interrupt_specifier_cells ; j++) {
> +            arg = va_arg(ap, uint32_t);
> +            set_cell(&cells, 1 , arg);
> +        }
> +
> +        /* Set the interrupt-parent*/
> +        set_cell(&cells, 1 , GUEST_PHANDLE_GIC);
> +
> +        /* Set the parent unit address*/
> +        for (j = 0 ; j < parent_unit_addr_cells; j++) {
> +            arg = va_arg(ap, uint32_t);
> +            set_cell(&cells, 1 , arg);
> +        }
> +
> +        /* Set the parent interrupt specifier*/
> +        for (j = 0 ; j < parent_interrupt_specifier_cells; j++) {
> +            arg = va_arg(ap, uint32_t);
> +            set_cell(&cells, 1 , arg);
> +        }
> +    }
> +    va_end(ap);
> +
> +    return fdt_property(fdt, "interrupt-map", interrupt_map,
> +                                sizeof(interrupt_map));
> +}
> +
>  static int make_root_properties(libxl__gc *gc,
>                                  const libxl_version_info *vers,
>                                  void *fdt)
> @@ -659,6 +783,79 @@ static int make_vpl011_uart_node(libxl__gc *gc, void *fdt,
>      return 0;
>  }
>  
> +static int make_vpci_node(libxl__gc *gc, void *fdt,
> +        const struct arch_info *ainfo,
> +        struct xc_dom_image *dom)
> +{
> +    int res;
> +    const uint64_t vpci_ecam_base = GUEST_VPCI_ECAM_BASE;
> +    const uint64_t vpci_ecam_size = GUEST_VPCI_ECAM_SIZE;
> +    const char *name = GCSPRINTF("pcie@%"PRIx64, vpci_ecam_base);
> +
> +    res = fdt_begin_node(fdt, name);
> +    if (res) return res;
> +
> +    res = fdt_property_compat(gc, fdt, 1, "pci-host-ecam-generic");
> +    if (res) return res;
> +
> +    res = fdt_property_string(fdt, "device_type", "pci");
> +    if (res) return res;
> +
> +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> +            GUEST_ROOT_SIZE_CELLS, 1, vpci_ecam_base, vpci_ecam_size);
> +    if (res) return res;
> +
> +    res = fdt_property_vpci_bus_range(gc, fdt, 2, 0,255);
> +    if (res) return res;
> +
> +    res = fdt_property_cell(fdt, "linux,pci-domain", 0);
> +    if (res) return res;
> +
> +    res = fdt_property_cell(fdt, "#address-cells", 3);
> +    if (res) return res;
> +
> +    res = fdt_property_cell(fdt, "#size-cells", 2);
> +    if (res) return res;
> +
> +    res = fdt_property_cell(fdt, "#interrupt-cells", 1);
> +    if (res) return res;
> +
> +    res = fdt_property_string(fdt, "status", "okay");
> +    if (res) return res;
> +
> +    res = fdt_property_vpci_ranges(gc, fdt, GUEST_PCI_ADDRESS_CELLS,
> +        GUEST_ROOT_ADDRESS_CELLS, GUEST_PCI_SIZE_CELLS,
> +        3,
> +        GUEST_VPCI_ADDR_TYPE_MEM, GUEST_VPCI_MEM_PCI_ADDR,
> +        GUEST_VPCI_MEM_CPU_ADDR, GUEST_VPCI_MEM_SIZE,
> +        GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM, GUEST_VPCI_PREFETCH_MEM_PCI_ADDR,
> +        GUEST_VPCI_PREFETCH_MEM_CPU_ADDR, GUEST_VPCI_PREFETCH_MEM_SIZE,
> +        GUEST_VPCI_ADDR_TYPE_IO, GUEST_VPCI_IO_PCI_ADDR,
> +        GUEST_VPCI_IO_CPU_ADDR, GUEST_VPCI_IO_SIZE);
> +    if (res) return res;
> +
> +    res = fdt_property_vpci_interrupt_map_mask(gc, fdt, 4, 0, 0, 0, 7);

it would make sense to separate out child_unit_addr_cells and
child_interrupt_specifier_cells here like we do below with
fdt_property_vpci_interrupt_map


> +    if (res) return res;
> +
> +    /*
> +     * Legacy interrupt is forced and assigned to the guest.
> +     * This will be removed once we have implementation for MSI support.
> +     *
> +     */
> +    res = fdt_property_vpci_interrupt_map(gc, fdt, 3, 1, 0, 3,
> +            4,
> +            0, 0, 0, 1, 0, 136, DT_IRQ_TYPE_LEVEL_HIGH,
> +            0, 0, 0, 2, 0, 137, DT_IRQ_TYPE_LEVEL_HIGH,
> +            0, 0, 0, 3, 0, 138, DT_IRQ_TYPE_LEVEL_HIGH,
> +            0, 0, 0, 4, 0, 139, DT_IRQ_TYPE_LEVEL_HIGH);

The 4 interrupt allocated for this need to be defined in
xen/include/public/arch-arm.h as well. Also, why would we want to get
rid of the legacy interrupts completely? It would be possible to still
find device or software that rely on them.


> +    if (res) return res;
> +
> +    res = fdt_end_node(fdt);
> +    if (res) return res;
> +
> +    return 0;
> +}
> +
>  static const struct arch_info *get_arch_info(libxl__gc *gc,
>                                               const struct xc_dom_image *dom)
>  {

[...]


> diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
> index 7364a07362..4e19c62948 100644
> --- a/xen/include/public/arch-arm.h
> +++ b/xen/include/public/arch-arm.h
> @@ -426,6 +426,34 @@ typedef uint64_t xen_callback_t;
>  #define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
>  #define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
>  
> +#define GUEST_PCI_ADDRESS_CELLS 3
> +#define GUEST_PCI_SIZE_CELLS 2
> +
> +/* PCI-PCIe memory space types */
> +#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM xen_mk_ullong(0x42000000)
> +#define GUEST_VPCI_ADDR_TYPE_MEM          xen_mk_ullong(0x02000000)
> +#define GUEST_VPCI_ADDR_TYPE_IO           xen_mk_ullong(0x01000000)
> +
> +/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
> +#define GUEST_VPCI_PREFETCH_MEM_CPU_ADDR  xen_mk_ullong(0x4000000000)

It looks like it could conflict with GUEST_RAM1_BASE+GUEST_RAM1_SIZE?


> +#define GUEST_VPCI_MEM_CPU_ADDR           xen_mk_ullong(0x04020000)
> +#define GUEST_VPCI_IO_CPU_ADDR            xen_mk_ullong(0xC0200800)

0xC0200800 looks like it could conflict with
GUEST_RAM0_BASE+GUEST_RAM0_SIZE?


> +/*
> + * This is hardcoded values for the real PCI physical addresses.
> + * This will be removed once we read the real PCI-PCIe physical
> + * addresses form the config space and map to the guest memory map
> + * when assigning the device to guest via VPCI.
> + *
> + */
> +#define GUEST_VPCI_PREFETCH_MEM_PCI_ADDR  xen_mk_ullong(0x4000000000)
> +#define GUEST_VPCI_MEM_PCI_ADDR           xen_mk_ullong(0x50000000)
> +#define GUEST_VPCI_IO_PCI_ADDR            xen_mk_ullong(0x00000000)
> +
> +#define GUEST_VPCI_PREFETCH_MEM_SIZE      xen_mk_ullong(0x100000000)
> +#define GUEST_VPCI_MEM_SIZE               xen_mk_ullong(0x08000000)

How did you chose these sizes? GUEST_VPCI_MEM_SIZE and/or
GUEST_VPCI_PREFETCH_MEM_SIZE are supposed to potentially cover all the
PCI BARs, including potential future hotplug devices, right?

If so, maybe we need to increase GUEST_VPCI_MEM_SIZE to a couple of GB
and GUEST_VPCI_PREFETCH_MEM_SIZE to even more?




> +#define GUEST_VPCI_IO_SIZE                xen_mk_ullong(0x00800000)
> +
>  /*
>   * 16MB == 4096 pages reserved for guest to use as a region to map its
>   * grant table in.
Oleksandr Andrushchenko July 24, 2020, 7:55 a.m. UTC | #2
On 7/24/20 2:39 AM, Stefano Stabellini wrote:
> On Thu, 23 Jul 2020, Rahul Singh wrote:
>> libxl will create an emulated PCI device tree node in the
>> device tree to enable the guest OS to discover the virtual
>> PCI during guest boot.
>>
>> We introduced the new config option [vpci="ecam"] for guests.
>> When this config option is enabled in a guest configuration,
>> a PCI device tree node will be created in the guest device tree.
>>
>> A new area has been reserved in the arm guest physical map at
>> which the VPCI bus is declared in the device tree (reg and ranges
>> parameters of the node).
>>
>> Change-Id: I47d39cbe8184de2226f174644df9790ecc610ccd
> Same question
>
>
>> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
>> ---
>>   tools/libxl/libxl_arm.c       | 200 ++++++++++++++++++++++++++++++++++
>>   tools/libxl/libxl_types.idl   |   6 +
>>   tools/xl/xl_parse.c           |   7 ++
>>   xen/include/public/arch-arm.h |  28 +++++
>>   4 files changed, 241 insertions(+)
>>
>> diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
>> index 34f8a29056..84568e9dc9 100644
>> --- a/tools/libxl/libxl_arm.c
>> +++ b/tools/libxl/libxl_arm.c
>> @@ -268,6 +268,130 @@ static int fdt_property_regs(libxl__gc *gc, void *fdt,
>>       return fdt_property(fdt, "reg", regs, sizeof(regs));
>>   }
>>   
>> +static int fdt_property_vpci_bus_range(libxl__gc *gc, void *fdt,
>> +        unsigned num_cells, ...)
>> +{
>> +    uint32_t bus_range[num_cells];
>> +    be32 *cells = &bus_range[0];
>> +    int i;
>> +    va_list ap;
>> +    uint32_t arg;
>> +
>> +    va_start(ap, num_cells);
>> +    for (i = 0 ; i < num_cells; i++) {
>> +        arg = va_arg(ap, uint32_t);
>> +        set_cell(&cells, 1, arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range));
>> +}
>> +
>> +static int fdt_property_vpci_interrupt_map_mask(libxl__gc *gc, void *fdt,
>> +        unsigned num_cells, ...)
>> +{
>> +    uint32_t interrupt_map_mask[num_cells];
>> +    be32 *cells = &interrupt_map_mask[0];
>> +    int i;
>> +    va_list ap;
>> +    uint32_t arg;
>> +
>> +    va_start(ap, num_cells);
>> +    for (i = 0 ; i < num_cells; i++) {
>> +        arg = va_arg(ap, uint32_t);
>> +        set_cell(&cells, 1, arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "interrupt-map-mask", interrupt_map_mask,
>> +                                sizeof(interrupt_map_mask));
>> +}
>> +
>> +static int fdt_property_vpci_ranges(libxl__gc *gc, void *fdt,
>> +        unsigned vpci_addr_cells,
>> +        unsigned cpu_addr_cells,
>> +        unsigned vpci_size_cells,
>> +        unsigned num_regs, ...)
>> +{
>> +    uint32_t regs[num_regs*(vpci_addr_cells+cpu_addr_cells+vpci_size_cells)];
>> +    be32 *cells = &regs[0];
>> +    int i;
>> +    va_list ap;
>> +    uint64_t arg;
>> +
>> +    va_start(ap, num_regs);
>> +    for (i = 0 ; i < num_regs; i++) {
>> +        /* Set the memory bit field */
>> +        arg = va_arg(ap, uint64_t);
>> +        set_cell(&cells, 1, arg);
>> +
>> +        /* Set the vpci bus address */
>> +        arg = vpci_addr_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, 2 , arg);
>> +
>> +        /* Set the cpu bus address where vpci address is mapped */
>> +        arg = cpu_addr_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, cpu_addr_cells, arg);
>> +
>> +        /* Set the vpci size requested */
>> +        arg = vpci_size_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, vpci_size_cells,arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "ranges", regs, sizeof(regs));
>> +}
>> +
>> +static int fdt_property_vpci_interrupt_map(libxl__gc *gc, void *fdt,
>> +        unsigned child_unit_addr_cells,
>> +        unsigned child_interrupt_specifier_cells,
>> +        unsigned parent_unit_addr_cells,
>> +        unsigned parent_interrupt_specifier_cells,
>> +        unsigned num_regs, ...)
>> +{
>> +    uint32_t interrupt_map[num_regs * (child_unit_addr_cells +
>> +            child_interrupt_specifier_cells + parent_unit_addr_cells
>> +            + parent_interrupt_specifier_cells + 1)];
>> +    be32 *cells = &interrupt_map[0];
>> +    int i,j;
>> +    va_list ap;
>> +    uint64_t arg;
>> +
>> +    va_start(ap, num_regs);
>> +    for (i = 0 ; i < num_regs; i++) {
>> +        /* Set the child unit address*/
>> +        for (j = 0 ; j < child_unit_addr_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the child interrupt specifier*/
>> +        for (j = 0 ; j < child_interrupt_specifier_cells ; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the interrupt-parent*/
>> +        set_cell(&cells, 1 , GUEST_PHANDLE_GIC);
>> +
>> +        /* Set the parent unit address*/
>> +        for (j = 0 ; j < parent_unit_addr_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the parent interrupt specifier*/
>> +        for (j = 0 ; j < parent_interrupt_specifier_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "interrupt-map", interrupt_map,
>> +                                sizeof(interrupt_map));
>> +}
>> +
>>   static int make_root_properties(libxl__gc *gc,
>>                                   const libxl_version_info *vers,
>>                                   void *fdt)
>> @@ -659,6 +783,79 @@ static int make_vpl011_uart_node(libxl__gc *gc, void *fdt,
>>       return 0;
>>   }
>>   
>> +static int make_vpci_node(libxl__gc *gc, void *fdt,
>> +        const struct arch_info *ainfo,
>> +        struct xc_dom_image *dom)
>> +{
>> +    int res;
>> +    const uint64_t vpci_ecam_base = GUEST_VPCI_ECAM_BASE;
>> +    const uint64_t vpci_ecam_size = GUEST_VPCI_ECAM_SIZE;
>> +    const char *name = GCSPRINTF("pcie@%"PRIx64, vpci_ecam_base);
>> +
>> +    res = fdt_begin_node(fdt, name);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_compat(gc, fdt, 1, "pci-host-ecam-generic");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_string(fdt, "device_type", "pci");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
>> +            GUEST_ROOT_SIZE_CELLS, 1, vpci_ecam_base, vpci_ecam_size);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_bus_range(gc, fdt, 2, 0,255);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "linux,pci-domain", 0);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#address-cells", 3);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#size-cells", 2);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#interrupt-cells", 1);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_string(fdt, "status", "okay");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_ranges(gc, fdt, GUEST_PCI_ADDRESS_CELLS,
>> +        GUEST_ROOT_ADDRESS_CELLS, GUEST_PCI_SIZE_CELLS,
>> +        3,
>> +        GUEST_VPCI_ADDR_TYPE_MEM, GUEST_VPCI_MEM_PCI_ADDR,
>> +        GUEST_VPCI_MEM_CPU_ADDR, GUEST_VPCI_MEM_SIZE,
>> +        GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM, GUEST_VPCI_PREFETCH_MEM_PCI_ADDR,
>> +        GUEST_VPCI_PREFETCH_MEM_CPU_ADDR, GUEST_VPCI_PREFETCH_MEM_SIZE,
>> +        GUEST_VPCI_ADDR_TYPE_IO, GUEST_VPCI_IO_PCI_ADDR,
>> +        GUEST_VPCI_IO_CPU_ADDR, GUEST_VPCI_IO_SIZE);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_interrupt_map_mask(gc, fdt, 4, 0, 0, 0, 7);
> it would make sense to separate out child_unit_addr_cells and
> child_interrupt_specifier_cells here like we do below with
> fdt_property_vpci_interrupt_map
>
>
>> +    if (res) return res;
>> +
>> +    /*
>> +     * Legacy interrupt is forced and assigned to the guest.
>> +     * This will be removed once we have implementation for MSI support.
>> +     *
>> +     */
>> +    res = fdt_property_vpci_interrupt_map(gc, fdt, 3, 1, 0, 3,
>> +            4,
>> +            0, 0, 0, 1, 0, 136, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 2, 0, 137, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 3, 0, 138, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 4, 0, 139, DT_IRQ_TYPE_LEVEL_HIGH);
> The 4 interrupt allocated for this need to be defined in
> xen/include/public/arch-arm.h as well. Also, why would we want to get
> rid of the legacy interrupts completely? It would be possible to still
> find device or software that rely on them.
I think this is more about shared interrupts support complexity
>
>
>> +    if (res) return res;
>> +
>> +    res = fdt_end_node(fdt);
>> +    if (res) return res;
>> +
>> +    return 0;
>> +}
>> +
>>   static const struct arch_info *get_arch_info(libxl__gc *gc,
>>                                                const struct xc_dom_image *dom)
>>   {
> [...]
>
>
>> diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
>> index 7364a07362..4e19c62948 100644
>> --- a/xen/include/public/arch-arm.h
>> +++ b/xen/include/public/arch-arm.h
>> @@ -426,6 +426,34 @@ typedef uint64_t xen_callback_t;
>>   #define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
>>   #define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
>>   
>> +#define GUEST_PCI_ADDRESS_CELLS 3
>> +#define GUEST_PCI_SIZE_CELLS 2
>> +
>> +/* PCI-PCIe memory space types */
>> +#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM xen_mk_ullong(0x42000000)
>> +#define GUEST_VPCI_ADDR_TYPE_MEM          xen_mk_ullong(0x02000000)
>> +#define GUEST_VPCI_ADDR_TYPE_IO           xen_mk_ullong(0x01000000)
>> +
>> +/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
>> +#define GUEST_VPCI_PREFETCH_MEM_CPU_ADDR  xen_mk_ullong(0x4000000000)
> It looks like it could conflict with GUEST_RAM1_BASE+GUEST_RAM1_SIZE?
>
>
>> +#define GUEST_VPCI_MEM_CPU_ADDR           xen_mk_ullong(0x04020000)
>> +#define GUEST_VPCI_IO_CPU_ADDR            xen_mk_ullong(0xC0200800)
> 0xC0200800 looks like it could conflict with
> GUEST_RAM0_BASE+GUEST_RAM0_SIZE?
>
>
>> +/*
>> + * This is hardcoded values for the real PCI physical addresses.
>> + * This will be removed once we read the real PCI-PCIe physical
>> + * addresses form the config space and map to the guest memory map
>> + * when assigning the device to guest via VPCI.
>> + *
>> + */
>> +#define GUEST_VPCI_PREFETCH_MEM_PCI_ADDR  xen_mk_ullong(0x4000000000)
>> +#define GUEST_VPCI_MEM_PCI_ADDR           xen_mk_ullong(0x50000000)
>> +#define GUEST_VPCI_IO_PCI_ADDR            xen_mk_ullong(0x00000000)
>> +
>> +#define GUEST_VPCI_PREFETCH_MEM_SIZE      xen_mk_ullong(0x100000000)
>> +#define GUEST_VPCI_MEM_SIZE               xen_mk_ullong(0x08000000)
> How did you chose these sizes? GUEST_VPCI_MEM_SIZE and/or
> GUEST_VPCI_PREFETCH_MEM_SIZE are supposed to potentially cover all the
> PCI BARs, including potential future hotplug devices, right?
>
> If so, maybe we need to increase GUEST_VPCI_MEM_SIZE to a couple of GB
> and GUEST_VPCI_PREFETCH_MEM_SIZE to even more?
>
>
>
>
>> +#define GUEST_VPCI_IO_SIZE                xen_mk_ullong(0x00800000)
>> +
>>   /*
>>    * 16MB == 4096 pages reserved for guest to use as a region to map its
>>    * grant table in.
Julien Grall July 24, 2020, 9:11 a.m. UTC | #3
Hi,

On 24/07/2020 00:39, Stefano Stabellini wrote:
> On Thu, 23 Jul 2020, Rahul Singh wrote:
>> +    if (res) return res;
>> +
>> +    /*
>> +     * Legacy interrupt is forced and assigned to the guest.
>> +     * This will be removed once we have implementation for MSI support.
>> +     *
>> +     */
>> +    res = fdt_property_vpci_interrupt_map(gc, fdt, 3, 1, 0, 3,
>> +            4,
>> +            0, 0, 0, 1, 0, 136, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 2, 0, 137, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 3, 0, 138, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 4, 0, 139, DT_IRQ_TYPE_LEVEL_HIGH);
> 
> The 4 interrupt allocated for this need to be defined in
> xen/include/public/arch-arm.h as well. Also, why would we want to get
> rid of the legacy interrupts completely? 

With legacy interrupts, there are a few cases to take into account:
    1) Two PCI devices from the same hostbridge are assigned to 
different cases. As SPIs (used for legacy interrupts) can only be routed 
to one guest, we would need to now be able to share them. This raises 
the question when to EOI the physical interrupts. AFAICT, Linux has some 
code to deal with it using a timer if it takes too long.

    2) Two PCI devices from two distinct hostbridge are assigned to the 
same virtual hostbridge. Legacy interrupts would need to be virtual and 
we would possibly need to merge multiple physical interrupts into one 
virtual.

    3) A mix of virtual and physical PCI devices are assigned to the 
same host bridges. Same as 2) legacy interrupts would need to be virtual.

Given the complexity of handling interrupts legacy and the fact they 
will largely be slower compare to MSIs, I would rather focus on MSIs at 
first.

We can then decide to implement legacy interrupts if there is a real need.

> It would be possible to still
> find device or software that rely on them.

PCI devices without MSI support is getting extremely rare. There are 
actually Arm HW that doesn't support legacy interrupts at all (such as 
Thunder-X). I can't tell for the software.
Rahul Singh July 27, 2020, 1:40 p.m. UTC | #4
> On 24 Jul 2020, at 12:39 am, Stefano Stabellini <sstabellini@kernel.org> wrote:
> 
> On Thu, 23 Jul 2020, Rahul Singh wrote:
>> libxl will create an emulated PCI device tree node in the
>> device tree to enable the guest OS to discover the virtual
>> PCI during guest boot.
>> 
>> We introduced the new config option [vpci="ecam"] for guests.
>> When this config option is enabled in a guest configuration,
>> a PCI device tree node will be created in the guest device tree.
>> 
>> A new area has been reserved in the arm guest physical map at
>> which the VPCI bus is declared in the device tree (reg and ranges
>> parameters of the node).
>> 
>> Change-Id: I47d39cbe8184de2226f174644df9790ecc610ccd
> 
> Same question

I will remove the change-id in the next version.

> 
> 
>> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
>> ---
>> tools/libxl/libxl_arm.c       | 200 ++++++++++++++++++++++++++++++++++
>> tools/libxl/libxl_types.idl   |   6 +
>> tools/xl/xl_parse.c           |   7 ++
>> xen/include/public/arch-arm.h |  28 +++++
>> 4 files changed, 241 insertions(+)
>> 
>> diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
>> index 34f8a29056..84568e9dc9 100644
>> --- a/tools/libxl/libxl_arm.c
>> +++ b/tools/libxl/libxl_arm.c
>> @@ -268,6 +268,130 @@ static int fdt_property_regs(libxl__gc *gc, void *fdt,
>>     return fdt_property(fdt, "reg", regs, sizeof(regs));
>> }
>> 
>> +static int fdt_property_vpci_bus_range(libxl__gc *gc, void *fdt,
>> +        unsigned num_cells, ...)
>> +{
>> +    uint32_t bus_range[num_cells];
>> +    be32 *cells = &bus_range[0];
>> +    int i;
>> +    va_list ap;
>> +    uint32_t arg;
>> +
>> +    va_start(ap, num_cells);
>> +    for (i = 0 ; i < num_cells; i++) {
>> +        arg = va_arg(ap, uint32_t);
>> +        set_cell(&cells, 1, arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range));
>> +}
>> +
>> +static int fdt_property_vpci_interrupt_map_mask(libxl__gc *gc, void *fdt,
>> +        unsigned num_cells, ...)
>> +{
>> +    uint32_t interrupt_map_mask[num_cells];
>> +    be32 *cells = &interrupt_map_mask[0];
>> +    int i;
>> +    va_list ap;
>> +    uint32_t arg;
>> +
>> +    va_start(ap, num_cells);
>> +    for (i = 0 ; i < num_cells; i++) {
>> +        arg = va_arg(ap, uint32_t);
>> +        set_cell(&cells, 1, arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "interrupt-map-mask", interrupt_map_mask,
>> +                                sizeof(interrupt_map_mask));
>> +}
>> +
>> +static int fdt_property_vpci_ranges(libxl__gc *gc, void *fdt,
>> +        unsigned vpci_addr_cells,
>> +        unsigned cpu_addr_cells,
>> +        unsigned vpci_size_cells,
>> +        unsigned num_regs, ...)
>> +{
>> +    uint32_t regs[num_regs*(vpci_addr_cells+cpu_addr_cells+vpci_size_cells)];
>> +    be32 *cells = &regs[0];
>> +    int i;
>> +    va_list ap;
>> +    uint64_t arg;
>> +
>> +    va_start(ap, num_regs);
>> +    for (i = 0 ; i < num_regs; i++) {
>> +        /* Set the memory bit field */
>> +        arg = va_arg(ap, uint64_t);
>> +        set_cell(&cells, 1, arg);
>> +
>> +        /* Set the vpci bus address */
>> +        arg = vpci_addr_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, 2 , arg);
>> +
>> +        /* Set the cpu bus address where vpci address is mapped */
>> +        arg = cpu_addr_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, cpu_addr_cells, arg);
>> +
>> +        /* Set the vpci size requested */
>> +        arg = vpci_size_cells ? va_arg(ap, uint64_t) : 0;
>> +        set_cell(&cells, vpci_size_cells,arg);
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "ranges", regs, sizeof(regs));
>> +}
>> +
>> +static int fdt_property_vpci_interrupt_map(libxl__gc *gc, void *fdt,
>> +        unsigned child_unit_addr_cells,
>> +        unsigned child_interrupt_specifier_cells,
>> +        unsigned parent_unit_addr_cells,
>> +        unsigned parent_interrupt_specifier_cells,
>> +        unsigned num_regs, ...)
>> +{
>> +    uint32_t interrupt_map[num_regs * (child_unit_addr_cells +
>> +            child_interrupt_specifier_cells + parent_unit_addr_cells
>> +            + parent_interrupt_specifier_cells + 1)];
>> +    be32 *cells = &interrupt_map[0];
>> +    int i,j;
>> +    va_list ap;
>> +    uint64_t arg;
>> +
>> +    va_start(ap, num_regs);
>> +    for (i = 0 ; i < num_regs; i++) {
>> +        /* Set the child unit address*/
>> +        for (j = 0 ; j < child_unit_addr_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the child interrupt specifier*/
>> +        for (j = 0 ; j < child_interrupt_specifier_cells ; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the interrupt-parent*/
>> +        set_cell(&cells, 1 , GUEST_PHANDLE_GIC);
>> +
>> +        /* Set the parent unit address*/
>> +        for (j = 0 ; j < parent_unit_addr_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +
>> +        /* Set the parent interrupt specifier*/
>> +        for (j = 0 ; j < parent_interrupt_specifier_cells; j++) {
>> +            arg = va_arg(ap, uint32_t);
>> +            set_cell(&cells, 1 , arg);
>> +        }
>> +    }
>> +    va_end(ap);
>> +
>> +    return fdt_property(fdt, "interrupt-map", interrupt_map,
>> +                                sizeof(interrupt_map));
>> +}
>> +
>> static int make_root_properties(libxl__gc *gc,
>>                                 const libxl_version_info *vers,
>>                                 void *fdt)
>> @@ -659,6 +783,79 @@ static int make_vpl011_uart_node(libxl__gc *gc, void *fdt,
>>     return 0;
>> }
>> 
>> +static int make_vpci_node(libxl__gc *gc, void *fdt,
>> +        const struct arch_info *ainfo,
>> +        struct xc_dom_image *dom)
>> +{
>> +    int res;
>> +    const uint64_t vpci_ecam_base = GUEST_VPCI_ECAM_BASE;
>> +    const uint64_t vpci_ecam_size = GUEST_VPCI_ECAM_SIZE;
>> +    const char *name = GCSPRINTF("pcie@%"PRIx64, vpci_ecam_base);
>> +
>> +    res = fdt_begin_node(fdt, name);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_compat(gc, fdt, 1, "pci-host-ecam-generic");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_string(fdt, "device_type", "pci");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
>> +            GUEST_ROOT_SIZE_CELLS, 1, vpci_ecam_base, vpci_ecam_size);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_bus_range(gc, fdt, 2, 0,255);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "linux,pci-domain", 0);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#address-cells", 3);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#size-cells", 2);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_cell(fdt, "#interrupt-cells", 1);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_string(fdt, "status", "okay");
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_ranges(gc, fdt, GUEST_PCI_ADDRESS_CELLS,
>> +        GUEST_ROOT_ADDRESS_CELLS, GUEST_PCI_SIZE_CELLS,
>> +        3,
>> +        GUEST_VPCI_ADDR_TYPE_MEM, GUEST_VPCI_MEM_PCI_ADDR,
>> +        GUEST_VPCI_MEM_CPU_ADDR, GUEST_VPCI_MEM_SIZE,
>> +        GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM, GUEST_VPCI_PREFETCH_MEM_PCI_ADDR,
>> +        GUEST_VPCI_PREFETCH_MEM_CPU_ADDR, GUEST_VPCI_PREFETCH_MEM_SIZE,
>> +        GUEST_VPCI_ADDR_TYPE_IO, GUEST_VPCI_IO_PCI_ADDR,
>> +        GUEST_VPCI_IO_CPU_ADDR, GUEST_VPCI_IO_SIZE);
>> +    if (res) return res;
>> +
>> +    res = fdt_property_vpci_interrupt_map_mask(gc, fdt, 4, 0, 0, 0, 7);
> 
> it would make sense to separate out child_unit_addr_cells and
> child_interrupt_specifier_cells here like we do below with
> fdt_property_vpci_interrupt_map

Ok will fix.

> 
> 
>> +    if (res) return res;
>> +
>> +    /*
>> +     * Legacy interrupt is forced and assigned to the guest.
>> +     * This will be removed once we have implementation for MSI support.
>> +     *
>> +     */
>> +    res = fdt_property_vpci_interrupt_map(gc, fdt, 3, 1, 0, 3,
>> +            4,
>> +            0, 0, 0, 1, 0, 136, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 2, 0, 137, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 3, 0, 138, DT_IRQ_TYPE_LEVEL_HIGH,
>> +            0, 0, 0, 4, 0, 139, DT_IRQ_TYPE_LEVEL_HIGH);
> 
> The 4 interrupt allocated for this need to be defined in
> xen/include/public/arch-arm.h as well. Also, why would we want to get
> rid of the legacy interrupts completely? It would be possible to still
> find device or software that rely on them.
> 

Ok will fix that. 
Regarding legacy interrupt we have just tested on one of the board don’t know how it will work on other boards.
We will mostly support MSI and will see if we have to support the legacy interrupt going forward.

> 
>> +    if (res) return res;
>> +
>> +    res = fdt_end_node(fdt);
>> +    if (res) return res;
>> +
>> +    return 0;
>> +}
>> +
>> static const struct arch_info *get_arch_info(libxl__gc *gc,
>>                                              const struct xc_dom_image *dom)
>> {
> 
> [...]
> 
> 
>> diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
>> index 7364a07362..4e19c62948 100644
>> --- a/xen/include/public/arch-arm.h
>> +++ b/xen/include/public/arch-arm.h
>> @@ -426,6 +426,34 @@ typedef uint64_t xen_callback_t;
>> #define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
>> #define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
>> 
>> +#define GUEST_PCI_ADDRESS_CELLS 3
>> +#define GUEST_PCI_SIZE_CELLS 2
>> +
>> +/* PCI-PCIe memory space types */
>> +#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM xen_mk_ullong(0x42000000)
>> +#define GUEST_VPCI_ADDR_TYPE_MEM          xen_mk_ullong(0x02000000)
>> +#define GUEST_VPCI_ADDR_TYPE_IO           xen_mk_ullong(0x01000000)
>> +
>> +/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
>> +#define GUEST_VPCI_PREFETCH_MEM_CPU_ADDR  xen_mk_ullong(0x4000000000)
> 
> It looks like it could conflict with GUEST_RAM1_BASE+GUEST_RAM1_SIZE?

Ok yes will fix that and will define the address ranges for guest once we finalised  the VPCI topology for the guest. We are currently investigating if we want to follow the hardware topology for the guest or we will create a different virtual topology for the guest independent of the hw topology.
> 
> 
>> +#define GUEST_VPCI_MEM_CPU_ADDR           xen_mk_ullong(0x04020000)
>> +#define GUEST_VPCI_IO_CPU_ADDR            xen_mk_ullong(0xC0200800)
> 
> 0xC0200800 looks like it could conflict with
> GUEST_RAM0_BASE+GUEST_RAM0_SIZE?
> 

Same comment above.
> 
>> +/*
>> + * This is hardcoded values for the real PCI physical addresses.
>> + * This will be removed once we read the real PCI-PCIe physical
>> + * addresses form the config space and map to the guest memory map
>> + * when assigning the device to guest via VPCI.
>> + *
>> + */
>> +#define GUEST_VPCI_PREFETCH_MEM_PCI_ADDR  xen_mk_ullong(0x4000000000)
>> +#define GUEST_VPCI_MEM_PCI_ADDR           xen_mk_ullong(0x50000000)
>> +#define GUEST_VPCI_IO_PCI_ADDR            xen_mk_ullong(0x00000000)
>> +
>> +#define GUEST_VPCI_PREFETCH_MEM_SIZE      xen_mk_ullong(0x100000000)
>> +#define GUEST_VPCI_MEM_SIZE               xen_mk_ullong(0x08000000)
> 
> How did you chose these sizes? GUEST_VPCI_MEM_SIZE and/or
> GUEST_VPCI_PREFETCH_MEM_SIZE are supposed to potentially cover all the
> PCI BARs, including potential future hotplug devices, right?
> 
> If so, maybe we need to increase GUEST_VPCI_MEM_SIZE to a couple of GB
> and GUEST_VPCI_PREFETCH_MEM_SIZE to even more?

Same comments above.
> 
> 
> 
> 
>> +#define GUEST_VPCI_IO_SIZE                xen_mk_ullong(0x00800000)
>> +
>> /*
>>  * 16MB == 4096 pages reserved for guest to use as a region to map its
>>  * grant table in.
diff mbox series

Patch

diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
index 34f8a29056..84568e9dc9 100644
--- a/tools/libxl/libxl_arm.c
+++ b/tools/libxl/libxl_arm.c
@@ -268,6 +268,130 @@  static int fdt_property_regs(libxl__gc *gc, void *fdt,
     return fdt_property(fdt, "reg", regs, sizeof(regs));
 }
 
+static int fdt_property_vpci_bus_range(libxl__gc *gc, void *fdt,
+        unsigned num_cells, ...)
+{
+    uint32_t bus_range[num_cells];
+    be32 *cells = &bus_range[0];
+    int i;
+    va_list ap;
+    uint32_t arg;
+
+    va_start(ap, num_cells);
+    for (i = 0 ; i < num_cells; i++) {
+        arg = va_arg(ap, uint32_t);
+        set_cell(&cells, 1, arg);
+    }
+    va_end(ap);
+
+    return fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range));
+}
+
+static int fdt_property_vpci_interrupt_map_mask(libxl__gc *gc, void *fdt,
+        unsigned num_cells, ...)
+{
+    uint32_t interrupt_map_mask[num_cells];
+    be32 *cells = &interrupt_map_mask[0];
+    int i;
+    va_list ap;
+    uint32_t arg;
+
+    va_start(ap, num_cells);
+    for (i = 0 ; i < num_cells; i++) {
+        arg = va_arg(ap, uint32_t);
+        set_cell(&cells, 1, arg);
+    }
+    va_end(ap);
+
+    return fdt_property(fdt, "interrupt-map-mask", interrupt_map_mask,
+                                sizeof(interrupt_map_mask));
+}
+
+static int fdt_property_vpci_ranges(libxl__gc *gc, void *fdt,
+        unsigned vpci_addr_cells,
+        unsigned cpu_addr_cells,
+        unsigned vpci_size_cells,
+        unsigned num_regs, ...)
+{
+    uint32_t regs[num_regs*(vpci_addr_cells+cpu_addr_cells+vpci_size_cells)];
+    be32 *cells = &regs[0];
+    int i;
+    va_list ap;
+    uint64_t arg;
+
+    va_start(ap, num_regs);
+    for (i = 0 ; i < num_regs; i++) {
+        /* Set the memory bit field */
+        arg = va_arg(ap, uint64_t);
+        set_cell(&cells, 1, arg);
+
+        /* Set the vpci bus address */
+        arg = vpci_addr_cells ? va_arg(ap, uint64_t) : 0;
+        set_cell(&cells, 2 , arg);
+
+        /* Set the cpu bus address where vpci address is mapped */
+        arg = cpu_addr_cells ? va_arg(ap, uint64_t) : 0;
+        set_cell(&cells, cpu_addr_cells, arg);
+
+        /* Set the vpci size requested */
+        arg = vpci_size_cells ? va_arg(ap, uint64_t) : 0;
+        set_cell(&cells, vpci_size_cells,arg);
+    }
+    va_end(ap);
+
+    return fdt_property(fdt, "ranges", regs, sizeof(regs));
+}
+
+static int fdt_property_vpci_interrupt_map(libxl__gc *gc, void *fdt,
+        unsigned child_unit_addr_cells,
+        unsigned child_interrupt_specifier_cells,
+        unsigned parent_unit_addr_cells,
+        unsigned parent_interrupt_specifier_cells,
+        unsigned num_regs, ...)
+{
+    uint32_t interrupt_map[num_regs * (child_unit_addr_cells +
+            child_interrupt_specifier_cells + parent_unit_addr_cells
+            + parent_interrupt_specifier_cells + 1)];
+    be32 *cells = &interrupt_map[0];
+    int i,j;
+    va_list ap;
+    uint64_t arg;
+
+    va_start(ap, num_regs);
+    for (i = 0 ; i < num_regs; i++) {
+        /* Set the child unit address*/
+        for (j = 0 ; j < child_unit_addr_cells; j++) {
+            arg = va_arg(ap, uint32_t);
+            set_cell(&cells, 1 , arg);
+        }
+
+        /* Set the child interrupt specifier*/
+        for (j = 0 ; j < child_interrupt_specifier_cells ; j++) {
+            arg = va_arg(ap, uint32_t);
+            set_cell(&cells, 1 , arg);
+        }
+
+        /* Set the interrupt-parent*/
+        set_cell(&cells, 1 , GUEST_PHANDLE_GIC);
+
+        /* Set the parent unit address*/
+        for (j = 0 ; j < parent_unit_addr_cells; j++) {
+            arg = va_arg(ap, uint32_t);
+            set_cell(&cells, 1 , arg);
+        }
+
+        /* Set the parent interrupt specifier*/
+        for (j = 0 ; j < parent_interrupt_specifier_cells; j++) {
+            arg = va_arg(ap, uint32_t);
+            set_cell(&cells, 1 , arg);
+        }
+    }
+    va_end(ap);
+
+    return fdt_property(fdt, "interrupt-map", interrupt_map,
+                                sizeof(interrupt_map));
+}
+
 static int make_root_properties(libxl__gc *gc,
                                 const libxl_version_info *vers,
                                 void *fdt)
@@ -659,6 +783,79 @@  static int make_vpl011_uart_node(libxl__gc *gc, void *fdt,
     return 0;
 }
 
+static int make_vpci_node(libxl__gc *gc, void *fdt,
+        const struct arch_info *ainfo,
+        struct xc_dom_image *dom)
+{
+    int res;
+    const uint64_t vpci_ecam_base = GUEST_VPCI_ECAM_BASE;
+    const uint64_t vpci_ecam_size = GUEST_VPCI_ECAM_SIZE;
+    const char *name = GCSPRINTF("pcie@%"PRIx64, vpci_ecam_base);
+
+    res = fdt_begin_node(fdt, name);
+    if (res) return res;
+
+    res = fdt_property_compat(gc, fdt, 1, "pci-host-ecam-generic");
+    if (res) return res;
+
+    res = fdt_property_string(fdt, "device_type", "pci");
+    if (res) return res;
+
+    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
+            GUEST_ROOT_SIZE_CELLS, 1, vpci_ecam_base, vpci_ecam_size);
+    if (res) return res;
+
+    res = fdt_property_vpci_bus_range(gc, fdt, 2, 0,255);
+    if (res) return res;
+
+    res = fdt_property_cell(fdt, "linux,pci-domain", 0);
+    if (res) return res;
+
+    res = fdt_property_cell(fdt, "#address-cells", 3);
+    if (res) return res;
+
+    res = fdt_property_cell(fdt, "#size-cells", 2);
+    if (res) return res;
+
+    res = fdt_property_cell(fdt, "#interrupt-cells", 1);
+    if (res) return res;
+
+    res = fdt_property_string(fdt, "status", "okay");
+    if (res) return res;
+
+    res = fdt_property_vpci_ranges(gc, fdt, GUEST_PCI_ADDRESS_CELLS,
+        GUEST_ROOT_ADDRESS_CELLS, GUEST_PCI_SIZE_CELLS,
+        3,
+        GUEST_VPCI_ADDR_TYPE_MEM, GUEST_VPCI_MEM_PCI_ADDR,
+        GUEST_VPCI_MEM_CPU_ADDR, GUEST_VPCI_MEM_SIZE,
+        GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM, GUEST_VPCI_PREFETCH_MEM_PCI_ADDR,
+        GUEST_VPCI_PREFETCH_MEM_CPU_ADDR, GUEST_VPCI_PREFETCH_MEM_SIZE,
+        GUEST_VPCI_ADDR_TYPE_IO, GUEST_VPCI_IO_PCI_ADDR,
+        GUEST_VPCI_IO_CPU_ADDR, GUEST_VPCI_IO_SIZE);
+    if (res) return res;
+
+    res = fdt_property_vpci_interrupt_map_mask(gc, fdt, 4, 0, 0, 0, 7);
+    if (res) return res;
+
+    /*
+     * Legacy interrupt is forced and assigned to the guest.
+     * This will be removed once we have implementation for MSI support.
+     *
+     */
+    res = fdt_property_vpci_interrupt_map(gc, fdt, 3, 1, 0, 3,
+            4,
+            0, 0, 0, 1, 0, 136, DT_IRQ_TYPE_LEVEL_HIGH,
+            0, 0, 0, 2, 0, 137, DT_IRQ_TYPE_LEVEL_HIGH,
+            0, 0, 0, 3, 0, 138, DT_IRQ_TYPE_LEVEL_HIGH,
+            0, 0, 0, 4, 0, 139, DT_IRQ_TYPE_LEVEL_HIGH);
+    if (res) return res;
+
+    res = fdt_end_node(fdt);
+    if (res) return res;
+
+    return 0;
+}
+
 static const struct arch_info *get_arch_info(libxl__gc *gc,
                                              const struct xc_dom_image *dom)
 {
@@ -962,6 +1159,9 @@  next_resize:
         if (info->tee == LIBXL_TEE_TYPE_OPTEE)
             FDT( make_optee_node(gc, fdt) );
 
+        if (info->arch_arm.vpci == LIBXL_VPCI_TYPE_ECAM)
+            FDT( make_vpci_node(gc, fdt, ainfo, dom) );
+
         if (pfdt)
             FDT( copy_partial_fdt(gc, fdt, pfdt) );
 
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 9d3f05f399..d493637705 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -257,6 +257,11 @@  libxl_vuart_type = Enumeration("vuart_type", [
     (1, "sbsa_uart"),
     ])
 
+libxl_vpci_type = Enumeration("vpci_type", [
+    (0, "unknown"),
+    (1, "ecam"),
+    ])
+
 libxl_vkb_backend = Enumeration("vkb_backend", [
     (0, "UNKNOWN"),
     (1, "QEMU"),
@@ -640,6 +645,7 @@  libxl_domain_build_info = Struct("domain_build_info",[
 
     ("arch_arm", Struct(None, [("gic_version", libxl_gic_version),
                                ("vuart", libxl_vuart_type),
+                               ("vpci", libxl_vpci_type),
                               ])),
     # Alternate p2m is not bound to any architecture or guest type, as it is
     # supported by x86 HVM and ARM support is planned.
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 61b4ef7b7e..58b7e6f56a 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -1386,6 +1386,13 @@  void parse_config_data(const char *config_source,
         }
     }
 
+    if (!xlu_cfg_get_string(config, "vpci", &buf, 0)) {
+        if (libxl_vpci_type_from_string(buf, &b_info->arch_arm.vpci)) {
+            fprintf(stderr, "ERROR: invalid value \"%s\" for \"vpci\"\n",
+                    buf);
+            exit(1);
+        }
+    }
     parse_vnuma_config(config, b_info);
 
     /* Set max_memkb to target_memkb and max_vcpus to avail_vcpus if
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index 7364a07362..4e19c62948 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -426,6 +426,34 @@  typedef uint64_t xen_callback_t;
 #define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
 #define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
 
+#define GUEST_PCI_ADDRESS_CELLS 3
+#define GUEST_PCI_SIZE_CELLS 2
+
+/* PCI-PCIe memory space types */
+#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM xen_mk_ullong(0x42000000)
+#define GUEST_VPCI_ADDR_TYPE_MEM          xen_mk_ullong(0x02000000)
+#define GUEST_VPCI_ADDR_TYPE_IO           xen_mk_ullong(0x01000000)
+
+/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
+#define GUEST_VPCI_PREFETCH_MEM_CPU_ADDR  xen_mk_ullong(0x4000000000)
+#define GUEST_VPCI_MEM_CPU_ADDR           xen_mk_ullong(0x04020000)
+#define GUEST_VPCI_IO_CPU_ADDR            xen_mk_ullong(0xC0200800)
+
+/*
+ * This is hardcoded values for the real PCI physical addresses.
+ * This will be removed once we read the real PCI-PCIe physical
+ * addresses form the config space and map to the guest memory map
+ * when assigning the device to guest via VPCI.
+ *
+ */
+#define GUEST_VPCI_PREFETCH_MEM_PCI_ADDR  xen_mk_ullong(0x4000000000)
+#define GUEST_VPCI_MEM_PCI_ADDR           xen_mk_ullong(0x50000000)
+#define GUEST_VPCI_IO_PCI_ADDR            xen_mk_ullong(0x00000000)
+
+#define GUEST_VPCI_PREFETCH_MEM_SIZE      xen_mk_ullong(0x100000000)
+#define GUEST_VPCI_MEM_SIZE               xen_mk_ullong(0x08000000)
+#define GUEST_VPCI_IO_SIZE                xen_mk_ullong(0x00800000)
+
 /*
  * 16MB == 4096 pages reserved for guest to use as a region to map its
  * grant table in.