@@ -21,6 +21,8 @@
#include <xen/lib.h>
#include <xen/delay.h>
#include <xen/mm.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
#include <xen/sizes.h>
#include <asm/gic.h>
#include <asm/gic_v3_defs.h>
@@ -36,6 +38,26 @@
*/
LIST_HEAD(host_its_list);
+/*
+ * Describes a device which is using the ITS and is used by a guest.
+ * Since device IDs are per ITS (in contrast to vLPIs, which are per
+ * guest), we have to differentiate between different virtual ITSes.
+ * We use the doorbell address here, since this is a nice architectural
+ * property of MSIs in general and we can easily get to the base address
+ * of the ITS and look that up.
+ */
+struct its_device {
+ struct rb_node rbnode;
+ struct host_its *hw_its;
+ void *itt_addr;
+ paddr_t guest_doorbell; /* Identifies the virtual ITS */
+ uint32_t host_devid;
+ uint32_t guest_devid;
+ uint32_t eventids; /* Number of event IDs (MSIs) */
+ uint32_t *host_lpi_blocks; /* Which LPIs are used on the host */
+ struct pending_irq *pend_irqs; /* One struct per event */
+};
+
bool gicv3_its_host_has_its(void)
{
return !list_empty(&host_its_list);
@@ -157,6 +179,20 @@ static int its_send_cmd_sync(struct host_its *its, unsigned int cpu)
return its_send_command(its, cmd);
}
+static int its_send_cmd_mapti(struct host_its *its,
+ uint32_t deviceid, uint32_t eventid,
+ uint32_t pintid, uint16_t icid)
+{
+ uint64_t cmd[4];
+
+ cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32);
+ cmd[1] = eventid | ((uint64_t)pintid << 32);
+ cmd[2] = icid;
+ cmd[3] = 0x00;
+
+ return its_send_command(its, cmd);
+}
+
static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
unsigned int cpu)
{
@@ -171,6 +207,43 @@ static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
return its_send_command(its, cmd);
}
+static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
+ uint8_t size_bits, paddr_t itt_addr, bool valid)
+{
+ uint64_t cmd[4];
+
+ if ( valid )
+ {
+ ASSERT(size_bits <= its->evid_bits);
+ ASSERT(size_bits > 0);
+ ASSERT(!(itt_addr & ~GENMASK(51, 8)));
+
+ /* The number of events is encoded as "number of bits minus one". */
+ size_bits--;
+ }
+ cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
+ cmd[1] = size_bits;
+ cmd[2] = itt_addr;
+ if ( valid )
+ cmd[2] |= GITS_VALID_BIT;
+ cmd[3] = 0x00;
+
+ return its_send_command(its, cmd);
+}
+
+static int its_send_cmd_inv(struct host_its *its,
+ uint32_t deviceid, uint32_t eventid)
+{
+ uint64_t cmd[4];
+
+ cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32);
+ cmd[1] = eventid;
+ cmd[2] = 0x00;
+ cmd[3] = 0x00;
+
+ return its_send_command(its, cmd);
+}
+
/* Set up the (1:1) collection mapping for the given host CPU. */
int gicv3_its_setup_collection(unsigned int cpu)
{
@@ -450,6 +523,279 @@ int gicv3_its_init(void)
return 0;
}
+/*
+ * TODO: Investigate the interaction when a guest removes a device while
+ * some LPIs are still in flight.
+ */
+static int remove_mapped_guest_device(struct its_device *dev)
+{
+ int ret = 0;
+ unsigned int i;
+
+ if ( dev->hw_its )
+ /* MAPD also discards all events with this device ID. */
+ ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
+
+ for ( i = 0; i < dev->eventids / LPI_BLOCK; i++ )
+ gicv3_free_host_lpi_block(dev->host_lpi_blocks[i]);
+
+ /* Make sure the MAPD command above is really executed. */
+ if ( !ret )
+ ret = gicv3_its_wait_commands(dev->hw_its);
+
+ /* This should never happen, but just in case ... */
+ if ( ret && printk_ratelimit() )
+ printk(XENLOG_WARNING "Can't unmap host ITS device 0x%x\n",
+ dev->host_devid);
+
+ xfree(dev->itt_addr);
+ xfree(dev->pend_irqs);
+ xfree(dev->host_lpi_blocks);
+ xfree(dev);
+
+ return 0;
+}
+
+static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
+{
+ struct host_its *hw_its;
+
+ list_for_each_entry(hw_its, &host_its_list, entry)
+ {
+ if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
+ return hw_its;
+ }
+
+ return NULL;
+}
+
+static int compare_its_guest_devices(struct its_device *dev,
+ paddr_t vdoorbell, uint32_t vdevid)
+{
+ if ( dev->guest_doorbell < vdoorbell )
+ return -1;
+
+ if ( dev->guest_doorbell > vdoorbell )
+ return 1;
+
+ if ( dev->guest_devid < vdevid )
+ return -1;
+
+ if ( dev->guest_devid > vdevid )
+ return 1;
+
+ return 0;
+}
+
+/*
+ * On the host ITS @its, map @nr_events consecutive LPIs.
+ * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
+ * increasing both @eventid and @lpi to cover the number of requested LPIs.
+ */
+static int gicv3_its_map_host_events(struct host_its *its,
+ uint32_t devid, uint32_t eventid,
+ uint32_t lpi, uint32_t nr_events)
+{
+ uint32_t i;
+ int ret;
+
+ for ( i = 0; i < nr_events; i++ )
+ {
+ /* For now we map every host LPI to host CPU 0 */
+ ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0);
+ if ( ret )
+ return ret;
+
+ ret = its_send_cmd_inv(its, devid, eventid + i);
+ if ( ret )
+ return ret;
+ }
+
+ /* TODO: Consider using INVALL here. Didn't work on the model, though. */
+
+ ret = its_send_cmd_sync(its, 0);
+ if ( ret )
+ return ret;
+
+ return gicv3_its_wait_commands(its);
+}
+
+/*
+ * Map a hardware device, identified by a certain host ITS and its device ID
+ * to domain d, a guest ITS (identified by its doorbell address) and device ID.
+ * Also provide the number of events (MSIs) needed for that device.
+ * This does not check if this particular hardware device is already mapped
+ * at another domain, it is expected that this would be done by the caller.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+ paddr_t host_doorbell, uint32_t host_devid,
+ paddr_t guest_doorbell, uint32_t guest_devid,
+ uint64_t nr_events, bool valid)
+{
+ void *itt_addr = NULL;
+ struct host_its *hw_its;
+ struct its_device *dev = NULL;
+ struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
+ int i, ret = -ENOENT; /* "i" must be signed to check for >= 0 below. */
+
+ hw_its = gicv3_its_find_by_doorbell(host_doorbell);
+ if ( !hw_its )
+ return ret;
+
+ /* Sanitise the provided hardware values against the host ITS. */
+ if ( host_devid >= BIT(hw_its->devid_bits) )
+ return -EINVAL;
+
+ /*
+ * The ITS requires the number of events to be a power of 2. We allocate
+ * events and LPIs in chunks of LPI_BLOCK (=32), so make sure we
+ * allocate at least that many.
+ * TODO: Investigate if the number of events can be limited to smaller
+ * values if the guest does not require that many.
+ */
+ nr_events = BIT(fls(nr_events - 1));
+ if ( nr_events < LPI_BLOCK )
+ nr_events = LPI_BLOCK;
+ if ( nr_events >= BIT(hw_its->evid_bits) )
+ return -EINVAL;
+
+ /* check for already existing mappings */
+ spin_lock(&d->arch.vgic.its_devices_lock);
+ while ( *new )
+ {
+ struct its_device *temp;
+ int cmp;
+
+ temp = rb_entry(*new, struct its_device, rbnode);
+
+ parent = *new;
+ cmp = compare_its_guest_devices(temp, guest_doorbell, guest_devid);
+ if ( !cmp )
+ {
+ if ( !valid )
+ rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
+
+ spin_unlock(&d->arch.vgic.its_devices_lock);
+
+ if ( valid )
+ {
+ printk(XENLOG_G_WARNING "d%d tried to remap guest ITS device 0x%x to host device 0x%x\n",
+ d->domain_id, guest_devid, host_devid);
+ return -EBUSY;
+ }
+
+ return remove_mapped_guest_device(temp);
+ }
+
+ if ( cmp > 0 )
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ if ( !valid )
+ goto out_unlock;
+
+ ret = -ENOMEM;
+
+ /* An Interrupt Translation Table needs to be 256-byte aligned. */
+ itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
+ if ( !itt_addr )
+ goto out_unlock;
+
+ dev = xzalloc(struct its_device);
+ if ( !dev )
+ goto out_unlock;
+
+ /*
+ * Allocate the pending_irqs for each virtual LPI. They will be put
+ * into the domain's radix tree upon the guest's MAPTI command.
+ * Pre-allocating memory for each *possible* LPI would be using way
+ * too much memory (they can be sparsely used by the guest), also
+ * allocating them on demand requires memory allocation in the interrupt
+ * injection code path, which is not really desired.
+ * So we compromise here by pre-allocating memory for each possible event
+ * up to the max specified by MAPD.
+ * See the mailing list discussion for some background:
+ * https://lists.xen.org/archives/html/xen-devel/2017-03/msg03645.html
+ */
+ dev->pend_irqs = xzalloc_array(struct pending_irq, nr_events);
+ if ( !dev->pend_irqs )
+ goto out_unlock;
+
+ dev->host_lpi_blocks = xzalloc_array(uint32_t, nr_events);
+ if ( !dev->host_lpi_blocks )
+ goto out_unlock;
+
+ ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1),
+ virt_to_maddr(itt_addr), true);
+ if ( ret )
+ goto out_unlock;
+
+ dev->itt_addr = itt_addr;
+ dev->hw_its = hw_its;
+ dev->guest_doorbell = guest_doorbell;
+ dev->guest_devid = guest_devid;
+ dev->host_devid = host_devid;
+ dev->eventids = nr_events;
+
+ rb_link_node(&dev->rbnode, parent, new);
+ rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
+
+ spin_unlock(&d->arch.vgic.its_devices_lock);
+
+ /*
+ * Map all host LPIs within this device already. We can't afford to queue
+ * any host ITS commands later on during the guest's runtime.
+ */
+ for ( i = 0; i < nr_events / LPI_BLOCK; i++ )
+ {
+ ret = gicv3_allocate_host_lpi_block(d, &dev->host_lpi_blocks[i]);
+ if ( ret < 0 )
+ break;
+
+ ret = gicv3_its_map_host_events(hw_its, host_devid, i * LPI_BLOCK,
+ dev->host_lpi_blocks[i], LPI_BLOCK);
+ if ( ret < 0 )
+ break;
+ }
+
+ if ( ret )
+ {
+ /* Clean up all allocated host LPI blocks. */
+ for ( ; i >= 0; i-- )
+ {
+ if ( dev->host_lpi_blocks[i] )
+ gicv3_free_host_lpi_block(dev->host_lpi_blocks[i]);
+ }
+
+ /*
+ * Unmapping the device will discard all LPIs mapped so far.
+ * We are already on the failing path, so no error checking to
+ * not mask the original error value. This should never fail anyway.
+ */
+ its_send_cmd_mapd(hw_its, host_devid, 0, 0, false);
+
+ goto out;
+ }
+
+ return 0;
+
+out_unlock:
+ spin_unlock(&d->arch.vgic.its_devices_lock);
+
+out:
+ if ( dev )
+ {
+ xfree(dev->pend_irqs);
+ xfree(dev->host_lpi_blocks);
+ }
+ xfree(itt_addr);
+ xfree(dev);
+
+ return ret;
+}
+
/* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
void gicv3_its_dt_init(const struct dt_device_node *node)
{
@@ -69,11 +69,15 @@ struct vits_itte
int vgic_v3_its_init_domain(struct domain *d)
{
+ spin_lock_init(&d->arch.vgic.its_devices_lock);
+ d->arch.vgic.its_devices = RB_ROOT;
+
return 0;
}
void vgic_v3_its_free_domain(struct domain *d)
{
+ ASSERT(RB_EMPTY_ROOT(&d->arch.vgic.its_devices));
}
/*
@@ -10,6 +10,7 @@
#include <asm/gic.h>
#include <public/hvm/params.h>
#include <xen/serial.h>
+#include <xen/rbtree.h>
struct hvm_domain
{
@@ -108,6 +109,8 @@ struct arch_domain
} *rdist_regions;
int nr_regions; /* Number of rdist regions */
uint32_t rdist_stride; /* Re-Distributor stride */
+ struct rb_root its_devices; /* Devices mapped to an ITS */
+ spinlock_t its_devices_lock; /* Protects the its_devices tree */
#endif
} vgic;
@@ -98,7 +98,10 @@
#define GITS_CMD_MOVALL 0x0e
#define GITS_CMD_DISCARD 0x0f
+#define ITS_DOORBELL_OFFSET 0x10040
+
#include <xen/device_tree.h>
+#include <xen/rbtree.h>
#define HOST_ITS_FLUSH_CMD_QUEUE (1U << 0)
#define HOST_ITS_USES_PTA (1U << 1)
@@ -148,6 +151,16 @@ int gicv3_its_setup_collection(unsigned int cpu);
int vgic_v3_its_init_domain(struct domain *d);
void vgic_v3_its_free_domain(struct domain *d);
+/*
+ * Map a device on the host by allocating an ITT on the host (ITS).
+ * "nr_event" specifies how many events (interrupts) this device will need.
+ * Setting "valid" to false deallocates the device.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+ paddr_t host_doorbell, uint32_t host_devid,
+ paddr_t guest_doorbell, uint32_t guest_devid,
+ uint64_t nr_events, bool valid);
+
int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi);
void gicv3_free_host_lpi_block(uint32_t first_lpi);