diff mbox series

[v4,08/20] irqchip/gic-v4.1: Plumb get/set_irqchip_state SGI callbacks

Message ID 20200214145736.18550-9-maz@kernel.org
State New, archived
Headers show
Series irqchip/gic-v4: GICv4.1 architecture support | expand

Commit Message

Marc Zyngier Feb. 14, 2020, 2:57 p.m. UTC
To implement the get/set_irqchip_state callbacks (limited to the
PENDING state), we have to use a particular set of hacks:

- Reading the pending state is done by using a pair of new redistributor
  registers (GICR_VSGIR, GICR_VSGIPENDR), which allow the 16 interrupts
  state to be retrieved.
- Setting the pending state is done by generating it as we'd otherwise do
  for a guest (writing to GITS_SGIR)
- Clearing the pending state is done by emiting a VSGI command with the
  "clear" bit set.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-gic-v3-its.c   | 56 ++++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h | 14 ++++++++
 2 files changed, 70 insertions(+)

Comments

Zenghui Yu Feb. 18, 2020, 7 a.m. UTC | #1
Hi Marc,

On 2020/2/14 22:57, Marc Zyngier wrote:
> To implement the get/set_irqchip_state callbacks (limited to the
> PENDING state), we have to use a particular set of hacks:
> 
> - Reading the pending state is done by using a pair of new redistributor
>    registers (GICR_VSGIR, GICR_VSGIPENDR), which allow the 16 interrupts
>    state to be retrieved.
> - Setting the pending state is done by generating it as we'd otherwise do
>    for a guest (writing to GITS_SGIR)
> - Clearing the pending state is done by emiting a VSGI command with the
>    "clear" bit set.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   drivers/irqchip/irq-gic-v3-its.c   | 56 ++++++++++++++++++++++++++++++
>   include/linux/irqchip/arm-gic-v3.h | 14 ++++++++
>   2 files changed, 70 insertions(+)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 1e448d9a16ea..a9753435c4ff 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -3915,11 +3915,67 @@ static int its_sgi_set_affinity(struct irq_data *d,
>   	return -EINVAL;
>   }
>   
> +static int its_sgi_set_irqchip_state(struct irq_data *d,
> +				     enum irqchip_irq_state which,
> +				     bool state)
> +{
> +	if (which != IRQCHIP_STATE_PENDING)
> +		return -EINVAL;
> +
> +	if (state) {
> +		struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
> +		struct its_node *its = find_4_1_its();
> +		u64 val;
> +
> +		val  = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
> +		val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
> +		writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
> +	} else {
> +		its_configure_sgi(d, true);
> +	}
> +
> +	return 0;
> +}
> +
> +static int its_sgi_get_irqchip_state(struct irq_data *d,
> +				     enum irqchip_irq_state which, bool *val)
> +{
> +	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
> +	void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + SZ_128K;

There might be a race on reading the 'vpe->col_idx' against a concurrent
vPE schedule (col_idx will be modified in its_vpe_set_affinity)? Will we
end up accessing the GICR_VSGI* registers of the old redistributor,
while the vPE is now resident on the new one? Or is it harmful?

The same question for direct_lpi_inv(), where 'vpe->col_idx' will be
used in irq_to_cpuid().

> +	u32 count = 1000000;	/* 1s! */
> +	u32 status;
> +
> +	if (which != IRQCHIP_STATE_PENDING)
> +		return -EINVAL;
> +
> +	writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
> +	do {
> +		status = readl_relaxed(base + GICR_VSGIPENDR);
> +		if (!(status & GICR_VSGIPENDR_BUSY))
> +			goto out;
> +
> +		count--;
> +		if (!count) {
> +			pr_err_ratelimited("Unable to get SGI status\n");
> +			goto out;
> +		}
> +		cpu_relax();
> +		udelay(1);
> +	} while(count);
> +
> +out:
> +	*val = !!(status & (1 << d->hwirq));
> +
> +	return 0;
> +}
> +
>   static struct irq_chip its_sgi_irq_chip = {
>   	.name			= "GICv4.1-sgi",
>   	.irq_mask		= its_sgi_mask_irq,
>   	.irq_unmask		= its_sgi_unmask_irq,
>   	.irq_set_affinity	= its_sgi_set_affinity,
> +	.irq_set_irqchip_state	= its_sgi_set_irqchip_state,
> +	.irq_get_irqchip_state	= its_sgi_get_irqchip_state,
>   };
>   
>   static int its_sgi_irq_domain_alloc(struct irq_domain *domain,
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index a89578884263..64da945486ac 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -345,6 +345,15 @@
>   #define GICR_VPENDBASER_4_1_VGRP1EN	(1ULL << 58)
>   #define GICR_VPENDBASER_4_1_VPEID	GENMASK_ULL(15, 0)
>   
> +#define GICR_VSGIR			0x0080
> +
> +#define GICR_VSGIR_VPEID		GENMASK(15, 0)
> +
> +#define GICR_VSGIPENDR			0x0088
> +
> +#define GICR_VSGIPENDR_BUSY		(1U << 31)
> +#define GICR_VSGIPENDR_PENDING		GENMASK(15, 0)
> +
>   /*
>    * ITS registers, offsets from ITS_base
>    */
> @@ -368,6 +377,11 @@
>   
>   #define GITS_TRANSLATER			0x10040
>   
> +#define GITS_SGIR			0x20020
> +
> +#define GITS_SGIR_VPEID			GENMASK_ULL(47, 32)
> +#define GITS_SGIR_VINTID		GENMASK_ULL(7, 0)

The spec says vINTID field is [3:0] of the GITS_SGIR.


Thanks,
Zenghui

> +
>   #define GITS_CTLR_ENABLE		(1U << 0)
>   #define GITS_CTLR_ImDe			(1U << 1)
>   #define	GITS_CTLR_ITS_NUMBER_SHIFT	4
>
Marc Zyngier Feb. 18, 2020, 9:27 a.m. UTC | #2
Hi Zenghui,

On 2020-02-18 07:00, Zenghui Yu wrote:
> Hi Marc,
> 
> On 2020/2/14 22:57, Marc Zyngier wrote:
>> To implement the get/set_irqchip_state callbacks (limited to the
>> PENDING state), we have to use a particular set of hacks:
>> 
>> - Reading the pending state is done by using a pair of new 
>> redistributor
>>    registers (GICR_VSGIR, GICR_VSGIPENDR), which allow the 16 
>> interrupts
>>    state to be retrieved.
>> - Setting the pending state is done by generating it as we'd otherwise 
>> do
>>    for a guest (writing to GITS_SGIR)
>> - Clearing the pending state is done by emiting a VSGI command with 
>> the
>>    "clear" bit set.
>> 
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> ---
>>   drivers/irqchip/irq-gic-v3-its.c   | 56 
>> ++++++++++++++++++++++++++++++
>>   include/linux/irqchip/arm-gic-v3.h | 14 ++++++++
>>   2 files changed, 70 insertions(+)
>> 
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
>> b/drivers/irqchip/irq-gic-v3-its.c
>> index 1e448d9a16ea..a9753435c4ff 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -3915,11 +3915,67 @@ static int its_sgi_set_affinity(struct 
>> irq_data *d,
>>   	return -EINVAL;
>>   }
>>   +static int its_sgi_set_irqchip_state(struct irq_data *d,
>> +				     enum irqchip_irq_state which,
>> +				     bool state)
>> +{
>> +	if (which != IRQCHIP_STATE_PENDING)
>> +		return -EINVAL;
>> +
>> +	if (state) {
>> +		struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
>> +		struct its_node *its = find_4_1_its();
>> +		u64 val;
>> +
>> +		val  = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
>> +		val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
>> +		writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
>> +	} else {
>> +		its_configure_sgi(d, true);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int its_sgi_get_irqchip_state(struct irq_data *d,
>> +				     enum irqchip_irq_state which, bool *val)
>> +{
>> +	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
>> +	void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + 
>> SZ_128K;
> 
> There might be a race on reading the 'vpe->col_idx' against a 
> concurrent
> vPE schedule (col_idx will be modified in its_vpe_set_affinity)? Will 
> we
> end up accessing the GICR_VSGI* registers of the old redistributor,
> while the vPE is now resident on the new one? Or is it harmful?

Very well spotted. There is a potential problem if old and new RDs are 
not part
of the same CommonLPIAff group.

> The same question for direct_lpi_inv(), where 'vpe->col_idx' will be
> used in irq_to_cpuid().

Same problem indeed. We need to ensure that no VMOVP operation can occur 
whilst
we use col_idx to access a redistributor. This means a vPE lock of some 
sort
that will protect the affinity.

But I think there is a slightly more general problem here, which we 
failed to
see initially: the same issue exists for physical LPIs, as col_map[] can 
be
updated (its_set_affinity()) in parallel with a direct invalidate.

The good old invalidation through the ITS does guarantee that the two 
operation
don't overlap, but direct invalidation breaks it.

Let me have a think about it.

> 
>> +	u32 count = 1000000;	/* 1s! */
>> +	u32 status;
>> +
>> +	if (which != IRQCHIP_STATE_PENDING)
>> +		return -EINVAL;
>> +
>> +	writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
>> +	do {
>> +		status = readl_relaxed(base + GICR_VSGIPENDR);
>> +		if (!(status & GICR_VSGIPENDR_BUSY))
>> +			goto out;
>> +
>> +		count--;
>> +		if (!count) {
>> +			pr_err_ratelimited("Unable to get SGI status\n");
>> +			goto out;
>> +		}
>> +		cpu_relax();
>> +		udelay(1);
>> +	} while(count);
>> +
>> +out:
>> +	*val = !!(status & (1 << d->hwirq));
>> +
>> +	return 0;
>> +}
>> +
>>   static struct irq_chip its_sgi_irq_chip = {
>>   	.name			= "GICv4.1-sgi",
>>   	.irq_mask		= its_sgi_mask_irq,
>>   	.irq_unmask		= its_sgi_unmask_irq,
>>   	.irq_set_affinity	= its_sgi_set_affinity,
>> +	.irq_set_irqchip_state	= its_sgi_set_irqchip_state,
>> +	.irq_get_irqchip_state	= its_sgi_get_irqchip_state,
>>   };
>>     static int its_sgi_irq_domain_alloc(struct irq_domain *domain,
>> diff --git a/include/linux/irqchip/arm-gic-v3.h 
>> b/include/linux/irqchip/arm-gic-v3.h
>> index a89578884263..64da945486ac 100644
>> --- a/include/linux/irqchip/arm-gic-v3.h
>> +++ b/include/linux/irqchip/arm-gic-v3.h
>> @@ -345,6 +345,15 @@
>>   #define GICR_VPENDBASER_4_1_VGRP1EN	(1ULL << 58)
>>   #define GICR_VPENDBASER_4_1_VPEID	GENMASK_ULL(15, 0)
>>   +#define GICR_VSGIR			0x0080
>> +
>> +#define GICR_VSGIR_VPEID		GENMASK(15, 0)
>> +
>> +#define GICR_VSGIPENDR			0x0088
>> +
>> +#define GICR_VSGIPENDR_BUSY		(1U << 31)
>> +#define GICR_VSGIPENDR_PENDING		GENMASK(15, 0)
>> +
>>   /*
>>    * ITS registers, offsets from ITS_base
>>    */
>> @@ -368,6 +377,11 @@
>>     #define GITS_TRANSLATER			0x10040
>>   +#define GITS_SGIR			0x20020
>> +
>> +#define GITS_SGIR_VPEID			GENMASK_ULL(47, 32)
>> +#define GITS_SGIR_VINTID		GENMASK_ULL(7, 0)
> 
> The spec says vINTID field is [3:0] of the GITS_SGIR.

Indeed, well spotted again!

Thanks,

          M.
Marc Zyngier Feb. 18, 2020, 3:31 p.m. UTC | #3
Hi Zenghui,

On 2020-02-18 09:27, Marc Zyngier wrote:
> Hi Zenghui,
> 
> On 2020-02-18 07:00, Zenghui Yu wrote:
>> Hi Marc,

[...]

>> There might be a race on reading the 'vpe->col_idx' against a 
>> concurrent
>> vPE schedule (col_idx will be modified in its_vpe_set_affinity)? Will 
>> we
>> end up accessing the GICR_VSGI* registers of the old redistributor,
>> while the vPE is now resident on the new one? Or is it harmful?
> 
> Very well spotted. There is a potential problem if old and new RDs are 
> not part
> of the same CommonLPIAff group.
> 
>> The same question for direct_lpi_inv(), where 'vpe->col_idx' will be
>> used in irq_to_cpuid().
> 
> Same problem indeed. We need to ensure that no VMOVP operation can 
> occur whilst
> we use col_idx to access a redistributor. This means a vPE lock of some 
> sort
> that will protect the affinity.
> 
> But I think there is a slightly more general problem here, which we 
> failed to
> see initially: the same issue exists for physical LPIs, as col_map[] 
> can be
> updated (its_set_affinity()) in parallel with a direct invalidate.
> 
> The good old invalidation through the ITS does guarantee that the two 
> operation
> don't overlap, but direct invalidation breaks it.
> 
> Let me have a think about it.

So I've thought about it, wrote a patch, and I don't really like the 
look of it.
This is pretty invasive, and we end-up serializing a lot more than we 
used to
(the repurposing of vlpi_lock to a general "lpi mapping lock" is 
probably too
coarse).

It of course needs splitting over at least three patches, but it'd be 
good if
you could have a look (applies on top of the whole series).

Thanks,

         M.

diff --git a/drivers/irqchip/irq-gic-v3-its.c 
b/drivers/irqchip/irq-gic-v3-its.c
index 7656b353a95f..0ed286dba827 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -144,7 +144,7 @@ struct event_lpi_map {
  	u16			*col_map;
  	irq_hw_number_t		lpi_base;
  	int			nr_lpis;
-	raw_spinlock_t		vlpi_lock;
+	raw_spinlock_t		map_lock;
  	struct its_vm		*vm;
  	struct its_vlpi_map	*vlpi_maps;
  	int			nr_vlpis;
@@ -240,15 +240,33 @@ static struct its_vlpi_map *get_vlpi_map(struct 
irq_data *d)
  	return NULL;
  }

-static int irq_to_cpuid(struct irq_data *d)
+static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
  {
-	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
  	struct its_vlpi_map *map = get_vlpi_map(d);
+	int cpu;

-	if (map)
-		return map->vpe->col_idx;
+	if (map) {
+		raw_spin_lock_irqsave(&map->vpe->vpe_lock, *flags);
+		cpu = map->vpe->col_idx;
+	} else {
+		struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+		raw_spin_lock_irqsave(&its_dev->event_map.map_lock, *flags);
+		cpu = its_dev->event_map.col_map[its_get_event_id(d)];
+	}

-	return its_dev->event_map.col_map[its_get_event_id(d)];
+	return cpu;
+}
+
+static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long 
flags)
+{
+	struct its_vlpi_map *map = get_vlpi_map(d);
+
+	if (map) {
+		raw_spin_unlock_irqrestore(&map->vpe->vpe_lock, flags);
+	} else {
+		struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+		raw_spin_unlock_irqrestore(&its_dev->event_map.map_lock, flags);
+	}
  }

  static struct its_collection *valid_col(struct its_collection *col)
@@ -1384,6 +1402,8 @@ static void direct_lpi_inv(struct irq_data *d)
  {
  	struct its_vlpi_map *map = get_vlpi_map(d);
  	void __iomem *rdbase;
+	unsigned long flags;
+	int cpu;
  	u64 val;

  	if (map) {
@@ -1399,10 +1419,12 @@ static void direct_lpi_inv(struct irq_data *d)
  	}

  	/* Target the redistributor this LPI is currently routed to */
-	rdbase = per_cpu_ptr(gic_rdists->rdist, irq_to_cpuid(d))->rd_base;
+	cpu = irq_to_cpuid_lock(d, &flags);
+	rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
  	gic_write_lpir(val, rdbase + GICR_INVLPIR);

  	wait_for_syncr(rdbase);
+	irq_to_cpuid_unlock(d, flags);
  }

  static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
@@ -1471,11 +1493,11 @@ static void its_unmask_irq(struct irq_data *d)
  static int its_set_affinity(struct irq_data *d, const struct cpumask 
*mask_val,
  			    bool force)
  {
-	unsigned int cpu;
  	const struct cpumask *cpu_mask = cpu_online_mask;
  	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
  	struct its_collection *target_col;
-	u32 id = its_get_event_id(d);
+	unsigned int from, cpu;
+	unsigned long flags;

  	/* A forwarded interrupt should use irq_set_vcpu_affinity */
  	if (irqd_is_forwarded_to_vcpu(d))
@@ -1496,12 +1518,16 @@ static int its_set_affinity(struct irq_data *d, 
const struct cpumask *mask_val,
  		return -EINVAL;

  	/* don't set the affinity when the target cpu is same as current one 
*/
-	if (cpu != its_dev->event_map.col_map[id]) {
+	from = irq_to_cpuid_lock(d, &flags);
+	if (cpu != from) {
+		u32 id = its_get_event_id(d);
+
  		target_col = &its_dev->its->collections[cpu];
  		its_send_movi(its_dev, target_col, id);
  		its_dev->event_map.col_map[id] = cpu;
  		irq_data_update_effective_affinity(d, cpumask_of(cpu));
  	}
+	irq_to_cpuid_unlock(d, flags);

  	return IRQ_SET_MASK_OK_DONE;
  }
@@ -1636,7 +1662,7 @@ static int its_vlpi_map(struct irq_data *d, struct 
its_cmd_info *info)
  	if (!info->map)
  		return -EINVAL;

-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
+	raw_spin_lock(&its_dev->event_map.map_lock);

  	if (!its_dev->event_map.vm) {
  		struct its_vlpi_map *maps;
@@ -1685,7 +1711,7 @@ static int its_vlpi_map(struct irq_data *d, struct 
its_cmd_info *info)
  	}

  out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+	raw_spin_unlock(&its_dev->event_map.map_lock);
  	return ret;
  }

@@ -1695,7 +1721,7 @@ static int its_vlpi_get(struct irq_data *d, struct 
its_cmd_info *info)
  	struct its_vlpi_map *map;
  	int ret = 0;

-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
+	raw_spin_lock(&its_dev->event_map.map_lock);

  	map = get_vlpi_map(d);

@@ -1708,7 +1734,7 @@ static int its_vlpi_get(struct irq_data *d, struct 
its_cmd_info *info)
  	*info->map = *map;

  out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+	raw_spin_unlock(&its_dev->event_map.map_lock);
  	return ret;
  }

@@ -1718,7 +1744,7 @@ static int its_vlpi_unmap(struct irq_data *d)
  	u32 event = its_get_event_id(d);
  	int ret = 0;

-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
+	raw_spin_lock(&its_dev->event_map.map_lock);

  	if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
  		ret = -EINVAL;
@@ -1748,7 +1774,7 @@ static int its_vlpi_unmap(struct irq_data *d)
  	}

  out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+	raw_spin_unlock(&its_dev->event_map.map_lock);
  	return ret;
  }

@@ -3193,7 +3219,7 @@ static struct its_device *its_create_device(struct 
its_node *its, u32 dev_id,
  	dev->event_map.col_map = col_map;
  	dev->event_map.lpi_base = lpi_base;
  	dev->event_map.nr_lpis = nr_lpis;
-	raw_spin_lock_init(&dev->event_map.vlpi_lock);
+	raw_spin_lock_init(&dev->event_map.map_lock);
  	dev->device_id = dev_id;
  	INIT_LIST_HEAD(&dev->entry);

@@ -3560,6 +3586,7 @@ static int its_vpe_set_affinity(struct irq_data 
*d,
  {
  	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
  	int from, cpu = cpumask_first(mask_val);
+	unsigned long flags;

  	/*
  	 * Changing affinity is mega expensive, so let's be as lazy as
@@ -3567,6 +3594,7 @@ static int its_vpe_set_affinity(struct irq_data 
*d,
  	 * into the proxy device, we need to move the doorbell
  	 * interrupt to its new location.
  	 */
+	raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
  	if (vpe->col_idx == cpu)
  		goto out;

@@ -3586,6 +3614,7 @@ static int its_vpe_set_affinity(struct irq_data 
*d,

  out:
  	irq_data_update_effective_affinity(d, cpumask_of(cpu));
+	raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);

  	return IRQ_SET_MASK_OK_DONE;
  }
@@ -3695,11 +3724,15 @@ static void its_vpe_send_inv(struct irq_data *d)

  	if (gic_rdists->has_direct_lpi) {
  		void __iomem *rdbase;
+		unsigned long flags;
+		int cpu;

  		/* Target the redistributor this VPE is currently known on */
-		rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+		cpu = irq_to_cpuid_lock(d, &flags);
+		rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
  		gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
  		wait_for_syncr(rdbase);
+		irq_to_cpuid_unlock(d, flags);
  	} else {
  		its_vpe_send_cmd(vpe, its_send_inv);
  	}
@@ -3735,14 +3768,18 @@ static int its_vpe_set_irqchip_state(struct 
irq_data *d,

  	if (gic_rdists->has_direct_lpi) {
  		void __iomem *rdbase;
+		unsigned long flags;
+		int cpu;

-		rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+		cpu = irq_to_cpuid_lock(d, &flags);
+		rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
  		if (state) {
  			gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
  		} else {
  			gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
  			wait_for_syncr(rdbase);
  		}
+		irq_to_cpuid_unlock(d, flags);
  	} else {
  		if (state)
  			its_vpe_send_cmd(vpe, its_send_int);
@@ -3854,14 +3891,17 @@ static void its_vpe_4_1_deschedule(struct 
its_vpe *vpe,
  static void its_vpe_4_1_invall(struct its_vpe *vpe)
  {
  	void __iomem *rdbase;
+	unsigned long flags;
  	u64 val;

  	val  = GICR_INVALLR_V;
  	val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);

  	/* Target the redistributor this vPE is currently known on */
+	raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
  	rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
  	gic_write_lpir(val, rdbase + GICR_INVALLR);
+	raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
  }

  static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void 
*vcpu_info)
@@ -3960,13 +4000,17 @@ static int its_sgi_get_irqchip_state(struct 
irq_data *d,
  				     enum irqchip_irq_state which, bool *val)
  {
  	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
-	void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + 
SZ_128K;
+	void __iomem *base;
+	unsigned long flags;
  	u32 count = 1000000;	/* 1s! */
  	u32 status;
+	int cpu;

  	if (which != IRQCHIP_STATE_PENDING)
  		return -EINVAL;

+	cpu = irq_to_cpuid_lock(d, &flags);
+	base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K;
  	writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
  	do {
  		status = readl_relaxed(base + GICR_VSGIPENDR);
@@ -3983,6 +4027,7 @@ static int its_sgi_get_irqchip_state(struct 
irq_data *d,
  	} while(count);

  out:
+	irq_to_cpuid_unlock(d, flags);
  	*val = !!(status & (1 << d->hwirq));

  	return 0;
@@ -4102,6 +4147,7 @@ static int its_vpe_init(struct its_vpe *vpe)
  		return -ENOMEM;
  	}

+	raw_spin_lock_init(&vpe->vpe_lock);
  	vpe->vpe_id = vpe_id;
  	vpe->vpt_page = vpt_page;
  	if (gic_rdists->has_rvpeid)
diff --git a/include/linux/irqchip/arm-gic-v4.h 
b/include/linux/irqchip/arm-gic-v4.h
index 46c167a6349f..fc43a63875a3 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -60,6 +60,7 @@ struct its_vpe {
  		};
  	};

+	raw_spinlock_t		vpe_lock;
  	/*
  	 * This collection ID is used to indirect the target
  	 * redistributor for this VPE. The ID itself isn't involved in
Zenghui Yu Feb. 19, 2020, 11:50 a.m. UTC | #4
Hi Marc,

On 2020/2/18 23:31, Marc Zyngier wrote:
> Hi Zenghui,
> 
> On 2020-02-18 09:27, Marc Zyngier wrote:
>> Hi Zenghui,
>>
>> On 2020-02-18 07:00, Zenghui Yu wrote:
>>> Hi Marc,
> 
> [...]
> 
>>> There might be a race on reading the 'vpe->col_idx' against a concurrent
>>> vPE schedule (col_idx will be modified in its_vpe_set_affinity)? Will we
>>> end up accessing the GICR_VSGI* registers of the old redistributor,
>>> while the vPE is now resident on the new one? Or is it harmful?
>>
>> Very well spotted. There is a potential problem if old and new RDs are 
>> not part
>> of the same CommonLPIAff group.
>>
>>> The same question for direct_lpi_inv(), where 'vpe->col_idx' will be
>>> used in irq_to_cpuid().
>>
>> Same problem indeed. We need to ensure that no VMOVP operation can 
>> occur whilst
>> we use col_idx to access a redistributor. This means a vPE lock of 
>> some sort
>> that will protect the affinity.

Yeah, I had the same view here, a vPE level lock might help.

>> But I think there is a slightly more general problem here, which we 
>> failed to
>> see initially: the same issue exists for physical LPIs, as col_map[] 
>> can be
>> updated (its_set_affinity()) in parallel with a direct invalidate.
>>
>> The good old invalidation through the ITS does guarantee that the two 
>> operation
>> don't overlap, but direct invalidation breaks it.

Agreed!

>> Let me have a think about it.
> 
> So I've thought about it, wrote a patch, and I don't really like the 
> look of it.
> This is pretty invasive, and we end-up serializing a lot more than we 
> used to
> (the repurposing of vlpi_lock to a general "lpi mapping lock" is 
> probably too
> coarse).
> 
> It of course needs splitting over at least three patches, but it'd be 
> good if
> you could have a look (applies on top of the whole series).

So the first thing is that

1. there're races on choosing the RD against a concurrent LPI/vPE
affinity changing.

And sure, I will have a look on the following patch! But I'd first
talk about some other issues I've seen today...

2. Another potential race is on accessing the same RD by different
CPUs, which gets more obvious after introducing the GICv4.1.
We can as least take two registers for example:

  - GICR_VSGIR:
    Let's assume that vPE0 is just descheduled from CPU0 and then vPE1
    is scheduled on. CPU0 is writing its GICR_VSGIR with vpeid1 to serve
    vPE1's GICR_ISPENDR0 read trap, whilst userspace is getting vSGI's
    pending state of vPE0 (i.e., by a debugfs read) thus another CPU
    will try to write the same GICR_VSGIR with vpeid0... without waiting
    GICR_VSGIPENDR.Busy reads as 0.
    This is a CONSTRAINED UNPREDICTABLE behavior from the spec and at
    least one of the queries will fail.
  - GICR_INV{LPI,ALL}R:
    Multiple LPIs can be targeted to the same RD, thus multiple writes to
    the same GICR_INVLPIR (with different INITID, even with different V)
    can happen concurrently...

Above comes from the fact that the same redistributor can be accessed
(concurrently) by multiple CPUs but we don't have a mechanism to ensure
some extent of serialization. I also had a look at how KVM will handle
this kind of access, but

3. it looks like KVM makes the assumption that the per-RD MMIO region
will only be accessed by the associated VCPU?  But I think this's not
restricted by the architecture, we can do it better.  Or I've just
missed some important points here.


I will look at the following patch asap but may need some time to
think about all above, and do some fix if possible :-)

> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index 7656b353a95f..0ed286dba827 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c

[...]


Thanks,
Zenghui
Zenghui Yu Feb. 19, 2020, 3:18 p.m. UTC | #5
On 2020/2/19 19:50, Zenghui Yu wrote:
> 3. it looks like KVM makes the assumption that the per-RD MMIO region
> will only be accessed by the associated VCPU?  But I think this's not
> restricted by the architecture, we can do it better.  Or I've just
> missed some important points here.

(After some basic tests, KVM actually does the right thing!)
So I must have some mis-understanding on this point, please
ignore it.  I'll dig it further myself, sorry for the noisy.


Thanks,
Zenghui
Zenghui Yu Feb. 20, 2020, 3:11 a.m. UTC | #6
Hi Marc,

On 2020/2/18 23:31, Marc Zyngier wrote:
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index 7656b353a95f..0ed286dba827 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -144,7 +144,7 @@ struct event_lpi_map {
>       u16            *col_map;
>       irq_hw_number_t        lpi_base;
>       int            nr_lpis;
> -    raw_spinlock_t        vlpi_lock;
> +    raw_spinlock_t        map_lock;

So we use map_lock to protect both LPI's and VLPI's mapping affinity of
a device, and use vpe_lock to protect vPE's affinity, OK.

>       struct its_vm        *vm;
>       struct its_vlpi_map    *vlpi_maps;
>       int            nr_vlpis;
> @@ -240,15 +240,33 @@ static struct its_vlpi_map *get_vlpi_map(struct 
> irq_data *d)
>       return NULL;
>   }
> 
> -static int irq_to_cpuid(struct irq_data *d)
> +static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
>   {
> -    struct its_device *its_dev = irq_data_get_irq_chip_data(d);
>       struct its_vlpi_map *map = get_vlpi_map(d);
> +    int cpu;
> 
> -    if (map)
> -        return map->vpe->col_idx;
> +    if (map) {
> +        raw_spin_lock_irqsave(&map->vpe->vpe_lock, *flags);
> +        cpu = map->vpe->col_idx;
> +    } else {
> +        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
> +        raw_spin_lock_irqsave(&its_dev->event_map.map_lock, *flags);
> +        cpu = its_dev->event_map.col_map[its_get_event_id(d)];
> +    }
> 
> -    return its_dev->event_map.col_map[its_get_event_id(d)];
> +    return cpu;
> +}

This helper is correct for normal LPIs and VLPIs, but wrong for per-vPE
IRQ (doorbell) and vSGIs. irq_data_get_irq_chip_data() gets confused by
both of them.

> +
> +static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
> +{
> +    struct its_vlpi_map *map = get_vlpi_map(d);
> +
> +    if (map) {
> +        raw_spin_unlock_irqrestore(&map->vpe->vpe_lock, flags);
> +    } else {
> +        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
> +        raw_spin_unlock_irqrestore(&its_dev->event_map.map_lock, flags);
> +    }
>   }

The same problem for this helper.

> 
>   static struct its_collection *valid_col(struct its_collection *col)
> @@ -1384,6 +1402,8 @@ static void direct_lpi_inv(struct irq_data *d)
>   {
>       struct its_vlpi_map *map = get_vlpi_map(d);
>       void __iomem *rdbase;
> +    unsigned long flags;
> +    int cpu;
>       u64 val;
> 
>       if (map) {
> @@ -1399,10 +1419,12 @@ static void direct_lpi_inv(struct irq_data *d)
>       }
> 
>       /* Target the redistributor this LPI is currently routed to */
> -    rdbase = per_cpu_ptr(gic_rdists->rdist, irq_to_cpuid(d))->rd_base;
> +    cpu = irq_to_cpuid_lock(d, &flags);
> +    rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
>       gic_write_lpir(val, rdbase + GICR_INVLPIR);
> 
>       wait_for_syncr(rdbase);
> +    irq_to_cpuid_unlock(d, flags);
>   }
> 
>   static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
> @@ -1471,11 +1493,11 @@ static void its_unmask_irq(struct irq_data *d)
>   static int its_set_affinity(struct irq_data *d, const struct cpumask 
> *mask_val,
>                   bool force)
>   {
> -    unsigned int cpu;
>       const struct cpumask *cpu_mask = cpu_online_mask;
>       struct its_device *its_dev = irq_data_get_irq_chip_data(d);
>       struct its_collection *target_col;
> -    u32 id = its_get_event_id(d);
> +    unsigned int from, cpu;
> +    unsigned long flags;
> 
>       /* A forwarded interrupt should use irq_set_vcpu_affinity */
>       if (irqd_is_forwarded_to_vcpu(d))
> @@ -1496,12 +1518,16 @@ static int its_set_affinity(struct irq_data *d, 
> const struct cpumask *mask_val,
>           return -EINVAL;
> 
>       /* don't set the affinity when the target cpu is same as current 
> one */
> -    if (cpu != its_dev->event_map.col_map[id]) {
> +    from = irq_to_cpuid_lock(d, &flags);
> +    if (cpu != from) {
> +        u32 id = its_get_event_id(d);
> +
>           target_col = &its_dev->its->collections[cpu];
>           its_send_movi(its_dev, target_col, id);
>           its_dev->event_map.col_map[id] = cpu;
>           irq_data_update_effective_affinity(d, cpumask_of(cpu));
>       }
> +    irq_to_cpuid_unlock(d, flags);
> 
>       return IRQ_SET_MASK_OK_DONE;
>   }
> @@ -1636,7 +1662,7 @@ static int its_vlpi_map(struct irq_data *d, struct 
> its_cmd_info *info)
>       if (!info->map)
>           return -EINVAL;
> 
> -    raw_spin_lock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_lock(&its_dev->event_map.map_lock);
> 
>       if (!its_dev->event_map.vm) {
>           struct its_vlpi_map *maps;
> @@ -1685,7 +1711,7 @@ static int its_vlpi_map(struct irq_data *d, struct 
> its_cmd_info *info)
>       }
> 
>   out:
> -    raw_spin_unlock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_unlock(&its_dev->event_map.map_lock);
>       return ret;
>   }
> 
> @@ -1695,7 +1721,7 @@ static int its_vlpi_get(struct irq_data *d, struct 
> its_cmd_info *info)
>       struct its_vlpi_map *map;
>       int ret = 0;
> 
> -    raw_spin_lock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_lock(&its_dev->event_map.map_lock);
> 
>       map = get_vlpi_map(d);
> 
> @@ -1708,7 +1734,7 @@ static int its_vlpi_get(struct irq_data *d, struct 
> its_cmd_info *info)
>       *info->map = *map;
> 
>   out:
> -    raw_spin_unlock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_unlock(&its_dev->event_map.map_lock);
>       return ret;
>   }
> 
> @@ -1718,7 +1744,7 @@ static int its_vlpi_unmap(struct irq_data *d)
>       u32 event = its_get_event_id(d);
>       int ret = 0;
> 
> -    raw_spin_lock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_lock(&its_dev->event_map.map_lock);
> 
>       if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
>           ret = -EINVAL;
> @@ -1748,7 +1774,7 @@ static int its_vlpi_unmap(struct irq_data *d)
>       }
> 
>   out:
> -    raw_spin_unlock(&its_dev->event_map.vlpi_lock);
> +    raw_spin_unlock(&its_dev->event_map.map_lock);
>       return ret;
>   }
> 
> @@ -3193,7 +3219,7 @@ static struct its_device *its_create_device(struct 
> its_node *its, u32 dev_id,
>       dev->event_map.col_map = col_map;
>       dev->event_map.lpi_base = lpi_base;
>       dev->event_map.nr_lpis = nr_lpis;
> -    raw_spin_lock_init(&dev->event_map.vlpi_lock);
> +    raw_spin_lock_init(&dev->event_map.map_lock);
>       dev->device_id = dev_id;
>       INIT_LIST_HEAD(&dev->entry);
> 
> @@ -3560,6 +3586,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
>   {
>       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
>       int from, cpu = cpumask_first(mask_val);
> +    unsigned long flags;
> 
>       /*
>        * Changing affinity is mega expensive, so let's be as lazy as
> @@ -3567,6 +3594,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
>        * into the proxy device, we need to move the doorbell
>        * interrupt to its new location.
>        */
> +    raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
>       if (vpe->col_idx == cpu)
>           goto out;
> 
> @@ -3586,6 +3614,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
> 
>   out:
>       irq_data_update_effective_affinity(d, cpumask_of(cpu));
> +    raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
> 
>       return IRQ_SET_MASK_OK_DONE;
>   }
> @@ -3695,11 +3724,15 @@ static void its_vpe_send_inv(struct irq_data *d)
> 
>       if (gic_rdists->has_direct_lpi) {
>           void __iomem *rdbase;
> +        unsigned long flags;
> +        int cpu;
> 
>           /* Target the redistributor this VPE is currently known on */
> -        rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
> +        cpu = irq_to_cpuid_lock(d, &flags);
> +        rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
>           gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
>           wait_for_syncr(rdbase);
> +        irq_to_cpuid_unlock(d, flags);
>       } else {
>           its_vpe_send_cmd(vpe, its_send_inv);
>       }

Do we really need to grab the vpe_lock for those which are belong to
the same irqchip with its_vpe_set_affinity()? The IRQ core code should
already ensure the mutual exclusion among them, wrong?

> @@ -3735,14 +3768,18 @@ static int its_vpe_set_irqchip_state(struct 
> irq_data *d,
> 
>       if (gic_rdists->has_direct_lpi) {
>           void __iomem *rdbase;
> +        unsigned long flags;
> +        int cpu;
> 
> -        rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
> +        cpu = irq_to_cpuid_lock(d, &flags);
> +        rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
>           if (state) {
>               gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
>           } else {
>               gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
>               wait_for_syncr(rdbase);
>           }
> +        irq_to_cpuid_unlock(d, flags);
>       } else {
>           if (state)
>               its_vpe_send_cmd(vpe, its_send_int);
> @@ -3854,14 +3891,17 @@ static void its_vpe_4_1_deschedule(struct 
> its_vpe *vpe,
>   static void its_vpe_4_1_invall(struct its_vpe *vpe)
>   {
>       void __iomem *rdbase;
> +    unsigned long flags;
>       u64 val;
> 
>       val  = GICR_INVALLR_V;
>       val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);
> 
>       /* Target the redistributor this vPE is currently known on */
> +    raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
>       rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
>       gic_write_lpir(val, rdbase + GICR_INVALLR);
> +    raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
>   }
> 
>   static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void 
> *vcpu_info)
> @@ -3960,13 +4000,17 @@ static int its_sgi_get_irqchip_state(struct 
> irq_data *d,
>                        enum irqchip_irq_state which, bool *val)
>   {
>       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
> -    void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + 
> SZ_128K;
> +    void __iomem *base;
> +    unsigned long flags;
>       u32 count = 1000000;    /* 1s! */
>       u32 status;
> +    int cpu;
> 
>       if (which != IRQCHIP_STATE_PENDING)
>           return -EINVAL;
> 
> +    cpu = irq_to_cpuid_lock(d, &flags);
> +    base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K;
>       writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
>       do {
>           status = readl_relaxed(base + GICR_VSGIPENDR);
> @@ -3983,6 +4027,7 @@ static int its_sgi_get_irqchip_state(struct 
> irq_data *d,
>       } while(count);
> 
>   out:
> +    irq_to_cpuid_unlock(d, flags);
>       *val = !!(status & (1 << d->hwirq));
> 
>       return 0;
> @@ -4102,6 +4147,7 @@ static int its_vpe_init(struct its_vpe *vpe)
>           return -ENOMEM;
>       }
> 
> +    raw_spin_lock_init(&vpe->vpe_lock);
>       vpe->vpe_id = vpe_id;
>       vpe->vpt_page = vpt_page;
>       if (gic_rdists->has_rvpeid)
> diff --git a/include/linux/irqchip/arm-gic-v4.h 
> b/include/linux/irqchip/arm-gic-v4.h
> index 46c167a6349f..fc43a63875a3 100644
> --- a/include/linux/irqchip/arm-gic-v4.h
> +++ b/include/linux/irqchip/arm-gic-v4.h
> @@ -60,6 +60,7 @@ struct its_vpe {
>           };
>       };
> 
> +    raw_spinlock_t        vpe_lock;
>       /*
>        * This collection ID is used to indirect the target
>        * redistributor for this VPE. The ID itself isn't involved in

I'm not sure if it's good enough, it may gets much clearer after
splitting.


Thanks,
Zenghui
Marc Zyngier Feb. 28, 2020, 7:37 p.m. UTC | #7
On 2020-02-20 03:11, Zenghui Yu wrote:
> Hi Marc,
> 
> On 2020/2/18 23:31, Marc Zyngier wrote:
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
>> b/drivers/irqchip/irq-gic-v3-its.c
>> index 7656b353a95f..0ed286dba827 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -144,7 +144,7 @@ struct event_lpi_map {
>>       u16            *col_map;
>>       irq_hw_number_t        lpi_base;
>>       int            nr_lpis;
>> -    raw_spinlock_t        vlpi_lock;
>> +    raw_spinlock_t        map_lock;
> 
> So we use map_lock to protect both LPI's and VLPI's mapping affinity of
> a device, and use vpe_lock to protect vPE's affinity, OK.
> 
>>       struct its_vm        *vm;
>>       struct its_vlpi_map    *vlpi_maps;
>>       int            nr_vlpis;
>> @@ -240,15 +240,33 @@ static struct its_vlpi_map *get_vlpi_map(struct 
>> irq_data *d)
>>       return NULL;
>>   }
>> 
>> -static int irq_to_cpuid(struct irq_data *d)
>> +static int irq_to_cpuid_lock(struct irq_data *d, unsigned long 
>> *flags)
>>   {
>> -    struct its_device *its_dev = irq_data_get_irq_chip_data(d);
>>       struct its_vlpi_map *map = get_vlpi_map(d);
>> +    int cpu;
>> 
>> -    if (map)
>> -        return map->vpe->col_idx;
>> +    if (map) {
>> +        raw_spin_lock_irqsave(&map->vpe->vpe_lock, *flags);
>> +        cpu = map->vpe->col_idx;
>> +    } else {
>> +        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
>> +        raw_spin_lock_irqsave(&its_dev->event_map.map_lock, *flags);
>> +        cpu = its_dev->event_map.col_map[its_get_event_id(d)];
>> +    }
>> 
>> -    return its_dev->event_map.col_map[its_get_event_id(d)];
>> +    return cpu;
>> +}
> 
> This helper is correct for normal LPIs and VLPIs, but wrong for per-vPE
> IRQ (doorbell) and vSGIs. irq_data_get_irq_chip_data() gets confused by
> both of them.

Yes, I've fixed that in the current state of the tree last week. Do have 
a
look if you can, but it seems to survive on both the model with v4.1 and
my D05.

[...]

>> -        rdbase = per_cpu_ptr(gic_rdists->rdist, 
>> vpe->col_idx)->rd_base;
>> +        cpu = irq_to_cpuid_lock(d, &flags);
>> +        rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
>>           gic_write_lpir(d->parent_data->hwirq, rdbase + 
>> GICR_INVLPIR);
>>           wait_for_syncr(rdbase);
>> +        irq_to_cpuid_unlock(d, flags);
>>       } else {
>>           its_vpe_send_cmd(vpe, its_send_inv);
>>       }
> 
> Do we really need to grab the vpe_lock for those which are belong to
> the same irqchip with its_vpe_set_affinity()? The IRQ core code should
> already ensure the mutual exclusion among them, wrong?

I've been trying to think about that, but jet-lag keeps getting in the 
way.
I empirically think that you are right, but I need to go and check the 
various
code paths to be sure. Hopefully I'll have a bit more brain space next 
week.

For sure this patch tries to do too many things at once...

         M.
Marc Zyngier March 1, 2020, 7 p.m. UTC | #8
On 2020-02-28 19:37, Marc Zyngier wrote:
> On 2020-02-20 03:11, Zenghui Yu wrote:

>> Do we really need to grab the vpe_lock for those which are belong to
>> the same irqchip with its_vpe_set_affinity()? The IRQ core code should
>> already ensure the mutual exclusion among them, wrong?
> 
> I've been trying to think about that, but jet-lag keeps getting in the 
> way.
> I empirically think that you are right, but I need to go and check the 
> various
> code paths to be sure. Hopefully I'll have a bit more brain space next 
> week.

So I slept on it and came back to my senses. The only case we actually 
need
to deal with is when an affinity change impacts *another* interrupt.

There is only two instances of this issue:

- vLPIs have their *physical* affinity impacted by the affinity of the
   vPE. Their virtual affinity is of course unchanged, but the physical
   one becomes important with direct invalidation. Taking a per-VPE lock
   in such context should address the issue.

- vSGIs have the exact same issue, plus the matter of requiring some
   *extra* one when reading the pending state, which requires a RMW
   on two different registers. This requires an extra per-RD lock.

My original patch was stupidly complex, and the irq_desc lock is
perfectly enough to deal with anything that only affects the interrupt
state itself.

GICv4 + direct invalidation for vLPIs breaks this by bypassing the
serialization initially provided by the ITS, as the RD is completely
out of band. The per-vPE lock brings back this serialization.

I've updated the branch, which seems to run OK on D05. I still need
to run the usual tests on the FVP model though.

Thanks,

         M.
Zenghui Yu March 2, 2020, 8:18 a.m. UTC | #9
On 2020/3/2 3:00, Marc Zyngier wrote:
> On 2020-02-28 19:37, Marc Zyngier wrote:
>> On 2020-02-20 03:11, Zenghui Yu wrote:
> 
>>> Do we really need to grab the vpe_lock for those which are belong to
>>> the same irqchip with its_vpe_set_affinity()? The IRQ core code should
>>> already ensure the mutual exclusion among them, wrong?
>>
>> I've been trying to think about that, but jet-lag keeps getting in the 
>> way.
>> I empirically think that you are right, but I need to go and check the 
>> various
>> code paths to be sure. Hopefully I'll have a bit more brain space next 
>> week.
> 
> So I slept on it and came back to my senses. The only case we actually need
> to deal with is when an affinity change impacts *another* interrupt.
> 
> There is only two instances of this issue:
> 
> - vLPIs have their *physical* affinity impacted by the affinity of the
>    vPE. Their virtual affinity is of course unchanged, but the physical
>    one becomes important with direct invalidation. Taking a per-VPE lock
>    in such context should address the issue.
> 
> - vSGIs have the exact same issue, plus the matter of requiring some
>    *extra* one when reading the pending state, which requires a RMW
>    on two different registers. This requires an extra per-RD lock.

Agreed with both!

> 
> My original patch was stupidly complex, and the irq_desc lock is
> perfectly enough to deal with anything that only affects the interrupt
> state itself.
> 
> GICv4 + direct invalidation for vLPIs breaks this by bypassing the
> serialization initially provided by the ITS, as the RD is completely
> out of band. The per-vPE lock brings back this serialization.
> 
> I've updated the branch, which seems to run OK on D05. I still need
> to run the usual tests on the FVP model though.

I have pulled the latest branch and it looks good to me, except for
one remaining concern:

GICR_INV{LPI, ALL}R + GICR_SYNCR can also be accessed concurrently
by multiple direct invalidation, should we also use the per-RD lock
to ensure mutual exclusion?  It looks not so harmful though, as this
will only increase one's polling time against the Busy bit (in my view).

But I point it out again for confirmation.


Thanks,
Zenghui
Marc Zyngier March 2, 2020, 12:09 p.m. UTC | #10
Hi Zenghui,

On 2020-03-02 08:18, Zenghui Yu wrote:
> On 2020/3/2 3:00, Marc Zyngier wrote:
>> On 2020-02-28 19:37, Marc Zyngier wrote:
>>> On 2020-02-20 03:11, Zenghui Yu wrote:
>> 
>>>> Do we really need to grab the vpe_lock for those which are belong to
>>>> the same irqchip with its_vpe_set_affinity()? The IRQ core code 
>>>> should
>>>> already ensure the mutual exclusion among them, wrong?
>>> 
>>> I've been trying to think about that, but jet-lag keeps getting in 
>>> the way.
>>> I empirically think that you are right, but I need to go and check 
>>> the various
>>> code paths to be sure. Hopefully I'll have a bit more brain space 
>>> next week.
>> 
>> So I slept on it and came back to my senses. The only case we actually 
>> need
>> to deal with is when an affinity change impacts *another* interrupt.
>> 
>> There is only two instances of this issue:
>> 
>> - vLPIs have their *physical* affinity impacted by the affinity of the
>>    vPE. Their virtual affinity is of course unchanged, but the 
>> physical
>>    one becomes important with direct invalidation. Taking a per-VPE 
>> lock
>>    in such context should address the issue.
>> 
>> - vSGIs have the exact same issue, plus the matter of requiring some
>>    *extra* one when reading the pending state, which requires a RMW
>>    on two different registers. This requires an extra per-RD lock.
> 
> Agreed with both!
> 
>> 
>> My original patch was stupidly complex, and the irq_desc lock is
>> perfectly enough to deal with anything that only affects the interrupt
>> state itself.
>> 
>> GICv4 + direct invalidation for vLPIs breaks this by bypassing the
>> serialization initially provided by the ITS, as the RD is completely
>> out of band. The per-vPE lock brings back this serialization.
>> 
>> I've updated the branch, which seems to run OK on D05. I still need
>> to run the usual tests on the FVP model though.
> 
> I have pulled the latest branch and it looks good to me, except for
> one remaining concern:
> 
> GICR_INV{LPI, ALL}R + GICR_SYNCR can also be accessed concurrently
> by multiple direct invalidation, should we also use the per-RD lock
> to ensure mutual exclusion?  It looks not so harmful though, as this
> will only increase one's polling time against the Busy bit (in my 
> view).
> 
> But I point it out again for confirmation.

I was about to say that it doesn't really matter because it is only a
performance optimisation (and we're noty quite there yet), until I 
spotted
this great nugget in the spec:

<quote>
Writing GICR_INVLPIR or GICR_INVALLR when GICR_SYNCR.Busy==1 is 
CONSTRAINED
UNPREDICTABLE:
- The write is IGNORED .
- The invalidate specified by the write is performed.
</quote>

So we really need some form of mutual exclusion on a per-RD basis to 
ensure
that no two invalidations occur at the same time, ensuring that Busy 
clears
between the two.

Thanks for the heads up,

         M.
diff mbox series

Patch

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1e448d9a16ea..a9753435c4ff 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3915,11 +3915,67 @@  static int its_sgi_set_affinity(struct irq_data *d,
 	return -EINVAL;
 }
 
+static int its_sgi_set_irqchip_state(struct irq_data *d,
+				     enum irqchip_irq_state which,
+				     bool state)
+{
+	if (which != IRQCHIP_STATE_PENDING)
+		return -EINVAL;
+
+	if (state) {
+		struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+		struct its_node *its = find_4_1_its();
+		u64 val;
+
+		val  = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
+		val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
+		writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
+	} else {
+		its_configure_sgi(d, true);
+	}
+
+	return 0;
+}
+
+static int its_sgi_get_irqchip_state(struct irq_data *d,
+				     enum irqchip_irq_state which, bool *val)
+{
+	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+	void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + SZ_128K;
+	u32 count = 1000000;	/* 1s! */
+	u32 status;
+
+	if (which != IRQCHIP_STATE_PENDING)
+		return -EINVAL;
+
+	writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
+	do {
+		status = readl_relaxed(base + GICR_VSGIPENDR);
+		if (!(status & GICR_VSGIPENDR_BUSY))
+			goto out;
+
+		count--;
+		if (!count) {
+			pr_err_ratelimited("Unable to get SGI status\n");
+			goto out;
+		}
+		cpu_relax();
+		udelay(1);
+	} while(count);
+
+out:
+	*val = !!(status & (1 << d->hwirq));
+
+	return 0;
+}
+
 static struct irq_chip its_sgi_irq_chip = {
 	.name			= "GICv4.1-sgi",
 	.irq_mask		= its_sgi_mask_irq,
 	.irq_unmask		= its_sgi_unmask_irq,
 	.irq_set_affinity	= its_sgi_set_affinity,
+	.irq_set_irqchip_state	= its_sgi_set_irqchip_state,
+	.irq_get_irqchip_state	= its_sgi_get_irqchip_state,
 };
 
 static int its_sgi_irq_domain_alloc(struct irq_domain *domain,
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index a89578884263..64da945486ac 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -345,6 +345,15 @@ 
 #define GICR_VPENDBASER_4_1_VGRP1EN	(1ULL << 58)
 #define GICR_VPENDBASER_4_1_VPEID	GENMASK_ULL(15, 0)
 
+#define GICR_VSGIR			0x0080
+
+#define GICR_VSGIR_VPEID		GENMASK(15, 0)
+
+#define GICR_VSGIPENDR			0x0088
+
+#define GICR_VSGIPENDR_BUSY		(1U << 31)
+#define GICR_VSGIPENDR_PENDING		GENMASK(15, 0)
+
 /*
  * ITS registers, offsets from ITS_base
  */
@@ -368,6 +377,11 @@ 
 
 #define GITS_TRANSLATER			0x10040
 
+#define GITS_SGIR			0x20020
+
+#define GITS_SGIR_VPEID			GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID		GENMASK_ULL(7, 0)
+
 #define GITS_CTLR_ENABLE		(1U << 0)
 #define GITS_CTLR_ImDe			(1U << 1)
 #define	GITS_CTLR_ITS_NUMBER_SHIFT	4