diff mbox series

[4/5] KVM: arm64: vgic-v3: Refactor GICv3 SGI generation

Message ID 20230907100931.1186690-5-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series [1/5] KVM: arm64: Simplify kvm_vcpu_get_mpidr_aff() | expand

Commit Message

Marc Zyngier Sept. 7, 2023, 10:09 a.m. UTC
As we're about to change the way SGIs are sent, start by splitting
out some of the basic functionnality: instead of intermingling
the broadcast and non-broadcast cases with the actual SGI generation,
perform the following cleanups:

- move the SGI queuing into its own helper
- split the broadcast code from the affinity-driven code
- replace the mask/shift combinations with FIELD_GET()

The result is much more readable, and paves the way for further
optimisations.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-mmio-v3.c | 110 ++++++++++++++++-------------
 1 file changed, 59 insertions(+), 51 deletions(-)

Comments

Zenghui Yu Sept. 10, 2023, 4:25 p.m. UTC | #1
Hi Marc,

On 2023/9/7 18:09, Marc Zyngier wrote:
> As we're about to change the way SGIs are sent, start by splitting
> out some of the basic functionnality: instead of intermingling

functionality

> the broadcast and non-broadcast cases with the actual SGI generation,
> perform the following cleanups:
> 
> - move the SGI queuing into its own helper
> - split the broadcast code from the affinity-driven code
> - replace the mask/shift combinations with FIELD_GET()
> 
> The result is much more readable, and paves the way for further
> optimisations.

Indeed!

> @@ -1070,19 +1102,30 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
>  {
>  	struct kvm *kvm = vcpu->kvm;
>  	struct kvm_vcpu *c_vcpu;
> -	u16 target_cpus;
> +	unsigned long target_cpus;
>  	u64 mpidr;
> -	int sgi;
> -	int vcpu_id = vcpu->vcpu_id;
> -	bool broadcast;
> -	unsigned long c, flags;
> -
> -	sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
> -	broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
> -	target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
> +	u32 sgi;
> +	unsigned long c;
> +
> +	sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg);
> +
> +	/* Broadcast */
> +	if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) {
> +		kvm_for_each_vcpu(c, c_vcpu, kvm) {
> +			/* Don't signal the calling VCPU */
> +			if (c_vcpu == vcpu)
> +				continue;
> +
> +			vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
> +		}
> +
> +		return;
> +	}
> +
>  	mpidr = SGI_AFFINITY_LEVEL(reg, 3);
>  	mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
>  	mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
> +	target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg);
>  
>  	/*
>  	 * We iterate over all VCPUs to find the MPIDRs matching the request.
> @@ -1091,54 +1134,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
>  	 * VCPUs when most of the times we just signal a single VCPU.
>  	 */
>  	kvm_for_each_vcpu(c, c_vcpu, kvm) {
> -		struct vgic_irq *irq;
> +		int level0;
>  
>  		/* Exit early if we have dealt with all requested CPUs */
> -		if (!broadcast && target_cpus == 0)
> +		if (target_cpus == 0)
>  			break;
> -
> -		/* Don't signal the calling VCPU */
> -		if (broadcast && c == vcpu_id)

Unrelated to this patch, but it looks that we were comparing the value
of *vcpu_idx* and vcpu_id to skip the calling VCPU. Is there a rule in
KVM that userspace should invoke KVM_CREATE_VCPU with sequential
"vcpu id"s, starting at 0, so that the user-provided vcpu_id always
equals to the KVM-internal vcpu_idx for a given VCPU?

I asked because it seems that in kvm/arm64 we always use
kvm_get_vcpu(kvm, i) to obtain the kvm_vcpu pointer, even if *i* is
sometimes essentially provided by userspace..

Besides, the refactor itself looks good to me.

Thanks,
Zenghui
Marc Zyngier Sept. 10, 2023, 6:18 p.m. UTC | #2
On Sun, 10 Sep 2023 17:25:36 +0100,
Zenghui Yu <zenghui.yu@linux.dev> wrote:
> 
> Hi Marc,
> 
> On 2023/9/7 18:09, Marc Zyngier wrote:
> > As we're about to change the way SGIs are sent, start by splitting
> > out some of the basic functionnality: instead of intermingling
> 
> functionality
> 
> > the broadcast and non-broadcast cases with the actual SGI generation,
> > perform the following cleanups:
> > 
> > - move the SGI queuing into its own helper
> > - split the broadcast code from the affinity-driven code
> > - replace the mask/shift combinations with FIELD_GET()
> > 
> > The result is much more readable, and paves the way for further
> > optimisations.
> 
> Indeed!
> 
> > @@ -1070,19 +1102,30 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
> >  {
> >  	struct kvm *kvm = vcpu->kvm;
> >  	struct kvm_vcpu *c_vcpu;
> > -	u16 target_cpus;
> > +	unsigned long target_cpus;
> >  	u64 mpidr;
> > -	int sgi;
> > -	int vcpu_id = vcpu->vcpu_id;
> > -	bool broadcast;
> > -	unsigned long c, flags;
> > -
> > -	sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
> > -	broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
> > -	target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
> > +	u32 sgi;
> > +	unsigned long c;
> > +
> > +	sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg);
> > +
> > +	/* Broadcast */
> > +	if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) {
> > +		kvm_for_each_vcpu(c, c_vcpu, kvm) {
> > +			/* Don't signal the calling VCPU */
> > +			if (c_vcpu == vcpu)
> > +				continue;
> > +
> > +			vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
> > +		}
> > +
> > +		return;
> > +	}
> > +
> >  	mpidr = SGI_AFFINITY_LEVEL(reg, 3);
> >  	mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
> >  	mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
> > +	target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg);
> >   	/*
> >  	 * We iterate over all VCPUs to find the MPIDRs matching the request.
> > @@ -1091,54 +1134,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
> >  	 * VCPUs when most of the times we just signal a single VCPU.
> >  	 */
> >  	kvm_for_each_vcpu(c, c_vcpu, kvm) {
> > -		struct vgic_irq *irq;
> > +		int level0;
> >   		/* Exit early if we have dealt with all requested CPUs
> > */
> > -		if (!broadcast && target_cpus == 0)
> > +		if (target_cpus == 0)
> >  			break;
> > -
> > -		/* Don't signal the calling VCPU */
> > -		if (broadcast && c == vcpu_id)
> 
> Unrelated to this patch, but it looks that we were comparing the value
> of *vcpu_idx* and vcpu_id to skip the calling VCPU.

Huh, well caught. That was definitely a bug that was there for ever,
and only you spotted it. Guess I should flag it as a stable candidate.

> Is there a rule in KVM that userspace should invoke KVM_CREATE_VCPU
> with sequential "vcpu id"s, starting at 0, so that the user-provided
> vcpu_id always equals to the KVM-internal vcpu_idx for a given VCPU?

I don't think there is any such rule. As far as I can tell, any number
will do as long as it is within the range [0, max_vcpu_id). Of course,
max_vcpu_id doesn't even exist on arm64. From what I can tell, this is
just some random number between 0 and 511 for us (GICv2
notwithstanding).

> I asked because it seems that in kvm/arm64 we always use
> kvm_get_vcpu(kvm, i) to obtain the kvm_vcpu pointer, even if *i* is
> sometimes essentially provided by userspace..

Huh, this is incredibly dodgy. I had a go at a few occurrences (see
below), but this is hardly a complete list.

> Besides, the refactor itself looks good to me.

Cool, thanks!

	M.

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..e32c867e7b48 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -458,7 +458,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 				   timer_ctx->irq.level);
 
 	if (!userspace_irqchip(vcpu->kvm)) {
-		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_idx,
 					  timer_irq(timer_ctx),
 					  timer_ctx->irq.level,
 					  timer_ctx);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a3b13281d38a..1f7b074b81df 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -439,9 +439,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * We might get preempted before the vCPU actually runs, but
 	 * over-invalidation doesn't affect correctness.
 	 */
-	if (*last_ran != vcpu->vcpu_id) {
+	if (*last_ran != vcpu->vcpu_idx) {
 		kvm_call_hyp(__kvm_flush_cpu_context, mmu);
-		*last_ran = vcpu->vcpu_id;
+		*last_ran = vcpu->vcpu_idx;
 	}
 
 	vcpu->cpu = cpu;
@@ -1207,7 +1207,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 		if (vcpu_idx >= nrcpus)
 			return -EINVAL;
 
-		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		vcpu = kvm_get_vcpu_by_id(kvm, vcpu_idx);
 		if (!vcpu)
 			return -EINVAL;
 
@@ -1222,14 +1222,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 		if (vcpu_idx >= nrcpus)
 			return -EINVAL;
 
-		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		vcpu = kvm_get_vcpu_by_id(kvm, vcpu_idx);
 		if (!vcpu)
 			return -EINVAL;
 
 		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
 			return -EINVAL;
 
-		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
+		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_idx, irq_num, level, NULL);
 	case KVM_ARM_IRQ_TYPE_SPI:
 		if (!irqchip_in_kernel(kvm))
 			return -ENXIO;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6b066e04dc5d..4448940b6d79 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -348,7 +348,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
 	pmu->irq_level = overflow;
 
 	if (likely(irqchip_in_kernel(vcpu->kvm))) {
-		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_idx,
 					      pmu->irq_num, overflow, pmu);
 		WARN_ON(ret);
 	}
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 07aa0437125a..85606a531dc3 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
 
 	if (vcpu) {
 		hdr = "VCPU";
-		id = vcpu->vcpu_id;
+		id = vcpu->vcpu_idx;
 	}
 
 	seq_printf(s, "\n");
@@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 		      "     %2d "
 		      "\n",
 			type, irq->intid,
-			(irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
+			(irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1,
 			pending,
 			irq->line_level,
 			irq->active,
@@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 			irq->mpidr,
 			irq->source,
 			irq->priority,
-			(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
+			(irq->vcpu) ? irq->vcpu->vcpu_idx : -1);
 }
 
 static int vgic_debug_show(struct seq_file *s, void *v)
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 212b73a715c1..82b264ad68c4 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -345,7 +345,7 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
 	if (cpuid >= atomic_read(&dev->kvm->online_vcpus))
 		return -EINVAL;
 
-	reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+	reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid);
 	reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
 
 	return 0;
Zenghui Yu Sept. 11, 2023, 3:57 p.m. UTC | #3
On 2023/9/11 02:18, Marc Zyngier wrote:
> On Sun, 10 Sep 2023 17:25:36 +0100,
> Zenghui Yu <zenghui.yu@linux.dev> wrote:
>>
>> Hi Marc,
>>
>> I asked because it seems that in kvm/arm64 we always use
>> kvm_get_vcpu(kvm, i) to obtain the kvm_vcpu pointer, even if *i* is
>> sometimes essentially provided by userspace..
> 
> Huh, this is incredibly dodgy. I had a go at a few occurrences (see
> below), but this is hardly a complete list.

Another case is all kvm_get_vcpu(kvm, target_addr) in the vgic-its
emulation code. As we expose GITS_TYPER.PTA=0 to guest, which indicates
that the target address corresponds to the PE number specified by
GICR_TYPER.Processor_Number, which is now encoded as vcpu->vcpu_id.

Thanks,
Zenghui
Marc Zyngier Sept. 12, 2023, 1:07 p.m. UTC | #4
On Mon, 11 Sep 2023 16:57:39 +0100,
Zenghui Yu <zenghui.yu@linux.dev> wrote:
> 
> On 2023/9/11 02:18, Marc Zyngier wrote:
> > On Sun, 10 Sep 2023 17:25:36 +0100,
> > Zenghui Yu <zenghui.yu@linux.dev> wrote:
> >> 
> >> Hi Marc,
> >> 
> >> I asked because it seems that in kvm/arm64 we always use
> >> kvm_get_vcpu(kvm, i) to obtain the kvm_vcpu pointer, even if *i* is
> >> sometimes essentially provided by userspace..
> > 
> > Huh, this is incredibly dodgy. I had a go at a few occurrences (see
> > below), but this is hardly a complete list.
> 
> Another case is all kvm_get_vcpu(kvm, target_addr) in the vgic-its
> emulation code. As we expose GITS_TYPER.PTA=0 to guest, which indicates
> that the target address corresponds to the PE number specified by
> GICR_TYPER.Processor_Number, which is now encoded as vcpu->vcpu_id.

Yup, that's indeed missing. I'm going to hack kvmtool to generate
stupid vcpu_ids and see what explodes...

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 188d2187eede..88b8d4524854 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -1052,6 +1052,38 @@  static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
 	((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
 	>> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
 
+static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1)
+{
+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+	/*
+	 * An access targeting Group0 SGIs can only generate
+	 * those, while an access targeting Group1 SGIs can
+	 * generate interrupts of either group.
+	 */
+	if (!irq->group || allow_group1) {
+		if (!irq->hw) {
+			irq->pending_latch = true;
+			vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+		} else {
+			/* HW SGI? Ask the GIC to inject it */
+			int err;
+			err = irq_set_irqchip_state(irq->host_irq,
+						    IRQCHIP_STATE_PENDING,
+						    true);
+			WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+			raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+		}
+	} else {
+		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+	}
+
+	vgic_put_irq(vcpu->kvm, irq);
+}
+
 /**
  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
  * @vcpu: The VCPU requesting a SGI
@@ -1070,19 +1102,30 @@  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *c_vcpu;
-	u16 target_cpus;
+	unsigned long target_cpus;
 	u64 mpidr;
-	int sgi;
-	int vcpu_id = vcpu->vcpu_id;
-	bool broadcast;
-	unsigned long c, flags;
-
-	sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
-	broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
-	target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
+	u32 sgi;
+	unsigned long c;
+
+	sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg);
+
+	/* Broadcast */
+	if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) {
+		kvm_for_each_vcpu(c, c_vcpu, kvm) {
+			/* Don't signal the calling VCPU */
+			if (c_vcpu == vcpu)
+				continue;
+
+			vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
+		}
+
+		return;
+	}
+
 	mpidr = SGI_AFFINITY_LEVEL(reg, 3);
 	mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
 	mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
+	target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg);
 
 	/*
 	 * We iterate over all VCPUs to find the MPIDRs matching the request.
@@ -1091,54 +1134,19 @@  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 	 * VCPUs when most of the times we just signal a single VCPU.
 	 */
 	kvm_for_each_vcpu(c, c_vcpu, kvm) {
-		struct vgic_irq *irq;
+		int level0;
 
 		/* Exit early if we have dealt with all requested CPUs */
-		if (!broadcast && target_cpus == 0)
+		if (target_cpus == 0)
 			break;
-
-		/* Don't signal the calling VCPU */
-		if (broadcast && c == vcpu_id)
+		level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
+		if (level0 == -1)
 			continue;
 
-		if (!broadcast) {
-			int level0;
-
-			level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
-			if (level0 == -1)
-				continue;
-
-			/* remove this matching VCPU from the mask */
-			target_cpus &= ~BIT(level0);
-		}
+		/* remove this matching VCPU from the mask */
+		target_cpus &= ~BIT(level0);
 
-		irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
-
-		raw_spin_lock_irqsave(&irq->irq_lock, flags);
-
-		/*
-		 * An access targeting Group0 SGIs can only generate
-		 * those, while an access targeting Group1 SGIs can
-		 * generate interrupts of either group.
-		 */
-		if (!irq->group || allow_group1) {
-			if (!irq->hw) {
-				irq->pending_latch = true;
-				vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
-			} else {
-				/* HW SGI? Ask the GIC to inject it */
-				int err;
-				err = irq_set_irqchip_state(irq->host_irq,
-							    IRQCHIP_STATE_PENDING,
-							    true);
-				WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
-				raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-			}
-		} else {
-			raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-		}
-
-		vgic_put_irq(vcpu->kvm, irq);
+		vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
 	}
 }