Message ID | 1438962347-17541-8-git-send-email-marc.zyngier@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Salut Marc, I know that this patch is already merged, but .... On 07/08/15 16:45, Marc Zyngier wrote: > diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c > index 51c9900..9d009d2 100644 ... > @@ -1364,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) > return level_pending; > } > > +/* > + * Save the physical active state, and reset it to inactive. > + * > + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. > + */ > +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) > +{ > + struct irq_phys_map *map; > + int ret; > + > + if (!(vlr.state & LR_HW)) > + return 0; > + > + map = vgic_irq_map_search(vcpu, vlr.irq); > + BUG_ON(!map || !map->active); > + > + ret = irq_get_irqchip_state(map->irq, > + IRQCHIP_STATE_ACTIVE, > + &map->active); > + > + WARN_ON(ret); > + > + if (map->active) { > + ret = irq_set_irqchip_state(map->irq, > + IRQCHIP_STATE_ACTIVE, > + false); > + WARN_ON(ret); > + return 0; > + } > + > + return 1; > +} > + > /* Sync back the VGIC state after a guest run */ > static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > { > @@ -1378,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > elrsr = vgic_get_elrsr(vcpu); > elrsr_ptr = u64_to_bitmask(&elrsr); > > - /* Clear mappings for empty LRs */ > - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { > + /* Deal with HW interrupts, and clear mappings for empty LRs */ > + for (lr = 0; lr < vgic->nr_lr; lr++) { > struct vgic_lr vlr; > > - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) > + if (!test_bit(lr, vgic_cpu->lr_used)) > continue; > > vlr = vgic_get_lr(vcpu, lr); > + if (vgic_sync_hwirq(vcpu, vlr)) { > + /* > + * So this is a HW interrupt that the guest > + * EOI-ed. Clean the LR state and allow the > + * interrupt to be sampled again. > + */ > + vlr.state = 0; > + vlr.hwirq = 0; > + vgic_set_lr(vcpu, lr, vlr); > + vgic_irq_clear_queued(vcpu, vlr.irq); Isn't this line altering common VGIC state without holding the lock? Eric removed the coarse dist->lock around the whole __kvm_vgic_sync_hwstate() function, we take it now in vgic_process_maintenance(), but don't hold it here AFAICT. As long as we are only dealing with private timer IRQs this is probably not a problem, but the IRQ number could be a SPI as well, right? Cheers, Andre. > + set_bit(lr, elrsr_ptr); > + } > + > + if (!test_bit(lr, elrsr_ptr)) > + continue; > + > + clear_bit(lr, vgic_cpu->lr_used); > > BUG_ON(vlr.irq >= dist->nr_irqs); > vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Sep 23, 2015 at 06:55:04PM +0100, Andre Przywara wrote: > Salut Marc, > > I know that this patch is already merged, but .... > > On 07/08/15 16:45, Marc Zyngier wrote: > > diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c > > index 51c9900..9d009d2 100644 > ... > > @@ -1364,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) > > return level_pending; > > } > > > > +/* > > + * Save the physical active state, and reset it to inactive. > > + * > > + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. > > + */ > > +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) > > +{ > > + struct irq_phys_map *map; > > + int ret; > > + > > + if (!(vlr.state & LR_HW)) > > + return 0; > > + > > + map = vgic_irq_map_search(vcpu, vlr.irq); > > + BUG_ON(!map || !map->active); > > + > > + ret = irq_get_irqchip_state(map->irq, > > + IRQCHIP_STATE_ACTIVE, > > + &map->active); > > + > > + WARN_ON(ret); > > + > > + if (map->active) { > > + ret = irq_set_irqchip_state(map->irq, > > + IRQCHIP_STATE_ACTIVE, > > + false); > > + WARN_ON(ret); > > + return 0; > > + } > > + > > + return 1; > > +} > > + > > /* Sync back the VGIC state after a guest run */ > > static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > > { > > @@ -1378,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > > elrsr = vgic_get_elrsr(vcpu); > > elrsr_ptr = u64_to_bitmask(&elrsr); > > > > - /* Clear mappings for empty LRs */ > > - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { > > + /* Deal with HW interrupts, and clear mappings for empty LRs */ > > + for (lr = 0; lr < vgic->nr_lr; lr++) { > > struct vgic_lr vlr; > > > > - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) > > + if (!test_bit(lr, vgic_cpu->lr_used)) > > continue; > > > > vlr = vgic_get_lr(vcpu, lr); > > + if (vgic_sync_hwirq(vcpu, vlr)) { > > + /* > > + * So this is a HW interrupt that the guest > > + * EOI-ed. Clean the LR state and allow the > > + * interrupt to be sampled again. > > + */ > > + vlr.state = 0; > > + vlr.hwirq = 0; > > + vgic_set_lr(vcpu, lr, vlr); > > + vgic_irq_clear_queued(vcpu, vlr.irq); > > Isn't this line altering common VGIC state without holding the lock? > Eric removed the coarse dist->lock around the whole > __kvm_vgic_sync_hwstate() function, we take it now in > vgic_process_maintenance(), but don't hold it here AFAICT. > As long as we are only dealing with private timer IRQs this is probably > not a problem, but the IRQ number could be a SPI as well, right? > I don't see a problematic race with this though, as all we're doing is to clear a bit in a bitmap, which is always checked atomically, so adding a lock around this really doesn't change anything as far as I can tell. Nevertheless, my rework series also addresses this. -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Christoffer, On 29/09/15 14:44, Christoffer Dall wrote: > On Wed, Sep 23, 2015 at 06:55:04PM +0100, Andre Przywara wrote: >> Salut Marc, >> >> I know that this patch is already merged, but .... >> >> On 07/08/15 16:45, Marc Zyngier wrote: >>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c >>> index 51c9900..9d009d2 100644 >> ... >>> @@ -1364,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) >>> return level_pending; >>> } >>> >>> +/* >>> + * Save the physical active state, and reset it to inactive. >>> + * >>> + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. >>> + */ >>> +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) >>> +{ >>> + struct irq_phys_map *map; >>> + int ret; >>> + >>> + if (!(vlr.state & LR_HW)) >>> + return 0; >>> + >>> + map = vgic_irq_map_search(vcpu, vlr.irq); >>> + BUG_ON(!map || !map->active); >>> + >>> + ret = irq_get_irqchip_state(map->irq, >>> + IRQCHIP_STATE_ACTIVE, >>> + &map->active); >>> + >>> + WARN_ON(ret); >>> + >>> + if (map->active) { >>> + ret = irq_set_irqchip_state(map->irq, >>> + IRQCHIP_STATE_ACTIVE, >>> + false); >>> + WARN_ON(ret); >>> + return 0; >>> + } >>> + >>> + return 1; >>> +} >>> + >>> /* Sync back the VGIC state after a guest run */ >>> static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) >>> { >>> @@ -1378,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) >>> elrsr = vgic_get_elrsr(vcpu); >>> elrsr_ptr = u64_to_bitmask(&elrsr); >>> >>> - /* Clear mappings for empty LRs */ >>> - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { >>> + /* Deal with HW interrupts, and clear mappings for empty LRs */ >>> + for (lr = 0; lr < vgic->nr_lr; lr++) { >>> struct vgic_lr vlr; >>> >>> - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) >>> + if (!test_bit(lr, vgic_cpu->lr_used)) >>> continue; >>> >>> vlr = vgic_get_lr(vcpu, lr); >>> + if (vgic_sync_hwirq(vcpu, vlr)) { >>> + /* >>> + * So this is a HW interrupt that the guest >>> + * EOI-ed. Clean the LR state and allow the >>> + * interrupt to be sampled again. >>> + */ >>> + vlr.state = 0; >>> + vlr.hwirq = 0; >>> + vgic_set_lr(vcpu, lr, vlr); >>> + vgic_irq_clear_queued(vcpu, vlr.irq); >> >> Isn't this line altering common VGIC state without holding the lock? >> Eric removed the coarse dist->lock around the whole >> __kvm_vgic_sync_hwstate() function, we take it now in >> vgic_process_maintenance(), but don't hold it here AFAICT. >> As long as we are only dealing with private timer IRQs this is probably >> not a problem, but the IRQ number could be a SPI as well, right? >> > I don't see a problematic race with this though, as all we're doing is > to clear a bit in a bitmap, which is always checked atomically, so > adding a lock around this really doesn't change anything as far as I can > tell. Indeed I found a similar comment in some older revisions of the code. But isn't it that other code holding the lock (thinking about kvm_vgic_flush_hwstate() in particular) assumes that no-one else tinkers with the VGIC state while it holds the lock? So couldn't we (potentially) run into inconsistent state because we cleared the queued bit while the flushing code runs over all interrupts? Maybe not in this particular case, but in general? Haven't looked at your new series yet, but will do this ASAP. Cheers, Andre. > > Nevertheless, my rework series also addresses this. > > -Christoffer > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Sep 30, 2015 at 03:06:32PM +0100, Andre Przywara wrote: > Hi Christoffer, > > On 29/09/15 14:44, Christoffer Dall wrote: > > On Wed, Sep 23, 2015 at 06:55:04PM +0100, Andre Przywara wrote: > >> Salut Marc, > >> > >> I know that this patch is already merged, but .... > >> > >> On 07/08/15 16:45, Marc Zyngier wrote: > >>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c > >>> index 51c9900..9d009d2 100644 > >> ... > >>> @@ -1364,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) > >>> return level_pending; > >>> } > >>> > >>> +/* > >>> + * Save the physical active state, and reset it to inactive. > >>> + * > >>> + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. > >>> + */ > >>> +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) > >>> +{ > >>> + struct irq_phys_map *map; > >>> + int ret; > >>> + > >>> + if (!(vlr.state & LR_HW)) > >>> + return 0; > >>> + > >>> + map = vgic_irq_map_search(vcpu, vlr.irq); > >>> + BUG_ON(!map || !map->active); > >>> + > >>> + ret = irq_get_irqchip_state(map->irq, > >>> + IRQCHIP_STATE_ACTIVE, > >>> + &map->active); > >>> + > >>> + WARN_ON(ret); > >>> + > >>> + if (map->active) { > >>> + ret = irq_set_irqchip_state(map->irq, > >>> + IRQCHIP_STATE_ACTIVE, > >>> + false); > >>> + WARN_ON(ret); > >>> + return 0; > >>> + } > >>> + > >>> + return 1; > >>> +} > >>> + > >>> /* Sync back the VGIC state after a guest run */ > >>> static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > >>> { > >>> @@ -1378,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > >>> elrsr = vgic_get_elrsr(vcpu); > >>> elrsr_ptr = u64_to_bitmask(&elrsr); > >>> > >>> - /* Clear mappings for empty LRs */ > >>> - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { > >>> + /* Deal with HW interrupts, and clear mappings for empty LRs */ > >>> + for (lr = 0; lr < vgic->nr_lr; lr++) { > >>> struct vgic_lr vlr; > >>> > >>> - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) > >>> + if (!test_bit(lr, vgic_cpu->lr_used)) > >>> continue; > >>> > >>> vlr = vgic_get_lr(vcpu, lr); > >>> + if (vgic_sync_hwirq(vcpu, vlr)) { > >>> + /* > >>> + * So this is a HW interrupt that the guest > >>> + * EOI-ed. Clean the LR state and allow the > >>> + * interrupt to be sampled again. > >>> + */ > >>> + vlr.state = 0; > >>> + vlr.hwirq = 0; > >>> + vgic_set_lr(vcpu, lr, vlr); > >>> + vgic_irq_clear_queued(vcpu, vlr.irq); > >> > >> Isn't this line altering common VGIC state without holding the lock? > >> Eric removed the coarse dist->lock around the whole > >> __kvm_vgic_sync_hwstate() function, we take it now in > >> vgic_process_maintenance(), but don't hold it here AFAICT. > >> As long as we are only dealing with private timer IRQs this is probably > >> not a problem, but the IRQ number could be a SPI as well, right? > >> > > I don't see a problematic race with this though, as all we're doing is > > to clear a bit in a bitmap, which is always checked atomically, so > > adding a lock around this really doesn't change anything as far as I can > > tell. > > Indeed I found a similar comment in some older revisions of the code. > > But isn't it that other code holding the lock (thinking about > kvm_vgic_flush_hwstate() in particular) assumes that no-one else tinkers > with the VGIC state while it holds the lock? > So couldn't we (potentially) run into inconsistent state because we > cleared the queued bit while the flushing code runs over all interrupts? > Maybe not in this particular case, but in general? In general, yes, you should lock operations accessing the distributor. It just feels silly to do spin_lock(); vgic_irq_clear_queued(...); spin_unlock(); because vgic_irq_clear_queued just clears a bit and I don't see the race. > > Haven't looked at your new series yet, but will do this ASAP. > Thanks, much appreciated. Based on your comment on the previous version, the whole thing is now wrapped in a spinlock so this point is moot. Unless there's a clear race to be fixed here, I would prefer if we focus our energy on getting the other series merged. -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 51c9900..9d009d2 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1140,6 +1140,39 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, if (!vgic_irq_is_edge(vcpu, irq)) vlr.state |= LR_EOI_INT; + if (vlr.irq >= VGIC_NR_SGIS) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, irq); + + /* + * If we have a mapping, and the virtual interrupt is + * being injected, then we must set the state to + * active in the physical world. Otherwise the + * physical interrupt will fire and the guest will + * exit before processing the virtual interrupt. + */ + if (map) { + int ret; + + BUG_ON(!map->active); + vlr.hwirq = map->phys_irq; + vlr.state |= LR_HW; + vlr.state &= ~LR_EOI_INT; + + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + true); + WARN_ON(ret); + + /* + * Make sure we're not going to sample this + * again, as a HW-backed interrupt cannot be + * in the PENDING_ACTIVE stage. + */ + vgic_irq_set_queued(vcpu, irq); + } + } + vgic_set_lr(vcpu, lr_nr, vlr); vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } @@ -1364,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) return level_pending; } +/* + * Save the physical active state, and reset it to inactive. + * + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. + */ +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) +{ + struct irq_phys_map *map; + int ret; + + if (!(vlr.state & LR_HW)) + return 0; + + map = vgic_irq_map_search(vcpu, vlr.irq); + BUG_ON(!map || !map->active); + + ret = irq_get_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + &map->active); + + WARN_ON(ret); + + if (map->active) { + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + false); + WARN_ON(ret); + return 0; + } + + return 1; +} + /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -1378,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr = vgic_get_elrsr(vcpu); elrsr_ptr = u64_to_bitmask(&elrsr); - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { + /* Deal with HW interrupts, and clear mappings for empty LRs */ + for (lr = 0; lr < vgic->nr_lr; lr++) { struct vgic_lr vlr; - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) + if (!test_bit(lr, vgic_cpu->lr_used)) continue; vlr = vgic_get_lr(vcpu, lr); + if (vgic_sync_hwirq(vcpu, vlr)) { + /* + * So this is a HW interrupt that the guest + * EOI-ed. Clean the LR state and allow the + * interrupt to be sampled again. + */ + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + vgic_irq_clear_queued(vcpu, vlr.irq); + set_bit(lr, elrsr_ptr); + } + + if (!test_bit(lr, elrsr_ptr)) + continue; + + clear_bit(lr, vgic_cpu->lr_used); BUG_ON(vlr.irq >= dist->nr_irqs); vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;