diff mbox series

[v3,06/11] x86/hvm: allowing registering EOI callbacks for GSIs

Message ID 20210331103303.79705-7-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/intr: introduce EOI callbacks and fix vPT | expand

Commit Message

Roger Pau Monne March 31, 2021, 10:32 a.m. UTC
Such callbacks will be executed once a EOI is performed by the guest,
regardless of whether the interrupts are injected from the vIO-APIC or
the vPIC, as ISA IRQs are translated to GSIs and then the
corresponding callback is executed at EOI.

The vIO-APIC infrastructure for handling EOIs is build on top of the
existing vlapic EOI callback functionality, while the vPIC one is
handled when writing to the vPIC EOI register.

Note that such callbacks need to be registered and de-registered, and
that a single GSI can have multiple callbacks associated. That's
because GSIs can be level triggered and shared, as that's the case
with legacy PCI interrupts shared between several devices.

Strictly speaking this is a non-functional change, since there are no
users of this new interface introduced by this change.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
 - Latch hvm_domain_irq in some functions.
 - Make domain parameter of hvm_gsi_has_callbacks const.
 - Add comment about dropping the lock around the
   hvm_gsi_execute_callbacks call.
 - Drop change to ioapic_load.

Changes since v1:
 - New in this version.
---
 xen/arch/x86/hvm/hvm.c        | 15 ++++++++-
 xen/arch/x86/hvm/irq.c        | 63 +++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vioapic.c    | 29 ++++++++++++----
 xen/arch/x86/hvm/vpic.c       |  5 +++
 xen/include/asm-x86/hvm/irq.h | 20 +++++++++++
 5 files changed, 125 insertions(+), 7 deletions(-)

Comments

Jan Beulich April 7, 2021, 3:51 p.m. UTC | #1
On 31.03.2021 12:32, Roger Pau Monne wrote:
> --- a/xen/arch/x86/hvm/irq.c
> +++ b/xen/arch/x86/hvm/irq.c
> @@ -595,6 +595,69 @@ int hvm_local_events_need_delivery(struct vcpu *v)
>      return !hvm_interrupt_blocked(v, intack);
>  }
>  
> +int hvm_gsi_register_callback(struct domain *d, unsigned int gsi,
> +                              struct hvm_gsi_eoi_callback *cb)
> +{
> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> +
> +    if ( gsi >= hvm_irq->nr_gsis )
> +    {
> +        ASSERT_UNREACHABLE();
> +        return -EINVAL;
> +    }
> +
> +    write_lock(&hvm_irq->gsi_callbacks_lock);
> +    list_add(&cb->list, &hvm_irq->gsi_callbacks[gsi]);
> +    write_unlock(&hvm_irq->gsi_callbacks_lock);
> +
> +    return 0;
> +}
> +
> +void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
> +                                 struct hvm_gsi_eoi_callback *cb)
> +{
> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> +    const struct list_head *tmp;
> +
> +    if ( gsi >= hvm_irq->nr_gsis )
> +    {
> +        ASSERT_UNREACHABLE();
> +        return;
> +    }
> +
> +    write_lock(&hvm_irq->gsi_callbacks_lock);
> +    list_for_each ( tmp, &hvm_irq->gsi_callbacks[gsi] )
> +        if ( tmp == &cb->list )
> +        {
> +            list_del(&cb->list);
> +            break;
> +        }
> +    write_unlock(&hvm_irq->gsi_callbacks_lock);
> +}

Perhaps somehow flag, at least in debug builds, if the callback
wasn#t found?

> +void hvm_gsi_execute_callbacks(unsigned int gsi)
> +{
> +    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
> +    struct hvm_gsi_eoi_callback *cb;
> +
> +    read_lock(&hvm_irq->gsi_callbacks_lock);
> +    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
> +        cb->callback(gsi, cb->data);
> +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> +}

Just as an observation (for now at least) - holding the lock here
means the callbacks cannot re-register themselves.

> +bool hvm_gsi_has_callbacks(const struct domain *d, unsigned int gsi)
> +{
> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> +    bool has_callbacks;
> +
> +    read_lock(&hvm_irq->gsi_callbacks_lock);
> +    has_callbacks = !list_empty(&hvm_irq->gsi_callbacks[gsi]);
> +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> +
> +    return has_callbacks;
> +}

What use is this function? Its result is stale by the time the
caller can look at it, as you've dropped the lock.

> @@ -421,13 +423,25 @@ static void eoi_callback(unsigned int vector, void *data)
>              if ( is_iommu_enabled(d) )
>              {
>                  spin_unlock(&d->arch.hvm.irq_lock);
> -                hvm_dpci_eoi(vioapic->base_gsi + pin);
> +                hvm_dpci_eoi(gsi);
>                  spin_lock(&d->arch.hvm.irq_lock);
>              }
>  
> +            /*
> +             * Callbacks don't expect to be executed with any lock held, so
> +             * drop the lock that protects the vIO-APIC fields from changing.
> +             *
> +             * Note that the redirection entry itself cannot go away, so upon
> +             * retaking the lock we only need to avoid making assumptions on
> +             * redirection entry field values (ie: recheck the IRR field).
> +             */
> +            spin_unlock(&d->arch.hvm.irq_lock);
> +            hvm_gsi_execute_callbacks(gsi);
> +            spin_lock(&d->arch.hvm.irq_lock);

The two pairs of unlock / re-lock want folding, I think - there's
no point causing extra contention on the lock here.

> @@ -443,7 +457,8 @@ static void ioapic_inj_irq(
>      struct vlapic *target,
>      uint8_t vector,
>      uint8_t trig_mode,
> -    uint8_t delivery_mode)
> +    uint8_t delivery_mode,
> +    bool callback)
>  {
>      HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
>                  vector, trig_mode, delivery_mode);
> @@ -452,7 +467,7 @@ static void ioapic_inj_irq(
>             (delivery_mode == dest_LowestPrio));
>  
>      vlapic_set_irq_callback(target, vector, trig_mode,
> -                            trig_mode ? eoi_callback : NULL, NULL);
> +                            callback ? eoi_callback : NULL, NULL);

I think you'd better use trig_mode || callback here and ...

> @@ -466,6 +481,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>      struct vlapic *target;
>      struct vcpu *v;
>      unsigned int irq = vioapic->base_gsi + pin;
> +    bool callback = trig_mode || hvm_gsi_has_callbacks(d, irq);
>  
>      ASSERT(spin_is_locked(&d->arch.hvm.irq_lock));
>  
> @@ -492,7 +508,8 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>              target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
>          if ( target != NULL )
>          {
> -            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
> +            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode,
> +                           callback);

... invoke hvm_gsi_has_callbacks() right here and ...

> @@ -507,7 +524,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>          for_each_vcpu ( d, v )
>              if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
>                  ioapic_inj_irq(vioapic, vcpu_vlapic(v), vector, trig_mode,
> -                               delivery_mode);
> +                               delivery_mode, callback);

... here, avoiding to call the function when you don't need the
result.

Jan
Roger Pau Monne April 7, 2021, 5:08 p.m. UTC | #2
On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
> On 31.03.2021 12:32, Roger Pau Monne wrote:
> > --- a/xen/arch/x86/hvm/irq.c
> > +++ b/xen/arch/x86/hvm/irq.c
> > +void hvm_gsi_execute_callbacks(unsigned int gsi)
> > +{
> > +    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
> > +    struct hvm_gsi_eoi_callback *cb;
> > +
> > +    read_lock(&hvm_irq->gsi_callbacks_lock);
> > +    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
> > +        cb->callback(gsi, cb->data);
> > +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> > +}
> 
> Just as an observation (for now at least) - holding the lock here
> means the callbacks cannot re-register themselves.

Well, re-registering would be weird, as the callback is not
unregistered after execution. What is likely more relevant is that the
callback cannot unregister itself. I haven't found a need for this so
far, so I think it's fine.

> > +bool hvm_gsi_has_callbacks(const struct domain *d, unsigned int gsi)
> > +{
> > +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> > +    bool has_callbacks;
> > +
> > +    read_lock(&hvm_irq->gsi_callbacks_lock);
> > +    has_callbacks = !list_empty(&hvm_irq->gsi_callbacks[gsi]);
> > +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> > +
> > +    return has_callbacks;
> > +}
> 
> What use is this function? Its result is stale by the time the
> caller can look at it, as you've dropped the lock.

Right, that function is only used to decide whether the vIOAPIC needs
to register an EOI callback when injecting a vector to the vlapic. The
workflow is to first register a callback with the vIOAPIC and
afterwards inject an interrupt which will trigger the callback
logic.

Playing with the callback registration while interrupts can be
injected will likely result in a malfunction of the device that relies
on those callbacks, but that's to be expected anyway when playing such
games.

That said multiple users sharing a vIOAPIC pin should be fine as long
as they follow the logic above: always register a callback before
attempting to inject an interrupt.

> > @@ -421,13 +423,25 @@ static void eoi_callback(unsigned int vector, void *data)
> >              if ( is_iommu_enabled(d) )
> >              {
> >                  spin_unlock(&d->arch.hvm.irq_lock);
> > -                hvm_dpci_eoi(vioapic->base_gsi + pin);
> > +                hvm_dpci_eoi(gsi);
> >                  spin_lock(&d->arch.hvm.irq_lock);
> >              }
> >  
> > +            /*
> > +             * Callbacks don't expect to be executed with any lock held, so
> > +             * drop the lock that protects the vIO-APIC fields from changing.
> > +             *
> > +             * Note that the redirection entry itself cannot go away, so upon
> > +             * retaking the lock we only need to avoid making assumptions on
> > +             * redirection entry field values (ie: recheck the IRR field).
> > +             */
> > +            spin_unlock(&d->arch.hvm.irq_lock);
> > +            hvm_gsi_execute_callbacks(gsi);
> > +            spin_lock(&d->arch.hvm.irq_lock);
> 
> The two pairs of unlock / re-lock want folding, I think - there's
> no point causing extra contention on the lock here.

The chunk above will go away on the next patch - there's no need to
fold it as it makes the following patch less clear.

> > @@ -443,7 +457,8 @@ static void ioapic_inj_irq(
> >      struct vlapic *target,
> >      uint8_t vector,
> >      uint8_t trig_mode,
> > -    uint8_t delivery_mode)
> > +    uint8_t delivery_mode,
> > +    bool callback)
> >  {
> >      HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
> >                  vector, trig_mode, delivery_mode);
> > @@ -452,7 +467,7 @@ static void ioapic_inj_irq(
> >             (delivery_mode == dest_LowestPrio));
> >  
> >      vlapic_set_irq_callback(target, vector, trig_mode,
> > -                            trig_mode ? eoi_callback : NULL, NULL);
> > +                            callback ? eoi_callback : NULL, NULL);
> 
> I think you'd better use trig_mode || callback here and ...
> 
> > @@ -466,6 +481,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
> >      struct vlapic *target;
> >      struct vcpu *v;
> >      unsigned int irq = vioapic->base_gsi + pin;
> > +    bool callback = trig_mode || hvm_gsi_has_callbacks(d, irq);
> >  
> >      ASSERT(spin_is_locked(&d->arch.hvm.irq_lock));
> >  
> > @@ -492,7 +508,8 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
> >              target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
> >          if ( target != NULL )
> >          {
> > -            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
> > +            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode,
> > +                           callback);
> 
> ... invoke hvm_gsi_has_callbacks() right here and ...
> 
> > @@ -507,7 +524,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
> >          for_each_vcpu ( d, v )
> >              if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
> >                  ioapic_inj_irq(vioapic, vcpu_vlapic(v), vector, trig_mode,
> > -                               delivery_mode);
> > +                               delivery_mode, callback);
> 
> ... here, avoiding to call the function when you don't need the
> result.

I think there's a slim chance of not needing to use the callback local
variable, and hence didn't consider limiting it. I can do, but I'm
unsure this will bring any real benefit while making the code more
complex IMO.

Thanks, Roger.
Jan Beulich April 8, 2021, 6:34 a.m. UTC | #3
On 07.04.2021 19:08, Roger Pau Monné wrote:
> On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
>> On 31.03.2021 12:32, Roger Pau Monne wrote:
>>> +bool hvm_gsi_has_callbacks(const struct domain *d, unsigned int gsi)
>>> +{
>>> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>>> +    bool has_callbacks;
>>> +
>>> +    read_lock(&hvm_irq->gsi_callbacks_lock);
>>> +    has_callbacks = !list_empty(&hvm_irq->gsi_callbacks[gsi]);
>>> +    read_unlock(&hvm_irq->gsi_callbacks_lock);
>>> +
>>> +    return has_callbacks;
>>> +}
>>
>> What use is this function? Its result is stale by the time the
>> caller can look at it, as you've dropped the lock.
> 
> Right, that function is only used to decide whether the vIOAPIC needs
> to register an EOI callback when injecting a vector to the vlapic. The
> workflow is to first register a callback with the vIOAPIC and
> afterwards inject an interrupt which will trigger the callback
> logic.
> 
> Playing with the callback registration while interrupts can be
> injected will likely result in a malfunction of the device that relies
> on those callbacks, but that's to be expected anyway when playing such
> games.
> 
> That said multiple users sharing a vIOAPIC pin should be fine as long
> as they follow the logic above: always register a callback before
> attempting to inject an interrupt.

May I ask that you add a comment ahead of this function pointing out
the restriction?

>>> @@ -443,7 +457,8 @@ static void ioapic_inj_irq(
>>>      struct vlapic *target,
>>>      uint8_t vector,
>>>      uint8_t trig_mode,
>>> -    uint8_t delivery_mode)
>>> +    uint8_t delivery_mode,
>>> +    bool callback)
>>>  {
>>>      HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
>>>                  vector, trig_mode, delivery_mode);
>>> @@ -452,7 +467,7 @@ static void ioapic_inj_irq(
>>>             (delivery_mode == dest_LowestPrio));
>>>  
>>>      vlapic_set_irq_callback(target, vector, trig_mode,
>>> -                            trig_mode ? eoi_callback : NULL, NULL);
>>> +                            callback ? eoi_callback : NULL, NULL);
>>
>> I think you'd better use trig_mode || callback here and ...
>>
>>> @@ -466,6 +481,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>      struct vlapic *target;
>>>      struct vcpu *v;
>>>      unsigned int irq = vioapic->base_gsi + pin;
>>> +    bool callback = trig_mode || hvm_gsi_has_callbacks(d, irq);
>>>  
>>>      ASSERT(spin_is_locked(&d->arch.hvm.irq_lock));
>>>  
>>> @@ -492,7 +508,8 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>              target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
>>>          if ( target != NULL )
>>>          {
>>> -            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
>>> +            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode,
>>> +                           callback);
>>
>> ... invoke hvm_gsi_has_callbacks() right here and ...
>>
>>> @@ -507,7 +524,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>          for_each_vcpu ( d, v )
>>>              if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
>>>                  ioapic_inj_irq(vioapic, vcpu_vlapic(v), vector, trig_mode,
>>> -                               delivery_mode);
>>> +                               delivery_mode, callback);
>>
>> ... here, avoiding to call the function when you don't need the
>> result.
> 
> I think there's a slim chance of not needing to use the callback local
> variable, and hence didn't consider limiting it. I can do, but I'm
> unsure this will bring any real benefit while making the code more
> complex IMO.

Really the variable remaining unused in a minor set of cases was only
a secondary observation. What I first stumbled over is the moving of
the decision whether a callback is wanted from ioapic_inj_irq() to its
caller. Since the function clearly is intended as a helper of
vioapic_deliver(), I guess in the end it's fine the way you have it.

Jan
Roger Pau Monne April 8, 2021, 12:52 p.m. UTC | #4
On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
> On 31.03.2021 12:32, Roger Pau Monne wrote:
> > --- a/xen/arch/x86/hvm/irq.c
> > +++ b/xen/arch/x86/hvm/irq.c
> > +void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
> > +                                 struct hvm_gsi_eoi_callback *cb)
> > +{
> > +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> > +    const struct list_head *tmp;
> > +
> > +    if ( gsi >= hvm_irq->nr_gsis )
> > +    {
> > +        ASSERT_UNREACHABLE();
> > +        return;
> > +    }
> > +
> > +    write_lock(&hvm_irq->gsi_callbacks_lock);
> > +    list_for_each ( tmp, &hvm_irq->gsi_callbacks[gsi] )
> > +        if ( tmp == &cb->list )
> > +        {
> > +            list_del(&cb->list);
> > +            break;
> > +        }
> > +    write_unlock(&hvm_irq->gsi_callbacks_lock);
> > +}
> 
> Perhaps somehow flag, at least in debug builds, if the callback
> wasn#t found?

I've added a debug printf here to warn if the callback is not found,
but I see it triggering because hpet_set_timer will call
destroy_periodic_time and create_periodic_time and thus two calls will
be made to hvm_gsi_unregister_callback. This is fine, but adding a
message there gets too verbose, so I will drop it and leave the code
as-is.

I don't see a problem with calling destroy_periodic_time multiple
times even if the timer was not active, and that shouldn't result in a
message being printed.

Thanks, Roger.
Jan Beulich April 8, 2021, 2:31 p.m. UTC | #5
On 08.04.2021 14:52, Roger Pau Monné wrote:
> On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
>> On 31.03.2021 12:32, Roger Pau Monne wrote:
>>> --- a/xen/arch/x86/hvm/irq.c
>>> +++ b/xen/arch/x86/hvm/irq.c
>>> +void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
>>> +                                 struct hvm_gsi_eoi_callback *cb)
>>> +{
>>> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>>> +    const struct list_head *tmp;
>>> +
>>> +    if ( gsi >= hvm_irq->nr_gsis )
>>> +    {
>>> +        ASSERT_UNREACHABLE();
>>> +        return;
>>> +    }
>>> +
>>> +    write_lock(&hvm_irq->gsi_callbacks_lock);
>>> +    list_for_each ( tmp, &hvm_irq->gsi_callbacks[gsi] )
>>> +        if ( tmp == &cb->list )
>>> +        {
>>> +            list_del(&cb->list);
>>> +            break;
>>> +        }
>>> +    write_unlock(&hvm_irq->gsi_callbacks_lock);
>>> +}
>>
>> Perhaps somehow flag, at least in debug builds, if the callback
>> wasn#t found?
> 
> I've added a debug printf here to warn if the callback is not found,
> but I see it triggering because hpet_set_timer will call
> destroy_periodic_time and create_periodic_time and thus two calls will
> be made to hvm_gsi_unregister_callback. This is fine, but adding a
> message there gets too verbose, so I will drop it and leave the code
> as-is.
> 
> I don't see a problem with calling destroy_periodic_time multiple
> times even if the timer was not active, and that shouldn't result in a
> message being printed.

If destroy_periodic_time() is to remain the only caller, I guess I
agree. Other (future) callers may then need this function to gain
a return value indicating whether the callback was actually found.

Jan
Roger Pau Monne April 8, 2021, 3:06 p.m. UTC | #6
On Thu, Apr 08, 2021 at 04:31:59PM +0200, Jan Beulich wrote:
> On 08.04.2021 14:52, Roger Pau Monné wrote:
> > On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
> >> On 31.03.2021 12:32, Roger Pau Monne wrote:
> >>> --- a/xen/arch/x86/hvm/irq.c
> >>> +++ b/xen/arch/x86/hvm/irq.c
> >>> +void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
> >>> +                                 struct hvm_gsi_eoi_callback *cb)
> >>> +{
> >>> +    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
> >>> +    const struct list_head *tmp;
> >>> +
> >>> +    if ( gsi >= hvm_irq->nr_gsis )
> >>> +    {
> >>> +        ASSERT_UNREACHABLE();
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    write_lock(&hvm_irq->gsi_callbacks_lock);
> >>> +    list_for_each ( tmp, &hvm_irq->gsi_callbacks[gsi] )
> >>> +        if ( tmp == &cb->list )
> >>> +        {
> >>> +            list_del(&cb->list);
> >>> +            break;
> >>> +        }
> >>> +    write_unlock(&hvm_irq->gsi_callbacks_lock);
> >>> +}
> >>
> >> Perhaps somehow flag, at least in debug builds, if the callback
> >> wasn#t found?
> > 
> > I've added a debug printf here to warn if the callback is not found,
> > but I see it triggering because hpet_set_timer will call
> > destroy_periodic_time and create_periodic_time and thus two calls will
> > be made to hvm_gsi_unregister_callback. This is fine, but adding a
> > message there gets too verbose, so I will drop it and leave the code
> > as-is.
> > 
> > I don't see a problem with calling destroy_periodic_time multiple
> > times even if the timer was not active, and that shouldn't result in a
> > message being printed.
> 
> If destroy_periodic_time() is to remain the only caller, I guess I
> agree. Other (future) callers may then need this function to gain
> a return value indicating whether the callback was actually found.

There's also pt_irq_destroy_bind which likely cares about the return
value, so let's return a value from hvm_gsi_unregister_callback and
check it in pt_irq_destroy_bind.

Thanks, Roger.
Roger Pau Monne April 15, 2021, 4:04 p.m. UTC | #7
On Wed, Apr 07, 2021 at 07:08:06PM +0200, Roger Pau Monné wrote:
> On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
> > On 31.03.2021 12:32, Roger Pau Monne wrote:
> > > --- a/xen/arch/x86/hvm/irq.c
> > > +++ b/xen/arch/x86/hvm/irq.c
> > > +void hvm_gsi_execute_callbacks(unsigned int gsi)
> > > +{
> > > +    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
> > > +    struct hvm_gsi_eoi_callback *cb;
> > > +
> > > +    read_lock(&hvm_irq->gsi_callbacks_lock);
> > > +    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
> > > +        cb->callback(gsi, cb->data);
> > > +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> > > +}
> > 
> > Just as an observation (for now at least) - holding the lock here
> > means the callbacks cannot re-register themselves.
> 
> Well, re-registering would be weird, as the callback is not
> unregistered after execution. What is likely more relevant is that the
> callback cannot unregister itself. I haven't found a need for this so
> far, so I think it's fine.

I'm afraid I was wrong here - rtc_pf_callback could attempt to
unregister the timer, and thus end up calling
hvm_gsi_unregister_callback inside of a callback.

I need to figure a way to solve this. We already run the RTC in no ack
mode (which is correct because of the flag we expose in the WAET ACPI
table), and hence I wonder if we still need to keep the code for the
strict_mode around, since it's not used at all. Would you be OK with
me removing the mode_strict related code?

Thanks, Roger.
Jan Beulich April 16, 2021, 7:29 a.m. UTC | #8
On 15.04.2021 18:04, Roger Pau Monné wrote:
> On Wed, Apr 07, 2021 at 07:08:06PM +0200, Roger Pau Monné wrote:
>> On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
>>> On 31.03.2021 12:32, Roger Pau Monne wrote:
>>>> --- a/xen/arch/x86/hvm/irq.c
>>>> +++ b/xen/arch/x86/hvm/irq.c
>>>> +void hvm_gsi_execute_callbacks(unsigned int gsi)
>>>> +{
>>>> +    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
>>>> +    struct hvm_gsi_eoi_callback *cb;
>>>> +
>>>> +    read_lock(&hvm_irq->gsi_callbacks_lock);
>>>> +    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
>>>> +        cb->callback(gsi, cb->data);
>>>> +    read_unlock(&hvm_irq->gsi_callbacks_lock);
>>>> +}
>>>
>>> Just as an observation (for now at least) - holding the lock here
>>> means the callbacks cannot re-register themselves.
>>
>> Well, re-registering would be weird, as the callback is not
>> unregistered after execution. What is likely more relevant is that the
>> callback cannot unregister itself. I haven't found a need for this so
>> far, so I think it's fine.
> 
> I'm afraid I was wrong here - rtc_pf_callback could attempt to
> unregister the timer, and thus end up calling
> hvm_gsi_unregister_callback inside of a callback.
> 
> I need to figure a way to solve this. We already run the RTC in no ack
> mode (which is correct because of the flag we expose in the WAET ACPI
> table), and hence I wonder if we still need to keep the code for the
> strict_mode around, since it's not used at all. Would you be OK with
> me removing the mode_strict related code?

Not sure, to be honest. Years ago I did submit a patch correcting this
("x86/HVM: tie RTC emulation mode to enabling of Viridian emulation"),
as we shouldn't assume all guests to even know of WAET. Hence running
uniformly in rtc_mode_no_ack isn't really correct. I'm still carrying
this patch, as Tim (iirc) had asked not to tie the behavior to the
Viridian param, but give it its own one. Which I still didn't get to.

Of course, if we decided to drop mode_strict support, I could also
drop that patch ...

Jan
Roger Pau Monne April 19, 2021, 8:31 a.m. UTC | #9
On Fri, Apr 16, 2021 at 09:29:26AM +0200, Jan Beulich wrote:
> On 15.04.2021 18:04, Roger Pau Monné wrote:
> > On Wed, Apr 07, 2021 at 07:08:06PM +0200, Roger Pau Monné wrote:
> >> On Wed, Apr 07, 2021 at 05:51:14PM +0200, Jan Beulich wrote:
> >>> On 31.03.2021 12:32, Roger Pau Monne wrote:
> >>>> --- a/xen/arch/x86/hvm/irq.c
> >>>> +++ b/xen/arch/x86/hvm/irq.c
> >>>> +void hvm_gsi_execute_callbacks(unsigned int gsi)
> >>>> +{
> >>>> +    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
> >>>> +    struct hvm_gsi_eoi_callback *cb;
> >>>> +
> >>>> +    read_lock(&hvm_irq->gsi_callbacks_lock);
> >>>> +    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
> >>>> +        cb->callback(gsi, cb->data);
> >>>> +    read_unlock(&hvm_irq->gsi_callbacks_lock);
> >>>> +}
> >>>
> >>> Just as an observation (for now at least) - holding the lock here
> >>> means the callbacks cannot re-register themselves.
> >>
> >> Well, re-registering would be weird, as the callback is not
> >> unregistered after execution. What is likely more relevant is that the
> >> callback cannot unregister itself. I haven't found a need for this so
> >> far, so I think it's fine.
> > 
> > I'm afraid I was wrong here - rtc_pf_callback could attempt to
> > unregister the timer, and thus end up calling
> > hvm_gsi_unregister_callback inside of a callback.
> > 
> > I need to figure a way to solve this. We already run the RTC in no ack
> > mode (which is correct because of the flag we expose in the WAET ACPI
> > table), and hence I wonder if we still need to keep the code for the
> > strict_mode around, since it's not used at all. Would you be OK with
> > me removing the mode_strict related code?
> 
> Not sure, to be honest. Years ago I did submit a patch correcting this
> ("x86/HVM: tie RTC emulation mode to enabling of Viridian emulation"),
> as we shouldn't assume all guests to even know of WAET.

It's very likely guest that don't even know about WAET to continue
working fine even in the no_ack mode. In fact the current code for
strict_mode will inject 10 interrupts without REG_C being read, as
there's no check for the value of REG_C before injecting the
interrupt.

> Hence running
> uniformly in rtc_mode_no_ack isn't really correct. I'm still carrying
> this patch, as Tim (iirc) had asked not to tie the behavior to the
> Viridian param, but give it its own one. Which I still didn't get to.
> 
> Of course, if we decided to drop mode_strict support, I could also
> drop that patch ...

AFAICT the no_ack mode it's been used since Xen 4.3, and so far we had
no complains, so I think it's safe to just remove the code for
strict_mode. It can always be fetched from the repository history if
there's a need to support strict_mode in the future.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index e7bcffebc49..0279014e66e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -608,7 +608,7 @@  static int hvm_print_line(
 
 int hvm_domain_initialise(struct domain *d)
 {
-    unsigned int nr_gsis;
+    unsigned int nr_gsis, i;
     int rc;
 
     if ( !hvm_enabled )
@@ -655,6 +655,14 @@  int hvm_domain_initialise(struct domain *d)
     BUILD_BUG_ON(NR_HVM_DOMU_IRQS < NR_ISAIRQS);
     ASSERT(hvm_domain_irq(d)->nr_gsis >= NR_ISAIRQS);
 
+    /* Initialize the EOI callback list. */
+    hvm_domain_irq(d)->gsi_callbacks = xmalloc_array(struct list_head, nr_gsis);
+    if ( !hvm_domain_irq(d)->gsi_callbacks )
+        goto fail1;
+    rwlock_init(&hvm_domain_irq(d)->gsi_callbacks_lock);
+    for ( i = 0; i < nr_gsis; i++ )
+        INIT_LIST_HEAD(&hvm_domain_irq(d)->gsi_callbacks[i]);
+
     /* need link to containing domain */
     d->arch.hvm.pl_time->domain = d;
 
@@ -714,6 +722,8 @@  int hvm_domain_initialise(struct domain *d)
  fail1:
     if ( is_hardware_domain(d) )
         xfree(d->arch.hvm.io_bitmap);
+    if ( hvm_domain_irq(d) )
+        XFREE(hvm_domain_irq(d)->gsi_callbacks);
     XFREE(d->arch.hvm.params);
     XFREE(d->arch.hvm.irq);
  fail0:
@@ -776,6 +786,9 @@  void hvm_domain_destroy(struct domain *d)
     vioapic_deinit(d);
 
     XFREE(d->arch.hvm.pl_time);
+
+    if ( hvm_domain_irq(d) )
+        XFREE(hvm_domain_irq(d)->gsi_callbacks);
     XFREE(d->arch.hvm.irq);
 
     list_for_each_safe ( ioport_list, tmp, &d->arch.hvm.g2m_ioport_list )
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 38ac5fb6c7c..b9fa8409b9e 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -595,6 +595,69 @@  int hvm_local_events_need_delivery(struct vcpu *v)
     return !hvm_interrupt_blocked(v, intack);
 }
 
+int hvm_gsi_register_callback(struct domain *d, unsigned int gsi,
+                              struct hvm_gsi_eoi_callback *cb)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+
+    if ( gsi >= hvm_irq->nr_gsis )
+    {
+        ASSERT_UNREACHABLE();
+        return -EINVAL;
+    }
+
+    write_lock(&hvm_irq->gsi_callbacks_lock);
+    list_add(&cb->list, &hvm_irq->gsi_callbacks[gsi]);
+    write_unlock(&hvm_irq->gsi_callbacks_lock);
+
+    return 0;
+}
+
+void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
+                                 struct hvm_gsi_eoi_callback *cb)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+    const struct list_head *tmp;
+
+    if ( gsi >= hvm_irq->nr_gsis )
+    {
+        ASSERT_UNREACHABLE();
+        return;
+    }
+
+    write_lock(&hvm_irq->gsi_callbacks_lock);
+    list_for_each ( tmp, &hvm_irq->gsi_callbacks[gsi] )
+        if ( tmp == &cb->list )
+        {
+            list_del(&cb->list);
+            break;
+        }
+    write_unlock(&hvm_irq->gsi_callbacks_lock);
+}
+
+void hvm_gsi_execute_callbacks(unsigned int gsi)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(current->domain);
+    struct hvm_gsi_eoi_callback *cb;
+
+    read_lock(&hvm_irq->gsi_callbacks_lock);
+    list_for_each_entry ( cb, &hvm_irq->gsi_callbacks[gsi], list )
+        cb->callback(gsi, cb->data);
+    read_unlock(&hvm_irq->gsi_callbacks_lock);
+}
+
+bool hvm_gsi_has_callbacks(const struct domain *d, unsigned int gsi)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+    bool has_callbacks;
+
+    read_lock(&hvm_irq->gsi_callbacks_lock);
+    has_callbacks = !list_empty(&hvm_irq->gsi_callbacks[gsi]);
+    read_unlock(&hvm_irq->gsi_callbacks_lock);
+
+    return has_callbacks;
+}
+
 static void irq_dump(struct domain *d)
 {
     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index d29b6bfdb7d..099c29466ba 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -285,6 +285,7 @@  static void vioapic_write_redirent(
             ASSERT(prev_level);
             ASSERT(!top_word);
             hvm_dpci_eoi(gsi);
+            hvm_gsi_execute_callbacks(gsi);
     }
 
     if ( is_hardware_domain(d) && unmasked )
@@ -412,6 +413,7 @@  static void eoi_callback(unsigned int vector, void *data)
         for ( pin = 0; pin < vioapic->nr_pins; pin++ )
         {
             union vioapic_redir_entry *ent = &vioapic->redirtbl[pin];
+            unsigned int gsi = vioapic->base_gsi + pin;
 
             if ( ent->fields.vector != vector )
                 continue;
@@ -421,13 +423,25 @@  static void eoi_callback(unsigned int vector, void *data)
             if ( is_iommu_enabled(d) )
             {
                 spin_unlock(&d->arch.hvm.irq_lock);
-                hvm_dpci_eoi(vioapic->base_gsi + pin);
+                hvm_dpci_eoi(gsi);
                 spin_lock(&d->arch.hvm.irq_lock);
             }
 
+            /*
+             * Callbacks don't expect to be executed with any lock held, so
+             * drop the lock that protects the vIO-APIC fields from changing.
+             *
+             * Note that the redirection entry itself cannot go away, so upon
+             * retaking the lock we only need to avoid making assumptions on
+             * redirection entry field values (ie: recheck the IRR field).
+             */
+            spin_unlock(&d->arch.hvm.irq_lock);
+            hvm_gsi_execute_callbacks(gsi);
+            spin_lock(&d->arch.hvm.irq_lock);
+
             if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
                  !ent->fields.mask && !ent->fields.remote_irr &&
-                 hvm_irq->gsi_assert_count[vioapic->base_gsi + pin] )
+                 hvm_irq->gsi_assert_count[gsi] )
             {
                 ent->fields.remote_irr = 1;
                 vioapic_deliver(vioapic, pin);
@@ -443,7 +457,8 @@  static void ioapic_inj_irq(
     struct vlapic *target,
     uint8_t vector,
     uint8_t trig_mode,
-    uint8_t delivery_mode)
+    uint8_t delivery_mode,
+    bool callback)
 {
     HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
                 vector, trig_mode, delivery_mode);
@@ -452,7 +467,7 @@  static void ioapic_inj_irq(
            (delivery_mode == dest_LowestPrio));
 
     vlapic_set_irq_callback(target, vector, trig_mode,
-                            trig_mode ? eoi_callback : NULL, NULL);
+                            callback ? eoi_callback : NULL, NULL);
 }
 
 static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
@@ -466,6 +481,7 @@  static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
     struct vlapic *target;
     struct vcpu *v;
     unsigned int irq = vioapic->base_gsi + pin;
+    bool callback = trig_mode || hvm_gsi_has_callbacks(d, irq);
 
     ASSERT(spin_is_locked(&d->arch.hvm.irq_lock));
 
@@ -492,7 +508,8 @@  static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
             target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
         if ( target != NULL )
         {
-            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
+            ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode,
+                           callback);
         }
         else
         {
@@ -507,7 +524,7 @@  static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
         for_each_vcpu ( d, v )
             if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
                 ioapic_inj_irq(vioapic, vcpu_vlapic(v), vector, trig_mode,
-                               delivery_mode);
+                               delivery_mode, callback);
         break;
 
     case dest_NMI:
diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
index a69aecad912..ca484c31b6a 100644
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -235,6 +235,8 @@  static void vpic_ioport_write(
                 unsigned int pin = __scanbit(pending, 8);
 
                 ASSERT(pin < 8);
+                hvm_gsi_execute_callbacks(
+                        hvm_isa_irq_to_gsi((addr >> 7) ? (pin | 8) : pin));
                 hvm_dpci_eoi(hvm_isa_irq_to_gsi((addr >> 7) ? (pin | 8) : pin));
                 __clear_bit(pin, &pending);
             }
@@ -284,8 +286,11 @@  static void vpic_ioport_write(
                 /* Release lock and EOI the physical interrupt (if any). */
                 vpic_update_int_output(vpic);
                 vpic_unlock(vpic);
+                hvm_gsi_execute_callbacks(
+                        hvm_isa_irq_to_gsi((addr >> 7) ? (pin | 8) : pin));
                 hvm_dpci_eoi(hvm_isa_irq_to_gsi((addr >> 7) ? (pin | 8) : pin));
                 return; /* bail immediately */
+
             case 6: /* Set Priority                */
                 vpic->priority_add = (val + 1) & 7;
                 break;
diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
index 07b1ab99cd1..0828c01dd18 100644
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -21,6 +21,7 @@ 
 #ifndef __ASM_X86_HVM_IRQ_H__
 #define __ASM_X86_HVM_IRQ_H__
 
+#include <xen/rwlock.h>
 #include <xen/timer.h>
 
 #include <asm/hvm/hvm.h>
@@ -79,6 +80,10 @@  struct hvm_irq {
 
     struct hvm_irq_dpci *dpci;
 
+    /* List of callbacks for GSI EOI events. Protected by irq_lock. */
+    struct list_head  *gsi_callbacks;
+    rwlock_t gsi_callbacks_lock;
+
     /*
      * Number of wires asserting each GSI.
      *
@@ -138,6 +143,13 @@  struct hvm_gmsi_info {
     bool posted; /* directly deliver to guest via VT-d PI? */
 };
 
+typedef void hvm_gsi_eoi_callback_t(unsigned int gsi, void *data);
+struct hvm_gsi_eoi_callback {
+    hvm_gsi_eoi_callback_t *callback;
+    void *data;
+    struct list_head list;
+};
+
 struct hvm_girq_dpci_mapping {
     struct list_head list;
     uint8_t bus;
@@ -225,4 +237,12 @@  void hvm_set_callback_via(struct domain *d, uint64_t via);
 struct pirq;
 bool hvm_domain_use_pirq(const struct domain *, const struct pirq *);
 
+int hvm_gsi_register_callback(struct domain *d, unsigned int gsi,
+                              struct hvm_gsi_eoi_callback *cb);
+void hvm_gsi_unregister_callback(struct domain *d, unsigned int gsi,
+                                 struct hvm_gsi_eoi_callback *cb);
+/* data is an opaque blob to pass to the callback if it has no private data. */
+void hvm_gsi_execute_callbacks(unsigned int gsi);
+bool hvm_gsi_has_callbacks(const struct domain *d, unsigned int gsi);
+
 #endif /* __ASM_X86_HVM_IRQ_H__ */