diff mbox series

[v4,01/10] evtchn: use per-channel lock where possible

Message ID e03cb246-c08b-5977-9137-a38974364445@suse.com (mailing list archive)
State Superseded
Headers show
Series evtchn: (not so) recent XSAs follow-on | expand

Commit Message

Jan Beulich Jan. 5, 2021, 1:09 p.m. UTC
Neither evtchn_status() nor domain_dump_evtchn_info() nor
flask_get_peer_sid() need to hold the per-domain lock - they all only
read a single channel's state (at a time, in the dump case).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

Comments

Julien Grall Jan. 8, 2021, 8:32 p.m. UTC | #1
Hi Jan,

On 05/01/2021 13:09, Jan Beulich wrote:
> Neither evtchn_status() nor domain_dump_evtchn_info() nor
> flask_get_peer_sid() need to hold the per-domain lock - they all only
> read a single channel's state (at a time, in the dump case).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v4: New.
> 
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu
>       if ( d == NULL )
>           return -ESRCH;
>   
> -    spin_lock(&d->event_lock);
> -
>       if ( !port_is_valid(d, port) )

There is one issue that is now becoming more apparent. To be clear, the 
problem is not in this patch, but I think it is the best place to 
discuss it as d->event_lock may be part of the solution.

After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns.

Given that evtchn_status() can work on the non-current domain, it would 
be possible to run it concurrently with evtchn_destroy(). As a 
consequence, port_is_valid() will be unstable as a valid event channel 
may turn invalid.

AFAICT, we are getting away so far, as the memory is not freed until the 
domain is fully destroyed. However, we re-introduced XSA-338 in a 
different way.

To be clear this is not the fault of this patch. But I don't think this 
is sane to re-introduce a behavior that lead us to an XSA.

I can see two solutions:
   1) Use d->event_lock to protect port_is_valid() when d != 
current->domain. This would require evtchn_destroy() to grab the lock 
when updating d->valid_evtchns.
   2) Never decrement d->valid_evtchns and use a different field for 
closing ports

I am not a big fan of 1) because this is muddying the already complex 
locking situation in the event channel code. But I suggested it because 
I wasn't sure whether you would be happy with 2).

If you are happy with 2), then the lock can be dropped here. I would be 
happy to send the patch for either 1) or 2) depending on the agreement here.

Cheers,


>       {
> -        rc = -EINVAL;
> -        goto out;
> +        rcu_unlock_domain(d);
> +        return -EINVAL;
>       }
>   
>       chn = evtchn_from_port(d, port);
> +
> +    evtchn_read_lock(chn);
> +
>       if ( consumer_is_xen(chn) )
>       {
>           rc = -EACCES;
> @@ -1021,7 +1022,7 @@ int evtchn_status(evtchn_status_t *statu
>       status->vcpu = chn->notify_vcpu_id;
>   
>    out:
> -    spin_unlock(&d->event_lock);
> +    evtchn_read_unlock(chn);
>       rcu_unlock_domain(d);
>   
>       return rc;
> @@ -1576,22 +1577,32 @@ void evtchn_move_pirqs(struct vcpu *v)
>   static void domain_dump_evtchn_info(struct domain *d)
>   {
>       unsigned int port;
> -    int irq;
>   
>       printk("Event channel information for domain %d:\n"
>              "Polling vCPUs: {%*pbl}\n"
>              "    port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask);
>   
> -    spin_lock(&d->event_lock);
> -
>       for ( port = 1; port_is_valid(d, port); ++port )
>       {
> -        const struct evtchn *chn;
> +        struct evtchn *chn;
>           char *ssid;
>   
> +        if ( !(port & 0x3f) )
> +            process_pending_softirqs();
> +
>           chn = evtchn_from_port(d, port);
> +
> +        if ( !evtchn_read_trylock(chn) )
> +        {
> +            printk("    %4u in flux\n", port);
> +            continue;
> +        }
> +
>           if ( chn->state == ECS_FREE )
> +        {
> +            evtchn_read_unlock(chn);
>               continue;
> +        }
>   
>           printk("    %4u [%d/%d/",
>                  port,
> @@ -1601,26 +1612,49 @@ static void domain_dump_evtchn_info(stru
>           printk("]: s=%d n=%d x=%d",
>                  chn->state, chn->notify_vcpu_id, chn->xen_consumer);
>   
> +        ssid = xsm_show_security_evtchn(d, chn);
> +
>           switch ( chn->state )
>           {
>           case ECS_UNBOUND:
>               printk(" d=%d", chn->u.unbound.remote_domid);
>               break;
> +
>           case ECS_INTERDOMAIN:
>               printk(" d=%d p=%d",
>                      chn->u.interdomain.remote_dom->domain_id,
>                      chn->u.interdomain.remote_port);
>               break;
> -        case ECS_PIRQ:
> -            irq = domain_pirq_to_irq(d, chn->u.pirq.irq);
> -            printk(" p=%d i=%d", chn->u.pirq.irq, irq);
> +
> +        case ECS_PIRQ: {
> +            unsigned int pirq = chn->u.pirq.irq;
> +
> +            /*
> +             * The per-channel locks nest inside the per-domain one, so we
> +             * can't acquire the latter without first letting go of the former.
> +             */
> +            evtchn_read_unlock(chn);
> +            chn = NULL;
> +            if ( spin_trylock(&d->event_lock) )
> +            {
> +                int irq = domain_pirq_to_irq(d, pirq);
> +
> +                spin_unlock(&d->event_lock);
> +                printk(" p=%u i=%d", pirq, irq);
> +            }
> +            else
> +                printk(" p=%u i=?", pirq);
>               break;
> +        }
> +
>           case ECS_VIRQ:
>               printk(" v=%d", chn->u.virq);
>               break;
>           }
>   
> -        ssid = xsm_show_security_evtchn(d, chn);
> +        if ( chn )
> +            evtchn_read_unlock(chn);
> +
>           if (ssid) {
>               printk(" Z=%s\n", ssid);
>               xfree(ssid);
> @@ -1628,8 +1662,6 @@ static void domain_dump_evtchn_info(stru
>               printk("\n");
>           }
>       }
> -
> -    spin_unlock(&d->event_lock);
>   }
>   
>   static void dump_evtchn_info(unsigned char key)
> --- a/xen/xsm/flask/flask_op.c
> +++ b/xen/xsm/flask/flask_op.c
> @@ -555,12 +555,13 @@ static int flask_get_peer_sid(struct xen
>       struct evtchn *chn;
>       struct domain_security_struct *dsec;
>   
> -    spin_lock(&d->event_lock);
> -
>       if ( !port_is_valid(d, arg->evtchn) )
> -        goto out;
> +        return -EINVAL;
>   
>       chn = evtchn_from_port(d, arg->evtchn);
> +
> +    evtchn_read_lock(chn);
> +
>       if ( chn->state != ECS_INTERDOMAIN )
>           goto out;
>   
> @@ -573,7 +574,7 @@ static int flask_get_peer_sid(struct xen
>       rv = 0;
>   
>    out:
> -    spin_unlock(&d->event_lock);
> +    evtchn_read_unlock(chn);
>       return rv;
>   }
>   
>
Jan Beulich Jan. 11, 2021, 10:14 a.m. UTC | #2
On 08.01.2021 21:32, Julien Grall wrote:
> Hi Jan,
> 
> On 05/01/2021 13:09, Jan Beulich wrote:
>> Neither evtchn_status() nor domain_dump_evtchn_info() nor
>> flask_get_peer_sid() need to hold the per-domain lock - they all only
>> read a single channel's state (at a time, in the dump case).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v4: New.
>>
>> --- a/xen/common/event_channel.c
>> +++ b/xen/common/event_channel.c
>> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu
>>       if ( d == NULL )
>>           return -ESRCH;
>>   
>> -    spin_lock(&d->event_lock);
>> -
>>       if ( !port_is_valid(d, port) )
> 
> There is one issue that is now becoming more apparent. To be clear, the 
> problem is not in this patch, but I think it is the best place to 
> discuss it as d->event_lock may be part of the solution.
> 
> After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns.
> 
> Given that evtchn_status() can work on the non-current domain, it would 
> be possible to run it concurrently with evtchn_destroy(). As a 
> consequence, port_is_valid() will be unstable as a valid event channel 
> may turn invalid.
> 
> AFAICT, we are getting away so far, as the memory is not freed until the 
> domain is fully destroyed. However, we re-introduced XSA-338 in a 
> different way.
> 
> To be clear this is not the fault of this patch. But I don't think this 
> is sane to re-introduce a behavior that lead us to an XSA.

I'm getting confused, I'm afraid, from the varying statements above:
Are you suggesting this patch does re-introduce bad behavior or not?

Yes, the decrementing of ->valid_evtchns has a similar effect, but
I'm not convinced it gets us into XSA territory again. The problem
wasn't the reducing of ->max_evtchns as such, but the derived
assumptions elsewhere in the code. If there were any such again, I
suppose we'd have reason to issue another XSA.

Furthermore there are other paths already using port_is_valid()
without holding the domain's event lock; I've not been able to spot
a problem with this though, so far.

> I can see two solutions:
>    1) Use d->event_lock to protect port_is_valid() when d != 
> current->domain. This would require evtchn_destroy() to grab the lock 
> when updating d->valid_evtchns.
>    2) Never decrement d->valid_evtchns and use a different field for 
> closing ports
> 
> I am not a big fan of 1) because this is muddying the already complex 
> locking situation in the event channel code. But I suggested it because 
> I wasn't sure whether you would be happy with 2).

I agree 1) wouldn't be very nice, and you're right in assuming I
wouldn't like 2) very much. For the moment I'm not (yet) convinced
we need to do anything at all - as you say yourself, while the
result of port_is_valid() is potentially unstable when a domain is
in the process of being cleaned up, the state guarded by such
checks remains usable in (I think) a race free manner.

Jan
Julien Grall Jan. 11, 2021, 11:06 a.m. UTC | #3
Hi Jan,

On 11/01/2021 10:14, Jan Beulich wrote:
> On 08.01.2021 21:32, Julien Grall wrote:
>> Hi Jan,
>>
>> On 05/01/2021 13:09, Jan Beulich wrote:
>>> Neither evtchn_status() nor domain_dump_evtchn_info() nor
>>> flask_get_peer_sid() need to hold the per-domain lock - they all only
>>> read a single channel's state (at a time, in the dump case).
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> v4: New.
>>>
>>> --- a/xen/common/event_channel.c
>>> +++ b/xen/common/event_channel.c
>>> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu
>>>        if ( d == NULL )
>>>            return -ESRCH;
>>>    
>>> -    spin_lock(&d->event_lock);
>>> -
>>>        if ( !port_is_valid(d, port) )
>>
>> There is one issue that is now becoming more apparent. To be clear, the
>> problem is not in this patch, but I think it is the best place to
>> discuss it as d->event_lock may be part of the solution.
>>
>> After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns.
>>
>> Given that evtchn_status() can work on the non-current domain, it would
>> be possible to run it concurrently with evtchn_destroy(). As a
>> consequence, port_is_valid() will be unstable as a valid event channel
>> may turn invalid.
>>
>> AFAICT, we are getting away so far, as the memory is not freed until the
>> domain is fully destroyed. However, we re-introduced XSA-338 in a
>> different way.
>>
>> To be clear this is not the fault of this patch. But I don't think this
>> is sane to re-introduce a behavior that lead us to an XSA.
> 
> I'm getting confused, I'm afraid, from the varying statements above:
> Are you suggesting this patch does re-introduce bad behavior or not?

No. I am pointing out that this is widening the bad behavior (again).

> 
> Yes, the decrementing of ->valid_evtchns has a similar effect, but
> I'm not convinced it gets us into XSA territory again. The problem
> wasn't the reducing of ->max_evtchns as such, but the derived
> assumptions elsewhere in the code. If there were any such again, I
> suppose we'd have reason to issue another XSA.

I don't think it get us to the XSA territory yet. However, the 
locking/interaction in the event channel code is quite complex.

To give a concrete example, below the current implementation of 
free_xen_event_channel():

     if ( !port_is_valid(d, port) )
     {
         /*
          * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
          * with the spin_barrier() and BUG_ON() in evtchn_destroy().
          */
         smp_rmb();
         BUG_ON(!d->is_dying);
         return;
     }

     evtchn_close(d, port, 0);

It would be fair for a developer to assume that after the check above, 
port_is_valid() would return true. However, this is not the case...

I am not aware of any issue so far... But I am not ready to be this is 
not going to be missed out. How about you?

 > If there were any such again, I
 > suppose we'd have reason to issue another XSA.

The point of my e-mail is to prevent this XSA to happen. I am pretty 
sure you want the same.

> 
> Furthermore there are other paths already using port_is_valid()
> without holding the domain's event lock; I've not been able to spot
> a problem with this though, so far.

Right. Most of the fine are fine because d == current. Therefore, the 
domain must be running and evtchn_destroy() couldn't happen concurrently.

> 
>> I can see two solutions:
>>     1) Use d->event_lock to protect port_is_valid() when d !=
>> current->domain. This would require evtchn_destroy() to grab the lock
>> when updating d->valid_evtchns.
>>     2) Never decrement d->valid_evtchns and use a different field for
>> closing ports
>>
>> I am not a big fan of 1) because this is muddying the already complex
>> locking situation in the event channel code. But I suggested it because
>> I wasn't sure whether you would be happy with 2).
> 
> I agree 1) wouldn't be very nice, and you're right in assuming I
> wouldn't like 2) very much. For the moment I'm not (yet) convinced
> we need to do anything at all - as you say yourself, while the
> result of port_is_valid() is potentially unstable when a domain is
> in the process of being cleaned up, the state guarded by such
> checks remains usable in (I think) a race free manner.

It remains usable *today*, the question is how long this will last?

All the recent XSAs in the event channel taught me that the 
locking/interaction is extremely complex. This series is another proof.

We would save us quite a bit of trouble by making port_is_valid() stable 
no matter the state of the domain.

I think an extra field (option 2) is quite a good compromise with space 
use, maintenance, speed.

I am would be interested to hear from others.

Cheers,
Jan Beulich Jan. 27, 2021, 7:39 a.m. UTC | #4
On 11.01.2021 12:06, Julien Grall wrote:
> On 11/01/2021 10:14, Jan Beulich wrote:
>> On 08.01.2021 21:32, Julien Grall wrote:
>>> On 05/01/2021 13:09, Jan Beulich wrote:
>>>> Neither evtchn_status() nor domain_dump_evtchn_info() nor
>>>> flask_get_peer_sid() need to hold the per-domain lock - they all only
>>>> read a single channel's state (at a time, in the dump case).
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> v4: New.
>>>>
>>>> --- a/xen/common/event_channel.c
>>>> +++ b/xen/common/event_channel.c
>>>> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu
>>>>        if ( d == NULL )
>>>>            return -ESRCH;
>>>>    
>>>> -    spin_lock(&d->event_lock);
>>>> -
>>>>        if ( !port_is_valid(d, port) )
>>>
>>> There is one issue that is now becoming more apparent. To be clear, the
>>> problem is not in this patch, but I think it is the best place to
>>> discuss it as d->event_lock may be part of the solution.
>>>
>>> After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns.
>>>
>>> Given that evtchn_status() can work on the non-current domain, it would
>>> be possible to run it concurrently with evtchn_destroy(). As a
>>> consequence, port_is_valid() will be unstable as a valid event channel
>>> may turn invalid.
>>>
>>> AFAICT, we are getting away so far, as the memory is not freed until the
>>> domain is fully destroyed. However, we re-introduced XSA-338 in a
>>> different way.
>>>
>>> To be clear this is not the fault of this patch. But I don't think this
>>> is sane to re-introduce a behavior that lead us to an XSA.
>>
>> I'm getting confused, I'm afraid, from the varying statements above:
>> Are you suggesting this patch does re-introduce bad behavior or not?
> 
> No. I am pointing out that this is widening the bad behavior (again).

Since I'd really like to get in some more of this series before
the full freeze, and hence I want (need) to re-post, I thought
I'd reply here despite (or in light of) your request for input
from others not having been met.

I don't view this as "bad" behaviour, btw. The situation is
quite different to that which had led to the XSA: Here we
only deal with the "illusion" of a port having become invalid.
IOW yes, ...

>> Yes, the decrementing of ->valid_evtchns has a similar effect, but
>> I'm not convinced it gets us into XSA territory again. The problem
>> wasn't the reducing of ->max_evtchns as such, but the derived
>> assumptions elsewhere in the code. If there were any such again, I
>> suppose we'd have reason to issue another XSA.
> 
> I don't think it get us to the XSA territory yet. However, the 
> locking/interaction in the event channel code is quite complex.
> 
> To give a concrete example, below the current implementation of 
> free_xen_event_channel():
> 
>      if ( !port_is_valid(d, port) )
>      {
>          /*
>           * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
>           * with the spin_barrier() and BUG_ON() in evtchn_destroy().
>           */
>          smp_rmb();
>          BUG_ON(!d->is_dying);
>          return;
>      }
> 
>      evtchn_close(d, port, 0);
> 
> It would be fair for a developer to assume that after the check above, 
> port_is_valid() would return true. However, this is not the case...

... there needs to be awareness that putting e.g.

    ASSERT(port_is_valid(d, port));

anywhere past the if() cannot be done without considering domain
cleanup logic.

> I am not aware of any issue so far... But I am not ready to be this is 
> not going to be missed out. How about you?

There is a risk of this being overlooked, yes. But I'm unconvinced
this absolutely requires measures to be taken beyond, maybe, the
addition of a comment somewhere. I do, in particular, not think
this should stand in the way of the locking relaxation done by
this patch, even more so that (just to repeat) it merely introduces
more instances of a pattern found elsewhere already.

Jan
diff mbox series

Patch

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -968,15 +968,16 @@  int evtchn_status(evtchn_status_t *statu
     if ( d == NULL )
         return -ESRCH;
 
-    spin_lock(&d->event_lock);
-
     if ( !port_is_valid(d, port) )
     {
-        rc = -EINVAL;
-        goto out;
+        rcu_unlock_domain(d);
+        return -EINVAL;
     }
 
     chn = evtchn_from_port(d, port);
+
+    evtchn_read_lock(chn);
+
     if ( consumer_is_xen(chn) )
     {
         rc = -EACCES;
@@ -1021,7 +1022,7 @@  int evtchn_status(evtchn_status_t *statu
     status->vcpu = chn->notify_vcpu_id;
 
  out:
-    spin_unlock(&d->event_lock);
+    evtchn_read_unlock(chn);
     rcu_unlock_domain(d);
 
     return rc;
@@ -1576,22 +1577,32 @@  void evtchn_move_pirqs(struct vcpu *v)
 static void domain_dump_evtchn_info(struct domain *d)
 {
     unsigned int port;
-    int irq;
 
     printk("Event channel information for domain %d:\n"
            "Polling vCPUs: {%*pbl}\n"
            "    port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask);
 
-    spin_lock(&d->event_lock);
-
     for ( port = 1; port_is_valid(d, port); ++port )
     {
-        const struct evtchn *chn;
+        struct evtchn *chn;
         char *ssid;
 
+        if ( !(port & 0x3f) )
+            process_pending_softirqs();
+
         chn = evtchn_from_port(d, port);
+
+        if ( !evtchn_read_trylock(chn) )
+        {
+            printk("    %4u in flux\n", port);
+            continue;
+        }
+
         if ( chn->state == ECS_FREE )
+        {
+            evtchn_read_unlock(chn);
             continue;
+        }
 
         printk("    %4u [%d/%d/",
                port,
@@ -1601,26 +1612,49 @@  static void domain_dump_evtchn_info(stru
         printk("]: s=%d n=%d x=%d",
                chn->state, chn->notify_vcpu_id, chn->xen_consumer);
 
+        ssid = xsm_show_security_evtchn(d, chn);
+
         switch ( chn->state )
         {
         case ECS_UNBOUND:
             printk(" d=%d", chn->u.unbound.remote_domid);
             break;
+
         case ECS_INTERDOMAIN:
             printk(" d=%d p=%d",
                    chn->u.interdomain.remote_dom->domain_id,
                    chn->u.interdomain.remote_port);
             break;
-        case ECS_PIRQ:
-            irq = domain_pirq_to_irq(d, chn->u.pirq.irq);
-            printk(" p=%d i=%d", chn->u.pirq.irq, irq);
+
+        case ECS_PIRQ: {
+            unsigned int pirq = chn->u.pirq.irq;
+
+            /*
+             * The per-channel locks nest inside the per-domain one, so we
+             * can't acquire the latter without first letting go of the former.
+             */
+            evtchn_read_unlock(chn);
+            chn = NULL;
+            if ( spin_trylock(&d->event_lock) )
+            {
+                int irq = domain_pirq_to_irq(d, pirq);
+
+                spin_unlock(&d->event_lock);
+                printk(" p=%u i=%d", pirq, irq);
+            }
+            else
+                printk(" p=%u i=?", pirq);
             break;
+        }
+
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
             break;
         }
 
-        ssid = xsm_show_security_evtchn(d, chn);
+        if ( chn )
+            evtchn_read_unlock(chn);
+
         if (ssid) {
             printk(" Z=%s\n", ssid);
             xfree(ssid);
@@ -1628,8 +1662,6 @@  static void domain_dump_evtchn_info(stru
             printk("\n");
         }
     }
-
-    spin_unlock(&d->event_lock);
 }
 
 static void dump_evtchn_info(unsigned char key)
--- a/xen/xsm/flask/flask_op.c
+++ b/xen/xsm/flask/flask_op.c
@@ -555,12 +555,13 @@  static int flask_get_peer_sid(struct xen
     struct evtchn *chn;
     struct domain_security_struct *dsec;
 
-    spin_lock(&d->event_lock);
-
     if ( !port_is_valid(d, arg->evtchn) )
-        goto out;
+        return -EINVAL;
 
     chn = evtchn_from_port(d, arg->evtchn);
+
+    evtchn_read_lock(chn);
+
     if ( chn->state != ECS_INTERDOMAIN )
         goto out;
 
@@ -573,7 +574,7 @@  static int flask_get_peer_sid(struct xen
     rv = 0;
 
  out:
-    spin_unlock(&d->event_lock);
+    evtchn_read_unlock(chn);
     return rv;
 }