Message ID | e03cb246-c08b-5977-9137-a38974364445@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | evtchn: (not so) recent XSAs follow-on | expand |
Hi Jan, On 05/01/2021 13:09, Jan Beulich wrote: > Neither evtchn_status() nor domain_dump_evtchn_info() nor > flask_get_peer_sid() need to hold the per-domain lock - they all only > read a single channel's state (at a time, in the dump case). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v4: New. > > --- a/xen/common/event_channel.c > +++ b/xen/common/event_channel.c > @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu > if ( d == NULL ) > return -ESRCH; > > - spin_lock(&d->event_lock); > - > if ( !port_is_valid(d, port) ) There is one issue that is now becoming more apparent. To be clear, the problem is not in this patch, but I think it is the best place to discuss it as d->event_lock may be part of the solution. After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns. Given that evtchn_status() can work on the non-current domain, it would be possible to run it concurrently with evtchn_destroy(). As a consequence, port_is_valid() will be unstable as a valid event channel may turn invalid. AFAICT, we are getting away so far, as the memory is not freed until the domain is fully destroyed. However, we re-introduced XSA-338 in a different way. To be clear this is not the fault of this patch. But I don't think this is sane to re-introduce a behavior that lead us to an XSA. I can see two solutions: 1) Use d->event_lock to protect port_is_valid() when d != current->domain. This would require evtchn_destroy() to grab the lock when updating d->valid_evtchns. 2) Never decrement d->valid_evtchns and use a different field for closing ports I am not a big fan of 1) because this is muddying the already complex locking situation in the event channel code. But I suggested it because I wasn't sure whether you would be happy with 2). If you are happy with 2), then the lock can be dropped here. I would be happy to send the patch for either 1) or 2) depending on the agreement here. Cheers, > { > - rc = -EINVAL; > - goto out; > + rcu_unlock_domain(d); > + return -EINVAL; > } > > chn = evtchn_from_port(d, port); > + > + evtchn_read_lock(chn); > + > if ( consumer_is_xen(chn) ) > { > rc = -EACCES; > @@ -1021,7 +1022,7 @@ int evtchn_status(evtchn_status_t *statu > status->vcpu = chn->notify_vcpu_id; > > out: > - spin_unlock(&d->event_lock); > + evtchn_read_unlock(chn); > rcu_unlock_domain(d); > > return rc; > @@ -1576,22 +1577,32 @@ void evtchn_move_pirqs(struct vcpu *v) > static void domain_dump_evtchn_info(struct domain *d) > { > unsigned int port; > - int irq; > > printk("Event channel information for domain %d:\n" > "Polling vCPUs: {%*pbl}\n" > " port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask); > > - spin_lock(&d->event_lock); > - > for ( port = 1; port_is_valid(d, port); ++port ) > { > - const struct evtchn *chn; > + struct evtchn *chn; > char *ssid; > > + if ( !(port & 0x3f) ) > + process_pending_softirqs(); > + > chn = evtchn_from_port(d, port); > + > + if ( !evtchn_read_trylock(chn) ) > + { > + printk(" %4u in flux\n", port); > + continue; > + } > + > if ( chn->state == ECS_FREE ) > + { > + evtchn_read_unlock(chn); > continue; > + } > > printk(" %4u [%d/%d/", > port, > @@ -1601,26 +1612,49 @@ static void domain_dump_evtchn_info(stru > printk("]: s=%d n=%d x=%d", > chn->state, chn->notify_vcpu_id, chn->xen_consumer); > > + ssid = xsm_show_security_evtchn(d, chn); > + > switch ( chn->state ) > { > case ECS_UNBOUND: > printk(" d=%d", chn->u.unbound.remote_domid); > break; > + > case ECS_INTERDOMAIN: > printk(" d=%d p=%d", > chn->u.interdomain.remote_dom->domain_id, > chn->u.interdomain.remote_port); > break; > - case ECS_PIRQ: > - irq = domain_pirq_to_irq(d, chn->u.pirq.irq); > - printk(" p=%d i=%d", chn->u.pirq.irq, irq); > + > + case ECS_PIRQ: { > + unsigned int pirq = chn->u.pirq.irq; > + > + /* > + * The per-channel locks nest inside the per-domain one, so we > + * can't acquire the latter without first letting go of the former. > + */ > + evtchn_read_unlock(chn); > + chn = NULL; > + if ( spin_trylock(&d->event_lock) ) > + { > + int irq = domain_pirq_to_irq(d, pirq); > + > + spin_unlock(&d->event_lock); > + printk(" p=%u i=%d", pirq, irq); > + } > + else > + printk(" p=%u i=?", pirq); > break; > + } > + > case ECS_VIRQ: > printk(" v=%d", chn->u.virq); > break; > } > > - ssid = xsm_show_security_evtchn(d, chn); > + if ( chn ) > + evtchn_read_unlock(chn); > + > if (ssid) { > printk(" Z=%s\n", ssid); > xfree(ssid); > @@ -1628,8 +1662,6 @@ static void domain_dump_evtchn_info(stru > printk("\n"); > } > } > - > - spin_unlock(&d->event_lock); > } > > static void dump_evtchn_info(unsigned char key) > --- a/xen/xsm/flask/flask_op.c > +++ b/xen/xsm/flask/flask_op.c > @@ -555,12 +555,13 @@ static int flask_get_peer_sid(struct xen > struct evtchn *chn; > struct domain_security_struct *dsec; > > - spin_lock(&d->event_lock); > - > if ( !port_is_valid(d, arg->evtchn) ) > - goto out; > + return -EINVAL; > > chn = evtchn_from_port(d, arg->evtchn); > + > + evtchn_read_lock(chn); > + > if ( chn->state != ECS_INTERDOMAIN ) > goto out; > > @@ -573,7 +574,7 @@ static int flask_get_peer_sid(struct xen > rv = 0; > > out: > - spin_unlock(&d->event_lock); > + evtchn_read_unlock(chn); > return rv; > } > >
On 08.01.2021 21:32, Julien Grall wrote: > Hi Jan, > > On 05/01/2021 13:09, Jan Beulich wrote: >> Neither evtchn_status() nor domain_dump_evtchn_info() nor >> flask_get_peer_sid() need to hold the per-domain lock - they all only >> read a single channel's state (at a time, in the dump case). >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> v4: New. >> >> --- a/xen/common/event_channel.c >> +++ b/xen/common/event_channel.c >> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu >> if ( d == NULL ) >> return -ESRCH; >> >> - spin_lock(&d->event_lock); >> - >> if ( !port_is_valid(d, port) ) > > There is one issue that is now becoming more apparent. To be clear, the > problem is not in this patch, but I think it is the best place to > discuss it as d->event_lock may be part of the solution. > > After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns. > > Given that evtchn_status() can work on the non-current domain, it would > be possible to run it concurrently with evtchn_destroy(). As a > consequence, port_is_valid() will be unstable as a valid event channel > may turn invalid. > > AFAICT, we are getting away so far, as the memory is not freed until the > domain is fully destroyed. However, we re-introduced XSA-338 in a > different way. > > To be clear this is not the fault of this patch. But I don't think this > is sane to re-introduce a behavior that lead us to an XSA. I'm getting confused, I'm afraid, from the varying statements above: Are you suggesting this patch does re-introduce bad behavior or not? Yes, the decrementing of ->valid_evtchns has a similar effect, but I'm not convinced it gets us into XSA territory again. The problem wasn't the reducing of ->max_evtchns as such, but the derived assumptions elsewhere in the code. If there were any such again, I suppose we'd have reason to issue another XSA. Furthermore there are other paths already using port_is_valid() without holding the domain's event lock; I've not been able to spot a problem with this though, so far. > I can see two solutions: > 1) Use d->event_lock to protect port_is_valid() when d != > current->domain. This would require evtchn_destroy() to grab the lock > when updating d->valid_evtchns. > 2) Never decrement d->valid_evtchns and use a different field for > closing ports > > I am not a big fan of 1) because this is muddying the already complex > locking situation in the event channel code. But I suggested it because > I wasn't sure whether you would be happy with 2). I agree 1) wouldn't be very nice, and you're right in assuming I wouldn't like 2) very much. For the moment I'm not (yet) convinced we need to do anything at all - as you say yourself, while the result of port_is_valid() is potentially unstable when a domain is in the process of being cleaned up, the state guarded by such checks remains usable in (I think) a race free manner. Jan
Hi Jan, On 11/01/2021 10:14, Jan Beulich wrote: > On 08.01.2021 21:32, Julien Grall wrote: >> Hi Jan, >> >> On 05/01/2021 13:09, Jan Beulich wrote: >>> Neither evtchn_status() nor domain_dump_evtchn_info() nor >>> flask_get_peer_sid() need to hold the per-domain lock - they all only >>> read a single channel's state (at a time, in the dump case). >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> --- >>> v4: New. >>> >>> --- a/xen/common/event_channel.c >>> +++ b/xen/common/event_channel.c >>> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu >>> if ( d == NULL ) >>> return -ESRCH; >>> >>> - spin_lock(&d->event_lock); >>> - >>> if ( !port_is_valid(d, port) ) >> >> There is one issue that is now becoming more apparent. To be clear, the >> problem is not in this patch, but I think it is the best place to >> discuss it as d->event_lock may be part of the solution. >> >> After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns. >> >> Given that evtchn_status() can work on the non-current domain, it would >> be possible to run it concurrently with evtchn_destroy(). As a >> consequence, port_is_valid() will be unstable as a valid event channel >> may turn invalid. >> >> AFAICT, we are getting away so far, as the memory is not freed until the >> domain is fully destroyed. However, we re-introduced XSA-338 in a >> different way. >> >> To be clear this is not the fault of this patch. But I don't think this >> is sane to re-introduce a behavior that lead us to an XSA. > > I'm getting confused, I'm afraid, from the varying statements above: > Are you suggesting this patch does re-introduce bad behavior or not? No. I am pointing out that this is widening the bad behavior (again). > > Yes, the decrementing of ->valid_evtchns has a similar effect, but > I'm not convinced it gets us into XSA territory again. The problem > wasn't the reducing of ->max_evtchns as such, but the derived > assumptions elsewhere in the code. If there were any such again, I > suppose we'd have reason to issue another XSA. I don't think it get us to the XSA territory yet. However, the locking/interaction in the event channel code is quite complex. To give a concrete example, below the current implementation of free_xen_event_channel(): if ( !port_is_valid(d, port) ) { /* * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing * with the spin_barrier() and BUG_ON() in evtchn_destroy(). */ smp_rmb(); BUG_ON(!d->is_dying); return; } evtchn_close(d, port, 0); It would be fair for a developer to assume that after the check above, port_is_valid() would return true. However, this is not the case... I am not aware of any issue so far... But I am not ready to be this is not going to be missed out. How about you? > If there were any such again, I > suppose we'd have reason to issue another XSA. The point of my e-mail is to prevent this XSA to happen. I am pretty sure you want the same. > > Furthermore there are other paths already using port_is_valid() > without holding the domain's event lock; I've not been able to spot > a problem with this though, so far. Right. Most of the fine are fine because d == current. Therefore, the domain must be running and evtchn_destroy() couldn't happen concurrently. > >> I can see two solutions: >> 1) Use d->event_lock to protect port_is_valid() when d != >> current->domain. This would require evtchn_destroy() to grab the lock >> when updating d->valid_evtchns. >> 2) Never decrement d->valid_evtchns and use a different field for >> closing ports >> >> I am not a big fan of 1) because this is muddying the already complex >> locking situation in the event channel code. But I suggested it because >> I wasn't sure whether you would be happy with 2). > > I agree 1) wouldn't be very nice, and you're right in assuming I > wouldn't like 2) very much. For the moment I'm not (yet) convinced > we need to do anything at all - as you say yourself, while the > result of port_is_valid() is potentially unstable when a domain is > in the process of being cleaned up, the state guarded by such > checks remains usable in (I think) a race free manner. It remains usable *today*, the question is how long this will last? All the recent XSAs in the event channel taught me that the locking/interaction is extremely complex. This series is another proof. We would save us quite a bit of trouble by making port_is_valid() stable no matter the state of the domain. I think an extra field (option 2) is quite a good compromise with space use, maintenance, speed. I am would be interested to hear from others. Cheers,
On 11.01.2021 12:06, Julien Grall wrote: > On 11/01/2021 10:14, Jan Beulich wrote: >> On 08.01.2021 21:32, Julien Grall wrote: >>> On 05/01/2021 13:09, Jan Beulich wrote: >>>> Neither evtchn_status() nor domain_dump_evtchn_info() nor >>>> flask_get_peer_sid() need to hold the per-domain lock - they all only >>>> read a single channel's state (at a time, in the dump case). >>>> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>>> --- >>>> v4: New. >>>> >>>> --- a/xen/common/event_channel.c >>>> +++ b/xen/common/event_channel.c >>>> @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu >>>> if ( d == NULL ) >>>> return -ESRCH; >>>> >>>> - spin_lock(&d->event_lock); >>>> - >>>> if ( !port_is_valid(d, port) ) >>> >>> There is one issue that is now becoming more apparent. To be clear, the >>> problem is not in this patch, but I think it is the best place to >>> discuss it as d->event_lock may be part of the solution. >>> >>> After XSA-344, evtchn_destroy() will end up to decrement d->valid_evtchns. >>> >>> Given that evtchn_status() can work on the non-current domain, it would >>> be possible to run it concurrently with evtchn_destroy(). As a >>> consequence, port_is_valid() will be unstable as a valid event channel >>> may turn invalid. >>> >>> AFAICT, we are getting away so far, as the memory is not freed until the >>> domain is fully destroyed. However, we re-introduced XSA-338 in a >>> different way. >>> >>> To be clear this is not the fault of this patch. But I don't think this >>> is sane to re-introduce a behavior that lead us to an XSA. >> >> I'm getting confused, I'm afraid, from the varying statements above: >> Are you suggesting this patch does re-introduce bad behavior or not? > > No. I am pointing out that this is widening the bad behavior (again). Since I'd really like to get in some more of this series before the full freeze, and hence I want (need) to re-post, I thought I'd reply here despite (or in light of) your request for input from others not having been met. I don't view this as "bad" behaviour, btw. The situation is quite different to that which had led to the XSA: Here we only deal with the "illusion" of a port having become invalid. IOW yes, ... >> Yes, the decrementing of ->valid_evtchns has a similar effect, but >> I'm not convinced it gets us into XSA territory again. The problem >> wasn't the reducing of ->max_evtchns as such, but the derived >> assumptions elsewhere in the code. If there were any such again, I >> suppose we'd have reason to issue another XSA. > > I don't think it get us to the XSA territory yet. However, the > locking/interaction in the event channel code is quite complex. > > To give a concrete example, below the current implementation of > free_xen_event_channel(): > > if ( !port_is_valid(d, port) ) > { > /* > * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing > * with the spin_barrier() and BUG_ON() in evtchn_destroy(). > */ > smp_rmb(); > BUG_ON(!d->is_dying); > return; > } > > evtchn_close(d, port, 0); > > It would be fair for a developer to assume that after the check above, > port_is_valid() would return true. However, this is not the case... ... there needs to be awareness that putting e.g. ASSERT(port_is_valid(d, port)); anywhere past the if() cannot be done without considering domain cleanup logic. > I am not aware of any issue so far... But I am not ready to be this is > not going to be missed out. How about you? There is a risk of this being overlooked, yes. But I'm unconvinced this absolutely requires measures to be taken beyond, maybe, the addition of a comment somewhere. I do, in particular, not think this should stand in the way of the locking relaxation done by this patch, even more so that (just to repeat) it merely introduces more instances of a pattern found elsewhere already. Jan
--- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -968,15 +968,16 @@ int evtchn_status(evtchn_status_t *statu if ( d == NULL ) return -ESRCH; - spin_lock(&d->event_lock); - if ( !port_is_valid(d, port) ) { - rc = -EINVAL; - goto out; + rcu_unlock_domain(d); + return -EINVAL; } chn = evtchn_from_port(d, port); + + evtchn_read_lock(chn); + if ( consumer_is_xen(chn) ) { rc = -EACCES; @@ -1021,7 +1022,7 @@ int evtchn_status(evtchn_status_t *statu status->vcpu = chn->notify_vcpu_id; out: - spin_unlock(&d->event_lock); + evtchn_read_unlock(chn); rcu_unlock_domain(d); return rc; @@ -1576,22 +1577,32 @@ void evtchn_move_pirqs(struct vcpu *v) static void domain_dump_evtchn_info(struct domain *d) { unsigned int port; - int irq; printk("Event channel information for domain %d:\n" "Polling vCPUs: {%*pbl}\n" " port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask); - spin_lock(&d->event_lock); - for ( port = 1; port_is_valid(d, port); ++port ) { - const struct evtchn *chn; + struct evtchn *chn; char *ssid; + if ( !(port & 0x3f) ) + process_pending_softirqs(); + chn = evtchn_from_port(d, port); + + if ( !evtchn_read_trylock(chn) ) + { + printk(" %4u in flux\n", port); + continue; + } + if ( chn->state == ECS_FREE ) + { + evtchn_read_unlock(chn); continue; + } printk(" %4u [%d/%d/", port, @@ -1601,26 +1612,49 @@ static void domain_dump_evtchn_info(stru printk("]: s=%d n=%d x=%d", chn->state, chn->notify_vcpu_id, chn->xen_consumer); + ssid = xsm_show_security_evtchn(d, chn); + switch ( chn->state ) { case ECS_UNBOUND: printk(" d=%d", chn->u.unbound.remote_domid); break; + case ECS_INTERDOMAIN: printk(" d=%d p=%d", chn->u.interdomain.remote_dom->domain_id, chn->u.interdomain.remote_port); break; - case ECS_PIRQ: - irq = domain_pirq_to_irq(d, chn->u.pirq.irq); - printk(" p=%d i=%d", chn->u.pirq.irq, irq); + + case ECS_PIRQ: { + unsigned int pirq = chn->u.pirq.irq; + + /* + * The per-channel locks nest inside the per-domain one, so we + * can't acquire the latter without first letting go of the former. + */ + evtchn_read_unlock(chn); + chn = NULL; + if ( spin_trylock(&d->event_lock) ) + { + int irq = domain_pirq_to_irq(d, pirq); + + spin_unlock(&d->event_lock); + printk(" p=%u i=%d", pirq, irq); + } + else + printk(" p=%u i=?", pirq); break; + } + case ECS_VIRQ: printk(" v=%d", chn->u.virq); break; } - ssid = xsm_show_security_evtchn(d, chn); + if ( chn ) + evtchn_read_unlock(chn); + if (ssid) { printk(" Z=%s\n", ssid); xfree(ssid); @@ -1628,8 +1662,6 @@ static void domain_dump_evtchn_info(stru printk("\n"); } } - - spin_unlock(&d->event_lock); } static void dump_evtchn_info(unsigned char key) --- a/xen/xsm/flask/flask_op.c +++ b/xen/xsm/flask/flask_op.c @@ -555,12 +555,13 @@ static int flask_get_peer_sid(struct xen struct evtchn *chn; struct domain_security_struct *dsec; - spin_lock(&d->event_lock); - if ( !port_is_valid(d, arg->evtchn) ) - goto out; + return -EINVAL; chn = evtchn_from_port(d, arg->evtchn); + + evtchn_read_lock(chn); + if ( chn->state != ECS_INTERDOMAIN ) goto out; @@ -573,7 +574,7 @@ static int flask_get_peer_sid(struct xen rv = 0; out: - spin_unlock(&d->event_lock); + evtchn_read_unlock(chn); return rv; }
Neither evtchn_status() nor domain_dump_evtchn_info() nor flask_get_peer_sid() need to hold the per-domain lock - they all only read a single channel's state (at a time, in the dump case). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v4: New.