@@ -605,15 +605,15 @@ static long evtchn_bind_pirq(evtchn_bind
int evtchn_close(struct domain *d1, int port1, bool guest)
{
struct domain *d2 = NULL;
- struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2;
+ struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2 = NULL;
long rc = 0;
if ( !chn1 )
return -EINVAL;
- again:
write_lock(&d1->event_lock);
+ again:
/* Guest cannot close a Xen-attached event channel. */
if ( unlikely(consumer_is_xen(chn1)) && guest )
{
@@ -634,6 +634,22 @@ int evtchn_close(struct domain *d1, int
case ECS_PIRQ: {
struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq);
+ /*
+ * We don't require the per-channel lock here, so in case a race
+ * happened on the interdomain path below better release both.
+ */
+ if ( unlikely(chn2) )
+ {
+ /*
+ * See evtchn_write_unlock() vs plain write_unlock() comment in
+ * ECS_INTERDOMAIN handling below.
+ */
+ write_unlock(&chn1->lock);
+ write_unlock(&chn2->lock);
+ put_domain(d2);
+ chn2 = NULL;
+ }
+
if ( pirq )
{
if ( !is_hvm_domain(d1) )
@@ -653,6 +669,22 @@ int evtchn_close(struct domain *d1, int
struct vcpu *v;
unsigned long flags;
+ /*
+ * The per-channel locks nest inside the vIRQ ones, so we must release
+ * them if a race happened on the interdomain path below.
+ */
+ if ( unlikely(chn2) )
+ {
+ /*
+ * See evtchn_write_unlock() vs plain write_unlock() comment in
+ * ECS_INTERDOMAIN handling below.
+ */
+ write_unlock(&chn1->lock);
+ write_unlock(&chn2->lock);
+ put_domain(d2);
+ chn2 = NULL;
+ }
+
v = d1->vcpu[virq_is_global(chn1->u.virq) ? 0 : chn1->notify_vcpu_id];
write_lock_irqsave(&v->virq_lock, flags);
@@ -669,63 +701,87 @@ int evtchn_close(struct domain *d1, int
case ECS_INTERDOMAIN:
if ( d2 == NULL )
{
- d2 = chn1->u.interdomain.remote_dom;
+ evtchn_write_lock(chn1);
- /* If we unlock d1 then we could lose d2. Must get a reference. */
- if ( unlikely(!get_domain(d2)) )
- BUG();
-
- if ( d1 < d2 )
- write_lock(&d2->event_lock);
- else if ( d1 != d2 )
- {
- write_unlock(&d1->event_lock);
- write_lock(&d2->event_lock);
- goto again;
- }
+ if ( chn1->state == ECS_INTERDOMAIN )
+ d2 = chn1->u.interdomain.remote_dom;
+ else
+ /* See comment further down. */
+ write_unlock(&chn1->lock);
}
- else if ( d2 != chn1->u.interdomain.remote_dom )
+
+ if ( d2 != chn1->u.interdomain.remote_dom )
{
/*
- * We can only get here if the port was closed and re-bound after
- * unlocking d1 but before locking d2 above. We could retry but
- * it is easier to return the same error as if we had seen the
- * port in ECS_FREE. It must have passed through that state for
- * us to end up here, so it's a valid error to return.
+ * We can only get here if the port was closed and re-bound
+ * - before locking chn1 or
+ * - after unlocking chn1 but before locking both channels
+ * above. We could retry but it is easier to return the same error
+ * as if we had seen the port in ECS_FREE. It must have passed
+ * through that state for us to end up here, so it's a valid error
+ * to return.
*/
+ if ( d2 && !chn2 )
+ write_unlock(&chn1->lock);
rc = -EINVAL;
goto out;
}
- chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
- BUG_ON(!chn2);
- BUG_ON(chn2->state != ECS_INTERDOMAIN);
- BUG_ON(chn2->u.interdomain.remote_dom != d1);
+ if ( !chn2 )
+ {
+ /* If we unlock chn1 then we could lose d2. Must get a reference. */
+ if ( unlikely(!get_domain(d2)) )
+ BUG();
- double_evtchn_lock(chn1, chn2);
+ chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
+ BUG_ON(!chn2);
- evtchn_free(d1, chn1);
+ if ( chn1 > chn2 )
+ {
+ /*
+ * Cannot use evtchn_write_unlock() here, as its assertion
+ * likely won't hold. However, a race - as per the comment
+ * below - indicates a transition through ECS_FREE must
+ * have occurred, so the assumptions by users of
+ * evtchn_read_trylock() still hold (i.e. they're similarly
+ * fine to bail).
+ */
+ write_unlock(&chn1->lock);
+ double_evtchn_lock(chn2, chn1);
+ goto again;
+ }
+
+ evtchn_write_lock(chn2);
+ }
+
+ BUG_ON(chn2->state != ECS_INTERDOMAIN);
+ BUG_ON(chn2->u.interdomain.remote_dom != d1);
chn2->state = ECS_UNBOUND;
chn2->u.unbound.remote_domid = d1->domain_id;
- double_evtchn_unlock(chn1, chn2);
-
- goto out;
+ break;
default:
BUG();
}
- evtchn_write_lock(chn1);
+ if ( !chn2 )
+ evtchn_write_lock(chn1);
evtchn_free(d1, chn1);
- evtchn_write_unlock(chn1);
+ if ( !chn2 )
+ evtchn_write_unlock(chn1);
out:
- if ( d2 != NULL )
+ if ( chn2 )
{
- if ( d1 != d2 )
- write_unlock(&d2->event_lock);
+ /*
+ * See evtchn_write_unlock() vs plain write_unlock() comment in
+ * ECS_INTERDOMAIN handling above. In principle we could use
+ * double_evtchn_unlock() on the ECS_INTERDOMAIN success path.
+ */
+ write_unlock(&chn1->lock);
+ write_unlock(&chn2->lock);
put_domain(d2);
}
The local domain's lock is needed for the port freeing, but for the remote side the per-channel lock is sufficient. Other logic then needs rearranging, though, including the early dropping of both locks (and the remote domain ref) in the ECS_PIRQ and ECS_VIRQ cases. Note in particular that there is no real race with evtchn_bind_vcpu(): ECS_INTERDOMAIN and ECS_UNBOUND get treated identically there, and evtchn_close() doesn't care about the notification vCPU ID. Note also that we can't use double_evtchn_unlock() or evtchn_write_unlock() when releasing locks to cover for possible races. See the respective code comment. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v4: New.