From patchwork Mon Sep 30 05:21:21 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: =?utf-8?b?SsO8cmdlbiBHcm/Dnw==?= <jgross@suse.com>
X-Patchwork-Id: 11166009
Return-Path: <SRS0=D6zB=XZ=lists.xenproject.org=xen-devel-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 29E7A13B1
	for <patchwork-xen-devel@patchwork.kernel.org>;
 Mon, 30 Sep 2019 05:23:03 +0000 (UTC)
Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id 1014220815
	for <patchwork-xen-devel@patchwork.kernel.org>;
 Mon, 30 Sep 2019 05:23:03 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 1014220815
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=suse.com
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
Received: from localhost ([127.0.0.1] helo=lists.xenproject.org)
	by lists.xenproject.org with esmtp (Exim 4.89)
	(envelope-from <xen-devel-bounces@lists.xenproject.org>)
	id 1iEo89-0001yi-MP; Mon, 30 Sep 2019 05:21:53 +0000
Received: from all-amaz-eas1.inumbo.com ([34.197.232.57]
 helo=us1-amaz-eas2.inumbo.com)
 by lists.xenproject.org with esmtp (Exim 4.89)
 (envelope-from <SRS0=hbFO=XZ=suse.com=jgross@srs-us1.protection.inumbo.net>)
 id 1iEo88-0001y2-Fd
 for xen-devel@lists.xenproject.org; Mon, 30 Sep 2019 05:21:52 +0000
X-Inumbo-ID: 300d1d30-e342-11e9-96c8-12813bfff9fa
Received: from mx1.suse.de (unknown [195.135.220.15])
 by localhost (Halon) with ESMTPS
 id 300d1d30-e342-11e9-96c8-12813bfff9fa;
 Mon, 30 Sep 2019 05:21:43 +0000 (UTC)
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.220.254])
 by mx1.suse.de (Postfix) with ESMTP id 32308B044;
 Mon, 30 Sep 2019 05:21:40 +0000 (UTC)
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Date: Mon, 30 Sep 2019 07:21:21 +0200
Message-Id: <20190930052135.11257-6-jgross@suse.com>
X-Mailer: git-send-email 2.16.4
In-Reply-To: <20190930052135.11257-1-jgross@suse.com>
References: <20190930052135.11257-1-jgross@suse.com>
Subject: [Xen-devel] [PATCH v5 05/19] xen/sched: support allocating multiple
 vcpus into one sched unit
X-BeenThere: xen-devel@lists.xenproject.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xenproject.org>
List-Unsubscribe: <https://lists.xenproject.org/mailman/options/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xenproject.org>
List-Help: <mailto:xen-devel-request@lists.xenproject.org?subject=help>
List-Subscribe: <https://lists.xenproject.org/mailman/listinfo/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=subscribe>
Cc: Juergen Gross <jgross@suse.com>,
 George Dunlap <george.dunlap@eu.citrix.com>,
 Dario Faggioli <dfaggioli@suse.com>
MIME-Version: 1.0
Errors-To: xen-devel-bounces@lists.xenproject.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>

With a scheduling granularity greater than 1 multiple vcpus share the
same struct sched_unit. Support that.

Setting the initial processor must be done carefully: we can't use
sched_set_res() as that relies on for_each_sched_unit_vcpu() which in
turn needs the vcpu already as a member of the domain's vcpu linked
list, which isn't the case.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
V4:
- merge patch 36 of V3 into this one (Jan Beulich)
- add some comments (Jan Beulich)
- use unit_id instead of vcpu_list->vcpu_id (Jan Beulich)
---
 xen/common/schedule.c | 97 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 21 deletions(-)

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 36b1d3df6e..37002b4c0e 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -349,7 +349,7 @@ static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
     spin_unlock_irqrestore(lock1, flags);
 }
 
-static void sched_free_unit(struct sched_unit *unit)
+static void sched_free_unit_mem(struct sched_unit *unit)
 {
     struct sched_unit *prev_unit;
     struct domain *d = unit->domain;
@@ -368,8 +368,6 @@ static void sched_free_unit(struct sched_unit *unit)
         }
     }
 
-    unit->vcpu_list->sched_unit = NULL;
-
     free_cpumask_var(unit->cpu_hard_affinity);
     free_cpumask_var(unit->cpu_hard_affinity_saved);
     free_cpumask_var(unit->cpu_soft_affinity);
@@ -377,18 +375,65 @@ static void sched_free_unit(struct sched_unit *unit)
     xfree(unit);
 }
 
+static void sched_free_unit(struct sched_unit *unit, struct vcpu *v)
+{
+    struct vcpu *vunit;
+    unsigned int cnt = 0;
+
+    /* Don't count to be released vcpu, might be not in vcpu list yet. */
+    for_each_sched_unit_vcpu ( unit, vunit )
+        if ( vunit != v )
+            cnt++;
+
+    v->sched_unit = NULL;
+    unit->runstate_cnt[v->runstate.state]--;
+
+    if ( unit->vcpu_list == v )
+        unit->vcpu_list = v->next_in_list;
+
+    if ( !cnt )
+        sched_free_unit_mem(unit);
+}
+
+static void sched_unit_add_vcpu(struct sched_unit *unit, struct vcpu *v)
+{
+    v->sched_unit = unit;
+
+    /* All but idle vcpus are allocated with sequential vcpu_id. */
+    if ( !unit->vcpu_list || unit->vcpu_list->vcpu_id > v->vcpu_id )
+    {
+        unit->vcpu_list = v;
+        /*
+         * unit_id is always the same as lowest vcpu_id of unit.
+         * This is used for stopping for_each_sched_unit_vcpu() loop and in
+         * order to support cpupools with different granularities.
+         */
+        unit->unit_id = v->vcpu_id;
+    }
+    unit->runstate_cnt[v->runstate.state]++;
+}
+
 static struct sched_unit *sched_alloc_unit(struct vcpu *v)
 {
     struct sched_unit *unit, **prev_unit;
     struct domain *d = v->domain;
 
+    for_each_sched_unit ( d, unit )
+        if ( unit->unit_id / sched_granularity ==
+             v->vcpu_id / sched_granularity )
+            break;
+
+    if ( unit )
+    {
+        sched_unit_add_vcpu(unit, v);
+        return unit;
+    }
+
     if ( (unit = xzalloc(struct sched_unit)) == NULL )
         return NULL;
 
-    unit->vcpu_list = v;
-    unit->unit_id = v->vcpu_id;
     unit->domain = d;
-    unit->runstate_cnt[v->runstate.state]++;
+    sched_unit_add_vcpu(unit, v);
 
     for ( prev_unit = &d->sched_unit_list; *prev_unit;
           prev_unit = &(*prev_unit)->next_in_list )
@@ -404,12 +449,10 @@ static struct sched_unit *sched_alloc_unit(struct vcpu *v)
          !zalloc_cpumask_var(&unit->cpu_soft_affinity) )
         goto fail;
 
-    v->sched_unit = unit;
-
     return unit;
 
  fail:
-    sched_free_unit(unit);
+    sched_free_unit(unit, v);
     return NULL;
 }
 
@@ -459,21 +502,26 @@ int sched_init_vcpu(struct vcpu *v)
     else
         processor = sched_select_initial_cpu(v);
 
-    sched_set_res(unit, get_sched_res(processor));
-
     /* Initialise the per-vcpu timers. */
     spin_lock_init(&v->periodic_timer_lock);
-    init_timer(&v->periodic_timer, vcpu_periodic_timer_fn,
-               v, v->processor);
-    init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn,
-               v, v->processor);
-    init_timer(&v->poll_timer, poll_timer_fn,
-               v, v->processor);
+    init_timer(&v->periodic_timer, vcpu_periodic_timer_fn, v, processor);
+    init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn, v, processor);
+    init_timer(&v->poll_timer, poll_timer_fn, v, processor);
+
+    /* If this is not the first vcpu of the unit we are done. */
+    if ( unit->priv != NULL )
+    {
+        v->processor = processor;
+        return 0;
+    }
+
+    /* The first vcpu of an unit can be set via sched_set_res(). */
+    sched_set_res(unit, get_sched_res(processor));
 
     unit->priv = sched_alloc_udata(dom_scheduler(d), unit, d->sched_priv);
     if ( unit->priv == NULL )
     {
-        sched_free_unit(unit);
+        sched_free_unit(unit, v);
         return 1;
     }
 
@@ -633,9 +681,16 @@ void sched_destroy_vcpu(struct vcpu *v)
     kill_timer(&v->poll_timer);
     if ( test_and_clear_bool(v->is_urgent) )
         atomic_dec(&per_cpu(sched_urgent_count, v->processor));
-    sched_remove_unit(vcpu_scheduler(v), unit);
-    sched_free_udata(vcpu_scheduler(v), unit->priv);
-    sched_free_unit(unit);
+    /*
+     * Vcpus are being destroyed top-down. So being the first vcpu of an unit
+     * is the same as being the only one.
+     */
+    if ( unit->vcpu_list == v )
+    {
+        sched_remove_unit(vcpu_scheduler(v), unit);
+        sched_free_udata(vcpu_scheduler(v), unit->priv);
+        sched_free_unit(unit, v);
+    }
 }
 
 int sched_init_domain(struct domain *d, int poolid)