[v3,2/2] xen: merge temporary vcpu pinning scenarios

Message ID	20190724112658.31495-3-jgross@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xenproject.org> From: Juergen Gross <jgross@suse.com> To: xen-devel@lists.xenproject.org Date: Wed, 24 Jul 2019 13:26:58 +0200 Message-Id: <20190724112658.31495-3-jgross@suse.com> In-Reply-To: <20190724112658.31495-1-jgross@suse.com> References: <20190724112658.31495-1-jgross@suse.com> Subject: [Xen-devel] [PATCH v3 2/2] xen: merge temporary vcpu pinning scenarios Precedence: list Cc: Juergen Gross <jgross@suse.com>, Stefano Stabellini <sstabellini@kernel.org>, Wei Liu <wl@xen.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, George Dunlap <George.Dunlap@eu.citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Ian Jackson <ian.jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>, Julien Grall <julien.grall@arm.com>, Jan Beulich <jbeulich@suse.com>, Dario Faggioli <dfaggioli@suse.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	xen: enhance temporary vcpu pinning \| expand [v3,0/2] xen: enhance temporary vcpu pinning [v3,1/2] xen/x86: cleanup unused NMI/MCE code [v3,2/2] xen: merge temporary vcpu pinning scenarios

diff --git a/xen/common/domain.c b/xen/common/domain.c index bc56a51815..e8e850796e 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -1267,6 +1267,7 @@ int vcpu_reset(struct vcpu *v) v->async_exception_mask = 0; memset(v->async_exception_state, 0, sizeof(v->async_exception_state)); #endif + v->affinity_broken = 0; clear_bit(_VPF_blocked, &v->pause_flags); clear_bit(_VPF_in_reset, &v->pause_flags); diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 72a44953d0..fa260ce5fb 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -654,7 +654,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) /* Undo a stuck SCHED_pin_override? */ if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE ) - vcpu_pin_override(v, -1); + vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE); ret = 0; diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 349f9624f5..130b97d875 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -1106,43 +1106,59 @@ void watchdog_domain_destroy(struct domain *d) kill_timer(&d->watchdog_timer[i]); } -int vcpu_pin_override(struct vcpu *v, int cpu) +/* + * Pin a vcpu temporarily to a specific CPU (or restore old pinning state if + * cpu is NR_CPUS). + * Temporary pinning can be done due to two reasons, which may be nested: + * - VCPU_AFFINITY_OVERRIDE (requested by guest): is allowed to fail in case + * of a conflict (e.g. in case cpupool doesn't include requested CPU, or + * another conflicting temporary pinning is already in effect. + * - VCPU_AFFINITY_WAIT (called by wait_event()): only used to pin vcpu to the + * CPU it is just running on. Can't fail if used properly. + */ +int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason) { spinlock_t *lock; int ret = -EINVAL; + bool migrate; lock = vcpu_schedule_lock_irq(v); - if ( cpu < 0 ) + if ( cpu == NR_CPUS ) { - if ( v->affinity_broken ) + if ( v->affinity_broken & reason ) { - sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL); - v->affinity_broken = 0; ret = 0; + v->affinity_broken &= ~reason; } + if ( !ret && !v->affinity_broken ) + sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL); } else if ( cpu < nr_cpu_ids ) { - if ( v->affinity_broken ) + if ( (v->affinity_broken & reason) || + (v->affinity_broken && v->processor != cpu) ) ret = -EBUSY; else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) ) { - cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity); - v->affinity_broken = 1; - sched_set_affinity(v, cpumask_of(cpu), NULL); + if ( !v->affinity_broken ) + { + cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity); + sched_set_affinity(v, cpumask_of(cpu), NULL); + } + v->affinity_broken |= reason; ret = 0; } } - if ( ret == 0 ) + migrate = !ret && !cpumask_test_cpu(v->processor, v->cpu_hard_affinity); + if ( migrate ) vcpu_migrate_start(v); vcpu_schedule_unlock_irq(lock, v); - domain_update_node_affinity(v->domain); - - vcpu_migrate_finish(v); + if ( migrate ) + vcpu_migrate_finish(v); return ret; } @@ -1258,6 +1274,7 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) case SCHEDOP_pin_override: { struct sched_pin_override sched_pin_override; + unsigned int cpu; ret = -EPERM; if ( !is_hardware_domain(current->domain) ) @@ -1267,7 +1284,12 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) if ( copy_from_guest(&sched_pin_override, arg, 1) ) break; - ret = vcpu_pin_override(current, sched_pin_override.pcpu); + ret = -EINVAL; + if ( sched_pin_override.pcpu >= NR_CPUS ) + break; + + cpu = sched_pin_override.pcpu < 0 ? NR_CPUS : sched_pin_override.pcpu; + ret = vcpu_temporary_affinity(current, cpu, VCPU_AFFINITY_OVERRIDE); break; } diff --git a/xen/common/wait.c b/xen/common/wait.c index 4f830a14e8..3fc5f68611 100644 --- a/xen/common/wait.c +++ b/xen/common/wait.c @@ -34,8 +34,6 @@ struct waitqueue_vcpu { */ void *esp; char *stack; - cpumask_t saved_affinity; - unsigned int wakeup_cpu; #endif }; @@ -131,12 +129,10 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) ASSERT(wqv->esp == 0); /* Save current VCPU affinity; force wakeup on *this* CPU only. */ - wqv->wakeup_cpu = smp_processor_id(); - cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity); - if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) ) + if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) ) { gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n"); - domain_crash(current->domain); + domain_crash(curr->domain); for ( ; ; ) do_softirq(); @@ -170,7 +166,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) if ( unlikely(wqv->esp == 0) ) { gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__); - domain_crash(current->domain); + domain_crash(curr->domain); for ( ; ; ) do_softirq(); @@ -182,30 +178,24 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) static void __finish_wait(struct waitqueue_vcpu *wqv) { wqv->esp = NULL; - (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity); + vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT); } void check_wakeup_from_wait(void) { - struct waitqueue_vcpu *wqv = current->waitqueue_vcpu; + struct vcpu *curr = current; + struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu; ASSERT(list_empty(&wqv->list)); if ( likely(wqv->esp == NULL) ) return; - /* Check if we woke up on the wrong CPU. */ - if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) ) + /* Check if we are still pinned. */ + if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) ) { - /* Re-set VCPU affinity and re-enter the scheduler. */ - struct vcpu *curr = current; - cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity); - if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) ) - { - gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n"); - domain_crash(current->domain); - } - wait(); /* takes us back into the scheduler */ + gdprintk(XENLOG_ERR, "vcpu affinity lost\n"); + domain_crash(curr->domain); } /* diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index c197e93d73..9578628c6a 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -200,7 +200,9 @@ struct vcpu /* VCPU is paused following shutdown request (d->is_shutting_down)? */ bool paused_for_shutdown; /* VCPU need affinity restored */ - bool affinity_broken; + uint8_t affinity_broken; +#define VCPU_AFFINITY_OVERRIDE 0x01 +#define VCPU_AFFINITY_WAIT 0x02 /* A hypercall has been preempted. */ bool hcall_preempted; @@ -245,7 +247,7 @@ struct vcpu /* Bitmask of CPUs on which this VCPU may run. */ cpumask_var_t cpu_hard_affinity; - /* Used to restore affinity across S3. */ + /* Used to save affinity during temporary pinning. */ cpumask_var_t cpu_hard_affinity_saved; /* Bitmask of CPUs on which this VCPU prefers to run. */ @@ -873,10 +875,10 @@ int cpu_disable_scheduler(unsigned int cpu); /* We need it in dom0_setup_vcpu */ void sched_set_affinity(struct vcpu *v, const cpumask_t *hard, const cpumask_t *soft); +int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason); int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity); int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity); void restore_vcpu_affinity(struct domain *d); -int vcpu_pin_override(struct vcpu *v, int cpu); void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); uint64_t get_cpu_idle_time(unsigned int cpu);

[v3,2/2] xen: merge temporary vcpu pinning scenarios

Commit Message

Comments

Patch