[RFC,6/6] schedule: account all the hypervisor time to the idle vcpu

Message ID	1564137460-25629-8-git-send-email-andrii.anisov@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xenproject.org> From: Andrii Anisov <andrii.anisov@gmail.com> To: xen-devel@lists.xenproject.org Date: Fri, 26 Jul 2019 13:37:40 +0300 Message-Id: <1564137460-25629-8-git-send-email-andrii.anisov@gmail.com> In-Reply-To: <1564137460-25629-1-git-send-email-andrii.anisov@gmail.com> References: <1564137460-25629-1-git-send-email-andrii.anisov@gmail.com> Subject: [Xen-devel] [RFC 6/6] schedule: account all the hypervisor time to the idle vcpu Precedence: list Cc: Stefano Stabellini <sstabellini@kernel.org>, Andrii Anisov <andrii_anisov@epam.com>, Wei Liu <wl@xen.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, George Dunlap <george.dunlap@eu.citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Ian Jackson <ian.jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>, Julien Grall <julien.grall@arm.com>, Meng Xu <mengxu@cis.upenn.edu>, Jan Beulich <jbeulich@suse.com>, Dario Faggioli <dfaggioli@suse.com>, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	XEN scheduling hardening \| expand [RFC,0/6] XEN scheduling hardening [RFC,1/6] xen/arm: Re-enable interrupt later in the trap path [RFC,2/6] schedule: account true system idle time [RFC,3/6] sysctl: extend XEN_SYSCTL_getcpuinfo interface [RFC,4/6] xentop: show CPU load information [RFC,5/6] arm64: сall enter_hypervisor_head only when it is needed [RFC,6/6] schedule: account all the hypervisor time to the idle vcpu

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index 13726db..f978b94 100644 --- a/xen/arch/arm/traps.c +++ b/xen/arch/arm/traps.c @@ -2064,7 +2064,7 @@ void do_trap_guest_sync(struct cpu_user_regs *regs) if ( !check_conditional_instr(regs, hsr) ) { advance_pc(regs, hsr); - return; + break; } if ( hsr.wfi_wfe.ti ) { /* Yield the VCPU for WFE */ @@ -2126,10 +2126,16 @@ void do_trap_guest_sync(struct cpu_user_regs *regs) perfc_incr(trap_hvc32); #ifndef NDEBUG if ( (hsr.iss & 0xff00) == 0xff00 ) - return do_debug_trap(regs, hsr.iss & 0x00ff); + { + do_debug_trap(regs, hsr.iss & 0x00ff); + break; + } #endif if ( hsr.iss == 0 ) - return do_trap_hvc_smccc(regs); + { + do_trap_hvc_smccc(regs); + break; + } nr = regs->r12; do_trap_hypercall(regs, &nr, hsr); regs->r12 = (uint32_t)nr; @@ -2141,10 +2147,16 @@ void do_trap_guest_sync(struct cpu_user_regs *regs) perfc_incr(trap_hvc64); #ifndef NDEBUG if ( (hsr.iss & 0xff00) == 0xff00 ) - return do_debug_trap(regs, hsr.iss & 0x00ff); + { + do_debug_trap(regs, hsr.iss & 0x00ff); + break; + } #endif if ( hsr.iss == 0 ) - return do_trap_hvc_smccc(regs); + { + do_trap_hvc_smccc(regs); + break; + } do_trap_hypercall(regs, &regs->x16, hsr); break; case HSR_EC_SMC64: @@ -2179,6 +2191,11 @@ void do_trap_guest_sync(struct cpu_user_regs *regs) hsr.bits, hsr.ec, hsr.len, hsr.iss); inject_undef_exception(regs, hsr); } + + local_irq_disable(); + hyp_tacc_head(1); + + /*we will call tacc tail from the leave_hypervisor_tail*/ } void do_trap_hyp_sync(struct cpu_user_regs *regs) @@ -2219,6 +2236,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs) hsr.bits, hsr.ec, hsr.len, hsr.iss); do_unexpected_trap("Hypervisor", regs); } + } void do_trap_hyp_serror(struct cpu_user_regs *regs) @@ -2234,28 +2252,47 @@ void do_trap_guest_serror(struct cpu_user_regs *regs) local_irq_enable(); __do_trap_serror(regs, true); + + local_irq_disable(); + hyp_tacc_head(2); } void do_trap_guest_irq(struct cpu_user_regs *regs) { + hyp_tacc_head(3); + enter_hypervisor_head(); gic_interrupt(regs, 0); + + /*we will call tacc tail from the leave_hypervisor_tail*/ } void do_trap_guest_fiq(struct cpu_user_regs *regs) { + hyp_tacc_head(4); + enter_hypervisor_head(); gic_interrupt(regs, 1); + + /*we will call tacc tail from the leave_hypervisor_tail*/ } void do_trap_hyp_irq(struct cpu_user_regs *regs) { + hyp_tacc_head(5); + gic_interrupt(regs, 0); + + hyp_tacc_tail(5); } void do_trap_hyp_fiq(struct cpu_user_regs *regs) { + hyp_tacc_head(6); + gic_interrupt(regs, 1); + + hyp_tacc_tail(6); } static void check_for_pcpu_work(void) @@ -2318,6 +2355,8 @@ void leave_hypervisor_tail(void) */ SYNCHRONIZE_SERROR(SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT); + hyp_tacc_tail(1234); + /* * The hypervisor runs with the workaround always present. * If the guest wants it disabled, so be it... diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index 3c0d7c7..b8d866b 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -1856,7 +1856,7 @@ csched_schedule( (unsigned char *)&d); } - runtime = now - current->runstate.state_entry_time; + runtime = current->runtime; if ( runtime < 0 ) /* Does this ever happen? */ runtime = 0; diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 8e4381d..2d11a5f 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -3285,7 +3285,7 @@ runq_candidate(struct csched2_runqueue_data *rqd, * no point forcing it to do so until rate limiting expires. */ if ( !yield && prv->ratelimit_us && vcpu_runnable(scurr->vcpu) && - (now - scurr->vcpu->runstate.state_entry_time) < + scurr->vcpu->runtime < MICROSECS(prv->ratelimit_us) ) { if ( unlikely(tb_init_done) ) @@ -3296,7 +3296,7 @@ runq_candidate(struct csched2_runqueue_data *rqd, } d; d.dom = scurr->vcpu->domain->domain_id; d.vcpu = scurr->vcpu->vcpu_id; - d.runtime = now - scurr->vcpu->runstate.state_entry_time; + d.runtime = scurr->vcpu->runtime; __trace_var(TRC_CSCHED2_RATELIMIT, 1, sizeof(d), (unsigned char *)&d); diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c index 0acfc3d..f1de511 100644 --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -947,7 +947,7 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now) return; /* burn at nanoseconds level */ - delta = now - svc->last_start; + delta = svc->vcpu->runtime; /* * delta < 0 only happens in nested virtualization; * TODO: how should we handle delta < 0 in a better way? diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 9e8805d..d3246f9 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -1504,20 +1504,16 @@ static void schedule(void) (now - next->runstate.state_entry_time) : 0, next_slice.time); - ASSERT(prev->runstate.state == RUNSTATE_running); - TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->vcpu_id, next->domain->domain_id, next->vcpu_id); - vcpu_runstate_change( - prev, - ((prev->pause_flags & VPF_blocked) ? RUNSTATE_blocked : - (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)), - now); - - ASSERT(next->runstate.state != RUNSTATE_running); - vcpu_runstate_change(next, RUNSTATE_running, now); + if ( !vcpu_runnable(prev) ) + vcpu_runstate_change( + prev, + ((prev->pause_flags & VPF_blocked) ? RUNSTATE_blocked : + RUNSTATE_offline), + now); /* * NB. Don't add any trace records from here until the actual context @@ -1526,6 +1522,7 @@ static void schedule(void) ASSERT(!next->is_running); next->is_running = 1; + next->runtime = 0; pcpu_schedule_unlock_irq(lock, cpu); @@ -1541,6 +1538,58 @@ static void schedule(void) context_switch(prev, next); } +DEFINE_PER_CPU(int, hyp_tacc_cnt); + +void hyp_tacc_head(int place) +{ + //printk("\thead cpu %u, place %d, cnt %d\n", smp_processor_id(), place, this_cpu(hyp_tacc_cnt)); + + ASSERT(this_cpu(hyp_tacc_cnt) >= 0); + + if ( this_cpu(hyp_tacc_cnt) == 0 ) + { + s_time_t now = NOW(); + spin_lock(per_cpu(schedule_data,smp_processor_id()).schedule_lock); + /* + * Stop time accounting for guest (guest vcpu) + */ + ASSERT( (current->runstate.state_entry_time & XEN_RUNSTATE_UPDATE) == 0); + current->runtime += now - current->runstate.state_entry_time; + vcpu_runstate_change(current, RUNSTATE_runnable, now); + /* + * Start time accounting for hyp (idle vcpu) + */ + vcpu_runstate_change(idle_vcpu[smp_processor_id()], RUNSTATE_running, now); + spin_unlock(per_cpu(schedule_data,smp_processor_id()).schedule_lock); + } + + this_cpu(hyp_tacc_cnt)++; +} + +void hyp_tacc_tail(int place) +{ + //printk("\t\t\t\ttail cpu %u, place %d, cnt %d\n", smp_processor_id(), place, this_cpu(hyp_tacc_cnt)); + + ASSERT(this_cpu(hyp_tacc_cnt) > 0); + + if (this_cpu(hyp_tacc_cnt) == 1) + { + s_time_t now = NOW(); + spin_lock(per_cpu(schedule_data,smp_processor_id()).schedule_lock); + /* + * Stop time accounting for guest (guest vcpu) + */ + vcpu_runstate_change(idle_vcpu[smp_processor_id()], RUNSTATE_runnable, now); + /* + * Start time accounting for hyp (idle vcpu) + */ + vcpu_runstate_change(current, RUNSTATE_running, now); + spin_unlock(per_cpu(schedule_data,smp_processor_id()).schedule_lock); + } + + this_cpu(hyp_tacc_cnt)--; +} + void context_saved(struct vcpu *prev) { /* Clear running flag /after/ writing context to memory. */ @@ -1597,8 +1646,9 @@ static int cpu_schedule_up(unsigned int cpu) sd->curr = idle_vcpu[cpu]; init_timer(&sd->s_timer, s_timer_fn, NULL, cpu); atomic_set(&sd->urgent_count, 0); + per_cpu(hyp_tacc_cnt, cpu) = 1; - /* Boot CPU is dealt with later in schedule_init(). */ + /* Boot CPU is dealt with later in scheduler_init(). */ if ( cpu == 0 ) return 0; @@ -1654,6 +1704,8 @@ static void cpu_schedule_down(unsigned int cpu) sd->sched_priv = NULL; kill_timer(&sd->s_timer); + + per_cpu(hyp_tacc_cnt, cpu) = 0; } static int cpu_schedule_callback( diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 5e28797..9391318 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -174,6 +174,8 @@ struct vcpu } runstate_guest; /* guest address */ #endif + s_time_t runtime; + /* Has the FPU been initialised? */ bool fpu_initialised; /* Has the FPU been used since it was last saved? */ @@ -998,6 +1000,9 @@ extern void dump_runq(unsigned char key); void arch_do_physinfo(struct xen_sysctl_physinfo *pi); +void hyp_tacc_head(int place); +void hyp_tacc_tail(int place); + #endif /* __SCHED_H__ */ /*

[RFC,6/6] schedule: account all the hypervisor time to the idle vcpu

Commit Message

Patch