@@ -1786,7 +1786,7 @@ static void park_vcpu(struct csched2_vcpu *svc)
*
* In both cases, we also add it to the list of parked vCPUs of the domain.
*/
- __set_bit(_VPF_parked, &v->pause_flags);
+ set_bit(_VPF_parked, &v->pause_flags);
if ( vcpu_on_runq(svc) )
{
runq_remove(svc);
@@ -1895,7 +1895,7 @@ unpark_parked_vcpus(const struct scheduler *ops, struct list_head *vcpus)
lock = vcpu_schedule_lock_irqsave(svc->vcpu, &flags);
- __clear_bit(_VPF_parked, &svc->vcpu->pause_flags);
+ clear_bit(_VPF_parked, &svc->vcpu->pause_flags);
if ( unlikely(svc->flags & CSFLAG_scheduled) )
{
/*
@@ -2492,7 +2492,7 @@ static void migrate(const struct scheduler *ops,
{
/* It's running; mark it to migrate. */
svc->migrate_rqd = trqd;
- __set_bit(_VPF_migrating, &svc->vcpu->pause_flags);
+ set_bit(_VPF_migrating, &svc->vcpu->pause_flags);
__set_bit(__CSFLAG_runq_migrate_request, &svc->flags);
SCHED_STAT_CRANK(migrate_requested);
tickle_cpu(cpu, svc->rqd);
The pause_flags field must always be modified atomically, as it is manupulated (e.g., in schedule.c) without any lock held. Credit2 code was not doing that, which causes races. Specifically, we have see cases where the unprotected setting of the _VPF_migrating flag in csched_credit2:migrate() was racing with the resetting and testing of the _VPF_blocked flag in schedule.c:vcpu_unblock() and schedule.c:vcpu_wake(). This caused the vCPU that was being unblocked to not be put back in the Credit2 runqueue, which then causes other issue. This unlocked accesses were introduced by ad4b3e1e9df ("xen: credit2: implement utilization cap") and in 222234f2ad1 ("xen: credit2: use non-atomic cpumask and bit operations"). Suggested-by: Juergen Gross <jgross@suse.com> Signed-off-by: Dario Faggioli <dfaggioli@suse.com> Cc: George Dunlap <george.dunlap@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> --- This patch is only necessary for branches older than 4.13 because. In fact, in newer ones, the problem has been resolved indirectly by commit a76255b42665 "xen/sched: make credit2 scheduler vcpu agnostic." (which was part of Juergen's core-scheduling series). I do know that 4.12 and older are in security only mode and that this patch will therefore not be applied. I'm mainly posting it because I think it may be useful for users and downstreams to know that there's an issue there, and so that they can pick it up if they're still using such code-base (especially considering that, at least for 4.12, Credit2 was default already). Hope this is not a problem! Thanks and Regards --- xen/common/sched_credit2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)