diff mbox series

[v4,07/14] sched: Introduce restrict_cpus_allowed_ptr() to limit task CPU affinity

Message ID 20201124155039.13804-8-will@kernel.org (mailing list archive)
State New, archived
Headers show
Series An alternative series for asymmetric AArch32 systems | expand

Commit Message

Will Deacon Nov. 24, 2020, 3:50 p.m. UTC
Asymmetric systems may not offer the same level of userspace ISA support
across all CPUs, meaning that some applications cannot be executed by
some CPUs. As a concrete example, upcoming arm64 big.LITTLE designs do
not feature support for 32-bit applications on both clusters.

Although userspace can carefully manage the affinity masks for such
tasks, one place where it is particularly problematic is execve()
because the CPU on which the execve() is occurring may be incompatible
with the new application image. In such a situation, it is desirable to
restrict the affinity mask of the task and ensure that the new image is
entered on a compatible CPU. From userspace's point of view, this looks
the same as if the incompatible CPUs have been hotplugged off in its
affinity mask.

In preparation for restricting the affinity mask for compat tasks on
arm64 systems without uniform support for 32-bit applications, introduce
a restrict_cpus_allowed_ptr(), which allows the current affinity mask
for a task to be shrunk to the intersection of a parameter mask.

Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 73 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 59 insertions(+), 15 deletions(-)

Comments

Quentin Perret Nov. 27, 2020, 9:49 a.m. UTC | #1
On Tuesday 24 Nov 2020 at 15:50:32 (+0000), Will Deacon wrote:
> Asymmetric systems may not offer the same level of userspace ISA support
> across all CPUs, meaning that some applications cannot be executed by
> some CPUs. As a concrete example, upcoming arm64 big.LITTLE designs do
> not feature support for 32-bit applications on both clusters.
> 
> Although userspace can carefully manage the affinity masks for such
> tasks, one place where it is particularly problematic is execve()
> because the CPU on which the execve() is occurring may be incompatible
> with the new application image. In such a situation, it is desirable to
> restrict the affinity mask of the task and ensure that the new image is
> entered on a compatible CPU. From userspace's point of view, this looks
> the same as if the incompatible CPUs have been hotplugged off in its
> affinity mask.
> 
> In preparation for restricting the affinity mask for compat tasks on
> arm64 systems without uniform support for 32-bit applications, introduce
> a restrict_cpus_allowed_ptr(), which allows the current affinity mask
> for a task to be shrunk to the intersection of a parameter mask.
> 
> Signed-off-by: Will Deacon <will@kernel.org>

Reviewed-by: Quentin Perret <qperret@google.com>

Thanks,
Quentin
Qais Yousef Nov. 27, 2020, 1:19 p.m. UTC | #2
On 11/24/20 15:50, Will Deacon wrote:

[...]

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d2003a7d5ab5..818c8f7bdf2a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1860,24 +1860,18 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
>  }
>  
>  /*
> - * Change a given task's CPU affinity. Migrate the thread to a
> - * proper CPU and schedule it away if the CPU it's executing on
> - * is removed from the allowed bitmask.
> - *
> - * NOTE: the caller must have a valid reference to the task, the
> - * task must not exit() & deallocate itself prematurely. The
> - * call is not atomic; no spinlocks may be held.
> + * Called with both p->pi_lock and rq->lock held; drops both before returning.

nit: wouldn't it be better for the caller to acquire and release the locks?
Not a big deal but it's always confusing when half of the work done outside the
function and the other half done inside.

Thanks

--
Qais Yousef

>   */
> -static int __set_cpus_allowed_ptr(struct task_struct *p,
> -				  const struct cpumask *new_mask, bool check)
> +static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
> +					 const struct cpumask *new_mask,
> +					 bool check,
> +					 struct rq *rq,
> +					 struct rq_flags *rf)
>  {
>  	const struct cpumask *cpu_valid_mask = cpu_active_mask;
>  	unsigned int dest_cpu;
> -	struct rq_flags rf;
> -	struct rq *rq;
>  	int ret = 0;
>  
> -	rq = task_rq_lock(p, &rf);
>  	update_rq_clock(rq);
>  
>  	if (p->flags & PF_KTHREAD) {
> @@ -1929,7 +1923,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
>  	if (task_running(rq, p) || p->state == TASK_WAKING) {
>  		struct migration_arg arg = { p, dest_cpu };
>  		/* Need help from migration thread: drop lock and wait. */
> -		task_rq_unlock(rq, p, &rf);
> +		task_rq_unlock(rq, p, rf);
>  		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
>  		return 0;
>  	} else if (task_on_rq_queued(p)) {
> @@ -1937,20 +1931,69 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
>  		 * OK, since we're going to drop the lock immediately
>  		 * afterwards anyway.
>  		 */
> -		rq = move_queued_task(rq, &rf, p, dest_cpu);
> +		rq = move_queued_task(rq, rf, p, dest_cpu);
>  	}
>  out:
> -	task_rq_unlock(rq, p, &rf);
> +	task_rq_unlock(rq, p, rf);
>  
>  	return ret;
>  }
Will Deacon Dec. 1, 2020, 4:56 p.m. UTC | #3
On Fri, Nov 27, 2020 at 01:19:16PM +0000, Qais Yousef wrote:
> On 11/24/20 15:50, Will Deacon wrote:
> 
> [...]
> 
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index d2003a7d5ab5..818c8f7bdf2a 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1860,24 +1860,18 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
> >  }
> >  
> >  /*
> > - * Change a given task's CPU affinity. Migrate the thread to a
> > - * proper CPU and schedule it away if the CPU it's executing on
> > - * is removed from the allowed bitmask.
> > - *
> > - * NOTE: the caller must have a valid reference to the task, the
> > - * task must not exit() & deallocate itself prematurely. The
> > - * call is not atomic; no spinlocks may be held.
> > + * Called with both p->pi_lock and rq->lock held; drops both before returning.
> 
> nit: wouldn't it be better for the caller to acquire and release the locks?
> Not a big deal but it's always confusing when half of the work done outside the
> function and the other half done inside.

That came up in the last version of the patches iirc, but the problem is
that __set_cpus_allowed_ptr_locked() can trigger migration, which can
drop the lock and take another one for the new runqueue.

Given that this function is internal to the scheduler, I think we can
probably live with it.

Will
Qais Yousef Dec. 2, 2020, 1:06 p.m. UTC | #4
On 12/01/20 16:56, Will Deacon wrote:
> On Fri, Nov 27, 2020 at 01:19:16PM +0000, Qais Yousef wrote:
> > On 11/24/20 15:50, Will Deacon wrote:
> > 
> > [...]
> > 
> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index d2003a7d5ab5..818c8f7bdf2a 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -1860,24 +1860,18 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
> > >  }
> > >  
> > >  /*
> > > - * Change a given task's CPU affinity. Migrate the thread to a
> > > - * proper CPU and schedule it away if the CPU it's executing on
> > > - * is removed from the allowed bitmask.
> > > - *
> > > - * NOTE: the caller must have a valid reference to the task, the
> > > - * task must not exit() & deallocate itself prematurely. The
> > > - * call is not atomic; no spinlocks may be held.
> > > + * Called with both p->pi_lock and rq->lock held; drops both before returning.
> > 
> > nit: wouldn't it be better for the caller to acquire and release the locks?
> > Not a big deal but it's always confusing when half of the work done outside the
> > function and the other half done inside.
> 
> That came up in the last version of the patches iirc, but the problem is
> that __set_cpus_allowed_ptr_locked() can trigger migration, which can
> drop the lock and take another one for the new runqueue.
> 
> Given that this function is internal to the scheduler, I think we can
> probably live with it.

I guess task_rq_lock() always entails be prepared for surprises!

Works for me.

Thanks

--
Qais Yousef
diff mbox series

Patch

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 063cd120b459..1cd12c3ce9ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1631,6 +1631,7 @@  extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
 #ifdef CONFIG_SMP
 extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
 extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
+extern int restrict_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *mask);
 #else
 static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d2003a7d5ab5..818c8f7bdf2a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1860,24 +1860,18 @@  void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 }
 
 /*
- * Change a given task's CPU affinity. Migrate the thread to a
- * proper CPU and schedule it away if the CPU it's executing on
- * is removed from the allowed bitmask.
- *
- * NOTE: the caller must have a valid reference to the task, the
- * task must not exit() & deallocate itself prematurely. The
- * call is not atomic; no spinlocks may be held.
+ * Called with both p->pi_lock and rq->lock held; drops both before returning.
  */
-static int __set_cpus_allowed_ptr(struct task_struct *p,
-				  const struct cpumask *new_mask, bool check)
+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
+					 const struct cpumask *new_mask,
+					 bool check,
+					 struct rq *rq,
+					 struct rq_flags *rf)
 {
 	const struct cpumask *cpu_valid_mask = cpu_active_mask;
 	unsigned int dest_cpu;
-	struct rq_flags rf;
-	struct rq *rq;
 	int ret = 0;
 
-	rq = task_rq_lock(p, &rf);
 	update_rq_clock(rq);
 
 	if (p->flags & PF_KTHREAD) {
@@ -1929,7 +1923,7 @@  static int __set_cpus_allowed_ptr(struct task_struct *p,
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
-		task_rq_unlock(rq, p, &rf);
+		task_rq_unlock(rq, p, rf);
 		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		return 0;
 	} else if (task_on_rq_queued(p)) {
@@ -1937,20 +1931,69 @@  static int __set_cpus_allowed_ptr(struct task_struct *p,
 		 * OK, since we're going to drop the lock immediately
 		 * afterwards anyway.
 		 */
-		rq = move_queued_task(rq, &rf, p, dest_cpu);
+		rq = move_queued_task(rq, rf, p, dest_cpu);
 	}
 out:
-	task_rq_unlock(rq, p, &rf);
+	task_rq_unlock(rq, p, rf);
 
 	return ret;
 }
 
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+				  const struct cpumask *new_mask, bool check)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+	return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
+}
+
 int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	return __set_cpus_allowed_ptr(p, new_mask, false);
 }
 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 
+/*
+ * Change a given task's CPU affinity to the intersection of its current
+ * affinity mask and @subset_mask. If the resulting mask is empty, leave
+ * the affinity unchanged and return -EINVAL.
+ */
+int restrict_cpus_allowed_ptr(struct task_struct *p,
+			      const struct cpumask *subset_mask)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+	cpumask_var_t new_mask;
+	int retval;
+
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rq = task_rq_lock(p, &rf);
+	if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
+		task_rq_unlock(rq, p, &rf);
+		retval = -EINVAL;
+		goto out_free_new_mask;
+	}
+
+	retval = __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf);
+
+out_free_new_mask:
+	free_cpumask_var(new_mask);
+	return retval;
+}
+
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 #ifdef CONFIG_SCHED_DEBUG