diff mbox series

[RFC,3/3] arm64: Handle AArch32 tasks running on non AArch32 cpu

Message ID 20201008181641.32767-4-qais.yousef@arm.com (mailing list archive)
State New, archived
Headers show
Series Add support for Asymmetric AArch32 systems | expand

Commit Message

Qais Yousef Oct. 8, 2020, 6:16 p.m. UTC
On Asym AArch32 system, if a task has invalid cpus in its affinity, we
try to fix the cpumask silently and let it continue to run. If the
cpumask doesn't contain any valid cpu, we have no choice but to send
SIGKILL.

This patch can be omitted if user-space can guarantee the cpumask of
all AArch32 apps only contains AArch32 capable CPUs.

The biggest challenge in user space managing the affinity is handling
apps who try to modify their own CPU affinity via sched_setaffinity().
Without this change they could trigger a SIGKILL if they unknowingly
affine to the wrong set of CPUs. Only by enforcing all 32bit apps to
a cpuset user space can guarantee apps can't escape this affinity.

Signed-off-by: Qais Yousef <qais.yousef@arm.com>
---
 arch/arm64/Kconfig                  |  8 ++++----
 arch/arm64/include/asm/cpufeature.h |  2 ++
 arch/arm64/kernel/cpufeature.c      | 11 +++++++++++
 arch/arm64/kernel/signal.c          | 25 ++++++++++++++++++++-----
 4 files changed, 37 insertions(+), 9 deletions(-)

Comments

Peter Zijlstra Oct. 9, 2020, 7:29 a.m. UTC | #1
On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index cf94cc248fbe..7e97f1589f33 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
>  	restore_saved_sigmask();
>  }
>  
> +static void set_32bit_cpus_allowed(void)
>  {
> +	cpumask_var_t cpus_allowed;
> +	int ret = 0;
> +
> +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> +		return;
> +
>  	/*
> +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> +	 * we try to fix it by removing the invalid ones.
>  	 */
> +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> +		ret = -ENOMEM;
> +	} else {
> +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> +		free_cpumask_var(cpus_allowed);
> +	}
> +
> +	if (ret) {
> +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
>  		force_sig(SIGKILL);
>  	}
>  }

Yeah, no. Not going to happen.

Fundamentally, you're not supposed to change the userspace provided
affinity mask. If we want to do something like this, we'll have to teach
the scheduler about this second mask such that it can compute an
effective mask as the intersection between the 'feature' and user mask.

Also, practically, using ->cpus_ptr here to construct the mask will be
really dodgy vs the proposed migrate_disable() patches.
Morten Rasmussen Oct. 9, 2020, 8:13 a.m. UTC | #2
On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > index cf94cc248fbe..7e97f1589f33 100644
> > --- a/arch/arm64/kernel/signal.c
> > +++ b/arch/arm64/kernel/signal.c
> > @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
> >  	restore_saved_sigmask();
> >  }
> >  
> > +static void set_32bit_cpus_allowed(void)
> >  {
> > +	cpumask_var_t cpus_allowed;
> > +	int ret = 0;
> > +
> > +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> > +		return;
> > +
> >  	/*
> > +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> > +	 * we try to fix it by removing the invalid ones.
> >  	 */
> > +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> > +		ret = -ENOMEM;
> > +	} else {
> > +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> > +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> > +		free_cpumask_var(cpus_allowed);
> > +	}
> > +
> > +	if (ret) {
> > +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
> >  		force_sig(SIGKILL);
> >  	}
> >  }
> 
> Yeah, no. Not going to happen.
> 
> Fundamentally, you're not supposed to change the userspace provided
> affinity mask. If we want to do something like this, we'll have to teach
> the scheduler about this second mask such that it can compute an
> effective mask as the intersection between the 'feature' and user mask.

I agree that we shouldn't mess wit the user-space mask directly. Would it
be unthinkable to go down the route of maintaining a new mask which is
the intersection of the feature mask (controlled and updated by arch
code) and the user-space mask?

It shouldn't add overhead in the scheduler as it would use the
intersection mask instead of the user-space mask, the main complexity
would be around making sure the intersection mask is updated correctly
(cpusets, hotplug, ...).

Like the above tweak, this won't help if the intersection mask is empty,
task will still get killed but it will allow tasks to survive
user-space masks including some non-compatible CPUs. If we want to
prevent task killing in all cases (ignoring hotplug) it gets more ugly
as we would have to ignore the user-space mask in some cases.

Morten
Will Deacon Oct. 9, 2020, 8:31 a.m. UTC | #3
On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> > On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> > > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > > index cf94cc248fbe..7e97f1589f33 100644
> > > --- a/arch/arm64/kernel/signal.c
> > > +++ b/arch/arm64/kernel/signal.c
> > > @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
> > >  	restore_saved_sigmask();
> > >  }
> > >  
> > > +static void set_32bit_cpus_allowed(void)
> > >  {
> > > +	cpumask_var_t cpus_allowed;
> > > +	int ret = 0;
> > > +
> > > +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> > > +		return;
> > > +
> > >  	/*
> > > +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> > > +	 * we try to fix it by removing the invalid ones.
> > >  	 */
> > > +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> > > +		ret = -ENOMEM;
> > > +	} else {
> > > +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> > > +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> > > +		free_cpumask_var(cpus_allowed);
> > > +	}
> > > +
> > > +	if (ret) {
> > > +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
> > >  		force_sig(SIGKILL);
> > >  	}
> > >  }
> > 
> > Yeah, no. Not going to happen.
> > 
> > Fundamentally, you're not supposed to change the userspace provided
> > affinity mask. If we want to do something like this, we'll have to teach
> > the scheduler about this second mask such that it can compute an
> > effective mask as the intersection between the 'feature' and user mask.
> 
> I agree that we shouldn't mess wit the user-space mask directly. Would it
> be unthinkable to go down the route of maintaining a new mask which is
> the intersection of the feature mask (controlled and updated by arch
> code) and the user-space mask?
> 
> It shouldn't add overhead in the scheduler as it would use the
> intersection mask instead of the user-space mask, the main complexity
> would be around making sure the intersection mask is updated correctly
> (cpusets, hotplug, ...).
> 
> Like the above tweak, this won't help if the intersection mask is empty,
> task will still get killed but it will allow tasks to survive
> user-space masks including some non-compatible CPUs. If we want to
> prevent task killing in all cases (ignoring hotplug) it gets more ugly
> as we would have to ignore the user-space mask in some cases.

Honestly, I don't understand why we're trying to hide this asymmetry from
userspace by playing games with affinity masks in the kernel. Userspace
is likely to want to move things about _anyway_ because even amongst the
32-bit capable cores, you may well have different clock frequencies to
contend with.

So I'd be *much* happier to let the schesduler do its thing, and if one
of these 32-bit tasks ends up on a core that can't deal with it, then
tough, it gets killed. Give userspace the information it needs to avoid
that happening in the first place, rather than implicitly limit the mask.

That way, the kernel support really boils down to two parts:

  1. Remove the sanity checks we have to prevent 32-bit applications running
     on asymmetric systems

  2. Tell userspace about the problem

Will
Morten Rasmussen Oct. 9, 2020, 8:50 a.m. UTC | #4
On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> > On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> > > On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> > > > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > > > index cf94cc248fbe..7e97f1589f33 100644
> > > > --- a/arch/arm64/kernel/signal.c
> > > > +++ b/arch/arm64/kernel/signal.c
> > > > @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
> > > >  	restore_saved_sigmask();
> > > >  }
> > > >  
> > > > +static void set_32bit_cpus_allowed(void)
> > > >  {
> > > > +	cpumask_var_t cpus_allowed;
> > > > +	int ret = 0;
> > > > +
> > > > +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> > > > +		return;
> > > > +
> > > >  	/*
> > > > +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> > > > +	 * we try to fix it by removing the invalid ones.
> > > >  	 */
> > > > +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> > > > +		ret = -ENOMEM;
> > > > +	} else {
> > > > +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> > > > +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> > > > +		free_cpumask_var(cpus_allowed);
> > > > +	}
> > > > +
> > > > +	if (ret) {
> > > > +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
> > > >  		force_sig(SIGKILL);
> > > >  	}
> > > >  }
> > > 
> > > Yeah, no. Not going to happen.
> > > 
> > > Fundamentally, you're not supposed to change the userspace provided
> > > affinity mask. If we want to do something like this, we'll have to teach
> > > the scheduler about this second mask such that it can compute an
> > > effective mask as the intersection between the 'feature' and user mask.
> > 
> > I agree that we shouldn't mess wit the user-space mask directly. Would it
> > be unthinkable to go down the route of maintaining a new mask which is
> > the intersection of the feature mask (controlled and updated by arch
> > code) and the user-space mask?
> > 
> > It shouldn't add overhead in the scheduler as it would use the
> > intersection mask instead of the user-space mask, the main complexity
> > would be around making sure the intersection mask is updated correctly
> > (cpusets, hotplug, ...).
> > 
> > Like the above tweak, this won't help if the intersection mask is empty,
> > task will still get killed but it will allow tasks to survive
> > user-space masks including some non-compatible CPUs. If we want to
> > prevent task killing in all cases (ignoring hotplug) it gets more ugly
> > as we would have to ignore the user-space mask in some cases.
> 
> Honestly, I don't understand why we're trying to hide this asymmetry from
> userspace by playing games with affinity masks in the kernel. Userspace
> is likely to want to move things about _anyway_ because even amongst the
> 32-bit capable cores, you may well have different clock frequencies to
> contend with.

I agree it doesn't make sense to hide the asymmetry. The only argument I
see for tweaking the affinity is to be more friendly in case user-space
is unaware.

> So I'd be *much* happier to let the schesduler do its thing, and if one
> of these 32-bit tasks ends up on a core that can't deal with it, then
> tough, it gets killed. Give userspace the information it needs to avoid
> that happening in the first place, rather than implicitly limit the mask.
> 
> That way, the kernel support really boils down to two parts:
> 
>   1. Remove the sanity checks we have to prevent 32-bit applications running
>      on asymmetric systems
> 
>   2. Tell userspace about the problem

I'm fine with that. We just have to accept that existing unaware
user-space(s) may see tasks getting killed if they use task affinity.

Morten
Peter Zijlstra Oct. 9, 2020, 9:25 a.m. UTC | #5
On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:

> > Fundamentally, you're not supposed to change the userspace provided
> > affinity mask. If we want to do something like this, we'll have to teach
> > the scheduler about this second mask such that it can compute an
> > effective mask as the intersection between the 'feature' and user mask.
> 
> I agree that we shouldn't mess wit the user-space mask directly. Would it
> be unthinkable to go down the route of maintaining a new mask which is
> the intersection of the feature mask (controlled and updated by arch
> code) and the user-space mask?
> 
> It shouldn't add overhead in the scheduler as it would use the
> intersection mask instead of the user-space mask, the main complexity
> would be around making sure the intersection mask is updated correctly
> (cpusets, hotplug, ...).

IFF we _need_ to go there, then yes that was the plan, compose the
intersection when either the (arch) feature(set) mask or the userspace
mask changes.
Catalin Marinas Oct. 9, 2020, 9:33 a.m. UTC | #6
On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> > On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> > > On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> > > > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > > > index cf94cc248fbe..7e97f1589f33 100644
> > > > --- a/arch/arm64/kernel/signal.c
> > > > +++ b/arch/arm64/kernel/signal.c
> > > > @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
> > > >  	restore_saved_sigmask();
> > > >  }
> > > >  
> > > > +static void set_32bit_cpus_allowed(void)
> > > >  {
> > > > +	cpumask_var_t cpus_allowed;
> > > > +	int ret = 0;
> > > > +
> > > > +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> > > > +		return;
> > > > +
> > > >  	/*
> > > > +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> > > > +	 * we try to fix it by removing the invalid ones.
> > > >  	 */
> > > > +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> > > > +		ret = -ENOMEM;
> > > > +	} else {
> > > > +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> > > > +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> > > > +		free_cpumask_var(cpus_allowed);
> > > > +	}
> > > > +
> > > > +	if (ret) {
> > > > +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
> > > >  		force_sig(SIGKILL);
> > > >  	}
> > > >  }
> > > 
> > > Yeah, no. Not going to happen.
> > > 
> > > Fundamentally, you're not supposed to change the userspace provided
> > > affinity mask. If we want to do something like this, we'll have to teach
> > > the scheduler about this second mask such that it can compute an
> > > effective mask as the intersection between the 'feature' and user mask.
> > 
> > I agree that we shouldn't mess wit the user-space mask directly. Would it
> > be unthinkable to go down the route of maintaining a new mask which is
> > the intersection of the feature mask (controlled and updated by arch
> > code) and the user-space mask?
> > 
> > It shouldn't add overhead in the scheduler as it would use the
> > intersection mask instead of the user-space mask, the main complexity
> > would be around making sure the intersection mask is updated correctly
> > (cpusets, hotplug, ...).
> > 
> > Like the above tweak, this won't help if the intersection mask is empty,
> > task will still get killed but it will allow tasks to survive
> > user-space masks including some non-compatible CPUs. If we want to
> > prevent task killing in all cases (ignoring hotplug) it gets more ugly
> > as we would have to ignore the user-space mask in some cases.
> 
> Honestly, I don't understand why we're trying to hide this asymmetry from
> userspace by playing games with affinity masks in the kernel. Userspace
> is likely to want to move things about _anyway_ because even amongst the
> 32-bit capable cores, you may well have different clock frequencies to
> contend with.
> 
> So I'd be *much* happier to let the schesduler do its thing, and if one
> of these 32-bit tasks ends up on a core that can't deal with it, then
> tough, it gets killed. Give userspace the information it needs to avoid
> that happening in the first place, rather than implicitly limit the mask.
> 
> That way, the kernel support really boils down to two parts:
> 
>   1. Remove the sanity checks we have to prevent 32-bit applications running
>      on asymmetric systems
> 
>   2. Tell userspace about the problem

This works for me as well as long as it is default off with a knob to
turn it on. I'd prefer a sysctl (which can be driven from the command
line in recent kernels IIRC) so that one can play with it a run-time.
This way it's also a userspace choice and not an admin or whoever
controls the cmdline (well, that's rather theoretical since the target
is Android).
Qais Yousef Oct. 9, 2020, 9:39 a.m. UTC | #7
On 10/09/20 11:25, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> > On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> 
> > > Fundamentally, you're not supposed to change the userspace provided
> > > affinity mask. If we want to do something like this, we'll have to teach
> > > the scheduler about this second mask such that it can compute an
> > > effective mask as the intersection between the 'feature' and user mask.
> > 
> > I agree that we shouldn't mess wit the user-space mask directly. Would it
> > be unthinkable to go down the route of maintaining a new mask which is
> > the intersection of the feature mask (controlled and updated by arch
> > code) and the user-space mask?
> > 
> > It shouldn't add overhead in the scheduler as it would use the
> > intersection mask instead of the user-space mask, the main complexity
> > would be around making sure the intersection mask is updated correctly
> > (cpusets, hotplug, ...).
> 
> IFF we _need_ to go there, then yes that was the plan, compose the
> intersection when either the (arch) feature(set) mask or the userspace
> mask changes.

On such systems these tasks are only valid to run on a subset of cpus. It makes
a lot of sense to me if we want to go down that route to fixup the affinity
when a task is spawned and make sure sched_setaffinity() never allows it to go
outside this range. The tasks can't physically run on those cpus, so I don't
see us breaking user-space affinity here. Just reflecting the reality.

Only if it moved to a cpuset with no intersection it would be killed. Which
I think is the behavior anyway today.

Thanks

--
Qais Yousef
Greg Kroah-Hartman Oct. 9, 2020, 9:42 a.m. UTC | #8
On Fri, Oct 09, 2020 at 10:33:41AM +0100, Catalin Marinas wrote:
> On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> > On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> > > On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> > > > On Thu, Oct 08, 2020 at 07:16:41PM +0100, Qais Yousef wrote:
> > > > > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > > > > index cf94cc248fbe..7e97f1589f33 100644
> > > > > --- a/arch/arm64/kernel/signal.c
> > > > > +++ b/arch/arm64/kernel/signal.c
> > > > > @@ -908,13 +908,28 @@ static void do_signal(struct pt_regs *regs)
> > > > >  	restore_saved_sigmask();
> > > > >  }
> > > > >  
> > > > > +static void set_32bit_cpus_allowed(void)
> > > > >  {
> > > > > +	cpumask_var_t cpus_allowed;
> > > > > +	int ret = 0;
> > > > > +
> > > > > +	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
> > > > > +		return;
> > > > > +
> > > > >  	/*
> > > > > +	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
> > > > > +	 * we try to fix it by removing the invalid ones.
> > > > >  	 */
> > > > > +	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
> > > > > +		ret = -ENOMEM;
> > > > > +	} else {
> > > > > +		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
> > > > > +		ret = set_cpus_allowed_ptr(current, cpus_allowed);
> > > > > +		free_cpumask_var(cpus_allowed);
> > > > > +	}
> > > > > +
> > > > > +	if (ret) {
> > > > > +		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
> > > > >  		force_sig(SIGKILL);
> > > > >  	}
> > > > >  }
> > > > 
> > > > Yeah, no. Not going to happen.
> > > > 
> > > > Fundamentally, you're not supposed to change the userspace provided
> > > > affinity mask. If we want to do something like this, we'll have to teach
> > > > the scheduler about this second mask such that it can compute an
> > > > effective mask as the intersection between the 'feature' and user mask.
> > > 
> > > I agree that we shouldn't mess wit the user-space mask directly. Would it
> > > be unthinkable to go down the route of maintaining a new mask which is
> > > the intersection of the feature mask (controlled and updated by arch
> > > code) and the user-space mask?
> > > 
> > > It shouldn't add overhead in the scheduler as it would use the
> > > intersection mask instead of the user-space mask, the main complexity
> > > would be around making sure the intersection mask is updated correctly
> > > (cpusets, hotplug, ...).
> > > 
> > > Like the above tweak, this won't help if the intersection mask is empty,
> > > task will still get killed but it will allow tasks to survive
> > > user-space masks including some non-compatible CPUs. If we want to
> > > prevent task killing in all cases (ignoring hotplug) it gets more ugly
> > > as we would have to ignore the user-space mask in some cases.
> > 
> > Honestly, I don't understand why we're trying to hide this asymmetry from
> > userspace by playing games with affinity masks in the kernel. Userspace
> > is likely to want to move things about _anyway_ because even amongst the
> > 32-bit capable cores, you may well have different clock frequencies to
> > contend with.
> > 
> > So I'd be *much* happier to let the schesduler do its thing, and if one
> > of these 32-bit tasks ends up on a core that can't deal with it, then
> > tough, it gets killed. Give userspace the information it needs to avoid
> > that happening in the first place, rather than implicitly limit the mask.
> > 
> > That way, the kernel support really boils down to two parts:
> > 
> >   1. Remove the sanity checks we have to prevent 32-bit applications running
> >      on asymmetric systems
> > 
> >   2. Tell userspace about the problem
> 
> This works for me as well as long as it is default off with a knob to
> turn it on. I'd prefer a sysctl (which can be driven from the command
> line in recent kernels IIRC) so that one can play with it a run-time.
> This way it's also a userspace choice and not an admin or whoever
> controls the cmdline (well, that's rather theoretical since the target
> is Android).

The target _today_ is Android.  Chips and cores have a life of their own
and end up getting used all over the place over time, as you know :)

thanks,

greg k-h
Catalin Marinas Oct. 9, 2020, 9:51 a.m. UTC | #9
On Fri, Oct 09, 2020 at 11:25:59AM +0200, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 10:13:12AM +0200, Morten Rasmussen wrote:
> > On Fri, Oct 09, 2020 at 09:29:43AM +0200, Peter Zijlstra wrote:
> 
> > > Fundamentally, you're not supposed to change the userspace provided
> > > affinity mask. If we want to do something like this, we'll have to teach
> > > the scheduler about this second mask such that it can compute an
> > > effective mask as the intersection between the 'feature' and user mask.
> > 
> > I agree that we shouldn't mess wit the user-space mask directly. Would it
> > be unthinkable to go down the route of maintaining a new mask which is
> > the intersection of the feature mask (controlled and updated by arch
> > code) and the user-space mask?
> > 
> > It shouldn't add overhead in the scheduler as it would use the
> > intersection mask instead of the user-space mask, the main complexity
> > would be around making sure the intersection mask is updated correctly
> > (cpusets, hotplug, ...).
> 
> IFF we _need_ to go there, then yes that was the plan, compose the
> intersection when either the (arch) feature(set) mask or the userspace
> mask changes.

Another thought: should the arm64 compat_elf_check_arch() and
sys_arm64_personality(PER_LINUX32) validate the user cpumask before
returning success/failure? It won't prevent the user from changing the
affinity at run-time but at least it won't randomly get killed just
because the kernel advertises 32-bit support.
Qais Yousef Oct. 9, 2020, 11:31 a.m. UTC | #10
On 10/09/20 10:33, Catalin Marinas wrote:
> On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> > Honestly, I don't understand why we're trying to hide this asymmetry from
> > userspace by playing games with affinity masks in the kernel. Userspace
> > is likely to want to move things about _anyway_ because even amongst the
> > 32-bit capable cores, you may well have different clock frequencies to
> > contend with.
> > 
> > So I'd be *much* happier to let the schesduler do its thing, and if one
> > of these 32-bit tasks ends up on a core that can't deal with it, then
> > tough, it gets killed. Give userspace the information it needs to avoid
> > that happening in the first place, rather than implicitly limit the mask.
> > 
> > That way, the kernel support really boils down to two parts:
> > 
> >   1. Remove the sanity checks we have to prevent 32-bit applications running
> >      on asymmetric systems
> > 
> >   2. Tell userspace about the problem
> 
> This works for me as well as long as it is default off with a knob to
> turn it on. I'd prefer a sysctl (which can be driven from the command
> line in recent kernels IIRC) so that one can play with it a run-time.
> This way it's also a userspace choice and not an admin or whoever
> controls the cmdline (well, that's rather theoretical since the target
> is Android).

I like the cmdline option more. It implies a custom bootloader and user space
are required to enable this. Which in return implies they can write their own
custom driver to manage exporting this info to user-space. Reliefing us from
maintaining any ABI in mainline kernel.

Thanks

--
Qais Yousef
Catalin Marinas Oct. 9, 2020, 12:40 p.m. UTC | #11
On Fri, Oct 09, 2020 at 12:31:56PM +0100, Qais Yousef wrote:
> On 10/09/20 10:33, Catalin Marinas wrote:
> > On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> > > Honestly, I don't understand why we're trying to hide this asymmetry from
> > > userspace by playing games with affinity masks in the kernel. Userspace
> > > is likely to want to move things about _anyway_ because even amongst the
> > > 32-bit capable cores, you may well have different clock frequencies to
> > > contend with.
> > > 
> > > So I'd be *much* happier to let the schesduler do its thing, and if one
> > > of these 32-bit tasks ends up on a core that can't deal with it, then
> > > tough, it gets killed. Give userspace the information it needs to avoid
> > > that happening in the first place, rather than implicitly limit the mask.
> > > 
> > > That way, the kernel support really boils down to two parts:
> > > 
> > >   1. Remove the sanity checks we have to prevent 32-bit applications running
> > >      on asymmetric systems
> > > 
> > >   2. Tell userspace about the problem
> > 
> > This works for me as well as long as it is default off with a knob to
> > turn it on. I'd prefer a sysctl (which can be driven from the command
> > line in recent kernels IIRC) so that one can play with it a run-time.
> > This way it's also a userspace choice and not an admin or whoever
> > controls the cmdline (well, that's rather theoretical since the target
> > is Android).
> 
> I like the cmdline option more. It implies a custom bootloader and user space
> are required to enable this. Which in return implies they can write their own
> custom driver to manage exporting this info to user-space. Reliefing us from
> maintaining any ABI in mainline kernel.

Regardless of whether it's cmdline or sysctl, I'm strongly opposed to
custom drivers for exposing this information to user. It leads to
custom incompatible ABIs scattered around.

Note that user can already check the MIDR_EL1 value if it knows which
CPU type and revision has 32-bit support.
Qais Yousef Oct. 13, 2020, 2:23 p.m. UTC | #12
On 10/09/20 13:40, Catalin Marinas wrote:
> On Fri, Oct 09, 2020 at 12:31:56PM +0100, Qais Yousef wrote:
> > On 10/09/20 10:33, Catalin Marinas wrote:
> > > On Fri, Oct 09, 2020 at 09:31:47AM +0100, Will Deacon wrote:
> > > > Honestly, I don't understand why we're trying to hide this asymmetry from
> > > > userspace by playing games with affinity masks in the kernel. Userspace
> > > > is likely to want to move things about _anyway_ because even amongst the
> > > > 32-bit capable cores, you may well have different clock frequencies to
> > > > contend with.
> > > > 
> > > > So I'd be *much* happier to let the schesduler do its thing, and if one
> > > > of these 32-bit tasks ends up on a core that can't deal with it, then
> > > > tough, it gets killed. Give userspace the information it needs to avoid
> > > > that happening in the first place, rather than implicitly limit the mask.
> > > > 
> > > > That way, the kernel support really boils down to two parts:
> > > > 
> > > >   1. Remove the sanity checks we have to prevent 32-bit applications running
> > > >      on asymmetric systems
> > > > 
> > > >   2. Tell userspace about the problem
> > > 
> > > This works for me as well as long as it is default off with a knob to
> > > turn it on. I'd prefer a sysctl (which can be driven from the command
> > > line in recent kernels IIRC) so that one can play with it a run-time.
> > > This way it's also a userspace choice and not an admin or whoever
> > > controls the cmdline (well, that's rather theoretical since the target
> > > is Android).
> > 
> > I like the cmdline option more. It implies a custom bootloader and user space
> > are required to enable this. Which in return implies they can write their own
> > custom driver to manage exporting this info to user-space. Reliefing us from
> > maintaining any ABI in mainline kernel.
> 
> Regardless of whether it's cmdline or sysctl, I'm strongly opposed to
> custom drivers for exposing this information to user. It leads to
> custom incompatible ABIs scattered around.

Yeah I see what you mean.

So for now I'll remove the Kconfig dependency and replace it with a sysctl
instead such that system_supports_asym_32bit_el0() only returns true if the
system is asym AND the sysctl is true.

Thanks

--
Qais Yousef

> Note that user can already check the MIDR_EL1 value if it knows which
> CPU type and revision has 32-bit support.
> 
> -- 
> Catalin
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 591853504dc4..ad6d52dd8ac0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1875,10 +1875,10 @@  config ASYMMETRIC_AARCH32
 	  Enable this option to allow support for asymmetric AArch32 EL0
 	  CPU configurations. Once the AArch32 EL0 support is detected
 	  on a CPU, the feature is made available to user space to allow
-	  the execution of 32-bit (compat) applications. If the affinity
-	  of the 32-bit application contains a non-AArch32 capable CPU
-	  or the last AArch32 capable CPU is offlined, the application
-	  will be killed.
+	  the execution of 32-bit (compat) applications by migrating
+	  them to the capable CPUs. Offlining such CPUs leads to 32-bit
+	  applications being killed. Similarly if the affinity contains
+	  no 32-bit capable CPU.
 
 	  If unsure say N.
 
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index fa2413715041..57275e47cd3d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -376,6 +376,8 @@  cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,
 	return false;
 }
 
+extern cpumask_t aarch32_el0_mask;
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d46732113305..4c0858c13e6d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1721,6 +1721,16 @@  cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
 	return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
 }
 
+#ifdef CONFIG_ASYMMETRIC_AARCH32
+cpumask_t aarch32_el0_mask;
+
+static void cpu_enable_aarch32_el0(struct arm64_cpu_capabilities const *cap)
+{
+	if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU))
+		cpumask_set_cpu(smp_processor_id(), &aarch32_el0_mask);
+}
+#endif
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -1799,6 +1809,7 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_32BIT_EL0,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_aarch32_el0,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index cf94cc248fbe..7e97f1589f33 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -908,13 +908,28 @@  static void do_signal(struct pt_regs *regs)
 	restore_saved_sigmask();
 }
 
-static void check_aarch32_cpumask(void)
+static void set_32bit_cpus_allowed(void)
 {
+	cpumask_var_t cpus_allowed;
+	int ret = 0;
+
+	if (cpumask_subset(current->cpus_ptr, &aarch32_el0_mask))
+		return;
+
 	/*
-	 * If the task moved to uncapable CPU, SIGKILL it.
+	 * On asym aarch32 systems, if the task has invalid cpus in its mask,
+	 * we try to fix it by removing the invalid ones.
 	 */
-	if (!this_cpu_has_cap(ARM64_HAS_32BIT_EL0)) {
-		pr_warn_once("CPU affinity contains CPUs that are not capable of running 32-bit tasks\n");
+	if (!alloc_cpumask_var(&cpus_allowed, GFP_ATOMIC)) {
+		ret = -ENOMEM;
+	} else {
+		cpumask_and(cpus_allowed, current->cpus_ptr, &aarch32_el0_mask);
+		ret = set_cpus_allowed_ptr(current, cpus_allowed);
+		free_cpumask_var(cpus_allowed);
+	}
+
+	if (ret) {
+		pr_warn_once("Failed to fixup affinity of running 32-bit task\n");
 		force_sig(SIGKILL);
 	}
 }
@@ -944,7 +959,7 @@  asmlinkage void do_notify_resume(struct pt_regs *regs,
 			if (IS_ENABLED(CONFIG_ASYMMETRIC_AARCH32) &&
 			    thread_flags & _TIF_CHECK_32BIT_AFFINITY) {
 				clear_thread_flag(TIF_CHECK_32BIT_AFFINITY);
-				check_aarch32_cpumask();
+				set_32bit_cpus_allowed();
 			}
 
 			if (thread_flags & _TIF_UPROBE)