diff mbox series

[v8,12/26] arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking

Message ID 1546956464-48825-13-git-send-email-julien.thierry@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: provide pseudo NMI with GICv3 | expand

Commit Message

Julien Thierry Jan. 8, 2019, 2:07 p.m. UTC
Instead disabling interrupts by setting the PSR.I bit, use a priority
higher than the one used for interrupts to mask them via PMR.

When using PMR to disable interrupts, the value of PMR will be used
instead of PSR.[DAIF] for the irqflags.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 arch/arm64/include/asm/efi.h      |  11 ++++
 arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
 2 files changed, 106 insertions(+), 28 deletions(-)

Comments

Dave Martin Jan. 8, 2019, 3:40 p.m. UTC | #1
On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> Instead disabling interrupts by setting the PSR.I bit, use a priority
> higher than the one used for interrupts to mask them via PMR.
> 
> When using PMR to disable interrupts, the value of PMR will be used
> instead of PSR.[DAIF] for the irqflags.
> 
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/arm64/include/asm/efi.h      |  11 ++++
>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>  2 files changed, 106 insertions(+), 28 deletions(-)

[...]

> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index 24692ed..fa3b06f 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -18,7 +18,9 @@

[...]

>  static inline void arch_local_irq_enable(void)
>  {
> -	asm volatile(
> -		"msr	daifclr, #2		// arch_local_irq_enable"
> -		:
> +	unsigned long unmasked = GIC_PRIO_IRQON;
> +
> +	asm volatile(ALTERNATIVE(
> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
> +		"nop",
> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
> +		"dsb	sy",

I'm still not convinced these dsbs are needed.

Without the dsb, we are probably not guaranteed to take a pending
interrupt _immediately_ on unmasking, but I'm not sure that's a
problem.

What goes wrong if we omit them?

(My attempts to answer these questions using the GIC architecture spec
have met with limited success so far...)

[...]

Cheers
---Dave
Marc Zyngier Jan. 8, 2019, 3:51 p.m. UTC | #2
On 08/01/2019 15:40, Dave Martin wrote:
> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>> Instead disabling interrupts by setting the PSR.I bit, use a priority
>> higher than the one used for interrupts to mask them via PMR.
>>
>> When using PMR to disable interrupts, the value of PMR will be used
>> instead of PSR.[DAIF] for the irqflags.
>>
>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> Cc: Oleg Nesterov <oleg@redhat.com>
>> ---
>>  arch/arm64/include/asm/efi.h      |  11 ++++
>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>>  2 files changed, 106 insertions(+), 28 deletions(-)
> 
> [...]
> 
>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>> index 24692ed..fa3b06f 100644
>> --- a/arch/arm64/include/asm/irqflags.h
>> +++ b/arch/arm64/include/asm/irqflags.h
>> @@ -18,7 +18,9 @@
> 
> [...]
> 
>>  static inline void arch_local_irq_enable(void)
>>  {
>> -	asm volatile(
>> -		"msr	daifclr, #2		// arch_local_irq_enable"
>> -		:
>> +	unsigned long unmasked = GIC_PRIO_IRQON;
>> +
>> +	asm volatile(ALTERNATIVE(
>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
>> +		"nop",
>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
>> +		"dsb	sy",
> 
> I'm still not convinced these dsbs are needed.
> 
> Without the dsb, we are probably not guaranteed to take a pending
> interrupt _immediately_ on unmasking, but I'm not sure that's a
> problem.
> 
> What goes wrong if we omit them?

Then the GIC doesn't know it can now deliver interrupts of a lower
priority. Only a dsb can guarantee that the GIC's view of PMR will get
updated.

See 9.1.6 (Observability of the effects of accesses to the GIC
registers), which states:

<quote>
Architectural execution of a DSB instruction guarantees that
— The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
associated Redistributor.
</quote>

So yes, DSB is required.

Thanks,

	M.
Dave Martin Jan. 8, 2019, 4:45 p.m. UTC | #3
On Tue, Jan 08, 2019 at 03:51:18PM +0000, Marc Zyngier wrote:
> On 08/01/2019 15:40, Dave Martin wrote:
> > On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> >> Instead disabling interrupts by setting the PSR.I bit, use a priority
> >> higher than the one used for interrupts to mask them via PMR.
> >>
> >> When using PMR to disable interrupts, the value of PMR will be used
> >> instead of PSR.[DAIF] for the irqflags.
> >>
> >> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> >> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> >> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >> Cc: Will Deacon <will.deacon@arm.com>
> >> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> Cc: Oleg Nesterov <oleg@redhat.com>
> >> ---
> >>  arch/arm64/include/asm/efi.h      |  11 ++++
> >>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
> >>  2 files changed, 106 insertions(+), 28 deletions(-)
> > 
> > [...]
> > 
> >> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> >> index 24692ed..fa3b06f 100644
> >> --- a/arch/arm64/include/asm/irqflags.h
> >> +++ b/arch/arm64/include/asm/irqflags.h
> >> @@ -18,7 +18,9 @@
> > 
> > [...]
> > 
> >>  static inline void arch_local_irq_enable(void)
> >>  {
> >> -	asm volatile(
> >> -		"msr	daifclr, #2		// arch_local_irq_enable"
> >> -		:
> >> +	unsigned long unmasked = GIC_PRIO_IRQON;
> >> +
> >> +	asm volatile(ALTERNATIVE(
> >> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
> >> +		"nop",
> >> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
> >> +		"dsb	sy",
> > 
> > I'm still not convinced these dsbs are needed.
> > 
> > Without the dsb, we are probably not guaranteed to take a pending
> > interrupt _immediately_ on unmasking, but I'm not sure that's a
> > problem.
> > 
> > What goes wrong if we omit them?
> 
> Then the GIC doesn't know it can now deliver interrupts of a lower
> priority. Only a dsb can guarantee that the GIC's view of PMR will get
> updated.
> 
> See 9.1.6 (Observability of the effects of accesses to the GIC
> registers), which states:
> 
> <quote>
> Architectural execution of a DSB instruction guarantees that
> — The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
> associated Redistributor.
> </quote>
> 
> So yes, DSB is required.

But it says neither what is means for the PMR write to be "observed by
the redistributor", nor whether the DSB is required for the
redistributor to observe the write at all.  (So, is an implementation
allowed to cached in the CPU interface indefinitely until forcibly
flushed to the redistributor by a DSB, and in any case can the write's
reaching the distributor in finite time or not have any effect that we
care about in this case?).


My reason for querying this is that temporary local masking of classes
of interrupts seems an obvious use case for the PMR, and the DSB
requirement flies rather in the face of this.


Have we seen hardware where interrupts may stall forever upstream of the
CPU interface after a PMR write, until a dsb is executed by the CPU?

If so that is sad, but I guess we have to live with it.

Also, is it ever important in Linux that a pending interrupt be taken
immediately upon unmasking (and how do we know that said interrupt is
pending)?  If not, we don't care precisely when such interrupts are
pended to the PE, just that such an interrupt cannot be taken before
the PMR write that unmasks it.  It would be insane for the self-
synchronization of PMR writes to lack this guarantee (and a DSB after
the PMR write would do no good anyway in that case).

Happy to be put right -- I'm doubtless showing my ignorance here!

Cheers
---Dave
Marc Zyngier Jan. 8, 2019, 5:16 p.m. UTC | #4
On 08/01/2019 16:45, Dave Martin wrote:
> On Tue, Jan 08, 2019 at 03:51:18PM +0000, Marc Zyngier wrote:
>> On 08/01/2019 15:40, Dave Martin wrote:
>>> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>>>> Instead disabling interrupts by setting the PSR.I bit, use a priority
>>>> higher than the one used for interrupts to mask them via PMR.
>>>>
>>>> When using PMR to disable interrupts, the value of PMR will be used
>>>> instead of PSR.[DAIF] for the irqflags.
>>>>
>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>>> Cc: Oleg Nesterov <oleg@redhat.com>
>>>> ---
>>>>  arch/arm64/include/asm/efi.h      |  11 ++++
>>>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>>>>  2 files changed, 106 insertions(+), 28 deletions(-)
>>>
>>> [...]
>>>
>>>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>>>> index 24692ed..fa3b06f 100644
>>>> --- a/arch/arm64/include/asm/irqflags.h
>>>> +++ b/arch/arm64/include/asm/irqflags.h
>>>> @@ -18,7 +18,9 @@
>>>
>>> [...]
>>>
>>>>  static inline void arch_local_irq_enable(void)
>>>>  {
>>>> -	asm volatile(
>>>> -		"msr	daifclr, #2		// arch_local_irq_enable"
>>>> -		:
>>>> +	unsigned long unmasked = GIC_PRIO_IRQON;
>>>> +
>>>> +	asm volatile(ALTERNATIVE(
>>>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
>>>> +		"nop",
>>>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
>>>> +		"dsb	sy",
>>>
>>> I'm still not convinced these dsbs are needed.
>>>
>>> Without the dsb, we are probably not guaranteed to take a pending
>>> interrupt _immediately_ on unmasking, but I'm not sure that's a
>>> problem.
>>>
>>> What goes wrong if we omit them?
>>
>> Then the GIC doesn't know it can now deliver interrupts of a lower
>> priority. Only a dsb can guarantee that the GIC's view of PMR will get
>> updated.
>>
>> See 9.1.6 (Observability of the effects of accesses to the GIC
>> registers), which states:
>>
>> <quote>
>> Architectural execution of a DSB instruction guarantees that
>> — The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
>> associated Redistributor.
>> </quote>
>>
>> So yes, DSB is required.
> 
> But it says neither what is means for the PMR write to be "observed by
> the redistributor", nor whether the DSB is required for the
> redistributor to observe the write at all.

Well, it seems pretty clear to me that if the redistributor doesn't
observe the PMR value, it is unlikely to change its interpretation of
it· And conversely, the redistributor is allowed to sit pretty and not
give you any interrupt until you are actually telling it that something
has changed.

I really think that for once, the spec is pretty unambiguous about what
is required.

> (So, is an implementation
> allowed to cached in the CPU interface indefinitely until forcibly
> flushed to the redistributor by a DSB, and in any case can the write's
> reaching the distributor in finite time or not have any effect that we
> care about in this case?).

Nothing in the spec says that the system register write will magically
trickle down to the redistributor in the absence of a DSB.

> My reason for querying this is that temporary local masking of classes
> of interrupts seems an obvious use case for the PMR, and the DSB
> requirement flies rather in the face of this.

Are you implying that the GIC architecture should have any form of
sanity and be useful for general purpose software? Think again! ;-)

The PMR behavior you are describing only works in a single direction
(from low to high priority), because the CPU interface has to perform
some filtering. In the opposite direction, you need the big hammer.

> Have we seen hardware where interrupts may stall forever upstream of the
> CPU interface after a PMR write, until a dsb is executed by the CPU?

Yes. You even have to have a DSB right after a read of IAR to avoid
loosing interrupts. The short story is that there is hardly any
synchronization between redistributor and CPU interface. Implementations
are allowed a more closely coupled design, but that's not what the
architecture mandates.

> If so that is sad, but I guess we have to live with it.
> 
> Also, is it ever important in Linux that a pending interrupt be taken
> immediately upon unmasking (and how do we know that said interrupt is
> pending)?  If not, we don't care precisely when such interrupts are
> pended to the PE, just that such an interrupt cannot be taken before
> the PMR write that unmasks it.  It would be insane for the self-
> synchronization of PMR writes to lack this guarantee (and a DSB after
> the PMR write would do no good anyway in that case).

RT folks are usually quite picky on when they see their interrupts
firing. I can also imagine the following scenario:

	set_pmr(allow all interrupts)
	WFI

where things stop rather abruptly if this is the only CPU in the system.

Thanks,

	M.
Julien Thierry Jan. 8, 2019, 5:58 p.m. UTC | #5
On 08/01/2019 16:45, Dave Martin wrote:
> On Tue, Jan 08, 2019 at 03:51:18PM +0000, Marc Zyngier wrote:
>> On 08/01/2019 15:40, Dave Martin wrote:
>>> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>>>> Instead disabling interrupts by setting the PSR.I bit, use a priority
>>>> higher than the one used for interrupts to mask them via PMR.
>>>>
>>>> When using PMR to disable interrupts, the value of PMR will be used
>>>> instead of PSR.[DAIF] for the irqflags.
>>>>
>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>>> Cc: Oleg Nesterov <oleg@redhat.com>
>>>> ---
>>>>  arch/arm64/include/asm/efi.h      |  11 ++++
>>>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>>>>  2 files changed, 106 insertions(+), 28 deletions(-)
>>>
>>> [...]
>>>
>>>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>>>> index 24692ed..fa3b06f 100644
>>>> --- a/arch/arm64/include/asm/irqflags.h
>>>> +++ b/arch/arm64/include/asm/irqflags.h
>>>> @@ -18,7 +18,9 @@
>>>
>>> [...]
>>>
>>>>  static inline void arch_local_irq_enable(void)
>>>>  {
>>>> -	asm volatile(
>>>> -		"msr	daifclr, #2		// arch_local_irq_enable"
>>>> -		:
>>>> +	unsigned long unmasked = GIC_PRIO_IRQON;
>>>> +
>>>> +	asm volatile(ALTERNATIVE(
>>>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
>>>> +		"nop",
>>>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
>>>> +		"dsb	sy",
>>>
>>> I'm still not convinced these dsbs are needed.
>>>
>>> Without the dsb, we are probably not guaranteed to take a pending
>>> interrupt _immediately_ on unmasking, but I'm not sure that's a
>>> problem.
>>>
>>> What goes wrong if we omit them?
>>
>> Then the GIC doesn't know it can now deliver interrupts of a lower
>> priority. Only a dsb can guarantee that the GIC's view of PMR will get
>> updated.
>>
>> See 9.1.6 (Observability of the effects of accesses to the GIC
>> registers), which states:
>>
>> <quote>
>> Architectural execution of a DSB instruction guarantees that
>> — The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
>> associated Redistributor.
>> </quote>
>>
>> So yes, DSB is required.
> 
> But it says neither what is means for the PMR write to be "observed by
> the redistributor", nor whether the DSB is required for the
> redistributor to observe the write at all.  (So, is an implementation
> allowed to cached in the CPU interface indefinitely until forcibly
> flushed to the redistributor by a DSB, and in any case can the write's
> reaching the distributor in finite time or not have any effect that we
> care about in this case?).
> 
> 
> My reason for querying this is that temporary local masking of classes
> of interrupts seems an obvious use case for the PMR, and the DSB
> requirement flies rather in the face of this.
> 
> 
> Have we seen hardware where interrupts may stall forever upstream of the
> CPU interface after a PMR write, until a dsb is executed by the CPU?
> 

I don't have too much GICv3 hardware at hand but the one I tested
*seems* to work without the DSB. But of course this does not mean it is
correct even on that hardware.

As you said it is not clear what is meant by "observed by the
redistributor", it can mean that the redistributor is allowed to stop
forwarding interrupts of lower priority or it can mean that it always
forwards them as the CPU interface is required to prevent masked
priority interrupts to be signaled to the CPU. The hardware I'm using
might be in the latter case... or not...



> If so that is sad, but I guess we have to live with it.
> 
> Also, is it ever important in Linux that a pending interrupt be taken
> immediately upon unmasking (and how do we know that said interrupt is
> pending)?  If not, we don't care precisely when such interrupts are
> pended to the PE, just that such an interrupt cannot be taken before
> the PMR write that unmasks it.  It would be insane for the self-
> synchronization of PMR writes to lack this guarantee (and a DSB after
> the PMR write would do no good anyway in that case).
> 

The first thing that comes to mind for this would be the RT world, I'm
not sure it would it would be nice to have interrupts "actually
unmasked" at some random time in the future.

Otherwise, aren't there some parts of the kernel that expect being able
to take interrupts? (memory allocation comes to mind). What happens if
the interrupt might not happen?

Also, code that does:

while (!poll_update_from_irq()) {
	local_irq_disable();

	// do stuff

	local_irq_enable();
}

Might never see interrupts. It is not clear to me whether that is the
case for the loop in do_idle() and the check for need_resched().

I don't know if drivers might have such code patterns.

Cheers,
Dave Martin Jan. 8, 2019, 6:01 p.m. UTC | #6
On Tue, Jan 08, 2019 at 05:16:43PM +0000, Marc Zyngier wrote:
> On 08/01/2019 16:45, Dave Martin wrote:
> > On Tue, Jan 08, 2019 at 03:51:18PM +0000, Marc Zyngier wrote:
> >> On 08/01/2019 15:40, Dave Martin wrote:
> >>> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> >>>> Instead disabling interrupts by setting the PSR.I bit, use a priority
> >>>> higher than the one used for interrupts to mask them via PMR.
> >>>>
> >>>> When using PMR to disable interrupts, the value of PMR will be used
> >>>> instead of PSR.[DAIF] for the irqflags.
> >>>>
> >>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> >>>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> >>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >>>> Cc: Will Deacon <will.deacon@arm.com>
> >>>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >>>> Cc: Oleg Nesterov <oleg@redhat.com>
> >>>> ---
> >>>>  arch/arm64/include/asm/efi.h      |  11 ++++
> >>>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
> >>>>  2 files changed, 106 insertions(+), 28 deletions(-)
> >>>
> >>> [...]
> >>>
> >>>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> >>>> index 24692ed..fa3b06f 100644
> >>>> --- a/arch/arm64/include/asm/irqflags.h
> >>>> +++ b/arch/arm64/include/asm/irqflags.h
> >>>> @@ -18,7 +18,9 @@
> >>>
> >>> [...]
> >>>
> >>>>  static inline void arch_local_irq_enable(void)
> >>>>  {
> >>>> -	asm volatile(
> >>>> -		"msr	daifclr, #2		// arch_local_irq_enable"
> >>>> -		:
> >>>> +	unsigned long unmasked = GIC_PRIO_IRQON;
> >>>> +
> >>>> +	asm volatile(ALTERNATIVE(
> >>>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
> >>>> +		"nop",
> >>>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
> >>>> +		"dsb	sy",
> >>>
> >>> I'm still not convinced these dsbs are needed.
> >>>
> >>> Without the dsb, we are probably not guaranteed to take a pending
> >>> interrupt _immediately_ on unmasking, but I'm not sure that's a
> >>> problem.
> >>>
> >>> What goes wrong if we omit them?
> >>
> >> Then the GIC doesn't know it can now deliver interrupts of a lower
> >> priority. Only a dsb can guarantee that the GIC's view of PMR will get
> >> updated.
> >>
> >> See 9.1.6 (Observability of the effects of accesses to the GIC
> >> registers), which states:
> >>
> >> <quote>
> >> Architectural execution of a DSB instruction guarantees that
> >> — The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
> >> associated Redistributor.
> >> </quote>
> >>
> >> So yes, DSB is required.
> > 
> > But it says neither what is means for the PMR write to be "observed by
> > the redistributor", nor whether the DSB is required for the
> > redistributor to observe the write at all.
> 
> Well, it seems pretty clear to me that if the redistributor doesn't
> observe the PMR value, it is unlikely to change its interpretation of
> it· And conversely, the redistributor is allowed to sit pretty and not
> give you any interrupt until you are actually telling it that something
> has changed.
> 
> I really think that for once, the spec is pretty unambiguous about what
> is required.

I think that there is some scope for clarification, but it sounds like
that doesn't impact this series.

> > (So, is an implementation
> > allowed to cached in the CPU interface indefinitely until forcibly
> > flushed to the redistributor by a DSB, and in any case can the write's
> > reaching the distributor in finite time or not have any effect that we
> > care about in this case?).
> 
> Nothing in the spec says that the system register write will magically
> trickle down to the redistributor in the absence of a DSB.
> 
> > My reason for querying this is that temporary local masking of classes
> > of interrupts seems an obvious use case for the PMR, and the DSB
> > requirement flies rather in the face of this.
> 
> Are you implying that the GIC architecture should have any form of
> sanity and be useful for general purpose software? Think again! ;-)
> 
> The PMR behavior you are describing only works in a single direction
> (from low to high priority), because the CPU interface has to perform
> some filtering. In the opposite direction, you need the big hammer.
> 
> > Have we seen hardware where interrupts may stall forever upstream of the
> > CPU interface after a PMR write, until a dsb is executed by the CPU?
> 
> Yes. You even have to have a DSB right after a read of IAR to avoid
> loosing interrupts. The short story is that there is hardly any
> synchronization between redistributor and CPU interface. Implementations
> are allowed a more closely coupled design, but that's not what the
> architecture mandates.

Well I guess that pretty much wraps it up!

If implementations require it, then we obviously need to have it.

> > Also, is it ever important in Linux that a pending interrupt be taken
> > immediately upon unmasking (and how do we know that said interrupt is
> > pending)?  If not, we don't care precisely when such interrupts are
> > pended to the PE, just that such an interrupt cannot be taken before
> > the PMR write that unmasks it.  It would be insane for the self-
> > synchronization of PMR writes to lack this guarantee (and a DSB after
> > the PMR write would do no good anyway in that case).
> 
> RT folks are usually quite picky on when they see their interrupts
> firing. I can also imagine the following scenario:
> 
> 	set_pmr(allow all interrupts)
> 	WFI
> 
> where things stop rather abruptly if this is the only CPU in the system.

This is a rather special case (and this case is indeed handled specially
by this series).  Here we don't need to expedite interrupt delivery,
but we want to make sure that the logic that will wake us back up knows
what it's supposed to be doing before we start powering things off.

However, I can see that for RT purposes explicit synchronisation on
interrupt unmasking may provided a more bounded interrupt blackout that
simply waiting for synchronisation to happen in the background (though
that might have better throughput).  Anyway, this is academic since
we have to have the synchyronisation anyway.

I will consider myself corrected and stop trolling...

Cheers
---Dave
Dave Martin Jan. 8, 2019, 6:37 p.m. UTC | #7
On Tue, Jan 08, 2019 at 05:58:59PM +0000, Julien Thierry wrote:
> 
> 
> On 08/01/2019 16:45, Dave Martin wrote:
> > On Tue, Jan 08, 2019 at 03:51:18PM +0000, Marc Zyngier wrote:
> >> On 08/01/2019 15:40, Dave Martin wrote:
> >>> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> >>>> Instead disabling interrupts by setting the PSR.I bit, use a priority
> >>>> higher than the one used for interrupts to mask them via PMR.
> >>>>
> >>>> When using PMR to disable interrupts, the value of PMR will be used
> >>>> instead of PSR.[DAIF] for the irqflags.
> >>>>
> >>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> >>>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> >>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >>>> Cc: Will Deacon <will.deacon@arm.com>
> >>>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >>>> Cc: Oleg Nesterov <oleg@redhat.com>
> >>>> ---
> >>>>  arch/arm64/include/asm/efi.h      |  11 ++++
> >>>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
> >>>>  2 files changed, 106 insertions(+), 28 deletions(-)
> >>>
> >>> [...]
> >>>
> >>>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> >>>> index 24692ed..fa3b06f 100644
> >>>> --- a/arch/arm64/include/asm/irqflags.h
> >>>> +++ b/arch/arm64/include/asm/irqflags.h
> >>>> @@ -18,7 +18,9 @@
> >>>
> >>> [...]
> >>>
> >>>>  static inline void arch_local_irq_enable(void)
> >>>>  {
> >>>> -	asm volatile(
> >>>> -		"msr	daifclr, #2		// arch_local_irq_enable"
> >>>> -		:
> >>>> +	unsigned long unmasked = GIC_PRIO_IRQON;
> >>>> +
> >>>> +	asm volatile(ALTERNATIVE(
> >>>> +		"msr	daifclr, #2		// arch_local_irq_enable\n"
> >>>> +		"nop",
> >>>> +		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
> >>>> +		"dsb	sy",
> >>>
> >>> I'm still not convinced these dsbs are needed.
> >>>
> >>> Without the dsb, we are probably not guaranteed to take a pending
> >>> interrupt _immediately_ on unmasking, but I'm not sure that's a
> >>> problem.
> >>>
> >>> What goes wrong if we omit them?
> >>
> >> Then the GIC doesn't know it can now deliver interrupts of a lower
> >> priority. Only a dsb can guarantee that the GIC's view of PMR will get
> >> updated.
> >>
> >> See 9.1.6 (Observability of the effects of accesses to the GIC
> >> registers), which states:
> >>
> >> <quote>
> >> Architectural execution of a DSB instruction guarantees that
> >> — The last value written to ICC_PMR_EL1 or GICC_PMR is observed by the
> >> associated Redistributor.
> >> </quote>
> >>
> >> So yes, DSB is required.
> > 
> > But it says neither what is means for the PMR write to be "observed by
> > the redistributor", nor whether the DSB is required for the
> > redistributor to observe the write at all.  (So, is an implementation
> > allowed to cached in the CPU interface indefinitely until forcibly
> > flushed to the redistributor by a DSB, and in any case can the write's
> > reaching the distributor in finite time or not have any effect that we
> > care about in this case?).
> > 
> > 
> > My reason for querying this is that temporary local masking of classes
> > of interrupts seems an obvious use case for the PMR, and the DSB
> > requirement flies rather in the face of this.
> > 
> > 
> > Have we seen hardware where interrupts may stall forever upstream of the
> > CPU interface after a PMR write, until a dsb is executed by the CPU?
> > 
> 
> I don't have too much GICv3 hardware at hand but the one I tested
> *seems* to work without the DSB. But of course this does not mean it is
> correct even on that hardware.
> 
> As you said it is not clear what is meant by "observed by the
> redistributor", it can mean that the redistributor is allowed to stop
> forwarding interrupts of lower priority or it can mean that it always
> forwards them as the CPU interface is required to prevent masked
> priority interrupts to be signaled to the CPU. The hardware I'm using
> might be in the latter case... or not...
> 
> 
> 
> > If so that is sad, but I guess we have to live with it.
> > 
> > Also, is it ever important in Linux that a pending interrupt be taken
> > immediately upon unmasking (and how do we know that said interrupt is
> > pending)?  If not, we don't care precisely when such interrupts are
> > pended to the PE, just that such an interrupt cannot be taken before
> > the PMR write that unmasks it.  It would be insane for the self-
> > synchronization of PMR writes to lack this guarantee (and a DSB after
> > the PMR write would do no good anyway in that case).
> > 
> 
> The first thing that comes to mind for this would be the RT world, I'm
> not sure it would it would be nice to have interrupts "actually
> unmasked" at some random time in the future.

If the PMR write were to start propagating immediately and DSB merely
waited for it to reach the redistributor, then there would be no
random delay -- rather, the DSB would generate some pointless system
load and prevent any useful work being done in the interim, without
actually speeding anything up.

Of course, it sounds like the GIC spec doesn't mandate such an
implementation, so we can't rely on this.

> Otherwise, aren't there some parts of the kernel that expect being able
> to take interrupts? (memory allocation comes to mind). What happens if
> the interrupt might not happen?
> 
> Also, code that does:
> 
> while (!poll_update_from_irq()) {
> 	local_irq_disable();
> 
> 	// do stuff
> 
> 	local_irq_enable();
> }
> 
> Might never see interrupts. It is not clear to me whether that is the
> case for the loop in do_idle() and the check for need_resched().
> 
> I don't know if drivers might have such code patterns.

For appropriate self-synchronising behaviour in PMR writes, this
problem is solvable: if you have a pending PMR write followed by a
PMR write with a more restrictive bound (so disabling some interrupts)
then the second write could be stalled until the first has synchronised,
providing an opportunity to take pending interrupts before they would
get masked.

There might be implementations that do something like this, but I guess
we can't rely on this either.

I'll defer to the experts...

Cheers
---Dave
Catalin Marinas Jan. 18, 2019, 4:09 p.m. UTC | #8
Hi Julien,

On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> + * Having two ways to control interrupt status is a bit complicated. Some
> + * locations like exception entries will have PSR.I bit set by the architecture
> + * while PMR is unmasked.
> + * We need the irqflags to represent that interrupts are disabled in such cases.
> + *
> + * For this, we lower the value read from PMR when the I bit is set so it is
> + * considered as an irq masking priority. (With PMR, lower value means masking
> + * more interrupts).
> + */
> +#define _get_irqflags(daif_bits, pmr)					\
> +({									\
> +	unsigned long flags;						\
> +									\
> +	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
> +	asm volatile(ALTERNATIVE(					\
> +		"mov	%0, %1\n"					\
> +		"nop\n"							\
> +		"nop",							\
> +		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
> +		"mvn	%0, %0\n"					\
> +		"and	%0, %0, %2",					\
> +		ARM64_HAS_IRQ_PRIO_MASKING)				\

Can you write the last two instructions as a single:

		bic	%0, %2, %0

> +		: "=&r" (flags)						\
> +		: "r" (daif_bits), "r" (pmr)				\
> +		: "memory");						\
> +									\
> +	flags;								\
> +})
> +
> +/*
>   * Save the current interrupt enable state.
>   */
>  static inline unsigned long arch_local_save_flags(void)
>  {
> -	unsigned long flags;
> -	asm volatile(
> -		"mrs	%0, daif		// arch_local_save_flags"
> -		: "=r" (flags)
> +	unsigned long daif_bits;
> +	unsigned long pmr; // Only used if alternative is on
> +
> +	daif_bits = read_sysreg(daif);
> +
> +	// Get PMR

Nitpick: don't use C++ (or arm asm) comment style in C code.

> +	asm volatile(ALTERNATIVE(
> +			"nop",
> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
> +			ARM64_HAS_IRQ_PRIO_MASKING)
> +		: "=&r" (pmr)
>  		:
>  		: "memory");
> +
> +	return _get_irqflags(daif_bits, pmr);
> +}

I find this confusing spread over two inline asm statements. IIUC, you
want something like below (it could be written as inline asm but I need
to understand it first):

	daif_bits = read_sysreg(daif);

	if (system_uses_irq_prio_masking()) {
		pmr = read_gicreg(ICC_PMR_EL1);
		flags = pmr & ~(daif_bits & PSR_I_BIT);
	} else {
		flags = daif_bits;
	}

	return flags;

In the case where the interrupts are disabled at the PSR level, is the
PMR value still relevant? Could we just return the GIC_PRIO_IRQOFF?
Something like:

	flags = read_sysreg(daif);

	if (system_uses_irq_prio_masking())
		flags = flags & PSR_I_BIT ?
			GIC_PRIO_IRQOFF : read_gicreg(ICC_PMR_EL1);
Dave Martin Jan. 18, 2019, 4:35 p.m. UTC | #9
On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> Instead disabling interrupts by setting the PSR.I bit, use a priority
> higher than the one used for interrupts to mask them via PMR.
> 
> When using PMR to disable interrupts, the value of PMR will be used
> instead of PSR.[DAIF] for the irqflags.
> 
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/arm64/include/asm/efi.h      |  11 ++++
>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>  2 files changed, 106 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
> index 7ed3208..134ff6e 100644
> --- a/arch/arm64/include/asm/efi.h
> +++ b/arch/arm64/include/asm/efi.h
> @@ -44,6 +44,17 @@
>  
>  #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
>  
> +#define arch_efi_save_flags(state_flags)		\
> +	do {						\
> +		(state_flags) =	read_sysreg(daif);	\
> +	} while (0)
> +
> +#define arch_efi_restore_flags(state_flags)		\
> +	do {						\
> +		write_sysreg(state_flags, daif);	\
> +	} while (0)
> +
> +

Randomly commenting a few minor nits as I glance down my mailbox...

There's no need to protect single statements with do { } while(0).

Just protect an expression statement that could be misparsed with ( ).

->

#define arch_efi_save_flags(state_flags) ((state_flags) = read_sysreg(daif))
#define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif)

[...]

> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index 24692ed..fa3b06f 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -18,7 +18,9 @@
>  
>  #ifdef __KERNEL__
>  
> +#include <asm/alternative.h>
>  #include <asm/ptrace.h>
> +#include <asm/sysreg.h>
>  
>  /*
>   * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
> @@ -36,47 +38,96 @@

[...]

>  /*
> + * Having two ways to control interrupt status is a bit complicated. Some
> + * locations like exception entries will have PSR.I bit set by the architecture
> + * while PMR is unmasked.
> + * We need the irqflags to represent that interrupts are disabled in such cases.
> + *
> + * For this, we lower the value read from PMR when the I bit is set so it is
> + * considered as an irq masking priority. (With PMR, lower value means masking
> + * more interrupts).
> + */
> +#define _get_irqflags(daif_bits, pmr)					\
> +({									\
> +	unsigned long flags;						\
> +									\
> +	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
> +	asm volatile(ALTERNATIVE(					\
> +		"mov	%0, %1\n"					\
> +		"nop\n"							\
> +		"nop",							\
> +		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
> +		"mvn	%0, %0\n"					\
> +		"and	%0, %0, %2",					\
> +		ARM64_HAS_IRQ_PRIO_MASKING)				\
> +		: "=&r" (flags)						\
> +		: "r" (daif_bits), "r" (pmr)				\
> +		: "memory");						\
> +									\
> +	flags;								\
> +})

Nit: does this need to be a macro?

({ ... }) is mildly gross and it's preferable to avoid it if the code
works just as well without...

pmr would need to be passed as a pointer, with "r" (*pmr) in the asm,
but I think it would compile down to precisely the same code.

> +
> +/*
>   * Save the current interrupt enable state.
>   */
>  static inline unsigned long arch_local_save_flags(void)
>  {
> -	unsigned long flags;
> -	asm volatile(
> -		"mrs	%0, daif		// arch_local_save_flags"
> -		: "=r" (flags)
> +	unsigned long daif_bits;
> +	unsigned long pmr; // Only used if alternative is on
> +
> +	daif_bits = read_sysreg(daif);
> +
> +	// Get PMR
> +	asm volatile(ALTERNATIVE(
> +			"nop",
> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
> +			ARM64_HAS_IRQ_PRIO_MASKING)
> +		: "=&r" (pmr)

Why earlyclobber?

>  		:
>  		: "memory");

[...]

> @@ -85,16 +136,32 @@ static inline unsigned long arch_local_save_flags(void)

[...]

>  static inline int arch_irqs_disabled_flags(unsigned long flags)
>  {
> -	return flags & PSR_I_BIT;
> +	int res;
> +
> +	asm volatile(ALTERNATIVE(
> +			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
> +			"nop",
> +			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
> +			"cset	%w0, ls",
> +			ARM64_HAS_IRQ_PRIO_MASKING)
> +		: "=&r" (res)

Why earlyclobber?  %0 is not written before the reading of any input
argument so far as I can see, in either alternative.

Cheers
---Dave
Julien Thierry Jan. 18, 2019, 4:57 p.m. UTC | #10
Hi Catalin,

On 18/01/2019 16:09, Catalin Marinas wrote:
> Hi Julien,
> 
> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>> + * Having two ways to control interrupt status is a bit complicated. Some
>> + * locations like exception entries will have PSR.I bit set by the architecture
>> + * while PMR is unmasked.
>> + * We need the irqflags to represent that interrupts are disabled in such cases.
>> + *
>> + * For this, we lower the value read from PMR when the I bit is set so it is
>> + * considered as an irq masking priority. (With PMR, lower value means masking
>> + * more interrupts).
>> + */
>> +#define _get_irqflags(daif_bits, pmr)					\
>> +({									\
>> +	unsigned long flags;						\
>> +									\
>> +	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
>> +	asm volatile(ALTERNATIVE(					\
>> +		"mov	%0, %1\n"					\
>> +		"nop\n"							\
>> +		"nop",							\
>> +		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
>> +		"mvn	%0, %0\n"					\
>> +		"and	%0, %0, %2",					\
>> +		ARM64_HAS_IRQ_PRIO_MASKING)				\
> 
> Can you write the last two instructions as a single:
> 
> 		bic	%0, %2, %0

Yes, makes sense. Although we won't need it anymore with your suggestion
below.

> 
>> +		: "=&r" (flags)						\
>> +		: "r" (daif_bits), "r" (pmr)				\
>> +		: "memory");						\
>> +									\
>> +	flags;								\
>> +})
>> +
>> +/*
>>   * Save the current interrupt enable state.
>>   */
>>  static inline unsigned long arch_local_save_flags(void)
>>  {
>> -	unsigned long flags;
>> -	asm volatile(
>> -		"mrs	%0, daif		// arch_local_save_flags"
>> -		: "=r" (flags)
>> +	unsigned long daif_bits;
>> +	unsigned long pmr; // Only used if alternative is on
>> +
>> +	daif_bits = read_sysreg(daif);
>> +
>> +	// Get PMR
> 
> Nitpick: don't use C++ (or arm asm) comment style in C code.

Noted.

> 
>> +	asm volatile(ALTERNATIVE(
>> +			"nop",
>> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
>> +			ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (pmr)
>>  		:
>>  		: "memory");
>> +
>> +	return _get_irqflags(daif_bits, pmr);
>> +}
> 
> I find this confusing spread over two inline asm statements. IIUC, you
> want something like below (it could be written as inline asm but I need
> to understand it first):
> 
> 	daif_bits = read_sysreg(daif);
> 
> 	if (system_uses_irq_prio_masking()) {
> 		pmr = read_gicreg(ICC_PMR_EL1);
> 		flags = pmr & ~(daif_bits & PSR_I_BIT);
> 	} else {
> 		flags = daif_bits;
> 	}
> 
> 	return flags;
> 
> In the case where the interrupts are disabled at the PSR level, is the
> PMR value still relevant? Could we just return the GIC_PRIO_IRQOFF?
> Something like:
> 
> 	flags = read_sysreg(daif);
> 
> 	if (system_uses_irq_prio_masking())
> 		flags = flags & PSR_I_BIT ?
> 			GIC_PRIO_IRQOFF : read_gicreg(ICC_PMR_EL1);
> 

You're right, returning GIC_PRIO_IRQOFF should be good enough (it is
actually what happens in this version because GIC_PRIO_IRQOFF ==
GIC_PRIO_IRQON & ~PSR_I_BIT happens to be true). Your suggestion would
make things easier to reason about. Maybe something like:


static inline unsigned long arch_local_save_flags(void)
{
	unsigned long daif_bits;
	unsigned long prio_off = GIC_PRIO_IRQOFF;

	daif_bits = read_sysreg(daif);

	asm volatile(ALTERNATIVE(
		"mov	%0, %1\n"
		"nop\n"
		"nop",
		"mrs	%0, SYS_ICC_PMR_EL1\n"
		"ands	%1, %1, PSR_I_BIT\n"
		"csel	%0, %0, %2, eq")
	: "=&r" (flags)
	: "r" (daif_bits), "r" (prio_off)
	: "memory");

	return flags;
}

(Looks like it removes one nop from the alternative as well, unless I
messed up something)

Does that seem better to you?

Thanks,
Julien Thierry Jan. 18, 2019, 5:27 p.m. UTC | #11
On 18/01/2019 16:35, Dave Martin wrote:
> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>> Instead disabling interrupts by setting the PSR.I bit, use a priority
>> higher than the one used for interrupts to mask them via PMR.
>>
>> When using PMR to disable interrupts, the value of PMR will be used
>> instead of PSR.[DAIF] for the irqflags.
>>
>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> Cc: Oleg Nesterov <oleg@redhat.com>
>> ---
>>  arch/arm64/include/asm/efi.h      |  11 ++++
>>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
>>  2 files changed, 106 insertions(+), 28 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
>> index 7ed3208..134ff6e 100644
>> --- a/arch/arm64/include/asm/efi.h
>> +++ b/arch/arm64/include/asm/efi.h
>> @@ -44,6 +44,17 @@
>>  
>>  #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
>>  
>> +#define arch_efi_save_flags(state_flags)		\
>> +	do {						\
>> +		(state_flags) =	read_sysreg(daif);	\
>> +	} while (0)
>> +
>> +#define arch_efi_restore_flags(state_flags)		\
>> +	do {						\
>> +		write_sysreg(state_flags, daif);	\
>> +	} while (0)
>> +
>> +
> 
> Randomly commenting a few minor nits as I glance down my mailbox...
> 
> There's no need to protect single statements with do { } while(0).
> 
> Just protect an expression statement that could be misparsed with ( ).
> 
> ->
> 
> #define arch_efi_save_flags(state_flags) ((state_flags) = read_sysreg(daif))

For the efi_save_flags(), I wanted to avoid it getting used as an
expression.

Would casting the assignment expression to (void) be acceptable?

> #define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif)

For this one, write_sysreg() is already a statement, so yes, I
definitely don't need a do { } while (0) here.

> 
> [...]
> 
>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>> index 24692ed..fa3b06f 100644
>> --- a/arch/arm64/include/asm/irqflags.h
>> +++ b/arch/arm64/include/asm/irqflags.h
>> @@ -18,7 +18,9 @@
>>  
>>  #ifdef __KERNEL__
>>  
>> +#include <asm/alternative.h>
>>  #include <asm/ptrace.h>
>> +#include <asm/sysreg.h>
>>  
>>  /*
>>   * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
>> @@ -36,47 +38,96 @@
> 
> [...]
> 
>>  /*
>> + * Having two ways to control interrupt status is a bit complicated. Some
>> + * locations like exception entries will have PSR.I bit set by the architecture
>> + * while PMR is unmasked.
>> + * We need the irqflags to represent that interrupts are disabled in such cases.
>> + *
>> + * For this, we lower the value read from PMR when the I bit is set so it is
>> + * considered as an irq masking priority. (With PMR, lower value means masking
>> + * more interrupts).
>> + */
>> +#define _get_irqflags(daif_bits, pmr)					\
>> +({									\
>> +	unsigned long flags;						\
>> +									\
>> +	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
>> +	asm volatile(ALTERNATIVE(					\
>> +		"mov	%0, %1\n"					\
>> +		"nop\n"							\
>> +		"nop",							\
>> +		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
>> +		"mvn	%0, %0\n"					\
>> +		"and	%0, %0, %2",					\
>> +		ARM64_HAS_IRQ_PRIO_MASKING)				\
>> +		: "=&r" (flags)						\
>> +		: "r" (daif_bits), "r" (pmr)				\
>> +		: "memory");						\
>> +									\
>> +	flags;								\
>> +})
> 
> Nit: does this need to be a macro?
> 
> ({ ... }) is mildly gross and it's preferable to avoid it if the code
> works just as well without...
> 
> pmr would need to be passed as a pointer, with "r" (*pmr) in the asm,
> but I think it would compile down to precisely the same code.
> 

The only motivation for it to be a macro was to be able to #undef it
after its use.

But with Catalin's suggestion, looks like we can makes things simple and
avoid having a separate macro/function.

>> +
>> +/*
>>   * Save the current interrupt enable state.
>>   */
>>  static inline unsigned long arch_local_save_flags(void)
>>  {
>> -	unsigned long flags;
>> -	asm volatile(
>> -		"mrs	%0, daif		// arch_local_save_flags"
>> -		: "=r" (flags)
>> +	unsigned long daif_bits;
>> +	unsigned long pmr; // Only used if alternative is on
>> +
>> +	daif_bits = read_sysreg(daif);
>> +
>> +	// Get PMR
>> +	asm volatile(ALTERNATIVE(
>> +			"nop",
>> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
>> +			ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (pmr)
> 
> Why earlyclobber?
>>>  		:
>>  		: "memory");
> 
> [...]
> 
>> @@ -85,16 +136,32 @@ static inline unsigned long arch_local_save_flags(void)
> 
> [...]
> 
>>  static inline int arch_irqs_disabled_flags(unsigned long flags)
>>  {
>> -	return flags & PSR_I_BIT;
>> +	int res;
>> +
>> +	asm volatile(ALTERNATIVE(
>> +			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
>> +			"nop",
>> +			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
>> +			"cset	%w0, ls",
>> +			ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (res)
> 
> Why earlyclobber?  %0 is not written before the reading of any input
> argument so far as I can see, in either alternative.
> 

I didn't really understand what the earlyclobber semantic was, thanks
for explaining it.

Thanks,
Catalin Marinas Jan. 18, 2019, 5:30 p.m. UTC | #12
On Fri, Jan 18, 2019 at 04:57:32PM +0000, Julien Thierry wrote:
> On 18/01/2019 16:09, Catalin Marinas wrote:
> > On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> >> +	asm volatile(ALTERNATIVE(
> >> +			"nop",
> >> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
> >> +			ARM64_HAS_IRQ_PRIO_MASKING)
> >> +		: "=&r" (pmr)
> >>  		:
> >>  		: "memory");
> >> +
> >> +	return _get_irqflags(daif_bits, pmr);
> >> +}
> > 
> > I find this confusing spread over two inline asm statements. IIUC, you
> > want something like below (it could be written as inline asm but I need
> > to understand it first):
> > 
> > 	daif_bits = read_sysreg(daif);
> > 
> > 	if (system_uses_irq_prio_masking()) {
> > 		pmr = read_gicreg(ICC_PMR_EL1);
> > 		flags = pmr & ~(daif_bits & PSR_I_BIT);
> > 	} else {
> > 		flags = daif_bits;
> > 	}
> > 
> > 	return flags;
> > 
> > In the case where the interrupts are disabled at the PSR level, is the
> > PMR value still relevant? Could we just return the GIC_PRIO_IRQOFF?
> > Something like:
> > 
> > 	flags = read_sysreg(daif);
> > 
> > 	if (system_uses_irq_prio_masking())
> > 		flags = flags & PSR_I_BIT ?
> > 			GIC_PRIO_IRQOFF : read_gicreg(ICC_PMR_EL1);
> > 
> 
> You're right, returning GIC_PRIO_IRQOFF should be good enough (it is
> actually what happens in this version because GIC_PRIO_IRQOFF ==
> GIC_PRIO_IRQON & ~PSR_I_BIT happens to be true).

This wasn't entirely clear to me, I got confused by:

+       BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));  \

and I thought there isn't necessarily an equality between the two.

> Your suggestion would
> make things easier to reason about. Maybe something like:
> 
> 
> static inline unsigned long arch_local_save_flags(void)
> {
> 	unsigned long daif_bits;
> 	unsigned long prio_off = GIC_PRIO_IRQOFF;
> 
> 	daif_bits = read_sysreg(daif);
> 
> 	asm volatile(ALTERNATIVE(
> 		"mov	%0, %1\n"
> 		"nop\n"
> 		"nop",
> 		"mrs	%0, SYS_ICC_PMR_EL1\n"
> 		"ands	%1, %1, PSR_I_BIT\n"
> 		"csel	%0, %0, %2, eq")
> 	: "=&r" (flags)
> 	: "r" (daif_bits), "r" (prio_off)
> 	: "memory");
> 
> 	return flags;
> }

It looks fine. If you turn the BUILD_BUG_ON into a !=, you could
probably simplify the asm a bit (though the number of instructions
generated would probably be the same). Untested:

static inline unsigned long arch_local_save_flags(void)
{
	unsigned long flags;

	flags = read_sysreg(daif);

	asm volatile(ALTERNATIVE(
		"nop",
		"bic	%0, %1, %2")
	: "=&r" (flags)
	: "r" (flags & PSR_I_BIT), "r" (GIC_PRIO_IRQOFF)
	: "memory");

	return flags;
}
Catalin Marinas Jan. 18, 2019, 5:33 p.m. UTC | #13
On Fri, Jan 18, 2019 at 05:30:02PM +0000, Catalin Marinas wrote:
> On Fri, Jan 18, 2019 at 04:57:32PM +0000, Julien Thierry wrote:
> > On 18/01/2019 16:09, Catalin Marinas wrote:
> > > On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> > >> +	asm volatile(ALTERNATIVE(
> > >> +			"nop",
> > >> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
> > >> +			ARM64_HAS_IRQ_PRIO_MASKING)
> > >> +		: "=&r" (pmr)
> > >>  		:
> > >>  		: "memory");
> > >> +
> > >> +	return _get_irqflags(daif_bits, pmr);
> > >> +}
> > > 
> > > I find this confusing spread over two inline asm statements. IIUC, you
> > > want something like below (it could be written as inline asm but I need
> > > to understand it first):
> > > 
> > > 	daif_bits = read_sysreg(daif);
> > > 
> > > 	if (system_uses_irq_prio_masking()) {
> > > 		pmr = read_gicreg(ICC_PMR_EL1);
> > > 		flags = pmr & ~(daif_bits & PSR_I_BIT);
> > > 	} else {
> > > 		flags = daif_bits;
> > > 	}
> > > 
> > > 	return flags;
> > > 
> > > In the case where the interrupts are disabled at the PSR level, is the
> > > PMR value still relevant? Could we just return the GIC_PRIO_IRQOFF?
> > > Something like:
> > > 
> > > 	flags = read_sysreg(daif);
> > > 
> > > 	if (system_uses_irq_prio_masking())
> > > 		flags = flags & PSR_I_BIT ?
> > > 			GIC_PRIO_IRQOFF : read_gicreg(ICC_PMR_EL1);
> > > 
> > 
> > You're right, returning GIC_PRIO_IRQOFF should be good enough (it is
> > actually what happens in this version because GIC_PRIO_IRQOFF ==
> > GIC_PRIO_IRQON & ~PSR_I_BIT happens to be true).
> 
> This wasn't entirely clear to me, I got confused by:
> 
> +       BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));  \
> 
> and I thought there isn't necessarily an equality between the two.
> 
> > Your suggestion would
> > make things easier to reason about. Maybe something like:
> > 
> > 
> > static inline unsigned long arch_local_save_flags(void)
> > {
> > 	unsigned long daif_bits;
> > 	unsigned long prio_off = GIC_PRIO_IRQOFF;
> > 
> > 	daif_bits = read_sysreg(daif);
> > 
> > 	asm volatile(ALTERNATIVE(
> > 		"mov	%0, %1\n"
> > 		"nop\n"
> > 		"nop",
> > 		"mrs	%0, SYS_ICC_PMR_EL1\n"
> > 		"ands	%1, %1, PSR_I_BIT\n"
> > 		"csel	%0, %0, %2, eq")
> > 	: "=&r" (flags)
> > 	: "r" (daif_bits), "r" (prio_off)
> > 	: "memory");
> > 
> > 	return flags;
> > }
> 
> It looks fine. If you turn the BUILD_BUG_ON into a !=, you could
> probably simplify the asm a bit (though the number of instructions
> generated would probably be the same). Untested:
> 
> static inline unsigned long arch_local_save_flags(void)
> {
> 	unsigned long flags;
> 
> 	flags = read_sysreg(daif);
> 
> 	asm volatile(ALTERNATIVE(
> 		"nop",
> 		"bic	%0, %1, %2")
> 	: "=&r" (flags)
> 	: "r" (flags & PSR_I_BIT), "r" (GIC_PRIO_IRQOFF)
> 	: "memory");

Ah, I missed a read from SYS_ICC_PMR_EL1 here. Anyway, the idea was that
you don't need to set prio_off to a variable, just pass "r" (constant)
here and the compiler does the trick.
Dave Martin Jan. 18, 2019, 6:23 p.m. UTC | #14
On Fri, Jan 18, 2019 at 05:27:29PM +0000, Julien Thierry wrote:
> 
> 
> On 18/01/2019 16:35, Dave Martin wrote:
> > On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
> >> Instead disabling interrupts by setting the PSR.I bit, use a priority
> >> higher than the one used for interrupts to mask them via PMR.
> >>
> >> When using PMR to disable interrupts, the value of PMR will be used
> >> instead of PSR.[DAIF] for the irqflags.
> >>
> >> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> >> Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
> >> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >> Cc: Will Deacon <will.deacon@arm.com>
> >> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> Cc: Oleg Nesterov <oleg@redhat.com>
> >> ---
> >>  arch/arm64/include/asm/efi.h      |  11 ++++
> >>  arch/arm64/include/asm/irqflags.h | 123 +++++++++++++++++++++++++++++---------
> >>  2 files changed, 106 insertions(+), 28 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
> >> index 7ed3208..134ff6e 100644
> >> --- a/arch/arm64/include/asm/efi.h
> >> +++ b/arch/arm64/include/asm/efi.h
> >> @@ -44,6 +44,17 @@
> >>  
> >>  #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
> >>  
> >> +#define arch_efi_save_flags(state_flags)		\
> >> +	do {						\
> >> +		(state_flags) =	read_sysreg(daif);	\
> >> +	} while (0)
> >> +
> >> +#define arch_efi_restore_flags(state_flags)		\
> >> +	do {						\
> >> +		write_sysreg(state_flags, daif);	\
> >> +	} while (0)
> >> +
> >> +
> > 
> > Randomly commenting a few minor nits as I glance down my mailbox...
> > 
> > There's no need to protect single statements with do { } while(0).
> > 
> > Just protect an expression statement that could be misparsed with ( ).
> > 
> > ->
> > 
> > #define arch_efi_save_flags(state_flags) ((state_flags) = read_sysreg(daif))
> 
> For the efi_save_flags(), I wanted to avoid it getting used as an
> expression.
> 
> Would casting the assignment expression to (void) be acceptable?

Yep, that's a common way of achieving that, so

	((void)((state_flags) = read_sysreg(daif)))

should be OK.

> > #define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif)
> 
> For this one, write_sysreg() is already a statement, so yes, I
> definitely don't need a do { } while (0) here.
> 
> > 
> > [...]
> > 
> >> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> >> index 24692ed..fa3b06f 100644
> >> --- a/arch/arm64/include/asm/irqflags.h
> >> +++ b/arch/arm64/include/asm/irqflags.h
> >> @@ -18,7 +18,9 @@
> >>  
> >>  #ifdef __KERNEL__
> >>  
> >> +#include <asm/alternative.h>
> >>  #include <asm/ptrace.h>
> >> +#include <asm/sysreg.h>
> >>  
> >>  /*
> >>   * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
> >> @@ -36,47 +38,96 @@
> > 
> > [...]
> > 
> >>  /*
> >> + * Having two ways to control interrupt status is a bit complicated. Some
> >> + * locations like exception entries will have PSR.I bit set by the architecture
> >> + * while PMR is unmasked.
> >> + * We need the irqflags to represent that interrupts are disabled in such cases.
> >> + *
> >> + * For this, we lower the value read from PMR when the I bit is set so it is
> >> + * considered as an irq masking priority. (With PMR, lower value means masking
> >> + * more interrupts).
> >> + */
> >> +#define _get_irqflags(daif_bits, pmr)					\
> >> +({									\
> >> +	unsigned long flags;						\
> >> +									\
> >> +	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
> >> +	asm volatile(ALTERNATIVE(					\
> >> +		"mov	%0, %1\n"					\
> >> +		"nop\n"							\
> >> +		"nop",							\
> >> +		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
> >> +		"mvn	%0, %0\n"					\
> >> +		"and	%0, %0, %2",					\
> >> +		ARM64_HAS_IRQ_PRIO_MASKING)				\
> >> +		: "=&r" (flags)						\
> >> +		: "r" (daif_bits), "r" (pmr)				\
> >> +		: "memory");						\
> >> +									\
> >> +	flags;								\
> >> +})
> > 
> > Nit: does this need to be a macro?
> > 
> > ({ ... }) is mildly gross and it's preferable to avoid it if the code
> > works just as well without...
> > 
> > pmr would need to be passed as a pointer, with "r" (*pmr) in the asm,
> > but I think it would compile down to precisely the same code.
> > 
> 
> The only motivation for it to be a macro was to be able to #undef it
> after its use.
> 
> But with Catalin's suggestion, looks like we can makes things simple and
> avoid having a separate macro/function.

Fair enough.  This wasn't a big deal in any case.

> 
> >> +
> >> +/*
> >>   * Save the current interrupt enable state.
> >>   */
> >>  static inline unsigned long arch_local_save_flags(void)
> >>  {
> >> -	unsigned long flags;
> >> -	asm volatile(
> >> -		"mrs	%0, daif		// arch_local_save_flags"
> >> -		: "=r" (flags)
> >> +	unsigned long daif_bits;
> >> +	unsigned long pmr; // Only used if alternative is on
> >> +
> >> +	daif_bits = read_sysreg(daif);
> >> +
> >> +	// Get PMR
> >> +	asm volatile(ALTERNATIVE(
> >> +			"nop",
> >> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
> >> +			ARM64_HAS_IRQ_PRIO_MASKING)
> >> +		: "=&r" (pmr)
> > 
> > Why earlyclobber?
> >>>  		:
> >>  		: "memory");
> > 
> > [...]
> > 
> >> @@ -85,16 +136,32 @@ static inline unsigned long arch_local_save_flags(void)
> > 
> > [...]
> > 
> >>  static inline int arch_irqs_disabled_flags(unsigned long flags)
> >>  {
> >> -	return flags & PSR_I_BIT;
> >> +	int res;
> >> +
> >> +	asm volatile(ALTERNATIVE(
> >> +			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
> >> +			"nop",
> >> +			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
> >> +			"cset	%w0, ls",
> >> +			ARM64_HAS_IRQ_PRIO_MASKING)
> >> +		: "=&r" (res)
> > 
> > Why earlyclobber?  %0 is not written before the reading of any input
> > argument so far as I can see, in either alternative.
> > 
> 
> I didn't really understand what the earlyclobber semantic was, thanks
> for explaining it.

It basically means "you can't put any input argument in the same
register, because it can get overwritten by some output argument before
use".

When unsure, it's safer to include "&", but it may cause the compiler to
allocate registers suboptimally.

In the above,

	"mov %w0, #" __stringify(PSR_I_BIT) "\n"
	"and %w0, %w0, %w1"

would require the earlyclobber, so that the compiler can't allocate the
same register for %0 and %1.

But in your code, allocating the same register looks safe.  (Don't take
my word for it, though!)

Cheers
---Dave
Julien Thierry Jan. 21, 2019, 8:45 a.m. UTC | #15
On 18/01/2019 17:33, Catalin Marinas wrote:
> On Fri, Jan 18, 2019 at 05:30:02PM +0000, Catalin Marinas wrote:
>> On Fri, Jan 18, 2019 at 04:57:32PM +0000, Julien Thierry wrote:
>>> On 18/01/2019 16:09, Catalin Marinas wrote:
>>>> On Tue, Jan 08, 2019 at 02:07:30PM +0000, Julien Thierry wrote:
>>>>> +	asm volatile(ALTERNATIVE(
>>>>> +			"nop",
>>>>> +			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
>>>>> +			ARM64_HAS_IRQ_PRIO_MASKING)
>>>>> +		: "=&r" (pmr)
>>>>>  		:
>>>>>  		: "memory");
>>>>> +
>>>>> +	return _get_irqflags(daif_bits, pmr);
>>>>> +}
>>>>
>>>> I find this confusing spread over two inline asm statements. IIUC, you
>>>> want something like below (it could be written as inline asm but I need
>>>> to understand it first):
>>>>
>>>> 	daif_bits = read_sysreg(daif);
>>>>
>>>> 	if (system_uses_irq_prio_masking()) {
>>>> 		pmr = read_gicreg(ICC_PMR_EL1);
>>>> 		flags = pmr & ~(daif_bits & PSR_I_BIT);
>>>> 	} else {
>>>> 		flags = daif_bits;
>>>> 	}
>>>>
>>>> 	return flags;
>>>>
>>>> In the case where the interrupts are disabled at the PSR level, is the
>>>> PMR value still relevant? Could we just return the GIC_PRIO_IRQOFF?
>>>> Something like:
>>>>
>>>> 	flags = read_sysreg(daif);
>>>>
>>>> 	if (system_uses_irq_prio_masking())
>>>> 		flags = flags & PSR_I_BIT ?
>>>> 			GIC_PRIO_IRQOFF : read_gicreg(ICC_PMR_EL1);
>>>>
>>>
>>> You're right, returning GIC_PRIO_IRQOFF should be good enough (it is
>>> actually what happens in this version because GIC_PRIO_IRQOFF ==
>>> GIC_PRIO_IRQON & ~PSR_I_BIT happens to be true).
>>
>> This wasn't entirely clear to me, I got confused by:
>>
>> +       BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));  \
>>
>> and I thought there isn't necessarily an equality between the two.
>>
>>> Your suggestion would
>>> make things easier to reason about. Maybe something like:
>>>
>>>
>>> static inline unsigned long arch_local_save_flags(void)
>>> {
>>> 	unsigned long daif_bits;
>>> 	unsigned long prio_off = GIC_PRIO_IRQOFF;
>>>
>>> 	daif_bits = read_sysreg(daif);
>>>
>>> 	asm volatile(ALTERNATIVE(
>>> 		"mov	%0, %1\n"
>>> 		"nop\n"
>>> 		"nop",
>>> 		"mrs	%0, SYS_ICC_PMR_EL1\n"
>>> 		"ands	%1, %1, PSR_I_BIT\n"
>>> 		"csel	%0, %0, %2, eq")
>>> 	: "=&r" (flags)
>>> 	: "r" (daif_bits), "r" (prio_off)
>>> 	: "memory");
>>>
>>> 	return flags;
>>> }
>>
>> It looks fine. If you turn the BUILD_BUG_ON into a !=, you could
>> probably simplify the asm a bit (though the number of instructions
>> generated would probably be the same). Untested:
>>
>> static inline unsigned long arch_local_save_flags(void)
>> {
>> 	unsigned long flags;
>>
>> 	flags = read_sysreg(daif);
>>
>> 	asm volatile(ALTERNATIVE(
>> 		"nop",
>> 		"bic	%0, %1, %2")
>> 	: "=&r" (flags)
>> 	: "r" (flags & PSR_I_BIT), "r" (GIC_PRIO_IRQOFF)
>> 	: "memory");
> 
> Ah, I missed a read from SYS_ICC_PMR_EL1 here. Anyway, the idea was that
> you don't need to set prio_off to a variable, just pass "r" (constant)
> here and the compiler does the trick.
> 

I see, thanks. I'll avoid that superfluous variable.

Thanks,
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 7ed3208..134ff6e 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -44,6 +44,17 @@ 
 
 #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
 
+#define arch_efi_save_flags(state_flags)		\
+	do {						\
+		(state_flags) =	read_sysreg(daif);	\
+	} while (0)
+
+#define arch_efi_restore_flags(state_flags)		\
+	do {						\
+		write_sysreg(state_flags, daif);	\
+	} while (0)
+
+
 /* arch specific definitions used by the stub code */
 
 /*
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 24692ed..fa3b06f 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -18,7 +18,9 @@ 
 
 #ifdef __KERNEL__
 
+#include <asm/alternative.h>
 #include <asm/ptrace.h>
+#include <asm/sysreg.h>
 
 /*
  * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
@@ -36,47 +38,96 @@ 
 /*
  * CPU interrupt mask handling.
  */
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags;
-	asm volatile(
-		"mrs	%0, daif		// arch_local_irq_save\n"
-		"msr	daifset, #2"
-		: "=r" (flags)
-		:
-		: "memory");
-	return flags;
-}
-
 static inline void arch_local_irq_enable(void)
 {
-	asm volatile(
-		"msr	daifclr, #2		// arch_local_irq_enable"
-		:
+	unsigned long unmasked = GIC_PRIO_IRQON;
+
+	asm volatile(ALTERNATIVE(
+		"msr	daifclr, #2		// arch_local_irq_enable\n"
+		"nop",
+		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
+		"dsb	sy",
+		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
+		: "r" (unmasked)
 		: "memory");
 }
 
 static inline void arch_local_irq_disable(void)
 {
-	asm volatile(
-		"msr	daifset, #2		// arch_local_irq_disable"
-		:
+	unsigned long masked = GIC_PRIO_IRQOFF;
+
+	asm volatile(ALTERNATIVE(
+		"msr	daifset, #2		// arch_local_irq_disable",
+		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ", %0",
+		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
+		: "r" (masked)
 		: "memory");
 }
 
 /*
+ * Having two ways to control interrupt status is a bit complicated. Some
+ * locations like exception entries will have PSR.I bit set by the architecture
+ * while PMR is unmasked.
+ * We need the irqflags to represent that interrupts are disabled in such cases.
+ *
+ * For this, we lower the value read from PMR when the I bit is set so it is
+ * considered as an irq masking priority. (With PMR, lower value means masking
+ * more interrupts).
+ */
+#define _get_irqflags(daif_bits, pmr)					\
+({									\
+	unsigned long flags;						\
+									\
+	BUILD_BUG_ON(GIC_PRIO_IRQOFF < (GIC_PRIO_IRQON & ~PSR_I_BIT));	\
+	asm volatile(ALTERNATIVE(					\
+		"mov	%0, %1\n"					\
+		"nop\n"							\
+		"nop",							\
+		"and	%0, %1, #" __stringify(PSR_I_BIT) "\n"		\
+		"mvn	%0, %0\n"					\
+		"and	%0, %0, %2",					\
+		ARM64_HAS_IRQ_PRIO_MASKING)				\
+		: "=&r" (flags)						\
+		: "r" (daif_bits), "r" (pmr)				\
+		: "memory");						\
+									\
+	flags;								\
+})
+
+/*
  * Save the current interrupt enable state.
  */
 static inline unsigned long arch_local_save_flags(void)
 {
-	unsigned long flags;
-	asm volatile(
-		"mrs	%0, daif		// arch_local_save_flags"
-		: "=r" (flags)
+	unsigned long daif_bits;
+	unsigned long pmr; // Only used if alternative is on
+
+	daif_bits = read_sysreg(daif);
+
+	// Get PMR
+	asm volatile(ALTERNATIVE(
+			"nop",
+			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1),
+			ARM64_HAS_IRQ_PRIO_MASKING)
+		: "=&r" (pmr)
 		:
 		: "memory");
+
+	return _get_irqflags(daif_bits, pmr);
+}
+
+#undef _get_irqflags
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+	flags = arch_local_save_flags();
+
+	arch_local_irq_disable();
+
 	return flags;
 }
 
@@ -85,16 +136,32 @@  static inline unsigned long arch_local_save_flags(void)
  */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	asm volatile(
-		"msr	daif, %0		// arch_local_irq_restore"
-	:
-	: "r" (flags)
-	: "memory");
+	asm volatile(ALTERNATIVE(
+			"msr	daif, %0\n"
+			"nop",
+			"msr_s	" __stringify(SYS_ICC_PMR_EL1) ", %0\n"
+			"dsb	sy",
+			ARM64_HAS_IRQ_PRIO_MASKING)
+		: "+r" (flags)
+		:
+		: "memory");
 }
 
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return flags & PSR_I_BIT;
+	int res;
+
+	asm volatile(ALTERNATIVE(
+			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
+			"nop",
+			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
+			"cset	%w0, ls",
+			ARM64_HAS_IRQ_PRIO_MASKING)
+		: "=&r" (res)
+		: "r" ((int) flags)
+		: "memory");
+
+	return res;
 }
 #endif
 #endif