diff mbox

[v2] arm64: enable EDAC on arm64

Message ID 1385154308-31335-1-git-send-email-robherring2@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rob Herring Nov. 22, 2013, 9:05 p.m. UTC
From: Rob Herring <rob.herring@calxeda.com>

Implement atomic_scrub and enable EDAC for arm64.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
v2:
- Add loop for exclusive store success
- Fix size to be 32-bits at a time. The framework gives no alignment
  guarantees.

 arch/arm64/Kconfig            |  1 +
 arch/arm64/include/asm/edac.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 arch/arm64/include/asm/edac.h

Comments

Will Deacon Nov. 25, 2013, 6:20 p.m. UTC | #1
On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
> diff --git a/arch/arm64/include/asm/edac.h b/arch/arm64/include/asm/edac.h
> new file mode 100644
> index 0000000..6d2d262
> --- /dev/null
> +++ b/arch/arm64/include/asm/edac.h
> @@ -0,0 +1,43 @@
> +/*
> + * Copyright 2013 Calxeda, Inc.
> + * Based on PPC version Copyright 2007 MontaVista Software, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef ASM_EDAC_H
> +#define ASM_EDAC_H
> +/*
> + * ECC atomic, DMA, SMP and interrupt safe scrub function.
> + * Implements the per arch atomic_scrub() that EDAC use for software
> + * ECC scrubbing.  It reads memory and then writes back the original
> + * value, allowing the hardware to detect and correct memory errors.
> + */
> +static inline void atomic_scrub(void *va, u32 size)
> +{
> +	unsigned int *virt_addr = va;
> +	unsigned int i;
> +
> +	for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
> +		long result;
> +		unsigned long tmp;
> +
> +		asm volatile("// atomic_scrub\n"
> +		"1:	ldxr	%w0, %2\n"
> +		"	stxr	%w1, %w0, %2\n"
> +		"	cbnz	%w1, 1b"
> +			: "=&r" (result), "=&r" (tmp), "+Q" (*virt_addr)
> +			: : "cc");

You still don't need the "cc" clobber (cbnz doesn't update the flags and,
yes, we have redundant clobbers in our atomic.h which I'll fix...).

Will
Rob Herring Nov. 26, 2013, 3:24 p.m. UTC | #2
On Mon, Nov 25, 2013 at 12:20 PM, Will Deacon <will.deacon@arm.com> wrote:
> On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
>> diff --git a/arch/arm64/include/asm/edac.h b/arch/arm64/include/asm/edac.h
>> new file mode 100644
>> index 0000000..6d2d262
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/edac.h
>> @@ -0,0 +1,43 @@
>> +/*
>> + * Copyright 2013 Calxeda, Inc.
>> + * Based on PPC version Copyright 2007 MontaVista Software, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along with
>> + * this program.  If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#ifndef ASM_EDAC_H
>> +#define ASM_EDAC_H
>> +/*
>> + * ECC atomic, DMA, SMP and interrupt safe scrub function.
>> + * Implements the per arch atomic_scrub() that EDAC use for software
>> + * ECC scrubbing.  It reads memory and then writes back the original
>> + * value, allowing the hardware to detect and correct memory errors.
>> + */
>> +static inline void atomic_scrub(void *va, u32 size)
>> +{
>> +     unsigned int *virt_addr = va;
>> +     unsigned int i;
>> +
>> +     for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>> +             long result;
>> +             unsigned long tmp;
>> +
>> +             asm volatile("// atomic_scrub\n"
>> +             "1:     ldxr    %w0, %2\n"
>> +             "       stxr    %w1, %w0, %2\n"
>> +             "       cbnz    %w1, 1b"
>> +                     : "=&r" (result), "=&r" (tmp), "+Q" (*virt_addr)
>> +                     : : "cc");
>
> You still don't need the "cc" clobber (cbnz doesn't update the flags and,
> yes, we have redundant clobbers in our atomic.h which I'll fix...).

Okay, I thought since the first version did not have the cbnz was why
you said to remove it.

Rob
Catalin Marinas Nov. 26, 2013, 3:37 p.m. UTC | #3
On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
> +static inline void atomic_scrub(void *va, u32 size)
> +{
> +	unsigned int *virt_addr = va;
> +	unsigned int i;
> +
> +	for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {

BTW, maybe the compiler is smart enough to drop the i but why not just
use a int *last_addr = va + size - 3; and just compare against this?
Rob Herring Dec. 5, 2013, 12:53 a.m. UTC | #4
On Tue, Nov 26, 2013 at 9:37 AM, Catalin Marinas
<catalin.marinas@arm.com> wrote:
> On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
>> +static inline void atomic_scrub(void *va, u32 size)
>> +{
>> +     unsigned int *virt_addr = va;
>> +     unsigned int i;
>> +
>> +     for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>
> BTW, maybe the compiler is smart enough to drop the i but why not just
> use a int *last_addr = va + size - 3; and just compare against this?

Evidently, the compiler is not that smart. Here's what I ended up
with. There's no need to subtract 3 that I can see:

unsigned int *last_addr = va + size;
for (; virt_addr < last_addr; virt_addr++) {

original:

ffffffc0001b0280 <atomic_scrub>:
ffffffc0001b0280:       53027c21        lsr     w1, w1, #2
ffffffc0001b0284:       34000141        cbz     w1, ffffffc0001b02ac
<atomic_scrub+0x2c>
ffffffc0001b0288:       51000421        sub     w1, w1, #0x1
ffffffc0001b028c:       91000421        add     x1, x1, #0x1
ffffffc0001b0290:       8b010801        add     x1, x0, x1, lsl #2
ffffffc0001b0294:       885f7c02        ldxr    w2, [x0]
ffffffc0001b0298:       88037c02        stxr    w3, w2, [x0]
ffffffc0001b029c:       35ffffc3        cbnz    w3, ffffffc0001b0294
<atomic_scrub+0x14>
ffffffc0001b02a0:       91001000        add     x0, x0, #0x4
ffffffc0001b02a4:       eb01001f        cmp     x0, x1
ffffffc0001b02a8:       54ffff61        b.ne    ffffffc0001b0294
<atomic_scrub+0x14>
ffffffc0001b02ac:       d65f03c0        ret

using last_addr:

ffffffc0001b0280 <atomic_scrub>:
ffffffc0001b0280:       8b214001        add     x1, x0, w1, uxtw
ffffffc0001b0284:       eb01001f        cmp     x0, x1
ffffffc0001b0288:       540000e2        b.cs    ffffffc0001b02a4
<atomic_scrub+0x24>
ffffffc0001b028c:       885f7c02        ldxr    w2, [x0]
ffffffc0001b0290:       88037c02        stxr    w3, w2, [x0]
ffffffc0001b0294:       35ffffc3        cbnz    w3, ffffffc0001b028c
<atomic_scrub+0xc>
ffffffc0001b0298:       91001000        add     x0, x0, #0x4
ffffffc0001b029c:       eb00003f        cmp     x1, x0
ffffffc0001b02a0:       54ffff68        b.hi    ffffffc0001b028c
<atomic_scrub+0xc>
ffffffc0001b02a4:       d65f03c0        ret

Rob
Catalin Marinas Dec. 5, 2013, 11:51 a.m. UTC | #5
On Thu, Dec 05, 2013 at 12:53:33AM +0000, Rob Herring wrote:
> On Tue, Nov 26, 2013 at 9:37 AM, Catalin Marinas
> <catalin.marinas@arm.com> wrote:
> > On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
> >> +static inline void atomic_scrub(void *va, u32 size)
> >> +{
> >> +     unsigned int *virt_addr = va;
> >> +     unsigned int i;
> >> +
> >> +     for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
> >
> > BTW, maybe the compiler is smart enough to drop the i but why not just
> > use a int *last_addr = va + size - 3; and just compare against this?
> 
> Evidently, the compiler is not that smart. Here's what I ended up
> with. There's no need to subtract 3 that I can see:
> 
> unsigned int *last_addr = va + size;
> for (; virt_addr < last_addr; virt_addr++) {

The 3 thing was for size not multiple of 4. Do we have any guarantees
here?
Rob Herring Dec. 5, 2013, 1:52 p.m. UTC | #6
On Thu, Dec 5, 2013 at 5:51 AM, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Thu, Dec 05, 2013 at 12:53:33AM +0000, Rob Herring wrote:
>> On Tue, Nov 26, 2013 at 9:37 AM, Catalin Marinas
>> <catalin.marinas@arm.com> wrote:
>> > On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
>> >> +static inline void atomic_scrub(void *va, u32 size)
>> >> +{
>> >> +     unsigned int *virt_addr = va;
>> >> +     unsigned int i;
>> >> +
>> >> +     for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>> >
>> > BTW, maybe the compiler is smart enough to drop the i but why not just
>> > use a int *last_addr = va + size - 3; and just compare against this?
>>
>> Evidently, the compiler is not that smart. Here's what I ended up
>> with. There's no need to subtract 3 that I can see:
>>
>> unsigned int *last_addr = va + size;
>> for (; virt_addr < last_addr; virt_addr++) {
>
> The 3 thing was for size not multiple of 4. Do we have any guarantees
> here?

Only that no one does ECC on less that 64-bit wide DDR. I'm only
assuming 32-bit alignment and that is what all the other
implementations assume.

If we have a non-multiple of 4 size, then we want to expand the size
to include the partial bytes. This function is a nop effectively, so
the fact that we may touch 1-3 extra bytes at the end does not matter.
The only thing subtracting 3 does is mean the comparison needs to be
'<=' instead of '<'. It would also break if you had a size of 1 or 2.
I think what you intended was to align size, but you really need to
add 3 and mask it to do that. But that is not necessary in my version.

Rob
Catalin Marinas Dec. 5, 2013, 3:34 p.m. UTC | #7
On Thu, Dec 05, 2013 at 01:52:23PM +0000, Rob Herring wrote:
> On Thu, Dec 5, 2013 at 5:51 AM, Catalin Marinas <catalin.marinas@arm.com> wrote:
> > On Thu, Dec 05, 2013 at 12:53:33AM +0000, Rob Herring wrote:
> >> On Tue, Nov 26, 2013 at 9:37 AM, Catalin Marinas
> >> <catalin.marinas@arm.com> wrote:
> >> > On Fri, Nov 22, 2013 at 09:05:08PM +0000, Rob Herring wrote:
> >> >> +static inline void atomic_scrub(void *va, u32 size)
> >> >> +{
> >> >> +     unsigned int *virt_addr = va;
> >> >> +     unsigned int i;
> >> >> +
> >> >> +     for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
> >> >
> >> > BTW, maybe the compiler is smart enough to drop the i but why not just
> >> > use a int *last_addr = va + size - 3; and just compare against this?
> >>
> >> Evidently, the compiler is not that smart. Here's what I ended up
> >> with. There's no need to subtract 3 that I can see:
> >>
> >> unsigned int *last_addr = va + size;
> >> for (; virt_addr < last_addr; virt_addr++) {
> >
> > The 3 thing was for size not multiple of 4. Do we have any guarantees
> > here?
> 
> Only that no one does ECC on less that 64-bit wide DDR. I'm only
> assuming 32-bit alignment and that is what all the other
> implementations assume.
> 
> If we have a non-multiple of 4 size, then we want to expand the size
> to include the partial bytes. This function is a nop effectively, so
> the fact that we may touch 1-3 extra bytes at the end does not matter.
> The only thing subtracting 3 does is mean the comparison needs to be
> '<=' instead of '<'. It would also break if you had a size of 1 or 2.
> I think what you intended was to align size, but you really need to
> add 3 and mask it to do that. But that is not necessary in my version.

What I wanted was to make sure it doesn't write more bytes than size. If
you think it's safe to write the extra bytes, than you don't need any
subtraction.
Tomasz Nowicki April 10, 2014, 1:10 p.m. UTC | #8
Rob Herring <robherring2 <at> gmail.com> writes:

> 
> From: Rob Herring <rob.herring <at> calxeda.com>
> 
> Implement atomic_scrub and enable EDAC for arm64.
> 
> Signed-off-by: Rob Herring <rob.herring <at> calxeda.com>
> Cc: Catalin Marinas <catalin.marinas <at> arm.com>
> Cc: Will Deacon <will.deacon <at> arm.com>
> ---
> v2:
> - Add loop for exclusive store success
> - Fix size to be 32-bits at a time. The framework gives no alignment
>   guarantees.
> 

Hi Rob,

I am working on memory error for ARM64 and wondering what is your plans 
regarding this patch. It looks essential for my further work.

Regards,
Tomasz
Rohit Vaswani April 14, 2014, 10:21 p.m. UTC | #9
On 4/10/2014 6:10 AM, Tomasz Nowicki wrote:
> Rob Herring <robherring2 <at> gmail.com> writes:
>
>> From: Rob Herring <rob.herring <at> calxeda.com>
>>
>> Implement atomic_scrub and enable EDAC for arm64.
>>
>> Signed-off-by: Rob Herring <rob.herring <at> calxeda.com>
>> Cc: Catalin Marinas <catalin.marinas <at> arm.com>
>> Cc: Will Deacon <will.deacon <at> arm.com>
>> ---
>> v2:
>> - Add loop for exclusive store success
>> - Fix size to be 32-bits at a time. The framework gives no alignment
>>    guarantees.
>>
> Hi Rob,
>
> I am working on memory error for ARM64 and wondering what is your plans
> regarding this patch. It looks essential for my further work.
>
> Regards,
> Tomasz
>
Hi Tomasz, Rob,

I am looking at similar information for A53 and A57 for the L1/L2 single 
and double bit ecc errors.
Is there an upstream/linaro effort for EDAC driver in progress ?

-Rohit
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Thanks,
Rohit Vaswani
Rob Herring April 21, 2014, 4:19 p.m. UTC | #10
On Mon, Apr 14, 2014 at 5:21 PM, Rohit Vaswani <rvaswani@codeaurora.org> wrote:
> On 4/10/2014 6:10 AM, Tomasz Nowicki wrote:
>>
>> Rob Herring <robherring2 <at> gmail.com> writes:
>>
>>> From: Rob Herring <rob.herring <at> calxeda.com>
>>>
>>> Implement atomic_scrub and enable EDAC for arm64.
>>>
>>> Signed-off-by: Rob Herring <rob.herring <at> calxeda.com>
>>> Cc: Catalin Marinas <catalin.marinas <at> arm.com>
>>> Cc: Will Deacon <will.deacon <at> arm.com>
>>> ---
>>> v2:
>>> - Add loop for exclusive store success
>>> - Fix size to be 32-bits at a time. The framework gives no alignment
>>>    guarantees.
>>>
>> Hi Rob,
>>
>> I am working on memory error for ARM64 and wondering what is your plans
>> regarding this patch. It looks essential for my further work.
>>
>> Regards,
>> Tomasz
>>
> Hi Tomasz, Rob,
>
> I am looking at similar information for A53 and A57 for the L1/L2 single and
> double bit ecc errors.
> Is there an upstream/linaro effort for EDAC driver in progress ?

I just sent out v3.

There is work to develop a generic RAS framework based on perf which
would hopefully replace EDAC and mcelog(x86 only). Jean Pihet has
picked up this work recently. There probably needs to be some
infrastructure work to handle SError events as I believe that is how
cache ECC errors are reported. Although those may get handled by
firmware and then passed to the OS via some other mechanism like ACPI.

Rob
Tomasz Nowicki April 22, 2014, 1:01 p.m. UTC | #11
Hi Rob,

Thanks for taking care of that. Please see question below.

On 21.04.2014 18:19, Rob Herring wrote:
> On Mon, Apr 14, 2014 at 5:21 PM, Rohit Vaswani <rvaswani@codeaurora.org> wrote:
>> On 4/10/2014 6:10 AM, Tomasz Nowicki wrote:
>>>
>>> Rob Herring <robherring2 <at> gmail.com> writes:
>>>
>>>> From: Rob Herring <rob.herring <at> calxeda.com>
>>>>
>>>> Implement atomic_scrub and enable EDAC for arm64.
>>>>
>>>> Signed-off-by: Rob Herring <rob.herring <at> calxeda.com>
>>>> Cc: Catalin Marinas <catalin.marinas <at> arm.com>
>>>> Cc: Will Deacon <will.deacon <at> arm.com>
>>>> ---
>>>> v2:
>>>> - Add loop for exclusive store success
>>>> - Fix size to be 32-bits at a time. The framework gives no alignment
>>>>     guarantees.
>>>>
>>> Hi Rob,
>>>
>>> I am working on memory error for ARM64 and wondering what is your plans
>>> regarding this patch. It looks essential for my further work.
>>>
>>> Regards,
>>> Tomasz
>>>
>> Hi Tomasz, Rob,
>>
>> I am looking at similar information for A53 and A57 for the L1/L2 single and
>> double bit ecc errors.
>> Is there an upstream/linaro effort for EDAC driver in progress ?
>
> I just sent out v3.
>
> There is work to develop a generic RAS framework based on perf which
> would hopefully replace EDAC and mcelog(x86 only).

What do you mean by EDAC and mcelog replacement with RAS framework?

> Jean Pihet has
> picked up this work recently. There probably needs to be some
> infrastructure work to handle SError events as I believe that is how
> cache ECC errors are reported. Although those may get handled by
> firmware and then passed to the OS via some other mechanism like ACPI.


Tomasz
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 88c8b6c1..109687a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -11,6 +11,7 @@  config ARM64
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
+	select EDAC_SUPPORT
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
diff --git a/arch/arm64/include/asm/edac.h b/arch/arm64/include/asm/edac.h
new file mode 100644
index 0000000..6d2d262
--- /dev/null
+++ b/arch/arm64/include/asm/edac.h
@@ -0,0 +1,43 @@ 
+/*
+ * Copyright 2013 Calxeda, Inc.
+ * Based on PPC version Copyright 2007 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef ASM_EDAC_H
+#define ASM_EDAC_H
+/*
+ * ECC atomic, DMA, SMP and interrupt safe scrub function.
+ * Implements the per arch atomic_scrub() that EDAC use for software
+ * ECC scrubbing.  It reads memory and then writes back the original
+ * value, allowing the hardware to detect and correct memory errors.
+ */
+static inline void atomic_scrub(void *va, u32 size)
+{
+	unsigned int *virt_addr = va;
+	unsigned int i;
+
+	for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
+		long result;
+		unsigned long tmp;
+
+		asm volatile("// atomic_scrub\n"
+		"1:	ldxr	%w0, %2\n"
+		"	stxr	%w1, %w0, %2\n"
+		"	cbnz	%w1, 1b"
+			: "=&r" (result), "=&r" (tmp), "+Q" (*virt_addr)
+			: : "cc");
+	}
+}
+
+#endif