arm64 memory accesses may cause undefined fault on Fujitsu-A64FX
diff mbox series

Message ID 8898674D84E3B24BA3A2D289B872026A6A29FA8F@G01JPEXMBKW03
State New
Headers show
Series
  • arm64 memory accesses may cause undefined fault on Fujitsu-A64FX
Related show

Commit Message

Zhang, Lei Jan. 18, 2019, 12:52 p.m. UTC
On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), 
memory accesses may cause undefined fault (Data abort, DFSC=0b111111).
This problem will be fixed by next version of Fujitsu-A64FX.
I would like to post a workaround to avoid this problem 
on existing version.
The workaround is to replace the fault handler for Data abort
DFSC=0b111111 with a new one to ignore this undefined fault, 
which will only affect the Fujitsu-A64FX.

I have tested this patch on A64FX and QEMU(2.9.0).The test passed.
I will test this patch on ThunderX and report the result.
I fully appreciate that if someone can test this patch on different 
chips to verity no harmful effect on other chips.

If there is no problem on other chips, please merge this patch.

Below is my patch based on linux-5.0-rc2.

Signed-off-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
Tested-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
---
 Documentation/arm64/silicon-errata.txt |    1 +
 arch/arm64/Kconfig                     |   13 +++++++++++++
 arch/arm64/include/asm/cputype.h       |    4 ++++
 arch/arm64/mm/fault.c                  |   23 +++++++++++++++++++++++
 4 files changed, 41 insertions(+)

---

Best regards,
Lei Zhang
zhang.lei@jp.fujitsu.com

Comments

Mark Rutland Jan. 18, 2019, 2:17 p.m. UTC | #1
Hi,

On Fri, Jan 18, 2019 at 12:52:38PM +0000, Zhang, Lei wrote:
> On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), 
> memory accesses may cause undefined fault (Data abort, DFSC=0b111111).

So that we can better understand the problem, could you please let us
know the following:

* Under what conditions can the fault occur? e.g. is this in place of
  some other fault, or completely spurious?

* Does this only occur for data abort? i.e. not instruction aborts?

* How often does this fault occur?

* Does this only apply to Stage-1, or can the same faults be taken at
  Stage-2?

> This problem will be fixed by next version of Fujitsu-A64FX.
> I would like to post a workaround to avoid this problem 
> on existing version.
> The workaround is to replace the fault handler for Data abort
> DFSC=0b111111 with a new one to ignore this undefined fault, 
> which will only affect the Fujitsu-A64FX.
> 
> I have tested this patch on A64FX and QEMU(2.9.0).The test passed.
> I will test this patch on ThunderX and report the result.
> I fully appreciate that if someone can test this patch on different 
> chips to verity no harmful effect on other chips.
> 
> If there is no problem on other chips, please merge this patch.
> 
> Below is my patch based on linux-5.0-rc2.
> 
> Signed-off-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
> Tested-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
> ---
>  Documentation/arm64/silicon-errata.txt |    1 +
>  arch/arm64/Kconfig                     |   13 +++++++++++++
>  arch/arm64/include/asm/cputype.h       |    4 ++++
>  arch/arm64/mm/fault.c                  |   23 +++++++++++++++++++++++
>  4 files changed, 41 insertions(+)
> 
> diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
> index 1f09d04..26d64e9 100644
> --- a/Documentation/arm64/silicon-errata.txt
> +++ b/Documentation/arm64/silicon-errata.txt
> @@ -80,3 +80,4 @@ stable kernels.
>  | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
>  | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
>  | Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
> +| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index a4168d3..9c09b2b 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -643,6 +643,19 @@ config QCOM_FALKOR_ERRATUM_E1041
>  
>  	  If unsure, say Y.
>  
> +config FUJITSU_ERRATUM_010001
> +	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
> +	default y
> +	help
> +	  This option adds workaround for Fujitsu-A64FX erratum E#010001.
> +	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory accesses
> +	  may cause undefined fault (Data abort, DFSC=0b111111).
> +	  The workaround is to replace the fault handler for Data abort DFSC=0b111111
> +	  with a new one to ignore this undefined fault, which will only affect
> +	  the Fujitsu-A64FX.
> +
> +	  If unsure, say Y.
> +
>  endmenu
>  
>  
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 951ed1a..166aa50 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -76,6 +76,7 @@
>  #define ARM_CPU_IMP_BRCM		0x42
>  #define ARM_CPU_IMP_QCOM		0x51
>  #define ARM_CPU_IMP_NVIDIA		0x4E
> +#define ARM_CPU_IMP_FUJITSU		0x46
>  
>  #define ARM_CPU_PART_AEM_V8		0xD0F
>  #define ARM_CPU_PART_FOUNDATION		0xD00
> @@ -104,6 +105,8 @@
>  #define NVIDIA_CPU_PART_DENVER		0x003
>  #define NVIDIA_CPU_PART_CARMEL		0x004
>  
> +#define FUJTISU_CPU_PART_A64FX		0x001
> +
>  #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
>  #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
>  #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
> @@ -122,6 +125,7 @@
>  #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
>  #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
>  #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
> +#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJTISU_CPU_PART_A64FX)
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index efb7b2c..c465b2f 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -666,6 +666,25 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
>  	return 0;
>  }
>  
> +static bool do_bad_ignore_first = FALSE;
> +static int do_bad_ignore(unsigned long addr, unsigned int esr, struct pt_regs *regs)
> +{
> +	if (do_bad_ignore_first == TRUE)
> +		return 0;
> +	if (do_bad_ignore_first == FALSE) {
> +		unsigned int current_cpu_midr = read_cpuid_id();
> +		const struct midr_range fujitsu_a64fx_midr_range = {
> +			MIDR_FUJITSU_A64FX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0)
> +		};
> +
> +		if (is_midr_in_range(current_cpu_midr, &fujitsu_a64fx_midr_range) == TRUE) {
> +			do_bad_ignore_first = TRUE;
> +			return 0;
> +		}
> +	}
> +	return 1; /* "fault" same as do_bad */
> +}

I'm a bit surprised by the single retry. Is there any guarantee that a
thread will eventually stop delivering this fault code?

Note that all CPUs and threads share the do_bad_ignore_first variable,
so this is going to behave non-deterministically and kill threads in
some cases.

This code is also preemptible, so checking the MIDR here doesn't make
much sense. Either this is always uniform (and we can check once in the
errata framework), or it's variable (e.g. on a big.LITTLE system) and we
need to avoid preemption up until this point.

Rather than dynamically checking the MIDR, this should use the errata
framework, and if any A64FX CPU is discovered, set an erratum cap like
ARM64_WORKAROUND_CONFIG_FUJITSU_ERRATUM_010001, so we can do something
like:

static int do_bad_unknown_63(unsigned long addr, unsigned int esr,
			     struct pt_regs *regs)
{
	/*
	 * On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
	 * memory accesses may spuriously trigger data aborts with
	 * DFSC=0b111111.
	 */
	if (IS_ENABLED(CONFIG_FUJITSU_ERRATUM_010001) && 
	    cpus_have_const_cap(ARM64_WORKAROUND_E010001))
	    	return 0;

	return do_bad(addr, esr, regs);
}

> +
>  static const struct fault_info fault_info[] = {
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
> @@ -730,7 +749,11 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 60"			},
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"section domain fault"		},
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"page domain fault"		},
> +#ifdef	CONFIG_FUJITSU_ERRATUM_010001
> +	{ do_bad_ignore,	SIGKILL, SI_KERNEL,	"unknown 63"			},
> +#else
>  	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
> +#endif

... with this unconditionally using do_bad_unknown_63.

Thanks,
Mark.
Zhang, Lei Jan. 22, 2019, 2:05 a.m. UTC | #2
Hi, Mark

Thanks for your comments, and sorry for late.

> -----Original Message-----
> * Under what conditions can the fault occur? e.g. is this in place of
>   some other fault, or completely spurious?
This fault can occur completely spurious under
a specific hardware condition and instructions order.
 
> * Does this only occur for data abort? i.e. not instruction aborts?
Yes. This fault only occurs for data abort.

> * How often does this fault occur?
In my test, this fault occurs once every several times 
in the OS boot sequence, and after the completion of OS boot, 
this fault have never occurred.
In my opinion, this fault rarely occurs 
after the completion of OS boot.

> * Does this only apply to Stage-1, or can the same faults be taken at
>   Stage-2?
This fault can be taken only at Stage-1.

> I'm a bit surprised by the single retry. Is there any guarantee that a
> thread will eventually stop delivering this fault code?
I guarantee that a thread will stop delivering this 
fault code by the this patch.
The hardware condition which cause this fault is 
reset at exception entry, therefore execution of at 
least one instruction is guaranteed by this single retry.

> Note that all CPUs and threads share the do_bad_ignore_first variable,
> so this is going to behave non-deterministically and kill threads in
> some cases.
> 
> This code is also preemptible, so checking the MIDR here doesn't make
> much sense. Either this is always uniform (and we can check once in the
> errata framework), or it's variable (e.g. on a big.LITTLE system) and
> we
> need to avoid preemption up until this point.
> 
> Rather than dynamically checking the MIDR, this should use the errata
> framework, and if any A64FX CPU is discovered, set an erratum cap like
> ARM64_WORKAROUND_CONFIG_FUJITSU_ERRATUM_010001, so we can do something
> like:
I try to provide a new patch to reflect your comments in today.
Unfortunately this bug may occurs before 
init_cpu_hwcaps_indirect_list called.
It is means maybe errata cap is not available. I am trying to
figure out best way to resolve this problem.

---
Best regards,
Lei Zhang
zhang.lei@jp.fujitsu.com
James Morse Jan. 22, 2019, 2:42 p.m. UTC | #3
Hello,

On 22/01/2019 02:05, Zhang, Lei wrote:
> Mark Rutland wrote:
>> * How often does this fault occur?
> In my test, this fault occurs once every several times
> in the OS boot sequence, and after the completion of OS boot,
> this fault have never occurred.
> In my opinion, this fault rarely occurs
> after the completion of OS boot.

Can you share anything about why this is? You mention a hardware-condition
that is reset at exception entry....


>> I'm a bit surprised by the single retry. Is there any guarantee that a
>> thread will eventually stop delivering this fault code?

> I guarantee that a thread will stop delivering this 
> fault code by the this patch.
> The hardware condition which cause this fault is 
> reset at exception entry, therefore execution of at 
> least one instruction is guaranteed by this single retry.

... so its possible to take this fault during kernel_entry when we've taken an irq?

This will overwrite the ELR and SPSR, (and possibly the FAR and ESR), meaning we've
 lost that information and can't return to the point in the kernel that took the
irq.

If we try, we might end up spinning through the irq handler, as the ELR might
now point
to el1_irq's kernel_entry.

We can spot we took an exception from the entry text ... but all we can do then
is panic().
I'm not sure its worth working around this if its just a matter of time before this
happens. (you mention its less likely after boot, it would be good to know why...)


Thanks,

James
Mark Rutland Jan. 22, 2019, 3:23 p.m. UTC | #4
On Tue, Jan 22, 2019 at 02:05:26AM +0000, Zhang, Lei wrote:
> Hi, Mark
> 
> Thanks for your comments, and sorry for late.
> 
> > -----Original Message-----
> > * Under what conditions can the fault occur? e.g. is this in place of
> >   some other fault, or completely spurious?

> This fault can occur completely spurious under a specific hardware
> condition and instructions order.

Ok.

Can you be more specific regarding the conditions under which this
occurs? e.g. can this only occur with certain instruction sequences?

> > * Does this only occur for data abort? i.e. not instruction aborts?

> Yes. This fault only occurs for data abort.
> 
> > * How often does this fault occur?

> In my test, this fault occurs once every several times in the OS boot
> sequence, and after the completion of OS boot, this fault have never
> occurred.
> In my opinion, this fault rarely occurs after the completion of OS
> boot.

I'm very concerned that this could occur during boot (even if rarely),
as that implies this is being taken EL1->EL1 or EL2->EL2.

Which exception levels can the fault be taken from?

e.g. is it possible for this fault to be taken from EL2 to EL2, or from
EL3 to EL3?

> > * Does this only apply to Stage-1, or can the same faults be taken at
> >   Stage-2?
> This fault can be taken only at Stage-1.
> 
> > I'm a bit surprised by the single retry. Is there any guarantee that a
> > thread will eventually stop delivering this fault code?

> I guarantee that a thread will stop delivering this fault code by the
> this patch.
> The hardware condition which cause this fault is reset at exception
> entry, therefore execution of at least one instruction is guaranteed
> by this single retry.

Ok, so we can guarantee forward progress, but in the worst case that's
down to single-step performance levels.

> > Note that all CPUs and threads share the do_bad_ignore_first variable,
> > so this is going to behave non-deterministically and kill threads in
> > some cases.

I see now that I'd misread the code, and we'll always retry the fault
(on A64FX), so this is not true.

> > This code is also preemptible, so checking the MIDR here doesn't make
> > much sense. Either this is always uniform (and we can check once in the
> > errata framework), or it's variable (e.g. on a big.LITTLE system)
> > and we need to avoid preemption up until this point.

... though this may be a problem if A64FX is integrated into a
non-uniform system (and we could unwittingly kill threads).

> > Rather than dynamically checking the MIDR, this should use the errata
> > framework, and if any A64FX CPU is discovered, set an erratum cap like
> > ARM64_WORKAROUND_CONFIG_FUJITSU_ERRATUM_010001, so we can do something
> > like:

> I try to provide a new patch to reflect your comments in today.
> Unfortunately this bug may occurs before init_cpu_hwcaps_indirect_list
> called.

As above, I'm very concerned that this could be taken from kernel
context. There are a number of cases where we cannot handle such faults:

* During boot, when we hand-over between agents (e.g. UEFI->kernel).

* Before VBAR_EL1 is initialized.

* During exception entry/return sequences (including when the KPTI
  trampoline vectors are installed).

* While the KVM vectors are installed (for VHE).

Are there any constraints on when the fault can be raised? Under which
conditions does this happen?

Thanks,
Mark.
Zhang, Lei Jan. 23, 2019, 12:51 p.m. UTC | #5
Hi, Mark, James
> -----Original Message-----
> From: Mark Rutland [mailto:mark.rutland@arm.com]
> Sent: Wednesday, January 23, 2019 12:24 AM
> To: Zhang, Lei/張 雷
> Cc: 'catalin.marinas@arm.com'; 'will.deacon@arm.com';
> 'linux-arm-kernel@lists.infradead.org';
> 'linux-kernel@vger.kernel.org'
> Subject: Re: [PATCH] arm64 memory accesses may cause undefined fault on
> Fujitsu-A64FX
> 
 
> As above, I'm very concerned that this could be taken from kernel
> context. There are a number of cases where we cannot handle such faults:
At first thanks for your comments. 
I thinks James's comments is quite similar with above comment.
I am reviewing this point now.
I will respond to your questions after I check this.

Thanks
Lei Zhang
Zhang, Lei Jan. 25, 2019, 6:51 a.m. UTC | #6
Hi, Mark, James

> -----Original Message-----
> From: linux-arm-kernel
> [mailto:linux-arm-kernel-bounces@lists.infradead.org] On Behalf Of
> Zhang, Lei
> Sent: Wednesday, January 23, 2019 9:51 PM
> To: 'Mark Rutland'; 'james.morse@arm.com'
> Cc: 'catalin.marinas@arm.com'; 'will.deacon@arm.com';
> 'linux-kernel@vger.kernel.org';
> 'linux-arm-kernel@lists.infradead.org'; Zhang, Lei/張 雷
> Subject: RE: [PATCH] arm64 memory accesses may cause undefined fault on
> Fujitsu-A64FX
> 
> At first thanks for your comments.
> I thinks James's comments is quite similar with above comment.
> I am reviewing this point now.
> I will respond to your questions after I check this.
As your comments, this patch cannot avoid this problem
completely. So I will post a new patch to resolve this 
problem in different way.

Lei Zhang
zhang.lei@jp.fujitsu.com

Patch
diff mbox series

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 1f09d04..26d64e9 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -80,3 +80,4 @@  stable kernels.
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
 | Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
+| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d3..9c09b2b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -643,6 +643,19 @@  config QCOM_FALKOR_ERRATUM_E1041
 
 	  If unsure, say Y.
 
+config FUJITSU_ERRATUM_010001
+	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
+	default y
+	help
+	  This option adds workaround for Fujitsu-A64FX erratum E#010001.
+	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory accesses
+	  may cause undefined fault (Data abort, DFSC=0b111111).
+	  The workaround is to replace the fault handler for Data abort DFSC=0b111111
+	  with a new one to ignore this undefined fault, which will only affect
+	  the Fujitsu-A64FX.
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 951ed1a..166aa50 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -76,6 +76,7 @@ 
 #define ARM_CPU_IMP_BRCM		0x42
 #define ARM_CPU_IMP_QCOM		0x51
 #define ARM_CPU_IMP_NVIDIA		0x4E
+#define ARM_CPU_IMP_FUJITSU		0x46
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
@@ -104,6 +105,8 @@ 
 #define NVIDIA_CPU_PART_DENVER		0x003
 #define NVIDIA_CPU_PART_CARMEL		0x004
 
+#define FUJTISU_CPU_PART_A64FX		0x001
+
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -122,6 +125,7 @@ 
 #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
 #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
+#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJTISU_CPU_PART_A64FX)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index efb7b2c..c465b2f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -666,6 +666,25 @@  static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	return 0;
 }
 
+static bool do_bad_ignore_first = FALSE;
+static int do_bad_ignore(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+	if (do_bad_ignore_first == TRUE)
+		return 0;
+	if (do_bad_ignore_first == FALSE) {
+		unsigned int current_cpu_midr = read_cpuid_id();
+		const struct midr_range fujitsu_a64fx_midr_range = {
+			MIDR_FUJITSU_A64FX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0)
+		};
+
+		if (is_midr_in_range(current_cpu_midr, &fujitsu_a64fx_midr_range) == TRUE) {
+			do_bad_ignore_first = TRUE;
+			return 0;
+		}
+	}
+	return 1; /* "fault" same as do_bad */
+}
+
 static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
@@ -730,7 +749,11 @@  static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 60"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"section domain fault"		},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"page domain fault"		},
+#ifdef	CONFIG_FUJITSU_ERRATUM_010001
+	{ do_bad_ignore,	SIGKILL, SI_KERNEL,	"unknown 63"			},
+#else
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
+#endif
 };
 
 int handle_guest_sea(phys_addr_t addr, unsigned int esr)