diff mbox series

[v2] ppc/spapr: Set the effective address provided flag in mc error log.

Message ID 158451653844.22972.17999316676230071087.stgit@jupiter (mailing list archive)
State New, archived
Headers show
Series [v2] ppc/spapr: Set the effective address provided flag in mc error log. | expand

Commit Message

Mahesh Salgaonkar March 18, 2020, 7:34 a.m. UTC
Per PAPR, it is expected to set effective address provided flag in
sub_err_type member of mc extended error log (i.e
rtas_event_log_v6_mc.sub_err_type). This somehow got missed in original
fwnmi-mce patch series. The current code just updates the effective address
but does not set the flag to indicate that it is available. Hence guest
fails to extract effective address from mce rtas log. This patch fixes
that.

Without this patch guest MCE logs fails print DAR value:

[   11.933608] Disabling lock debugging due to kernel taint
[   11.933773] MCE: CPU0: machine check (Severe) Host TLB Multihit [Recovered]
[   11.933979] MCE: CPU0: NIP: [c000000000090b34] radix__flush_tlb_range_psize+0x194/0xf00
[   11.934223] MCE: CPU0: Initiator CPU
[   11.934341] MCE: CPU0: Unknown

After the change:

[   22.454149] Disabling lock debugging due to kernel taint
[   22.454316] MCE: CPU0: machine check (Severe) Host TLB Multihit DAR: deadbeefdeadbeef [Recovered]
[   22.454605] MCE: CPU0: NIP: [c0000000003e5804] kmem_cache_alloc+0x84/0x330
[   22.454820] MCE: CPU0: Initiator CPU
[   22.454944] MCE: CPU0: Unknown


Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
---
Change in v2:
- Fixed coding style issues.
---
 hw/ppc/spapr_events.c |   26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Nicholas Piggin March 19, 2020, 3:24 a.m. UTC | #1
Mahesh Salgaonkar's on March 18, 2020 5:34 pm:
> Per PAPR, it is expected to set effective address provided flag in
> sub_err_type member of mc extended error log (i.e
> rtas_event_log_v6_mc.sub_err_type). This somehow got missed in original
> fwnmi-mce patch series. The current code just updates the effective address
> but does not set the flag to indicate that it is available. Hence guest
> fails to extract effective address from mce rtas log. This patch fixes
> that.
> 
> Without this patch guest MCE logs fails print DAR value:
> 
> [   11.933608] Disabling lock debugging due to kernel taint
> [   11.933773] MCE: CPU0: machine check (Severe) Host TLB Multihit [Recovered]
> [   11.933979] MCE: CPU0: NIP: [c000000000090b34] radix__flush_tlb_range_psize+0x194/0xf00
> [   11.934223] MCE: CPU0: Initiator CPU
> [   11.934341] MCE: CPU0: Unknown
> 
> After the change:
> 
> [   22.454149] Disabling lock debugging due to kernel taint
> [   22.454316] MCE: CPU0: machine check (Severe) Host TLB Multihit DAR: deadbeefdeadbeef [Recovered]
> [   22.454605] MCE: CPU0: NIP: [c0000000003e5804] kmem_cache_alloc+0x84/0x330
> [   22.454820] MCE: CPU0: Initiator CPU
> [   22.454944] MCE: CPU0: Unknown

Thanks, I was wondering why my MCEs weren't printing a DAR!

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> 
> 
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> ---
> Change in v2:
> - Fixed coding style issues.
> ---
>  hw/ppc/spapr_events.c |   26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 8b32b7eea5..cb6bfedc53 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -243,6 +243,14 @@ struct rtas_event_log_v6_mc {
>  #define RTAS_LOG_V6_MC_TLB_PARITY                        1
>  #define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
>  #define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> +/*
> + * Per PAPR,
> + * For UE error type, set bit 1 of sub_err_type to indicate effective addr is
> + * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate
> + * same.
> + */
> +#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED               0x40
> +#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED                  0x80
>      uint8_t reserved_1[6];
>      uint64_t effective_address;
>      uint64_t logical_address;
> @@ -726,6 +734,22 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
> +static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog)
> +{
> +    switch (ext_elog->mc.error_type) {
> +    case RTAS_LOG_V6_MC_TYPE_UE:
> +        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED;
> +        break;
> +    case RTAS_LOG_V6_MC_TYPE_SLB:
> +    case RTAS_LOG_V6_MC_TYPE_ERAT:
> +    case RTAS_LOG_V6_MC_TYPE_TLB:
> +        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED;
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
>  static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>                                          struct mc_extended_log *ext_elog)
>  {
> @@ -751,6 +775,7 @@ static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>              ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
>              if (mc_derror_table[i].dar_valid) {
>                  ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> +                spapr_mc_set_ea_provided_flag(ext_elog);
>              }
>  
>              summary |= mc_derror_table[i].initiator
> @@ -769,6 +794,7 @@ static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>              ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
>              if (mc_ierror_table[i].nip_valid) {
>                  ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> +                spapr_mc_set_ea_provided_flag(ext_elog);
>              }
>  
>              summary |= mc_ierror_table[i].initiator
> 
>
David Gibson March 19, 2020, 5:19 a.m. UTC | #2
On Wed, Mar 18, 2020 at 01:04:20PM +0530, Mahesh Salgaonkar wrote:
> Per PAPR, it is expected to set effective address provided flag in
> sub_err_type member of mc extended error log (i.e
> rtas_event_log_v6_mc.sub_err_type). This somehow got missed in original
> fwnmi-mce patch series. The current code just updates the effective address
> but does not set the flag to indicate that it is available. Hence guest
> fails to extract effective address from mce rtas log. This patch fixes
> that.
> 
> Without this patch guest MCE logs fails print DAR value:
> 
> [   11.933608] Disabling lock debugging due to kernel taint
> [   11.933773] MCE: CPU0: machine check (Severe) Host TLB Multihit [Recovered]
> [   11.933979] MCE: CPU0: NIP: [c000000000090b34] radix__flush_tlb_range_psize+0x194/0xf00
> [   11.934223] MCE: CPU0: Initiator CPU
> [   11.934341] MCE: CPU0: Unknown
> 
> After the change:
> 
> [   22.454149] Disabling lock debugging due to kernel taint
> [   22.454316] MCE: CPU0: machine check (Severe) Host TLB Multihit DAR: deadbeefdeadbeef [Recovered]
> [   22.454605] MCE: CPU0: NIP: [c0000000003e5804] kmem_cache_alloc+0x84/0x330
> [   22.454820] MCE: CPU0: Initiator CPU
> [   22.454944] MCE: CPU0: Unknown
> 
> 
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>

Applied to ppc-for-5.0, thanks.

> ---
> Change in v2:
> - Fixed coding style issues.
> ---
>  hw/ppc/spapr_events.c |   26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 8b32b7eea5..cb6bfedc53 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -243,6 +243,14 @@ struct rtas_event_log_v6_mc {
>  #define RTAS_LOG_V6_MC_TLB_PARITY                        1
>  #define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
>  #define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> +/*
> + * Per PAPR,
> + * For UE error type, set bit 1 of sub_err_type to indicate effective addr is
> + * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate
> + * same.
> + */
> +#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED               0x40
> +#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED                  0x80
>      uint8_t reserved_1[6];
>      uint64_t effective_address;
>      uint64_t logical_address;
> @@ -726,6 +734,22 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
> +static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog)
> +{
> +    switch (ext_elog->mc.error_type) {
> +    case RTAS_LOG_V6_MC_TYPE_UE:
> +        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED;
> +        break;
> +    case RTAS_LOG_V6_MC_TYPE_SLB:
> +    case RTAS_LOG_V6_MC_TYPE_ERAT:
> +    case RTAS_LOG_V6_MC_TYPE_TLB:
> +        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED;
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
>  static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>                                          struct mc_extended_log *ext_elog)
>  {
> @@ -751,6 +775,7 @@ static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>              ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
>              if (mc_derror_table[i].dar_valid) {
>                  ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> +                spapr_mc_set_ea_provided_flag(ext_elog);
>              }
>  
>              summary |= mc_derror_table[i].initiator
> @@ -769,6 +794,7 @@ static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>              ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
>              if (mc_ierror_table[i].nip_valid) {
>                  ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> +                spapr_mc_set_ea_provided_flag(ext_elog);
>              }
>  
>              summary |= mc_ierror_table[i].initiator
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 8b32b7eea5..cb6bfedc53 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -243,6 +243,14 @@  struct rtas_event_log_v6_mc {
 #define RTAS_LOG_V6_MC_TLB_PARITY                        1
 #define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
 #define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
+/*
+ * Per PAPR,
+ * For UE error type, set bit 1 of sub_err_type to indicate effective addr is
+ * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate
+ * same.
+ */
+#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED               0x40
+#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED                  0x80
     uint8_t reserved_1[6];
     uint64_t effective_address;
     uint64_t logical_address;
@@ -726,6 +734,22 @@  void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
                             RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
+static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog)
+{
+    switch (ext_elog->mc.error_type) {
+    case RTAS_LOG_V6_MC_TYPE_UE:
+        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED;
+        break;
+    case RTAS_LOG_V6_MC_TYPE_SLB:
+    case RTAS_LOG_V6_MC_TYPE_ERAT:
+    case RTAS_LOG_V6_MC_TYPE_TLB:
+        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED;
+        break;
+    default:
+        break;
+    }
+}
+
 static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
                                         struct mc_extended_log *ext_elog)
 {
@@ -751,6 +775,7 @@  static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
             ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
             if (mc_derror_table[i].dar_valid) {
                 ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+                spapr_mc_set_ea_provided_flag(ext_elog);
             }
 
             summary |= mc_derror_table[i].initiator
@@ -769,6 +794,7 @@  static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
             ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
             if (mc_ierror_table[i].nip_valid) {
                 ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+                spapr_mc_set_ea_provided_flag(ext_elog);
             }
 
             summary |= mc_ierror_table[i].initiator