diff mbox series

[v3,2/3] i386: Explicitly ignore unsupported BUS_MCEERR_AO MCE on AMD guest

Message ID 20230906205308.50334-3-john.allen@amd.com (mailing list archive)
State New, archived
Headers show
Series Fix MCE handling on AMD hosts | expand

Commit Message

John Allen Sept. 6, 2023, 8:53 p.m. UTC
From: William Roche <william.roche@oracle.com>

AMD guests can't currently deal with BUS_MCEERR_AO MCE injection
as it panics the VM kernel. We filter this event and provide a
warning message.

Signed-off-by: William Roche <william.roche@oracle.com>
---
v3:
  - New patch
---
 target/i386/kvm/kvm.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

Comments

Gupta, Pankaj Sept. 7, 2023, 11:12 a.m. UTC | #1
On 9/6/2023 10:53 PM, John Allen wrote:
> From: William Roche <william.roche@oracle.com>
> 
> AMD guests can't currently deal with BUS_MCEERR_AO MCE injection
> as it panics the VM kernel. We filter this event and provide a
> warning message.
> 
> Signed-off-by: William Roche <william.roche@oracle.com>
> ---
> v3:
>    - New patch
> ---
>   target/i386/kvm/kvm.c | 13 ++++++++++---
>   1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 5fce74aac5..4d42d3ed4c 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -604,6 +604,10 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
>               mcg_status |= MCG_STATUS_RIPV;
>           }
>       } else {
> +        if (code == BUS_MCEERR_AO) {
> +            /* XXX we don't support BUS_MCEERR_AO injection on AMD yet */
> +            return;
> +        }
>           mcg_status |= MCG_STATUS_EIPV | MCG_STATUS_RIPV;
>       }
>   
> @@ -655,7 +659,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>           if (ram_addr != RAM_ADDR_INVALID &&
>               kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
>               kvm_hwpoison_page_add(ram_addr);
> -            kvm_mce_inject(cpu, paddr, code);
> +            if (!IS_AMD_CPU(env) || code != BUS_MCEERR_AO) {

Isn't the 'optional' case we already handle inside kvm_mce_inject()?
So this check seems repetitive to me.

Thanks,
Pankaj
> +                kvm_mce_inject(cpu, paddr, code);
> +            }
>   
>               /*
>                * Use different logging severity based on error type.
> @@ -668,8 +674,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>                       addr, paddr, "BUS_MCEERR_AR");
>               } else {
>                    warn_report("Guest MCE Memory Error at QEMU addr %p and "
> -                     "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
> -                     addr, paddr, "BUS_MCEERR_AO");
> +                     "GUEST addr 0x%" HWADDR_PRIx " of type %s %s",
> +                     addr, paddr, "BUS_MCEERR_AO",
> +                     IS_AMD_CPU(env) ? "ignored on AMD guest" : "injected");
>               }
>   
>               return;
William Roche Sept. 7, 2023, 2 p.m. UTC | #2
On 9/7/23 13:12, Gupta, Pankaj wrote:
>
>>
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index 5fce74aac5..4d42d3ed4c 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -604,6 +604,10 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr 
>> paddr, int code)
>>               mcg_status |= MCG_STATUS_RIPV;
>>           }
>>       } else {
>> +        if (code == BUS_MCEERR_AO) {
>> +            /* XXX we don't support BUS_MCEERR_AO injection on AMD 
>> yet */
>> +            return;
>> +        }
>>           mcg_status |= MCG_STATUS_EIPV | MCG_STATUS_RIPV;
>>       }
>> @@ -655,7 +659,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int 
>> code, void *addr)
>>           if (ram_addr != RAM_ADDR_INVALID &&
>>               kvm_physical_memory_addr_from_host(c->kvm_state, addr, 
>> &paddr)) {
>>               kvm_hwpoison_page_add(ram_addr);
>> -            kvm_mce_inject(cpu, paddr, code);
>> +            if (!IS_AMD_CPU(env) || code != BUS_MCEERR_AO) {
> 
> Isn't the 'optional' case we already handle inside kvm_mce_inject()?
> So this check seems repetitive to me.

You are right, it is repetitive, but can be considered as a reminder of 
the situation and an explanation of the "ignored on AMD guest" message 
later in this function.

Of course it can be removed if you think that the code is easier to read 
without it. When the AMD BUS_MCEERR_AO support is integrated, both 
locations would need to be cleared, but this sounds reasonable to me.

John, it's up to you.

William.
Gupta, Pankaj Sept. 8, 2023, 9:35 a.m. UTC | #3
>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>> index 5fce74aac5..4d42d3ed4c 100644
>>> --- a/target/i386/kvm/kvm.c
>>> +++ b/target/i386/kvm/kvm.c
>>> @@ -604,6 +604,10 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr 
>>> paddr, int code)
>>>               mcg_status |= MCG_STATUS_RIPV;
>>>           }
>>>       } else {
>>> +        if (code == BUS_MCEERR_AO) {
>>> +            /* XXX we don't support BUS_MCEERR_AO injection on AMD 
>>> yet */
>>> +            return;
>>> +        }
>>>           mcg_status |= MCG_STATUS_EIPV | MCG_STATUS_RIPV;
>>>       }
>>> @@ -655,7 +659,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int 
>>> code, void *addr)
>>>           if (ram_addr != RAM_ADDR_INVALID &&
>>>               kvm_physical_memory_addr_from_host(c->kvm_state, addr, 
>>> &paddr)) {
>>>               kvm_hwpoison_page_add(ram_addr);
>>> -            kvm_mce_inject(cpu, paddr, code);
>>> +            if (!IS_AMD_CPU(env) || code != BUS_MCEERR_AO) {
>>
>> Isn't the 'optional' case we already handle inside kvm_mce_inject()?
>> So this check seems repetitive to me.
> 
> You are right, it is repetitive, but can be considered as a reminder of 
> the situation and an explanation of the "ignored on AMD guest" message 
> later in this function.
> 
> Of course it can be removed if you think that the code is easier to read 
> without it. When the AMD BUS_MCEERR_AO support is integrated, both 
> locations would need to be cleared, but this sounds reasonable to me.

Yes, I think it helps to read the code better and less conditional.

Thanks,
Pankaj

> 
> John, it's up to you.
> 
> William.
diff mbox series

Patch

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5fce74aac5..4d42d3ed4c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -604,6 +604,10 @@  static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
             mcg_status |= MCG_STATUS_RIPV;
         }
     } else {
+        if (code == BUS_MCEERR_AO) {
+            /* XXX we don't support BUS_MCEERR_AO injection on AMD yet */
+            return;
+        }
         mcg_status |= MCG_STATUS_EIPV | MCG_STATUS_RIPV;
     }
 
@@ -655,7 +659,9 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
             kvm_hwpoison_page_add(ram_addr);
-            kvm_mce_inject(cpu, paddr, code);
+            if (!IS_AMD_CPU(env) || code != BUS_MCEERR_AO) {
+                kvm_mce_inject(cpu, paddr, code);
+            }
 
             /*
              * Use different logging severity based on error type.
@@ -668,8 +674,9 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
                     addr, paddr, "BUS_MCEERR_AR");
             } else {
                  warn_report("Guest MCE Memory Error at QEMU addr %p and "
-                     "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
-                     addr, paddr, "BUS_MCEERR_AO");
+                     "GUEST addr 0x%" HWADDR_PRIx " of type %s %s",
+                     addr, paddr, "BUS_MCEERR_AO",
+                     IS_AMD_CPU(env) ? "ignored on AMD guest" : "injected");
             }
 
             return;