diff mbox series

x86/SMP: don't try to stop already stopped CPUs

Message ID 5CEE5C4D02000078002335A0@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86/SMP: don't try to stop already stopped CPUs | expand

Commit Message

Jan Beulich May 29, 2019, 10:17 a.m. UTC
In particular with an enabled IOMMU (but not really limited to this
case), trying to invoke fixup_irqs() after having already done
disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:

 RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
 RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
 rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
 rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
 rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
 r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
 r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
 r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
 cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
 fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
 ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
 Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
  ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
 Xen stack trace from rsp=ffff83009e8a79a8:
 ...
 Xen call trace:
    [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
    [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
    [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
    [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
    [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
    [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
    [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
    [<ffff82d080252242>] console_suspend+0/0x28
    [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
    [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
    [<00000000aa5b526b>] 00000000aa5b526b
    [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
    [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
    [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
    [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
    [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
    [<ffff82d080385432>] lstar_enter+0x112/0x120

Don't call fixup_irqs() and don't send any IPI if there's only one
online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
we're on was already marked offline (by a prior invocation of
__stop_this_cpu()).

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Roger Pau Monné June 3, 2019, 2:12 p.m. UTC | #1
On Wed, May 29, 2019 at 04:17:49AM -0600, Jan Beulich wrote:
> In particular with an enabled IOMMU (but not really limited to this
> case), trying to invoke fixup_irqs() after having already done
> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
> 
>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>  Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>  Xen stack trace from rsp=ffff83009e8a79a8:
>  ...
>  Xen call trace:
>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>     [<ffff82d080252242>] console_suspend+0/0x28
>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>     [<00000000aa5b526b>] 00000000aa5b526b
>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>     [<ffff82d080385432>] lstar_enter+0x112/0x120
> 
> Don't call fixup_irqs() and don't send any IPI if there's only one
> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
> we're on was already marked offline (by a prior invocation of
> __stop_this_cpu()).
> 
> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/arch/x86/smp.c
> +++ b/xen/arch/x86/smp.c
> @@ -302,23 +302,31 @@ static void stop_this_cpu(void *dummy)
>   */
>  void smp_send_stop(void)
>  {
> -    int timeout = 10;
> +    unsigned int cpu = smp_processor_id();
>  
> -    local_irq_disable();
> -    fixup_irqs(cpumask_of(smp_processor_id()), 0);
> -    local_irq_enable();
> -
> -    smp_call_function(stop_this_cpu, NULL, 0);
> -
> -    /* Wait 10ms for all other CPUs to go offline. */
> -    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
> -        mdelay(1);
> -
> -    local_irq_disable();
> -    disable_IO_APIC();
> -    hpet_disable();
> -    __stop_this_cpu();
> -    local_irq_enable();
> +    if ( num_online_cpus() > 1 )
> +    {
> +        int timeout = 10;
> +
> +        local_irq_disable();
> +        fixup_irqs(cpumask_of(cpu), 0);
> +        local_irq_enable();
> +
> +        smp_call_function(stop_this_cpu, NULL, 0);
> +
> +        /* Wait 10ms for all other CPUs to go offline. */
> +        while ( (num_online_cpus() > 1) && (timeout-- > 0) )
> +            mdelay(1);
> +    }
> +
> +    if ( cpu_online(cpu) )

Won't this be better placed inside the previous if? Is it valid to
have a single CPU and try to offline it?

Thanks, Roger.
Jan Beulich June 3, 2019, 3:40 p.m. UTC | #2
>>> On 03.06.19 at 16:12, <roger.pau@citrix.com> wrote:
> On Wed, May 29, 2019 at 04:17:49AM -0600, Jan Beulich wrote:
>> In particular with an enabled IOMMU (but not really limited to this
>> case), trying to invoke fixup_irqs() after having already done
>> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>> 
>>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>>  Xen code around <ffff82d08026a036> 
> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>>  Xen stack trace from rsp=ffff83009e8a79a8:
>>  ...
>>  Xen call trace:
>>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>>     [<ffff82d080252242>] console_suspend+0/0x28
>>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>>     [<00000000aa5b526b>] 00000000aa5b526b
>>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>> 
>> Don't call fixup_irqs() and don't send any IPI if there's only one
>> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
>> we're on was already marked offline (by a prior invocation of
>> __stop_this_cpu()).
>> 
>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> 
>> --- a/xen/arch/x86/smp.c
>> +++ b/xen/arch/x86/smp.c
>> @@ -302,23 +302,31 @@ static void stop_this_cpu(void *dummy)
>>   */
>>  void smp_send_stop(void)
>>  {
>> -    int timeout = 10;
>> +    unsigned int cpu = smp_processor_id();
>>  
>> -    local_irq_disable();
>> -    fixup_irqs(cpumask_of(smp_processor_id()), 0);
>> -    local_irq_enable();
>> -
>> -    smp_call_function(stop_this_cpu, NULL, 0);
>> -
>> -    /* Wait 10ms for all other CPUs to go offline. */
>> -    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
>> -        mdelay(1);
>> -
>> -    local_irq_disable();
>> -    disable_IO_APIC();
>> -    hpet_disable();
>> -    __stop_this_cpu();
>> -    local_irq_enable();
>> +    if ( num_online_cpus() > 1 )
>> +    {
>> +        int timeout = 10;
>> +
>> +        local_irq_disable();
>> +        fixup_irqs(cpumask_of(cpu), 0);
>> +        local_irq_enable();
>> +
>> +        smp_call_function(stop_this_cpu, NULL, 0);
>> +
>> +        /* Wait 10ms for all other CPUs to go offline. */
>> +        while ( (num_online_cpus() > 1) && (timeout-- > 0) )
>> +            mdelay(1);
>> +    }
>> +
>> +    if ( cpu_online(cpu) )
> 
> Won't this be better placed inside the previous if? Is it valid to
> have a single CPU and try to offline it?

No to the first question, and I'm not sure I see how you came to
the 2nd one. If there's just a single online CPU, then there's no
need to fixup_irqs(), and there's no-one to IPI. Yet the local CPU
still needs to do everything that should happen once in UP mode,
unless this CPU has been offlined already before (as was the
case in Andrew's report, at least as far as I was able to deduce).

Jan
Roger Pau Monné June 3, 2019, 4:35 p.m. UTC | #3
On Mon, Jun 03, 2019 at 09:40:19AM -0600, Jan Beulich wrote:
> >>> On 03.06.19 at 16:12, <roger.pau@citrix.com> wrote:
> > On Wed, May 29, 2019 at 04:17:49AM -0600, Jan Beulich wrote:
> >> In particular with an enabled IOMMU (but not really limited to this
> >> case), trying to invoke fixup_irqs() after having already done
> >> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
> >> 
> >>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
> >>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
> >>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
> >>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
> >>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
> >>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
> >>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
> >>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
> >>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
> >>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
> >>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
> >>  Xen code around <ffff82d08026a036> 
> > (amd_iommu_read_ioapic_from_ire+0xde/0x113):
> >>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
> >>  Xen stack trace from rsp=ffff83009e8a79a8:
> >>  ...
> >>  Xen call trace:
> >>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
> >>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
> >>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
> >>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
> >>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
> >>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
> >>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
> >>     [<ffff82d080252242>] console_suspend+0/0x28
> >>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
> >>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
> >>     [<00000000aa5b526b>] 00000000aa5b526b
> >>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
> >>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
> >>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
> >>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
> >>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
> >>     [<ffff82d080385432>] lstar_enter+0x112/0x120
> >> 
> >> Don't call fixup_irqs() and don't send any IPI if there's only one
> >> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
> >> we're on was already marked offline (by a prior invocation of
> >> __stop_this_cpu()).
> >> 
> >> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> 
> >> --- a/xen/arch/x86/smp.c
> >> +++ b/xen/arch/x86/smp.c
> >> @@ -302,23 +302,31 @@ static void stop_this_cpu(void *dummy)
> >>   */
> >>  void smp_send_stop(void)
> >>  {
> >> -    int timeout = 10;
> >> +    unsigned int cpu = smp_processor_id();
> >>  
> >> -    local_irq_disable();
> >> -    fixup_irqs(cpumask_of(smp_processor_id()), 0);
> >> -    local_irq_enable();
> >> -
> >> -    smp_call_function(stop_this_cpu, NULL, 0);
> >> -
> >> -    /* Wait 10ms for all other CPUs to go offline. */
> >> -    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
> >> -        mdelay(1);
> >> -
> >> -    local_irq_disable();
> >> -    disable_IO_APIC();
> >> -    hpet_disable();
> >> -    __stop_this_cpu();
> >> -    local_irq_enable();
> >> +    if ( num_online_cpus() > 1 )
> >> +    {
> >> +        int timeout = 10;
> >> +
> >> +        local_irq_disable();
> >> +        fixup_irqs(cpumask_of(cpu), 0);
> >> +        local_irq_enable();
> >> +
> >> +        smp_call_function(stop_this_cpu, NULL, 0);
> >> +
> >> +        /* Wait 10ms for all other CPUs to go offline. */
> >> +        while ( (num_online_cpus() > 1) && (timeout-- > 0) )
> >> +            mdelay(1);
> >> +    }
> >> +
> >> +    if ( cpu_online(cpu) )
> > 
> > Won't this be better placed inside the previous if? Is it valid to
> > have a single CPU and try to offline it?
> 
> No to the first question, and I'm not sure I see how you came to
> the 2nd one. If there's just a single online CPU, then there's no
> need to fixup_irqs(), and there's no-one to IPI. Yet the local CPU
> still needs to do everything that should happen once in UP mode,
> unless this CPU has been offlined already before (as was the
> case in Andrew's report, at least as far as I was able to deduce).

Sorry, I got confused. AFAICT the logic is correct:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.
Andrew Cooper June 17, 2019, 5:39 p.m. UTC | #4
On 29/05/2019 11:17, Jan Beulich wrote:
> In particular with an enabled IOMMU (but not really limited to this
> case), trying to invoke fixup_irqs() after having already done
> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>
>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>  Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>  Xen stack trace from rsp=ffff83009e8a79a8:
>  ...
>  Xen call trace:
>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>     [<ffff82d080252242>] console_suspend+0/0x28
>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>     [<00000000aa5b526b>] 00000000aa5b526b
>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>
> Don't call fixup_irqs() and don't send any IPI if there's only one
> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
> we're on was already marked offline (by a prior invocation of
> __stop_this_cpu()).
>
> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

It is probably worth noting that the above stack trace is a cascade
fault, where we took a #GP fault in the middle of the EFI firmware, and
then tried restarting a second time.

For the change it is an improvement, so Acked-by: Andrew Cooper
<andrew.cooper3@citrix.com>

There are further fixes needing (which have been on my todo list for
rather too long) to avoid any local_irq_enable() on the shutdown path,
because during a crash (especially one in the middle of a vcpu context
switch), its not safe to re-enable interrupts.

The only solution I've got involves using NMI based IPIs/shootdowns.

~Andrew
Andrew Cooper June 17, 2019, 5:55 p.m. UTC | #5
On 17/06/2019 18:39, Andrew Cooper wrote:
> On 29/05/2019 11:17, Jan Beulich wrote:
>> In particular with an enabled IOMMU (but not really limited to this
>> case), trying to invoke fixup_irqs() after having already done
>> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>>
>>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>>  Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>>  Xen stack trace from rsp=ffff83009e8a79a8:
>>  ...
>>  Xen call trace:
>>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>>     [<ffff82d080252242>] console_suspend+0/0x28
>>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>>     [<00000000aa5b526b>] 00000000aa5b526b
>>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>>
>> Don't call fixup_irqs() and don't send any IPI if there's only one
>> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
>> we're on was already marked offline (by a prior invocation of
>> __stop_this_cpu()).
>>
>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> It is probably worth noting that the above stack trace is a cascade
> fault, where we took a #GP fault in the middle of the EFI firmware, and
> then tried restarting a second time.
>
> For the change it is an improvement, so Acked-by: Andrew Cooper
> <andrew.cooper3@citrix.com>
>
> There are further fixes needing (which have been on my todo list for
> rather too long) to avoid any local_irq_enable() on the shutdown path,
> because during a crash (especially one in the middle of a vcpu context
> switch), its not safe to re-enable interrupts.
>
> The only solution I've got involves using NMI based IPIs/shootdowns.

/sigh and no sooner as I tried testing this, I found the next piece of
fallout:

[   90.447906] reboot: Restarting system
(XEN) Hardware Dom0 shutdown: rebooting machine
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[<00000000aa5b526b>] 00000000aa5b526b
(XEN) RFLAGS: 0000000000010246   CONTEXT: hypervisor
(XEN) rax: 00000000aa985950   rbx: 0000000000000000   rcx: 00000000aa5b7010
(XEN) rdx: 0000000000000000   rsi: ffff83009e827fff   rdi: 00000000003506e0
(XEN) rbp: ffff83009e827c70   rsp: ffff83009e827bb0   r8:  00000000aa5b7048
(XEN) r9:  0000000000000000   r10: ffff83009e827c88   r11: 0f0f0f0f0f0f0f0f
(XEN) r12: 00000000fffffffe   r13: 0000000000000cf9   r14: 0000000000000000
(XEN) r15: 0000000000000065   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around <00000000aa5b526b> (00000000aa5b526b):
(XEN)  8d 0d a7 1d 00 00 33 d2 <ff> 90 40 01 00 00 48 8b 05 d0 1d 00 00 48 83 c4
(XEN) Xen stack trace from rsp=ffff83009e827bb0:
(XEN)    000000000000001f 00000000003506e0 ffff83009e827c00 0000000000000206
(XEN)    ffff82d08027d96a 00000000aa5b51f7 0000000000000286 ffff83009e827c40
(XEN)    000000009e817000 0000000000000cf9 ffff83009e827c30 ffff82d080201327
(XEN)    00000000ffffffff ffff82d08020162e 0000000000000000 00000000fffffffe
(XEN)    ffff83009e827c70 ffff82d0802015ff 000000009e817000 ffff83009e827c78
(XEN)    ffff82d0802a372a 0000000080000000 0000000000000000 ffff83009e827c88
(XEN)    ffff83009e827cd8 ffff82d0802a3045 ffff82d0802a3045 ffff83009e827c98
(XEN)    000000008028454c 000082d080387851 0000000000000000 ffff82d080387851
(XEN)    0000000000000000 ffff83009e827d98 00000000000000fb 0000000080000000
(XEN)    0000000000000000 ffff83009e827ce8 ffff82d0802a3105 ffff83009e827d08
(XEN)    ffff82d08023cdaa ffff82d080387851 0000000000000000 ffff83009e827d18
(XEN)    ffff82d0802a37da ffff83009e827d88 ffff82d080283fb4 ffff82d080387851
(XEN)    ffff82d080387845 0000000000000000 ffff82d080387845 ffff82d080387851
(XEN)    ffff82d080387845 ffff82d080387851 0000000000000000 0000000000000000
(XEN)    0000000000000000 ffff83009e827fff 0000000000000000 00007cff617d8247
(XEN)    ffff82d0803878ba ffff82d080933900 0000000000000000 000000204b161644
(XEN)    ffff8310f5206ef8 ffff83009e827e40 ffff8310f5206ea0 0000002065fe762e
(XEN)    000000204bbfce6e ffff82d08095c3e0 ffff83009e827ef8 0000000000000000
(XEN)    0000000000000048 0000000000000000 ffff83009e827fff ffff8310f5206ef8
(XEN)    000000fb00000000 ffff82d0802e1bc5 000000000000e008 0000000000000206
(XEN) Xen call trace:
(XEN)    [<00000000aa5b526b>] 00000000aa5b526b
(XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
(XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
(XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
(XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
(XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
(XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
(XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) GENERAL PROTECTION FAULT
(XEN) [error_code=0000]
(XEN) ****************************************
(XEN) 
(XEN) Reboot in five seconds...
(XEN) Executing kexec image on cpu0
(XEN) Shot down all CPUs
(XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[<ffff82d080266844>] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: 0000000000000014   rcx: ffff832005d68010
(XEN) rdx: ffff832005d68000   rsi: 0000000000000000   rdi: ffff82d080942a00
(XEN) rbp: ffff83009e827948   rsp: ffff83009e827928   r8:  ffff82d0808074c0
(XEN) r9:  ffff82d080942a08   r10: 0000000000000000   r11: 0000000000000001
(XEN) r12: 0000000000010000   r13: 0000000000000001   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around <ffff82d080266844> (amd_iommu_read_ioapic_from_ire+0xd0/0x131):
(XEN)  07 00 00 41 39 c5 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 0f b6 11 c0 ea 02 83
(XEN) Xen stack trace from rsp=ffff83009e827928:
(XEN)    ffff82d08027c244 0000000000000014 ffff82d080806980 0000000000000000
(XEN)    ffff83009e827958 ffff82d0802687a7 ffff83009e827968 ffff82d08027c25b
(XEN)    ffff83009e827998 ffff82d08027cefb 0000000000000000 0000000000000002
(XEN)    ffff82d080806980 ffff82d080806980 ffff83009e8279c8 ffff82d08027d8c8
(XEN)    ffff83009e827fff 0000000000010000 0000000000000002 0000000000000000
(XEN)    ffff83009e8279f8 ffff82d08027d9ce 00000000000003e8 ffff82d080943b80
(XEN)    0000000000000000 ffff83009e827fff ffff83009e827a18 ffff82d08027dd30
(XEN)    ffff83009e827a18 ffff82d0802a3626 ffff83009e827a38 ffff82d080270a79
(XEN)    0000000000000003 ffff82d08043f858 ffff83009e827a58 ffff82d08021dd74
(XEN)    0000000000000206 0000000000000296 ffff83009e827ac8 ffff82d08024db40
(XEN)    ffff83009e827aa8 ffff82d000000010 ffff83009e827ad8 ffff83009e827a88
(XEN)    ffff83009e827aa8 0000000000000000 ffff83009e827fff 0000000000000000
(XEN)    ffff8310f3e00000 0000000000000002 ffff83009e827b08 0000000000000000
(XEN)    ffff83009e827af8 ffff82d0802ab1b8 ffff82d080387979 ffff82d08038796d
(XEN)    ffff8310f4c08000 0000000000000000 00007cff617d84d7 ffff82d080387a3d
(XEN)    0000000000000065 0000000000000000 0000000000000cf9 00000000fffffffe
(XEN)    ffff83009e827c70 0000000000000000 0f0f0f0f0f0f0f0f ffff83009e827c88
(XEN)    0000000000000000 00000000aa5b7048 00000000aa985950 00000000aa5b7010
(XEN)    0000000000000000 ffff83009e827fff 00000000003506e0 0000000d00000000
(XEN)    00000000aa5b526b 000000000000e008 0000000000010246 ffff83009e827bb0
(XEN) Xen call trace:
(XEN)    [<ffff82d080266844>] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN)    [<ffff82d0802687a7>] iommu_read_apic_from_ire+0x10/0x12
(XEN)    [<ffff82d08027c25b>] io_apic.c#io_apic_read+0x17/0x5f
(XEN)    [<ffff82d08027cefb>] __ioapic_read_entry+0x2f/0x55
(XEN)    [<ffff82d08027d8c8>] io_apic.c#clear_IO_APIC_pin+0x1a/0xf3
(XEN)    [<ffff82d08027d9ce>] io_apic.c#clear_IO_APIC+0x2d/0x60
(XEN)    [<ffff82d08027dd30>] disable_IO_APIC+0xd/0x7e
(XEN)    [<ffff82d080270a79>] machine_crash_shutdown+0x228/0x292
(XEN)    [<ffff82d08021dd74>] kexec_crash+0x3f/0x5b
(XEN)    [<ffff82d08024db40>] panic+0x117/0x12f
(XEN)    [<ffff82d0802ab1b8>] do_general_protection+0x22b/0x234
(XEN)    [<ffff82d080387a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
(XEN)    [<00000000aa5b526b>] 00000000aa5b526b
(XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
(XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
(XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
(XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
(XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
(XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
(XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ****************************************
(XEN) 


I think we need a similar adjustment in nmi_shootdown_cpus()

~Andrew
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">On 17/06/2019 18:39, Andrew Cooper
      wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:caa80cec-7b46-b31a-88e2-2554a495e159@citrix.com">
      <pre class="moz-quote-pre" wrap="">On 29/05/2019 11:17, Jan Beulich wrote:
</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">In particular with an enabled IOMMU (but not really limited to this
case), trying to invoke fixup_irqs() after having already done
disable_IO_APIC() -&gt; clear_IO_APIC() is a rather bad idea:

 RIP:    e008:[&lt;ffff82d08026a036&gt;] amd_iommu_read_ioapic_from_ire+0xde/0x113
 RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
 rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
 rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
 rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
 r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
 r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
 r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
 cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
 fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
 ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
 Xen code around &lt;ffff82d08026a036&gt; (amd_iommu_read_ioapic_from_ire+0xde/0x113):
  ff 07 00 00 39 d3 74 02 &lt;0f&gt; 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
 Xen stack trace from rsp=ffff83009e8a79a8:
 ...
 Xen call trace:
    [&lt;ffff82d08026a036&gt;] amd_iommu_read_ioapic_from_ire+0xde/0x113
    [&lt;ffff82d08026bf7b&gt;] iommu_read_apic_from_ire+0x10/0x12
    [&lt;ffff82d08027f718&gt;] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
    [&lt;ffff82d08027f9c5&gt;] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
    [&lt;ffff82d080289bc7&gt;] fixup_irqs+0x320/0x40b
    [&lt;ffff82d0802a82c4&gt;] smp_send_stop+0x4b/0xa8
    [&lt;ffff82d0802a7b2f&gt;] machine_restart+0x98/0x288
    [&lt;ffff82d080252242&gt;] console_suspend+0/0x28
    [&lt;ffff82d0802b01da&gt;] do_general_protection+0x204/0x24e
    [&lt;ffff82d080385a3d&gt;] x86_64/entry.S#handle_exception_saved+0x68/0x94
    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
    [&lt;ffff82d0802a7c7d&gt;] machine_restart+0x1e6/0x288
    [&lt;ffff82d080240f75&gt;] hwdom_shutdown+0xa2/0x11d
    [&lt;ffff82d08020baa2&gt;] domain_shutdown+0x4f/0xd8
    [&lt;ffff82d08023fe98&gt;] do_sched_op+0x12f/0x42a
    [&lt;ffff82d08037e404&gt;] pv_hypercall+0x1e4/0x564
    [&lt;ffff82d080385432&gt;] lstar_enter+0x112/0x120

Don't call fixup_irqs() and don't send any IPI if there's only one
online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
we're on was already marked offline (by a prior invocation of
__stop_this_cpu()).

Reported-by: Andrew Cooper <a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com">&lt;andrew.cooper3@citrix.com&gt;</a>
Signed-off-by: Jan Beulich <a class="moz-txt-link-rfc2396E" href="mailto:jbeulich@suse.com">&lt;jbeulich@suse.com&gt;</a>
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
It is probably worth noting that the above stack trace is a cascade
fault, where we took a #GP fault in the middle of the EFI firmware, and
then tried restarting a second time.

For the change it is an improvement, so Acked-by: Andrew Cooper
<a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com">&lt;andrew.cooper3@citrix.com&gt;</a>

There are further fixes needing (which have been on my todo list for
rather too long) to avoid any local_irq_enable() on the shutdown path,
because during a crash (especially one in the middle of a vcpu context
switch), its not safe to re-enable interrupts.

The only solution I've got involves using NMI based IPIs/shootdowns.</pre>
    </blockquote>
    <br>
    /sigh and no sooner as I tried testing this, I found the next piece
    of fallout:<br>
    <br>
    <pre>[   90.447906] reboot: Restarting system
(XEN) Hardware Dom0 shutdown: rebooting machine
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN) RFLAGS: 0000000000010246   CONTEXT: hypervisor
(XEN) rax: 00000000aa985950   rbx: 0000000000000000   rcx: 00000000aa5b7010
(XEN) rdx: 0000000000000000   rsi: ffff83009e827fff   rdi: 00000000003506e0
(XEN) rbp: ffff83009e827c70   rsp: ffff83009e827bb0   r8:  00000000aa5b7048
(XEN) r9:  0000000000000000   r10: ffff83009e827c88   r11: 0f0f0f0f0f0f0f0f
(XEN) r12: 00000000fffffffe   r13: 0000000000000cf9   r14: 0000000000000000
(XEN) r15: 0000000000000065   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around &lt;00000000aa5b526b&gt; (00000000aa5b526b):
(XEN)  8d 0d a7 1d 00 00 33 d2 &lt;ff&gt; 90 40 01 00 00 48 8b 05 d0 1d 00 00 48 83 c4
(XEN) Xen stack trace from rsp=ffff83009e827bb0:
(XEN)    000000000000001f 00000000003506e0 ffff83009e827c00 0000000000000206
(XEN)    ffff82d08027d96a 00000000aa5b51f7 0000000000000286 ffff83009e827c40
(XEN)    000000009e817000 0000000000000cf9 ffff83009e827c30 ffff82d080201327
(XEN)    00000000ffffffff ffff82d08020162e 0000000000000000 00000000fffffffe
(XEN)    ffff83009e827c70 ffff82d0802015ff 000000009e817000 ffff83009e827c78
(XEN)    ffff82d0802a372a 0000000080000000 0000000000000000 ffff83009e827c88
(XEN)    ffff83009e827cd8 ffff82d0802a3045 ffff82d0802a3045 ffff83009e827c98
(XEN)    000000008028454c 000082d080387851 0000000000000000 ffff82d080387851
(XEN)    0000000000000000 ffff83009e827d98 00000000000000fb 0000000080000000
(XEN)    0000000000000000 ffff83009e827ce8 ffff82d0802a3105 ffff83009e827d08
(XEN)    ffff82d08023cdaa ffff82d080387851 0000000000000000 ffff83009e827d18
(XEN)    ffff82d0802a37da ffff83009e827d88 ffff82d080283fb4 ffff82d080387851
(XEN)    ffff82d080387845 0000000000000000 ffff82d080387845 ffff82d080387851
(XEN)    ffff82d080387845 ffff82d080387851 0000000000000000 0000000000000000
(XEN)    0000000000000000 ffff83009e827fff 0000000000000000 00007cff617d8247
(XEN)    ffff82d0803878ba ffff82d080933900 0000000000000000 000000204b161644
(XEN)    ffff8310f5206ef8 ffff83009e827e40 ffff8310f5206ea0 0000002065fe762e
(XEN)    000000204bbfce6e ffff82d08095c3e0 ffff83009e827ef8 0000000000000000
(XEN)    0000000000000048 0000000000000000 ffff83009e827fff ffff8310f5206ef8
(XEN)    000000fb00000000 ffff82d0802e1bc5 000000000000e008 0000000000000206
(XEN) Xen call trace:
(XEN)    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN)    [&lt;ffff82d0802a3045&gt;] machine_restart+0x1ef/0x2a4
(XEN)    [&lt;ffff82d0802a3105&gt;] send_IPI_mask+0/0xc
(XEN)    [&lt;ffff82d08023cdaa&gt;] smp_call_function_interrupt+0x95/0xb8
(XEN)    [&lt;ffff82d0802a37da&gt;] call_function_interrupt+0x35/0x37
(XEN)    [&lt;ffff82d080283fb4&gt;] do_IRQ+0xa7/0x697
(XEN)    [&lt;ffff82d0803878ba&gt;] common_interrupt+0x10a/0x120
(XEN)    [&lt;ffff82d0802e1bc5&gt;] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [&lt;ffff82d0802e20ae&gt;] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [&lt;ffff82d080274f8c&gt;] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) GENERAL PROTECTION FAULT
(XEN) [error_code=0000]
(XEN) ****************************************
(XEN) 
(XEN) Reboot in five seconds...
(XEN) Executing kexec image on cpu0
(XEN) Shot down all CPUs
(XEN) Assertion 'offset == (val &amp; (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[&lt;ffff82d080266844&gt;] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: 0000000000000014   rcx: ffff832005d68010
(XEN) rdx: ffff832005d68000   rsi: 0000000000000000   rdi: ffff82d080942a00
(XEN) rbp: ffff83009e827948   rsp: ffff83009e827928   r8:  ffff82d0808074c0
(XEN) r9:  ffff82d080942a08   r10: 0000000000000000   r11: 0000000000000001
(XEN) r12: 0000000000010000   r13: 0000000000000001   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around &lt;ffff82d080266844&gt; (amd_iommu_read_ioapic_from_ire+0xd0/0x131):
(XEN)  07 00 00 41 39 c5 74 02 &lt;0f&gt; 0b 41 81 e4 00 f8 ff ff 0f b6 11 c0 ea 02 83
(XEN) Xen stack trace from rsp=ffff83009e827928:
(XEN)    ffff82d08027c244 0000000000000014 ffff82d080806980 0000000000000000
(XEN)    ffff83009e827958 ffff82d0802687a7 ffff83009e827968 ffff82d08027c25b
(XEN)    ffff83009e827998 ffff82d08027cefb 0000000000000000 0000000000000002
(XEN)    ffff82d080806980 ffff82d080806980 ffff83009e8279c8 ffff82d08027d8c8
(XEN)    ffff83009e827fff 0000000000010000 0000000000000002 0000000000000000
(XEN)    ffff83009e8279f8 ffff82d08027d9ce 00000000000003e8 ffff82d080943b80
(XEN)    0000000000000000 ffff83009e827fff ffff83009e827a18 ffff82d08027dd30
(XEN)    ffff83009e827a18 ffff82d0802a3626 ffff83009e827a38 ffff82d080270a79
(XEN)    0000000000000003 ffff82d08043f858 ffff83009e827a58 ffff82d08021dd74
(XEN)    0000000000000206 0000000000000296 ffff83009e827ac8 ffff82d08024db40
(XEN)    ffff83009e827aa8 ffff82d000000010 ffff83009e827ad8 ffff83009e827a88
(XEN)    ffff83009e827aa8 0000000000000000 ffff83009e827fff 0000000000000000
(XEN)    ffff8310f3e00000 0000000000000002 ffff83009e827b08 0000000000000000
(XEN)    ffff83009e827af8 ffff82d0802ab1b8 ffff82d080387979 ffff82d08038796d
(XEN)    ffff8310f4c08000 0000000000000000 00007cff617d84d7 ffff82d080387a3d
(XEN)    0000000000000065 0000000000000000 0000000000000cf9 00000000fffffffe
(XEN)    ffff83009e827c70 0000000000000000 0f0f0f0f0f0f0f0f ffff83009e827c88
(XEN)    0000000000000000 00000000aa5b7048 00000000aa985950 00000000aa5b7010
(XEN)    0000000000000000 ffff83009e827fff 00000000003506e0 0000000d00000000
(XEN)    00000000aa5b526b 000000000000e008 0000000000010246 ffff83009e827bb0
(XEN) Xen call trace:
(XEN)    [&lt;ffff82d080266844&gt;] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN)    [&lt;ffff82d0802687a7&gt;] iommu_read_apic_from_ire+0x10/0x12
(XEN)    [&lt;ffff82d08027c25b&gt;] io_apic.c#io_apic_read+0x17/0x5f
(XEN)    [&lt;ffff82d08027cefb&gt;] __ioapic_read_entry+0x2f/0x55
(XEN)    [&lt;ffff82d08027d8c8&gt;] io_apic.c#clear_IO_APIC_pin+0x1a/0xf3
(XEN)    [&lt;ffff82d08027d9ce&gt;] io_apic.c#clear_IO_APIC+0x2d/0x60
(XEN)    [&lt;ffff82d08027dd30&gt;] disable_IO_APIC+0xd/0x7e
(XEN)    [&lt;ffff82d080270a79&gt;] machine_crash_shutdown+0x228/0x292
(XEN)    [&lt;ffff82d08021dd74&gt;] kexec_crash+0x3f/0x5b
(XEN)    [&lt;ffff82d08024db40&gt;] panic+0x117/0x12f
(XEN)    [&lt;ffff82d0802ab1b8&gt;] do_general_protection+0x22b/0x234
(XEN)    [&lt;ffff82d080387a3d&gt;] x86_64/entry.S#handle_exception_saved+0x68/0x94
(XEN)    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN)    [&lt;ffff82d0802a3045&gt;] machine_restart+0x1ef/0x2a4
(XEN)    [&lt;ffff82d0802a3105&gt;] send_IPI_mask+0/0xc
(XEN)    [&lt;ffff82d08023cdaa&gt;] smp_call_function_interrupt+0x95/0xb8
(XEN)    [&lt;ffff82d0802a37da&gt;] call_function_interrupt+0x35/0x37
(XEN)    [&lt;ffff82d080283fb4&gt;] do_IRQ+0xa7/0x697
(XEN)    [&lt;ffff82d0803878ba&gt;] common_interrupt+0x10a/0x120
(XEN)    [&lt;ffff82d0802e1bc5&gt;] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [&lt;ffff82d0802e20ae&gt;] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [&lt;ffff82d080274f8c&gt;] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) Assertion 'offset == (val &amp; (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ****************************************
(XEN) 

</pre>
    <br>
    I think we need a similar adjustment in nmi_shootdown_cpus()<br>
    <br>
    ~Andrew<br>
  </body>
</html>
Andrew Cooper June 17, 2019, 6:27 p.m. UTC | #6
On 17/06/2019 18:55, Andrew Cooper wrote:
> On 17/06/2019 18:39, Andrew Cooper wrote:
>> On 29/05/2019 11:17, Jan Beulich wrote:
>>> In particular with an enabled IOMMU (but not really limited to this
>>> case), trying to invoke fixup_irqs() after having already done
>>> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>>>
>>>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>>>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>>>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>>>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>>>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>>>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>>>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>>>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>>>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>>>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>>>  Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>>>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>>>  Xen stack trace from rsp=ffff83009e8a79a8:
>>>  ...
>>>  Xen call trace:
>>>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>>>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>>>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>>>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>>>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>>>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>>>     [<ffff82d080252242>] console_suspend+0/0x28
>>>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>>>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>>>     [<00000000aa5b526b>] 00000000aa5b526b
>>>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>>>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>>>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>>>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>>>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>>>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>>>
>>> Don't call fixup_irqs() and don't send any IPI if there's only one
>>> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
>>> we're on was already marked offline (by a prior invocation of
>>> __stop_this_cpu()).
>>>
>>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> It is probably worth noting that the above stack trace is a cascade
>> fault, where we took a #GP fault in the middle of the EFI firmware, and
>> then tried restarting a second time.
>>
>> For the change it is an improvement, so Acked-by: Andrew Cooper
>> <andrew.cooper3@citrix.com>
>>
>> There are further fixes needing (which have been on my todo list for
>> rather too long) to avoid any local_irq_enable() on the shutdown path,
>> because during a crash (especially one in the middle of a vcpu context
>> switch), its not safe to re-enable interrupts.
>>
>> The only solution I've got involves using NMI based IPIs/shootdowns.
>
> /sigh and no sooner as I tried testing this, I found the next piece of
> fallout:
>
> [   90.447906] reboot: Restarting system
> (XEN) Hardware Dom0 shutdown: rebooting machine
> (XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
> (XEN) CPU:    0
> (XEN) RIP:    e008:[<00000000aa5b526b>] 00000000aa5b526b
> (XEN) RFLAGS: 0000000000010246   CONTEXT: hypervisor
> (XEN) rax: 00000000aa985950   rbx: 0000000000000000   rcx: 00000000aa5b7010
> (XEN) rdx: 0000000000000000   rsi: ffff83009e827fff   rdi: 00000000003506e0
> (XEN) rbp: ffff83009e827c70   rsp: ffff83009e827bb0   r8:  00000000aa5b7048
> (XEN) r9:  0000000000000000   r10: ffff83009e827c88   r11: 0f0f0f0f0f0f0f0f
> (XEN) r12: 00000000fffffffe   r13: 0000000000000cf9   r14: 0000000000000000
> (XEN) r15: 0000000000000065   cr0: 0000000080050033   cr4: 00000000003506e0
> (XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
> (XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
> (XEN) Xen code around <00000000aa5b526b> (00000000aa5b526b):
> (XEN)  8d 0d a7 1d 00 00 33 d2 <ff> 90 40 01 00 00 48 8b 05 d0 1d 00 00 48 83 c4
> (XEN) Xen stack trace from rsp=ffff83009e827bb0:
> (XEN)    000000000000001f 00000000003506e0 ffff83009e827c00 0000000000000206
> (XEN)    ffff82d08027d96a 00000000aa5b51f7 0000000000000286 ffff83009e827c40
> (XEN)    000000009e817000 0000000000000cf9 ffff83009e827c30 ffff82d080201327
> (XEN)    00000000ffffffff ffff82d08020162e 0000000000000000 00000000fffffffe
> (XEN)    ffff83009e827c70 ffff82d0802015ff 000000009e817000 ffff83009e827c78
> (XEN)    ffff82d0802a372a 0000000080000000 0000000000000000 ffff83009e827c88
> (XEN)    ffff83009e827cd8 ffff82d0802a3045 ffff82d0802a3045 ffff83009e827c98
> (XEN)    000000008028454c 000082d080387851 0000000000000000 ffff82d080387851
> (XEN)    0000000000000000 ffff83009e827d98 00000000000000fb 0000000080000000
> (XEN)    0000000000000000 ffff83009e827ce8 ffff82d0802a3105 ffff83009e827d08
> (XEN)    ffff82d08023cdaa ffff82d080387851 0000000000000000 ffff83009e827d18
> (XEN)    ffff82d0802a37da ffff83009e827d88 ffff82d080283fb4 ffff82d080387851
> (XEN)    ffff82d080387845 0000000000000000 ffff82d080387845 ffff82d080387851
> (XEN)    ffff82d080387845 ffff82d080387851 0000000000000000 0000000000000000
> (XEN)    0000000000000000 ffff83009e827fff 0000000000000000 00007cff617d8247
> (XEN)    ffff82d0803878ba ffff82d080933900 0000000000000000 000000204b161644
> (XEN)    ffff8310f5206ef8 ffff83009e827e40 ffff8310f5206ea0 0000002065fe762e
> (XEN)    000000204bbfce6e ffff82d08095c3e0 ffff83009e827ef8 0000000000000000
> (XEN)    0000000000000048 0000000000000000 ffff83009e827fff ffff8310f5206ef8
> (XEN)    000000fb00000000 ffff82d0802e1bc5 000000000000e008 0000000000000206
> (XEN) Xen call trace:
> (XEN)    [<00000000aa5b526b>] 00000000aa5b526b
> (XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
> (XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
> (XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
> (XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
> (XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
> (XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
> (XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
> (XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
> (XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
> (XEN) 
> (XEN) 
> (XEN) ****************************************
> (XEN) Panic on CPU 0:
> (XEN) GENERAL PROTECTION FAULT
> (XEN) [error_code=0000]
> (XEN) ****************************************
> (XEN) 
> (XEN) Reboot in five seconds...
> (XEN) Executing kexec image on cpu0
> (XEN) Shot down all CPUs
> (XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
> (XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
> (XEN) CPU:    0
> (XEN) RIP:    e008:[<ffff82d080266844>] amd_iommu_read_ioapic_from_ire+0xd0/0x131
> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
> (XEN) rax: 0000000000000000   rbx: 0000000000000014   rcx: ffff832005d68010
> (XEN) rdx: ffff832005d68000   rsi: 0000000000000000   rdi: ffff82d080942a00
> (XEN) rbp: ffff83009e827948   rsp: ffff83009e827928   r8:  ffff82d0808074c0
> (XEN) r9:  ffff82d080942a08   r10: 0000000000000000   r11: 0000000000000001
> (XEN) r12: 0000000000010000   r13: 0000000000000001   r14: 0000000000000000
> (XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000003506e0
> (XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
> (XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
> (XEN) Xen code around <ffff82d080266844> (amd_iommu_read_ioapic_from_ire+0xd0/0x131):
> (XEN)  07 00 00 41 39 c5 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 0f b6 11 c0 ea 02 83
> (XEN) Xen stack trace from rsp=ffff83009e827928:
> (XEN)    ffff82d08027c244 0000000000000014 ffff82d080806980 0000000000000000
> (XEN)    ffff83009e827958 ffff82d0802687a7 ffff83009e827968 ffff82d08027c25b
> (XEN)    ffff83009e827998 ffff82d08027cefb 0000000000000000 0000000000000002
> (XEN)    ffff82d080806980 ffff82d080806980 ffff83009e8279c8 ffff82d08027d8c8
> (XEN)    ffff83009e827fff 0000000000010000 0000000000000002 0000000000000000
> (XEN)    ffff83009e8279f8 ffff82d08027d9ce 00000000000003e8 ffff82d080943b80
> (XEN)    0000000000000000 ffff83009e827fff ffff83009e827a18 ffff82d08027dd30
> (XEN)    ffff83009e827a18 ffff82d0802a3626 ffff83009e827a38 ffff82d080270a79
> (XEN)    0000000000000003 ffff82d08043f858 ffff83009e827a58 ffff82d08021dd74
> (XEN)    0000000000000206 0000000000000296 ffff83009e827ac8 ffff82d08024db40
> (XEN)    ffff83009e827aa8 ffff82d000000010 ffff83009e827ad8 ffff83009e827a88
> (XEN)    ffff83009e827aa8 0000000000000000 ffff83009e827fff 0000000000000000
> (XEN)    ffff8310f3e00000 0000000000000002 ffff83009e827b08 0000000000000000
> (XEN)    ffff83009e827af8 ffff82d0802ab1b8 ffff82d080387979 ffff82d08038796d
> (XEN)    ffff8310f4c08000 0000000000000000 00007cff617d84d7 ffff82d080387a3d
> (XEN)    0000000000000065 0000000000000000 0000000000000cf9 00000000fffffffe
> (XEN)    ffff83009e827c70 0000000000000000 0f0f0f0f0f0f0f0f ffff83009e827c88
> (XEN)    0000000000000000 00000000aa5b7048 00000000aa985950 00000000aa5b7010
> (XEN)    0000000000000000 ffff83009e827fff 00000000003506e0 0000000d00000000
> (XEN)    00000000aa5b526b 000000000000e008 0000000000010246 ffff83009e827bb0
> (XEN) Xen call trace:
> (XEN)    [<ffff82d080266844>] amd_iommu_read_ioapic_from_ire+0xd0/0x131
> (XEN)    [<ffff82d0802687a7>] iommu_read_apic_from_ire+0x10/0x12
> (XEN)    [<ffff82d08027c25b>] io_apic.c#io_apic_read+0x17/0x5f
> (XEN)    [<ffff82d08027cefb>] __ioapic_read_entry+0x2f/0x55
> (XEN)    [<ffff82d08027d8c8>] io_apic.c#clear_IO_APIC_pin+0x1a/0xf3
> (XEN)    [<ffff82d08027d9ce>] io_apic.c#clear_IO_APIC+0x2d/0x60
> (XEN)    [<ffff82d08027dd30>] disable_IO_APIC+0xd/0x7e
> (XEN)    [<ffff82d080270a79>] machine_crash_shutdown+0x228/0x292
> (XEN)    [<ffff82d08021dd74>] kexec_crash+0x3f/0x5b
> (XEN)    [<ffff82d08024db40>] panic+0x117/0x12f
> (XEN)    [<ffff82d0802ab1b8>] do_general_protection+0x22b/0x234
> (XEN)    [<ffff82d080387a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
> (XEN)    [<00000000aa5b526b>] 00000000aa5b526b
> (XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
> (XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
> (XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
> (XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
> (XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
> (XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
> (XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
> (XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
> (XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
> (XEN) 
> (XEN) 
> (XEN) ****************************************
> (XEN) Panic on CPU 0:
> (XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
> (XEN) ****************************************
> (XEN) 
>
>
> I think we need a similar adjustment in nmi_shootdown_cpus()

Yes.  With this fix included, then we successfully transition into the
crash kernel.  I think it would be best to fold into this patch, given
its direct relevance.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 01e48a1..f9772dc 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -169,15 +169,20 @@ static void nmi_shootdown_cpus(void)
      */
     iommu_crash_shutdown();
 
-    __stop_this_cpu();
+    if ( num_online_cpus() > 1 )
+    {
+        __stop_this_cpu();
 
-    /* This is a bit of a hack due to the problems with the x2apic_enabled
-     * variable, but we can't do any better without a significant refactoring
-     * of the APIC code */
-    x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
+        /*
+         * This is a bit of a hack due to the problems with the x2apic_enabled
+         * variable, but we can't do any better without a significant
+         * refactoring of the APIC code
+         */
+        x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
 
-    disable_IO_APIC();
-    hpet_disable();
+        disable_IO_APIC();
+        hpet_disable();
+    }
 }
 
 void machine_crash_shutdown(void)
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">On 17/06/2019 18:55, Andrew Cooper
      wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:5183a4f3-69ff-076a-c15d-817472a42ab1@citrix.com">
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
      <div class="moz-cite-prefix">On 17/06/2019 18:39, Andrew Cooper
        wrote:<br>
      </div>
      <blockquote type="cite"
        cite="mid:caa80cec-7b46-b31a-88e2-2554a495e159@citrix.com">
        <pre class="moz-quote-pre" wrap="">On 29/05/2019 11:17, Jan Beulich wrote:
</pre>
        <blockquote type="cite">
          <pre class="moz-quote-pre" wrap="">In particular with an enabled IOMMU (but not really limited to this
case), trying to invoke fixup_irqs() after having already done
disable_IO_APIC() -&gt; clear_IO_APIC() is a rather bad idea:

 RIP:    e008:[&lt;ffff82d08026a036&gt;] amd_iommu_read_ioapic_from_ire+0xde/0x113
 RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
 rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
 rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
 rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
 r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
 r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
 r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
 cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
 fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
 ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
 Xen code around &lt;ffff82d08026a036&gt; (amd_iommu_read_ioapic_from_ire+0xde/0x113):
  ff 07 00 00 39 d3 74 02 &lt;0f&gt; 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
 Xen stack trace from rsp=ffff83009e8a79a8:
 ...
 Xen call trace:
    [&lt;ffff82d08026a036&gt;] amd_iommu_read_ioapic_from_ire+0xde/0x113
    [&lt;ffff82d08026bf7b&gt;] iommu_read_apic_from_ire+0x10/0x12
    [&lt;ffff82d08027f718&gt;] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
    [&lt;ffff82d08027f9c5&gt;] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
    [&lt;ffff82d080289bc7&gt;] fixup_irqs+0x320/0x40b
    [&lt;ffff82d0802a82c4&gt;] smp_send_stop+0x4b/0xa8
    [&lt;ffff82d0802a7b2f&gt;] machine_restart+0x98/0x288
    [&lt;ffff82d080252242&gt;] console_suspend+0/0x28
    [&lt;ffff82d0802b01da&gt;] do_general_protection+0x204/0x24e
    [&lt;ffff82d080385a3d&gt;] x86_64/entry.S#handle_exception_saved+0x68/0x94
    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
    [&lt;ffff82d0802a7c7d&gt;] machine_restart+0x1e6/0x288
    [&lt;ffff82d080240f75&gt;] hwdom_shutdown+0xa2/0x11d
    [&lt;ffff82d08020baa2&gt;] domain_shutdown+0x4f/0xd8
    [&lt;ffff82d08023fe98&gt;] do_sched_op+0x12f/0x42a
    [&lt;ffff82d08037e404&gt;] pv_hypercall+0x1e4/0x564
    [&lt;ffff82d080385432&gt;] lstar_enter+0x112/0x120

Don't call fixup_irqs() and don't send any IPI if there's only one
online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
we're on was already marked offline (by a prior invocation of
__stop_this_cpu()).

Reported-by: Andrew Cooper <a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com" moz-do-not-send="true">&lt;andrew.cooper3@citrix.com&gt;</a>
Signed-off-by: Jan Beulich <a class="moz-txt-link-rfc2396E" href="mailto:jbeulich@suse.com" moz-do-not-send="true">&lt;jbeulich@suse.com&gt;</a>
</pre>
        </blockquote>
        <pre class="moz-quote-pre" wrap="">It is probably worth noting that the above stack trace is a cascade
fault, where we took a #GP fault in the middle of the EFI firmware, and
then tried restarting a second time.

For the change it is an improvement, so Acked-by: Andrew Cooper
<a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com" moz-do-not-send="true">&lt;andrew.cooper3@citrix.com&gt;</a>

There are further fixes needing (which have been on my todo list for
rather too long) to avoid any local_irq_enable() on the shutdown path,
because during a crash (especially one in the middle of a vcpu context
switch), its not safe to re-enable interrupts.

The only solution I've got involves using NMI based IPIs/shootdowns.</pre>
      </blockquote>
      <br>
      /sigh and no sooner as I tried testing this, I found the next
      piece of fallout:<br>
      <br>
      <pre>[   90.447906] reboot: Restarting system
(XEN) Hardware Dom0 shutdown: rebooting machine
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN) RFLAGS: 0000000000010246   CONTEXT: hypervisor
(XEN) rax: 00000000aa985950   rbx: 0000000000000000   rcx: 00000000aa5b7010
(XEN) rdx: 0000000000000000   rsi: ffff83009e827fff   rdi: 00000000003506e0
(XEN) rbp: ffff83009e827c70   rsp: ffff83009e827bb0   r8:  00000000aa5b7048
(XEN) r9:  0000000000000000   r10: ffff83009e827c88   r11: 0f0f0f0f0f0f0f0f
(XEN) r12: 00000000fffffffe   r13: 0000000000000cf9   r14: 0000000000000000
(XEN) r15: 0000000000000065   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around &lt;00000000aa5b526b&gt; (00000000aa5b526b):
(XEN)  8d 0d a7 1d 00 00 33 d2 &lt;ff&gt; 90 40 01 00 00 48 8b 05 d0 1d 00 00 48 83 c4
(XEN) Xen stack trace from rsp=ffff83009e827bb0:
(XEN)    000000000000001f 00000000003506e0 ffff83009e827c00 0000000000000206
(XEN)    ffff82d08027d96a 00000000aa5b51f7 0000000000000286 ffff83009e827c40
(XEN)    000000009e817000 0000000000000cf9 ffff83009e827c30 ffff82d080201327
(XEN)    00000000ffffffff ffff82d08020162e 0000000000000000 00000000fffffffe
(XEN)    ffff83009e827c70 ffff82d0802015ff 000000009e817000 ffff83009e827c78
(XEN)    ffff82d0802a372a 0000000080000000 0000000000000000 ffff83009e827c88
(XEN)    ffff83009e827cd8 ffff82d0802a3045 ffff82d0802a3045 ffff83009e827c98
(XEN)    000000008028454c 000082d080387851 0000000000000000 ffff82d080387851
(XEN)    0000000000000000 ffff83009e827d98 00000000000000fb 0000000080000000
(XEN)    0000000000000000 ffff83009e827ce8 ffff82d0802a3105 ffff83009e827d08
(XEN)    ffff82d08023cdaa ffff82d080387851 0000000000000000 ffff83009e827d18
(XEN)    ffff82d0802a37da ffff83009e827d88 ffff82d080283fb4 ffff82d080387851
(XEN)    ffff82d080387845 0000000000000000 ffff82d080387845 ffff82d080387851
(XEN)    ffff82d080387845 ffff82d080387851 0000000000000000 0000000000000000
(XEN)    0000000000000000 ffff83009e827fff 0000000000000000 00007cff617d8247
(XEN)    ffff82d0803878ba ffff82d080933900 0000000000000000 000000204b161644
(XEN)    ffff8310f5206ef8 ffff83009e827e40 ffff8310f5206ea0 0000002065fe762e
(XEN)    000000204bbfce6e ffff82d08095c3e0 ffff83009e827ef8 0000000000000000
(XEN)    0000000000000048 0000000000000000 ffff83009e827fff ffff8310f5206ef8
(XEN)    000000fb00000000 ffff82d0802e1bc5 000000000000e008 0000000000000206
(XEN) Xen call trace:
(XEN)    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN)    [&lt;ffff82d0802a3045&gt;] machine_restart+0x1ef/0x2a4
(XEN)    [&lt;ffff82d0802a3105&gt;] send_IPI_mask+0/0xc
(XEN)    [&lt;ffff82d08023cdaa&gt;] smp_call_function_interrupt+0x95/0xb8
(XEN)    [&lt;ffff82d0802a37da&gt;] call_function_interrupt+0x35/0x37
(XEN)    [&lt;ffff82d080283fb4&gt;] do_IRQ+0xa7/0x697
(XEN)    [&lt;ffff82d0803878ba&gt;] common_interrupt+0x10a/0x120
(XEN)    [&lt;ffff82d0802e1bc5&gt;] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [&lt;ffff82d0802e20ae&gt;] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [&lt;ffff82d080274f8c&gt;] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) GENERAL PROTECTION FAULT
(XEN) [error_code=0000]
(XEN) ****************************************
(XEN) 
(XEN) Reboot in five seconds...
(XEN) Executing kexec image on cpu0
(XEN) Shot down all CPUs
(XEN) Assertion 'offset == (val &amp; (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[&lt;ffff82d080266844&gt;] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: 0000000000000014   rcx: ffff832005d68010
(XEN) rdx: ffff832005d68000   rsi: 0000000000000000   rdi: ffff82d080942a00
(XEN) rbp: ffff83009e827948   rsp: ffff83009e827928   r8:  ffff82d0808074c0
(XEN) r9:  ffff82d080942a08   r10: 0000000000000000   r11: 0000000000000001
(XEN) r12: 0000000000010000   r13: 0000000000000001   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000003506e0
(XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
(XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around &lt;ffff82d080266844&gt; (amd_iommu_read_ioapic_from_ire+0xd0/0x131):
(XEN)  07 00 00 41 39 c5 74 02 &lt;0f&gt; 0b 41 81 e4 00 f8 ff ff 0f b6 11 c0 ea 02 83
(XEN) Xen stack trace from rsp=ffff83009e827928:
(XEN)    ffff82d08027c244 0000000000000014 ffff82d080806980 0000000000000000
(XEN)    ffff83009e827958 ffff82d0802687a7 ffff83009e827968 ffff82d08027c25b
(XEN)    ffff83009e827998 ffff82d08027cefb 0000000000000000 0000000000000002
(XEN)    ffff82d080806980 ffff82d080806980 ffff83009e8279c8 ffff82d08027d8c8
(XEN)    ffff83009e827fff 0000000000010000 0000000000000002 0000000000000000
(XEN)    ffff83009e8279f8 ffff82d08027d9ce 00000000000003e8 ffff82d080943b80
(XEN)    0000000000000000 ffff83009e827fff ffff83009e827a18 ffff82d08027dd30
(XEN)    ffff83009e827a18 ffff82d0802a3626 ffff83009e827a38 ffff82d080270a79
(XEN)    0000000000000003 ffff82d08043f858 ffff83009e827a58 ffff82d08021dd74
(XEN)    0000000000000206 0000000000000296 ffff83009e827ac8 ffff82d08024db40
(XEN)    ffff83009e827aa8 ffff82d000000010 ffff83009e827ad8 ffff83009e827a88
(XEN)    ffff83009e827aa8 0000000000000000 ffff83009e827fff 0000000000000000
(XEN)    ffff8310f3e00000 0000000000000002 ffff83009e827b08 0000000000000000
(XEN)    ffff83009e827af8 ffff82d0802ab1b8 ffff82d080387979 ffff82d08038796d
(XEN)    ffff8310f4c08000 0000000000000000 00007cff617d84d7 ffff82d080387a3d
(XEN)    0000000000000065 0000000000000000 0000000000000cf9 00000000fffffffe
(XEN)    ffff83009e827c70 0000000000000000 0f0f0f0f0f0f0f0f ffff83009e827c88
(XEN)    0000000000000000 00000000aa5b7048 00000000aa985950 00000000aa5b7010
(XEN)    0000000000000000 ffff83009e827fff 00000000003506e0 0000000d00000000
(XEN)    00000000aa5b526b 000000000000e008 0000000000010246 ffff83009e827bb0
(XEN) Xen call trace:
(XEN)    [&lt;ffff82d080266844&gt;] amd_iommu_read_ioapic_from_ire+0xd0/0x131
(XEN)    [&lt;ffff82d0802687a7&gt;] iommu_read_apic_from_ire+0x10/0x12
(XEN)    [&lt;ffff82d08027c25b&gt;] io_apic.c#io_apic_read+0x17/0x5f
(XEN)    [&lt;ffff82d08027cefb&gt;] __ioapic_read_entry+0x2f/0x55
(XEN)    [&lt;ffff82d08027d8c8&gt;] io_apic.c#clear_IO_APIC_pin+0x1a/0xf3
(XEN)    [&lt;ffff82d08027d9ce&gt;] io_apic.c#clear_IO_APIC+0x2d/0x60
(XEN)    [&lt;ffff82d08027dd30&gt;] disable_IO_APIC+0xd/0x7e
(XEN)    [&lt;ffff82d080270a79&gt;] machine_crash_shutdown+0x228/0x292
(XEN)    [&lt;ffff82d08021dd74&gt;] kexec_crash+0x3f/0x5b
(XEN)    [&lt;ffff82d08024db40&gt;] panic+0x117/0x12f
(XEN)    [&lt;ffff82d0802ab1b8&gt;] do_general_protection+0x22b/0x234
(XEN)    [&lt;ffff82d080387a3d&gt;] x86_64/entry.S#handle_exception_saved+0x68/0x94
(XEN)    [&lt;00000000aa5b526b&gt;] 00000000aa5b526b
(XEN)    [&lt;ffff82d0802a3045&gt;] machine_restart+0x1ef/0x2a4
(XEN)    [&lt;ffff82d0802a3105&gt;] send_IPI_mask+0/0xc
(XEN)    [&lt;ffff82d08023cdaa&gt;] smp_call_function_interrupt+0x95/0xb8
(XEN)    [&lt;ffff82d0802a37da&gt;] call_function_interrupt+0x35/0x37
(XEN)    [&lt;ffff82d080283fb4&gt;] do_IRQ+0xa7/0x697
(XEN)    [&lt;ffff82d0803878ba&gt;] common_interrupt+0x10a/0x120
(XEN)    [&lt;ffff82d0802e1bc5&gt;] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
(XEN)    [&lt;ffff82d0802e20ae&gt;] cpu_idle.c#acpi_processor_idle+0x313/0x590
(XEN)    [&lt;ffff82d080274f8c&gt;] domain.c#idle_loop+0xa2/0xb1
(XEN) 
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) Assertion 'offset == (val &amp; (INTREMAP_ENTRIES - 1))' failed at iommu_intr.c:567
(XEN) ****************************************
(XEN) 

</pre>
      <br>
      I think we need a similar adjustment in nmi_shootdown_cpus()<br>
    </blockquote>
    <br>
    Yes.  With this fix included, then we successfully transition into
    the crash kernel.  I think it would be best to fold into this patch,
    given its direct relevance.<br>
    <br>
    <pre>Signed-off-by: Andrew Cooper <a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com">&lt;andrew.cooper3@citrix.com&gt;</a>

diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 01e48a1..f9772dc 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -169,15 +169,20 @@ static void nmi_shootdown_cpus(void)
      */
     iommu_crash_shutdown();
 
-    __stop_this_cpu();
+    if ( num_online_cpus() &gt; 1 )
+    {
+        __stop_this_cpu();
 
-    /* This is a bit of a hack due to the problems with the x2apic_enabled
-     * variable, but we can't do any better without a significant refactoring
-     * of the APIC code */
-    x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
+        /*
+         * This is a bit of a hack due to the problems with the x2apic_enabled
+         * variable, but we can't do any better without a significant
+         * refactoring of the APIC code
+         */
+        x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
 
-    disable_IO_APIC();
-    hpet_disable();
+        disable_IO_APIC();
+        hpet_disable();
+    }
 }
 
 void machine_crash_shutdown(void)

</pre>
    <br>
  </body>
</html>
Jan Beulich June 18, 2019, 9:23 a.m. UTC | #7
>>> On 17.06.19 at 19:39, <andrew.cooper3@citrix.com> wrote:
> On 29/05/2019 11:17, Jan Beulich wrote:
>> In particular with an enabled IOMMU (but not really limited to this
>> case), trying to invoke fixup_irqs() after having already done
>> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>>
>>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>>  Xen code around <ffff82d08026a036> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>>  Xen stack trace from rsp=ffff83009e8a79a8:
>>  ...
>>  Xen call trace:
>>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>>     [<ffff82d080252242>] console_suspend+0/0x28
>>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>>     [<00000000aa5b526b>] 00000000aa5b526b
>>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>>
>> Don't call fixup_irqs() and don't send any IPI if there's only one
>> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
>> we're on was already marked offline (by a prior invocation of
>> __stop_this_cpu()).
>>
>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> It is probably worth noting that the above stack trace is a cascade
> fault, where we took a #GP fault in the middle of the EFI firmware, and
> then tried restarting a second time.

Well, I've taken this as implied from the stack trace (in particular with
two machine_restart() instances on it) plus the "already stopped" in
the title: I don't see why else CPUs may have got stopped.

> For the change it is an improvement, so Acked-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks.

> There are further fixes needing (which have been on my todo list for
> rather too long) to avoid any local_irq_enable() on the shutdown path,
> because during a crash (especially one in the middle of a vcpu context
> switch), its not safe to re-enable interrupts.

Indeed, and I thought I too had pointed this out recently (as I
did notice this both here and in the context of the fixup_irqs()
work).  But I can't seem to find any record of it. I guess I had
meant to add this as a post-commit-message remark here, but
then forgot.

> The only solution I've got involves using NMI based IPIs/shootdowns.

Nor could we use smp_call_function() anymore anyway, as it
expects IRQs to be enabled. One question would be whether it
makes sense at all to call e.g. fixup_irqs() when shutting down
from a crash.

Jan
Jan Beulich June 18, 2019, 10:26 a.m. UTC | #8
>>> On 17.06.19 at 20:27, <andrew.cooper3@citrix.com> wrote:
> On 17/06/2019 18:55, Andrew Cooper wrote:
>> On 17/06/2019 18:39, Andrew Cooper wrote:
>>> On 29/05/2019 11:17, Jan Beulich wrote:
>>>> In particular with an enabled IOMMU (but not really limited to this
>>>> case), trying to invoke fixup_irqs() after having already done
>>>> disable_IO_APIC() -> clear_IO_APIC() is a rather bad idea:
>>>>
>>>>  RIP:    e008:[<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>>>  RFLAGS: 0000000000010006   CONTEXT: hypervisor (d0v0)
>>>>  rax: ffff8320291de00c   rbx: 0000000000000003   rcx: ffff832035000000
>>>>  rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff82d0805ca840
>>>>  rbp: ffff83009e8a79c8   rsp: ffff83009e8a79a8   r8:  0000000000000000
>>>>  r9:  0000000000000004   r10: 000000000008b9f9   r11: 0000000000000006
>>>>  r12: 0000000000010000   r13: 0000000000000003   r14: 0000000000000000
>>>>  r15: 00000000fffeffff   cr0: 0000000080050033   cr4: 00000000003406e0
>>>>  cr3: 0000002035d59000   cr2: ffff88824ccb4ee0
>>>>  fsb: 00007f2143f08840   gsb: ffff888256a00000   gss: 0000000000000000
>>>>  ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>>>>  Xen code around <ffff82d08026a036> 
> (amd_iommu_read_ioapic_from_ire+0xde/0x113):
>>>>   ff 07 00 00 39 d3 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 8b 10 89 d0 25 00 00
>>>>  Xen stack trace from rsp=ffff83009e8a79a8:
>>>>  ...
>>>>  Xen call trace:
>>>>     [<ffff82d08026a036>] amd_iommu_read_ioapic_from_ire+0xde/0x113
>>>>     [<ffff82d08026bf7b>] iommu_read_apic_from_ire+0x10/0x12
>>>>     [<ffff82d08027f718>] io_apic.c#modify_IO_APIC_irq+0x5e/0x126
>>>>     [<ffff82d08027f9c5>] io_apic.c#unmask_IO_APIC_irq+0x2d/0x41
>>>>     [<ffff82d080289bc7>] fixup_irqs+0x320/0x40b
>>>>     [<ffff82d0802a82c4>] smp_send_stop+0x4b/0xa8
>>>>     [<ffff82d0802a7b2f>] machine_restart+0x98/0x288
>>>>     [<ffff82d080252242>] console_suspend+0/0x28
>>>>     [<ffff82d0802b01da>] do_general_protection+0x204/0x24e
>>>>     [<ffff82d080385a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>>>>     [<00000000aa5b526b>] 00000000aa5b526b
>>>>     [<ffff82d0802a7c7d>] machine_restart+0x1e6/0x288
>>>>     [<ffff82d080240f75>] hwdom_shutdown+0xa2/0x11d
>>>>     [<ffff82d08020baa2>] domain_shutdown+0x4f/0xd8
>>>>     [<ffff82d08023fe98>] do_sched_op+0x12f/0x42a
>>>>     [<ffff82d08037e404>] pv_hypercall+0x1e4/0x564
>>>>     [<ffff82d080385432>] lstar_enter+0x112/0x120
>>>>
>>>> Don't call fixup_irqs() and don't send any IPI if there's only one
>>>> online CPU anyway, and don't call __stop_this_cpu() at all when the CPU
>>>> we're on was already marked offline (by a prior invocation of
>>>> __stop_this_cpu()).
>>>>
>>>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> It is probably worth noting that the above stack trace is a cascade
>>> fault, where we took a #GP fault in the middle of the EFI firmware, and
>>> then tried restarting a second time.
>>>
>>> For the change it is an improvement, so Acked-by: Andrew Cooper
>>> <andrew.cooper3@citrix.com>
>>>
>>> There are further fixes needing (which have been on my todo list for
>>> rather too long) to avoid any local_irq_enable() on the shutdown path,
>>> because during a crash (especially one in the middle of a vcpu context
>>> switch), its not safe to re-enable interrupts.
>>>
>>> The only solution I've got involves using NMI based IPIs/shootdowns.
>>
>> /sigh and no sooner as I tried testing this, I found the next piece of
>> fallout:
>>
>> [   90.447906] reboot: Restarting system
>> (XEN) Hardware Dom0 shutdown: rebooting machine
>> (XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
>> (XEN) CPU:    0
>> (XEN) RIP:    e008:[<00000000aa5b526b>] 00000000aa5b526b
>> (XEN) RFLAGS: 0000000000010246   CONTEXT: hypervisor
>> (XEN) rax: 00000000aa985950   rbx: 0000000000000000   rcx: 00000000aa5b7010
>> (XEN) rdx: 0000000000000000   rsi: ffff83009e827fff   rdi: 00000000003506e0
>> (XEN) rbp: ffff83009e827c70   rsp: ffff83009e827bb0   r8:  00000000aa5b7048
>> (XEN) r9:  0000000000000000   r10: ffff83009e827c88   r11: 0f0f0f0f0f0f0f0f
>> (XEN) r12: 00000000fffffffe   r13: 0000000000000cf9   r14: 0000000000000000
>> (XEN) r15: 0000000000000065   cr0: 0000000080050033   cr4: 00000000003506e0
>> (XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
>> (XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>> (XEN) Xen code around <00000000aa5b526b> (00000000aa5b526b):
>> (XEN)  8d 0d a7 1d 00 00 33 d2 <ff> 90 40 01 00 00 48 8b 05 d0 1d 00 00 48 83 
> c4
>> (XEN) Xen stack trace from rsp=ffff83009e827bb0:
>> (XEN)    000000000000001f 00000000003506e0 ffff83009e827c00 0000000000000206
>> (XEN)    ffff82d08027d96a 00000000aa5b51f7 0000000000000286 ffff83009e827c40
>> (XEN)    000000009e817000 0000000000000cf9 ffff83009e827c30 ffff82d080201327
>> (XEN)    00000000ffffffff ffff82d08020162e 0000000000000000 00000000fffffffe
>> (XEN)    ffff83009e827c70 ffff82d0802015ff 000000009e817000 ffff83009e827c78
>> (XEN)    ffff82d0802a372a 0000000080000000 0000000000000000 ffff83009e827c88
>> (XEN)    ffff83009e827cd8 ffff82d0802a3045 ffff82d0802a3045 ffff83009e827c98
>> (XEN)    000000008028454c 000082d080387851 0000000000000000 ffff82d080387851
>> (XEN)    0000000000000000 ffff83009e827d98 00000000000000fb 0000000080000000
>> (XEN)    0000000000000000 ffff83009e827ce8 ffff82d0802a3105 ffff83009e827d08
>> (XEN)    ffff82d08023cdaa ffff82d080387851 0000000000000000 ffff83009e827d18
>> (XEN)    ffff82d0802a37da ffff83009e827d88 ffff82d080283fb4 ffff82d080387851
>> (XEN)    ffff82d080387845 0000000000000000 ffff82d080387845 ffff82d080387851
>> (XEN)    ffff82d080387845 ffff82d080387851 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 ffff83009e827fff 0000000000000000 00007cff617d8247
>> (XEN)    ffff82d0803878ba ffff82d080933900 0000000000000000 000000204b161644
>> (XEN)    ffff8310f5206ef8 ffff83009e827e40 ffff8310f5206ea0 0000002065fe762e
>> (XEN)    000000204bbfce6e ffff82d08095c3e0 ffff83009e827ef8 0000000000000000
>> (XEN)    0000000000000048 0000000000000000 ffff83009e827fff ffff8310f5206ef8
>> (XEN)    000000fb00000000 ffff82d0802e1bc5 000000000000e008 0000000000000206
>> (XEN) Xen call trace:
>> (XEN)    [<00000000aa5b526b>] 00000000aa5b526b
>> (XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
>> (XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
>> (XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
>> (XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
>> (XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
>> (XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
>> (XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
>> (XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
>> (XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
>> (XEN) 
>> (XEN) 
>> (XEN) ****************************************
>> (XEN) Panic on CPU 0:
>> (XEN) GENERAL PROTECTION FAULT
>> (XEN) [error_code=0000]
>> (XEN) ****************************************
>> (XEN) 
>> (XEN) Reboot in five seconds...
>> (XEN) Executing kexec image on cpu0
>> (XEN) Shot down all CPUs
>> (XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at 
> iommu_intr.c:567
>> (XEN) ----[ Xen-4.13-unstable  x86_64  debug=y   Not tainted ]----
>> (XEN) CPU:    0
>> (XEN) RIP:    e008:[<ffff82d080266844>] 
> amd_iommu_read_ioapic_from_ire+0xd0/0x131
>> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
>> (XEN) rax: 0000000000000000   rbx: 0000000000000014   rcx: ffff832005d68010
>> (XEN) rdx: ffff832005d68000   rsi: 0000000000000000   rdi: ffff82d080942a00
>> (XEN) rbp: ffff83009e827948   rsp: ffff83009e827928   r8:  ffff82d0808074c0
>> (XEN) r9:  ffff82d080942a08   r10: 0000000000000000   r11: 0000000000000001
>> (XEN) r12: 0000000000010000   r13: 0000000000000001   r14: 0000000000000000
>> (XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000003506e0
>> (XEN) cr3: 00000010f5218000   cr2: ffff88825990c800
>> (XEN) fsb: 0000000000000000   gsb: ffff888266a00000   gss: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
>> (XEN) Xen code around <ffff82d080266844> 
> (amd_iommu_read_ioapic_from_ire+0xd0/0x131):
>> (XEN)  07 00 00 41 39 c5 74 02 <0f> 0b 41 81 e4 00 f8 ff ff 0f b6 11 c0 ea 02 
> 83
>> (XEN) Xen stack trace from rsp=ffff83009e827928:
>> (XEN)    ffff82d08027c244 0000000000000014 ffff82d080806980 0000000000000000
>> (XEN)    ffff83009e827958 ffff82d0802687a7 ffff83009e827968 ffff82d08027c25b
>> (XEN)    ffff83009e827998 ffff82d08027cefb 0000000000000000 0000000000000002
>> (XEN)    ffff82d080806980 ffff82d080806980 ffff83009e8279c8 ffff82d08027d8c8
>> (XEN)    ffff83009e827fff 0000000000010000 0000000000000002 0000000000000000
>> (XEN)    ffff83009e8279f8 ffff82d08027d9ce 00000000000003e8 ffff82d080943b80
>> (XEN)    0000000000000000 ffff83009e827fff ffff83009e827a18 ffff82d08027dd30
>> (XEN)    ffff83009e827a18 ffff82d0802a3626 ffff83009e827a38 ffff82d080270a79
>> (XEN)    0000000000000003 ffff82d08043f858 ffff83009e827a58 ffff82d08021dd74
>> (XEN)    0000000000000206 0000000000000296 ffff83009e827ac8 ffff82d08024db40
>> (XEN)    ffff83009e827aa8 ffff82d000000010 ffff83009e827ad8 ffff83009e827a88
>> (XEN)    ffff83009e827aa8 0000000000000000 ffff83009e827fff 0000000000000000
>> (XEN)    ffff8310f3e00000 0000000000000002 ffff83009e827b08 0000000000000000
>> (XEN)    ffff83009e827af8 ffff82d0802ab1b8 ffff82d080387979 ffff82d08038796d
>> (XEN)    ffff8310f4c08000 0000000000000000 00007cff617d84d7 ffff82d080387a3d
>> (XEN)    0000000000000065 0000000000000000 0000000000000cf9 00000000fffffffe
>> (XEN)    ffff83009e827c70 0000000000000000 0f0f0f0f0f0f0f0f ffff83009e827c88
>> (XEN)    0000000000000000 00000000aa5b7048 00000000aa985950 00000000aa5b7010
>> (XEN)    0000000000000000 ffff83009e827fff 00000000003506e0 0000000d00000000
>> (XEN)    00000000aa5b526b 000000000000e008 0000000000010246 ffff83009e827bb0
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82d080266844>] amd_iommu_read_ioapic_from_ire+0xd0/0x131
>> (XEN)    [<ffff82d0802687a7>] iommu_read_apic_from_ire+0x10/0x12
>> (XEN)    [<ffff82d08027c25b>] io_apic.c#io_apic_read+0x17/0x5f
>> (XEN)    [<ffff82d08027cefb>] __ioapic_read_entry+0x2f/0x55
>> (XEN)    [<ffff82d08027d8c8>] io_apic.c#clear_IO_APIC_pin+0x1a/0xf3
>> (XEN)    [<ffff82d08027d9ce>] io_apic.c#clear_IO_APIC+0x2d/0x60
>> (XEN)    [<ffff82d08027dd30>] disable_IO_APIC+0xd/0x7e
>> (XEN)    [<ffff82d080270a79>] machine_crash_shutdown+0x228/0x292
>> (XEN)    [<ffff82d08021dd74>] kexec_crash+0x3f/0x5b
>> (XEN)    [<ffff82d08024db40>] panic+0x117/0x12f
>> (XEN)    [<ffff82d0802ab1b8>] do_general_protection+0x22b/0x234
>> (XEN)    [<ffff82d080387a3d>] x86_64/entry.S#handle_exception_saved+0x68/0x94
>> (XEN)    [<00000000aa5b526b>] 00000000aa5b526b
>> (XEN)    [<ffff82d0802a3045>] machine_restart+0x1ef/0x2a4
>> (XEN)    [<ffff82d0802a3105>] send_IPI_mask+0/0xc
>> (XEN)    [<ffff82d08023cdaa>] smp_call_function_interrupt+0x95/0xb8
>> (XEN)    [<ffff82d0802a37da>] call_function_interrupt+0x35/0x37
>> (XEN)    [<ffff82d080283fb4>] do_IRQ+0xa7/0x697
>> (XEN)    [<ffff82d0803878ba>] common_interrupt+0x10a/0x120
>> (XEN)    [<ffff82d0802e1bc5>] cpu_idle.c#acpi_idle_do_entry+0xa4/0xb5
>> (XEN)    [<ffff82d0802e20ae>] cpu_idle.c#acpi_processor_idle+0x313/0x590
>> (XEN)    [<ffff82d080274f8c>] domain.c#idle_loop+0xa2/0xb1
>> (XEN) 
>> (XEN) 
>> (XEN) ****************************************
>> (XEN) Panic on CPU 0:
>> (XEN) Assertion 'offset == (val & (INTREMAP_ENTRIES - 1))' failed at 
> iommu_intr.c:567
>> (XEN) ****************************************
>> (XEN) 
>>
>>
>> I think we need a similar adjustment in nmi_shootdown_cpus()
> 
> Yes.  With this fix included, then we successfully transition into the
> crash kernel.  I think it would be best to fold into this patch, given
> its direct relevance.

Will do.

Jan

> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> 
> diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
> index 01e48a1..f9772dc 100644
> --- a/xen/arch/x86/crash.c
> +++ b/xen/arch/x86/crash.c
> @@ -169,15 +169,20 @@ static void nmi_shootdown_cpus(void)
>       */
>      iommu_crash_shutdown();
>  
> -    __stop_this_cpu();
> +    if ( num_online_cpus() > 1 )
> +    {
> +        __stop_this_cpu();
>  
> -    /* This is a bit of a hack due to the problems with the x2apic_enabled
> -     * variable, but we can't do any better without a significant 
> refactoring
> -     * of the APIC code */
> -    x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
> +        /*
> +         * This is a bit of a hack due to the problems with the 
> x2apic_enabled
> +         * variable, but we can't do any better without a significant
> +         * refactoring of the APIC code
> +         */
> +        x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
>  
> -    disable_IO_APIC();
> -    hpet_disable();
> +        disable_IO_APIC();
> +        hpet_disable();
> +    }
>  }
>  
>  void machine_crash_shutdown(void)
diff mbox series

Patch

--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -302,23 +302,31 @@  static void stop_this_cpu(void *dummy)
  */
 void smp_send_stop(void)
 {
-    int timeout = 10;
+    unsigned int cpu = smp_processor_id();
 
-    local_irq_disable();
-    fixup_irqs(cpumask_of(smp_processor_id()), 0);
-    local_irq_enable();
-
-    smp_call_function(stop_this_cpu, NULL, 0);
-
-    /* Wait 10ms for all other CPUs to go offline. */
-    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
-        mdelay(1);
-
-    local_irq_disable();
-    disable_IO_APIC();
-    hpet_disable();
-    __stop_this_cpu();
-    local_irq_enable();
+    if ( num_online_cpus() > 1 )
+    {
+        int timeout = 10;
+
+        local_irq_disable();
+        fixup_irqs(cpumask_of(cpu), 0);
+        local_irq_enable();
+
+        smp_call_function(stop_this_cpu, NULL, 0);
+
+        /* Wait 10ms for all other CPUs to go offline. */
+        while ( (num_online_cpus() > 1) && (timeout-- > 0) )
+            mdelay(1);
+    }
+
+    if ( cpu_online(cpu) )
+    {
+        local_irq_disable();
+        disable_IO_APIC();
+        hpet_disable();
+        __stop_this_cpu();
+        local_irq_enable();
+    }
 }
 
 void smp_send_nmi_allbutself(void)