diff mbox series

[RFC,v2,3/3] target/ppc: support single stepping with KVM HV

Message ID 20181121181347.24035-4-farosas@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series target/ppc: single step for KVM HV | expand

Commit Message

Fabiano Rosas Nov. 21, 2018, 6:13 p.m. UTC
The hardware singlestep mechanism in POWER works via a Trace Interrupt
(0xd00) that happens after any instruction executes, whenever MSR_SE =
1 (PowerISA Section 6.5.15 - Trace Interrupt).

However, with kvm_hv, the Trace Interrupt happens inside the guest and
KVM has no visibility of it. Therefore, when the gdbstub uses the
KVM_SET_GUEST_DEBUG ioctl to enable singlestep, KVM simply ignores it.

This patch takes advantage of the Trace Interrupt to perform the step
inside the guest, but uses a breakpoint at the Trace Interrupt handler
to return control to KVM. The exit is treated by KVM as a regular
breakpoint and it returns to the host (and QEMU eventually).

Before signalling GDB, QEMU sets the Next Instruction Pointer to the
instruction following the one being stepped, effectively skipping the
interrupt handler execution and hiding the trace interrupt breakpoint
from GDB.

This approach works with both of GDB's 'scheduler-locking' options
(off, step).

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 target/ppc/kvm.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

Comments

David Gibson Nov. 26, 2018, 7:41 a.m. UTC | #1
On Wed, Nov 21, 2018 at 04:13:47PM -0200, Fabiano Rosas wrote:
> The hardware singlestep mechanism in POWER works via a Trace Interrupt
> (0xd00) that happens after any instruction executes, whenever MSR_SE =
> 1 (PowerISA Section 6.5.15 - Trace Interrupt).
> 
> However, with kvm_hv, the Trace Interrupt happens inside the guest and
> KVM has no visibility of it. Therefore, when the gdbstub uses the
> KVM_SET_GUEST_DEBUG ioctl to enable singlestep, KVM simply ignores it.
> 
> This patch takes advantage of the Trace Interrupt to perform the step
> inside the guest, but uses a breakpoint at the Trace Interrupt handler
> to return control to KVM. The exit is treated by KVM as a regular
> breakpoint and it returns to the host (and QEMU eventually).
> 
> Before signalling GDB, QEMU sets the Next Instruction Pointer to the
> instruction following the one being stepped, effectively skipping the
> interrupt handler execution and hiding the trace interrupt breakpoint
> from GDB.
> 
> This approach works with both of GDB's 'scheduler-locking' options
> (off, step).
> 
> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
> ---
>  target/ppc/kvm.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 9d0b4f1f3f..93c20099e6 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -94,6 +94,7 @@ static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_nested_kvm_hv;
>  
>  static uint32_t debug_inst_opcode;
> +static target_ulong trace_handler_addr;
>  
>  /* XXX We have a race condition where we actually have a level triggered
>   *     interrupt, but the infrastructure can't expose that yet, so the guest
> @@ -509,6 +510,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>      kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
>      kvmppc_hw_debug_points_init(cenv);
>  
> +    trace_handler_addr = (cenv->excp_vectors[POWERPC_EXCP_TRACE] |
> +                          AIL_C000_0000_0000_4000_OFFSET);

Effectively caching this as a global variable is pretty horrible.  A
function to make the above calculation when you need it would be
better.

Also, won't the calculation above be wrong if the guest is using the
low AIL value?

> +
>      return ret;
>  }
>  
> @@ -1553,6 +1557,24 @@ void kvm_arch_remove_all_hw_breakpoints(void)
>  
>  void kvm_arch_set_singlestep(CPUState *cs, int enabled)
>  {
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (kvmppc_is_pr(kvm_state)) {

Explicitly testing for KVM PR is generally wrong (except as a
fallback).  Instead you should add a capability flag to KVM which you
use to test the feature's presence in qemu.  That capability can then
be set in HV, but not PR.

> +            return;
> +    }
> +
> +    if (enabled) {
> +        /* MSR_SE = 1 will cause a Trace Interrupt in the guest after
> +         * the next instruction executes. */
> +        env->msr |= (1ULL << MSR_SE);
> +
> +        /* We set a breakpoint at the interrupt handler address so
> +         * that the singlestep will be seen by KVM (this is treated by
> +         * KVM like an ordinary breakpoint) and control is returned to
> +         * QEMU. */
> +        kvm_insert_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
> +    }
>  }
>  
>  void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
> @@ -1594,6 +1616,43 @@ void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
>      }
>  }
>  
> +static int kvm_handle_singlestep(CPUState *cs,
> +                                 struct kvm_debug_exit_arch *arch_info)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    target_ulong msr = env->msr;
> +    uint32_t insn;
> +    int ret = 1;

You never set this to anything other than 1...

> +    int reg;
> +
> +    if (kvmppc_is_pr(kvm_state)) {
> +        return ret;
> +    }
> +
> +    if (arch_info->address == trace_handler_addr) {
> +        cpu_synchronize_state(cs);
> +        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
> +
> +        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
> +                            sizeof(insn), 0);
> +
> +        /* If the last instruction was a mfmsr, make sure that the
> +         * MSR_SE bit is not set to avoid the guest kernel knowing
> +         * that it is being single-stepped */
> +        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
> +            reg = extract32(insn, 21, 5);
> +            env->gpr[reg] &= ~(1ULL << MSR_SE);
> +        }

Hm.  What happens if both qemu and the guest itself attempt to single
step at the same time?  How do you distinguish between the cases?

> +
> +        env->nip = env->spr[SPR_SRR0];
> +        env->msr = msr &= ~(1ULL << MSR_SE);

You clear SE after tracing one instruction; is that intentional?

> +        cpu_synchronize_state(cs);

You're effectively aborting the entry to the single step exception
vector; don't you need to set MSR to SRR1, not to the current MSR
value which will have been modified for the singlestep exception entry?

You only need the first call to sync_state.  It sets a flag causing
the state to be written back before returning to the guest.

> +    }
> +
> +    return ret;
> +}
> +
>  static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
>  {
>      CPUState *cs = CPU(cpu);
> @@ -1604,7 +1663,7 @@ static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
>      int flag = 0;
>  
>      if (cs->singlestep_enabled) {
> -        handle = 1;
> +        handle = kvm_handle_singlestep(cs, arch_info);
>      } else if (arch_info->status) {
>          if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
>              if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
Fabiano Rosas Nov. 30, 2018, 8:46 p.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:

>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -94,6 +94,7 @@ static int cap_ppc_safe_indirect_branch;
>>  static int cap_ppc_nested_kvm_hv;
>>  
>>  static uint32_t debug_inst_opcode;
>> +static target_ulong trace_handler_addr;
>>  
>>  /* XXX We have a race condition where we actually have a level triggered
>>   *     interrupt, but the infrastructure can't expose that yet, so the guest
>> @@ -509,6 +510,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>      kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
>>      kvmppc_hw_debug_points_init(cenv);
>>  
>> +    trace_handler_addr = (cenv->excp_vectors[POWERPC_EXCP_TRACE] |
>> +                          AIL_C000_0000_0000_4000_OFFSET);
>
> Effectively caching this as a global variable is pretty horrible.  A
> function to make the above calculation when you need it would be
> better.
>
> Also, won't the calculation above be wrong if the guest is using the
> low AIL value?

Yes. As you suggested, I'll change this to calculate the offset based on AIL.

>>  void kvm_arch_set_singlestep(CPUState *cs, int enabled)
>>  {
>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +    CPUPPCState *env = &cpu->env;
>> +
>> +    if (kvmppc_is_pr(kvm_state)) {
>
> Explicitly testing for KVM PR is generally wrong (except as a
> fallback).  Instead you should add a capability flag to KVM which you
> use to test the feature's presence in qemu.  That capability can then
> be set in HV, but not PR.

My apologies, I did read the notice on the code about it. I was trying
to keep things simple for this RFC and forgot to mention that in the
cover letter.

>> +    if (arch_info->address == trace_handler_addr) {
>> +        cpu_synchronize_state(cs);
>> +        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
>> +
>> +        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
>> +                            sizeof(insn), 0);
>> +
>> +        /* If the last instruction was a mfmsr, make sure that the
>> +         * MSR_SE bit is not set to avoid the guest kernel knowing
>> +         * that it is being single-stepped */
>> +        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
>> +            reg = extract32(insn, 21, 5);
>> +            env->gpr[reg] &= ~(1ULL << MSR_SE);
>> +        }
>
> Hm.  What happens if both qemu and the guest itself attempt to single
> step at the same time?  How do you distinguish between the cases?

There is currently no distinction being made. Since MSR_SE is only set
for the duration of the step (see my answer below) this mostly just
works and it is possible to single step inside/outside the guest while
the control changes from one debugger to the other as expected. However
there is a race as follows:

If the vcpu being stepped is preempted right after vm enter, with the
breakpoint at 0xd00 already in place *and* the other debugger starts
doing a step, it will cause a vm exit when it shouldn't:

    outside guest                  inside guest
set breakpoint at 0xd00     |
msr_se = 1                  |
vm enter                    |
                            |  msr_se = 1
                            |  exec next instruction
                            |  trace interrupt
                            |  jump to 0xd00
                            |  --- wrong from now on ---
                            |  emulation assist interrupt
                            |  vm exit

One way to avoid this race is by using GDB's `scheduler-locking step`
option which effectively negates the issue by running only the vcpu
being stepped.

Regardless of that workaround, I am exploring the idea of sending the
cpu back into the guest if we happen to break at the trace interrupt
handler while there is no cpu being stepped by QEMU
(cs->singlestep_enabled). That way, a step inside the guest would never
happen before the outside step has finished.

>
>> +
>> +        env->nip = env->spr[SPR_SRR0];
>> +        env->msr = msr &= ~(1ULL << MSR_SE);
>
> You clear SE after tracing one instruction; is that intentional?

Yes, both the kernel and KVM PR do the same:

from arch/powerpc/kernel/traps.c
void single_step_exception(struct pt_regs *regs)
{
        enum ctx_state prev_state = exception_enter();

->      clear_single_step(regs);
        ...
}

from arch/powerpc/kvm/book3s_pr.c
static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
        ...
        ret = __kvmppc_vcpu_run(kvm_run, vcpu);

->      kvmppc_clear_debug(vcpu);
        ...
}

>
>> +        cpu_synchronize_state(cs);
>
> You're effectively aborting the entry to the single step exception
> vector; don't you need to set MSR to SRR1, not to the current MSR
> value which will have been modified for the singlestep exception entry?

I'm using the old MSR value (before synchronizing) so the effect is the same:

(gdb) p/x $msr
$2 = 0x8000000002803033
(gdb) si
1892            LOAD_REG_ADDR(r11, replay_interrupt_return)
(gdb) p/x $msr
$3 = 0x8000000002803033

Using SRR1 would also work but it would require clearing the bits set by
the interrupt (33:36, 44:47):

(gdb) p/x env->spr[0x01b]
$1 = 0x8000000042803433

I could do the latter, if you prefer.


Thank you for your attention.
Cheers.
David Gibson Dec. 2, 2018, 9:13 a.m. UTC | #3
On Fri, Nov 30, 2018 at 06:46:21PM -0200, Fabiano Rosas wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -94,6 +94,7 @@ static int cap_ppc_safe_indirect_branch;
> >>  static int cap_ppc_nested_kvm_hv;
> >>  
> >>  static uint32_t debug_inst_opcode;
> >> +static target_ulong trace_handler_addr;
> >>  
> >>  /* XXX We have a race condition where we actually have a level triggered
> >>   *     interrupt, but the infrastructure can't expose that yet, so the guest
> >> @@ -509,6 +510,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >>      kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
> >>      kvmppc_hw_debug_points_init(cenv);
> >>  
> >> +    trace_handler_addr = (cenv->excp_vectors[POWERPC_EXCP_TRACE] |
> >> +                          AIL_C000_0000_0000_4000_OFFSET);
> >
> > Effectively caching this as a global variable is pretty horrible.  A
> > function to make the above calculation when you need it would be
> > better.
> >
> > Also, won't the calculation above be wrong if the guest is using the
> > low AIL value?
> 
> Yes. As you suggested, I'll change this to calculate the offset based on AIL.
> 
> >>  void kvm_arch_set_singlestep(CPUState *cs, int enabled)
> >>  {
> >> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +    CPUPPCState *env = &cpu->env;
> >> +
> >> +    if (kvmppc_is_pr(kvm_state)) {
> >
> > Explicitly testing for KVM PR is generally wrong (except as a
> > fallback).  Instead you should add a capability flag to KVM which you
> > use to test the feature's presence in qemu.  That capability can then
> > be set in HV, but not PR.
> 
> My apologies, I did read the notice on the code about it. I was trying
> to keep things simple for this RFC and forgot to mention that in the
> cover letter.

Just put a comment on it explaining that you expect to replace this
before the final version.

> >> +    if (arch_info->address == trace_handler_addr) {
> >> +        cpu_synchronize_state(cs);
> >> +        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
> >> +
> >> +        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
> >> +                            sizeof(insn), 0);
> >> +
> >> +        /* If the last instruction was a mfmsr, make sure that the
> >> +         * MSR_SE bit is not set to avoid the guest kernel knowing
> >> +         * that it is being single-stepped */
> >> +        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
> >> +            reg = extract32(insn, 21, 5);
> >> +            env->gpr[reg] &= ~(1ULL << MSR_SE);
> >> +        }
> >
> > Hm.  What happens if both qemu and the guest itself attempt to single
> > step at the same time?  How do you distinguish between the cases?
> 
> There is currently no distinction being made.

This seems incorrect to me.  Basically you're restoring !MSR_SE
unconditionaly when you finish the hypervisor side step, which might
not be correct if the guest is also attempting to single step itself.
AFAICT it should be possible to track what the guest thinks is the
value of MSR_SE and restore that.  If both hypervisor and guest
attempt to single step, I'd expect to have the hypervisor trap first,
then return to the guest's single step vector.

> Since MSR_SE is only set
> for the duration of the step (see my answer below) this mostly just
> works and it is possible to single step inside/outside the guest while
> the control changes from one debugger to the other as expected. However
> there is a race as follows:
> 
> If the vcpu being stepped is preempted right after vm enter, with the
> breakpoint at 0xd00 already in place *and* the other debugger starts
> doing a step, it will cause a vm exit when it shouldn't:
> 
>     outside guest                  inside guest
> set breakpoint at 0xd00     |
> msr_se = 1                  |
> vm enter                    |
>                             |  msr_se = 1
>                             |  exec next instruction
>                             |  trace interrupt
>                             |  jump to 0xd00
>                             |  --- wrong from now on ---
>                             |  emulation assist interrupt
>                             |  vm exit
> 
> One way to avoid this race is by using GDB's `scheduler-locking step`
> option which effectively negates the issue by running only the vcpu
> being stepped.
> 
> Regardless of that workaround, I am exploring the idea of sending the
> cpu back into the guest if we happen to break at the trace interrupt
> handler while there is no cpu being stepped by QEMU
> (cs->singlestep_enabled). That way, a step inside the guest would never
> happen before the outside step has finished.
> 
> >
> >> +
> >> +        env->nip = env->spr[SPR_SRR0];
> >> +        env->msr = msr &= ~(1ULL << MSR_SE);
> >
> > You clear SE after tracing one instruction; is that intentional?
> 
> Yes, both the kernel and KVM PR do the same:
> 
> from arch/powerpc/kernel/traps.c
> void single_step_exception(struct pt_regs *regs)
> {
>         enum ctx_state prev_state = exception_enter();
> 
> ->      clear_single_step(regs);
>         ...
> }
> 
> from arch/powerpc/kvm/book3s_pr.c
> static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> {
>         ...
>         ret = __kvmppc_vcpu_run(kvm_run, vcpu);
> 
> ->      kvmppc_clear_debug(vcpu);
>         ...
> }
> 
> >
> >> +        cpu_synchronize_state(cs);
> >
> > You're effectively aborting the entry to the single step exception
> > vector; don't you need to set MSR to SRR1, not to the current MSR
> > value which will have been modified for the singlestep exception entry?
> 
> I'm using the old MSR value (before synchronizing) so the effect is the same:
> 
> (gdb) p/x $msr
> $2 = 0x8000000002803033
> (gdb) si
> 1892            LOAD_REG_ADDR(r11, replay_interrupt_return)
> (gdb) p/x $msr
> $3 = 0x8000000002803033
> 
> Using SRR1 would also work but it would require clearing the bits set by
> the interrupt (33:36, 44:47):
> 
> (gdb) p/x env->spr[0x01b]
> $1 = 0x8000000042803433
> 
> I could do the latter, if you prefer.

I think that's better - I don't think it's safe to assume that you're
*not* synchronized with KVM.
Fabiano Rosas Dec. 10, 2018, 12:52 p.m. UTC | #4
David Gibson <david@gibson.dropbear.id.au> writes:

>> >> +    if (arch_info->address == trace_handler_addr) {
>> >> +        cpu_synchronize_state(cs);
>> >> +        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
>> >> +
>> >> +        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
>> >> +                            sizeof(insn), 0);
>> >> +
>> >> +        /* If the last instruction was a mfmsr, make sure that the
>> >> +         * MSR_SE bit is not set to avoid the guest kernel knowing
>> >> +         * that it is being single-stepped */
>> >> +        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
>> >> +            reg = extract32(insn, 21, 5);
>> >> +            env->gpr[reg] &= ~(1ULL << MSR_SE);
>> >> +        }
>> >
>> > Hm.  What happens if both qemu and the guest itself attempt to single
>> > step at the same time?  How do you distinguish between the cases?
>> 
>> There is currently no distinction being made.
>
> This seems incorrect to me.  Basically you're restoring !MSR_SE
> unconditionaly when you finish the hypervisor side step, which might
> not be correct if the guest is also attempting to single step itself.
> AFAICT it should be possible to track what the guest thinks is the
> value of MSR_SE and restore that.

I was skeptical of being able to do both single steps at the same time
but I found a way to reproduce it by stepping over an rfid when
SRR1_SE is already 1.

>  If both hypervisor and guest
> attempt to single step, I'd expect to have the hypervisor trap first,
> then return to the guest's single step vector.

With the fix you suggest above, QEMU will be able to single step the
interrupt handler during the guest's single step. That means I'll have
to restore the SRRs as well so that the handler returns to the correct
place.

>> I could do the latter, if you prefer.
>
> I think that's better - I don't think it's safe to assume that you're
> *not* synchronized with KVM.

Ok, that's better indeed.

Thanks for the comments, I'll prepare another version with the
appropriate corrections.
David Gibson Dec. 11, 2018, 1:20 a.m. UTC | #5
On Mon, Dec 10, 2018 at 10:52:18AM -0200, Fabiano Rosas wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> >> >> +    if (arch_info->address == trace_handler_addr) {
> >> >> +        cpu_synchronize_state(cs);
> >> >> +        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
> >> >> +
> >> >> +        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
> >> >> +                            sizeof(insn), 0);
> >> >> +
> >> >> +        /* If the last instruction was a mfmsr, make sure that the
> >> >> +         * MSR_SE bit is not set to avoid the guest kernel knowing
> >> >> +         * that it is being single-stepped */
> >> >> +        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
> >> >> +            reg = extract32(insn, 21, 5);
> >> >> +            env->gpr[reg] &= ~(1ULL << MSR_SE);
> >> >> +        }
> >> >
> >> > Hm.  What happens if both qemu and the guest itself attempt to single
> >> > step at the same time?  How do you distinguish between the cases?
> >> 
> >> There is currently no distinction being made.
> >
> > This seems incorrect to me.  Basically you're restoring !MSR_SE
> > unconditionaly when you finish the hypervisor side step, which might
> > not be correct if the guest is also attempting to single step itself.
> > AFAICT it should be possible to track what the guest thinks is the
> > value of MSR_SE and restore that.
> 
> I was skeptical of being able to do both single steps at the same time
> but I found a way to reproduce it by stepping over an rfid when
> SRR1_SE is already 1.
> 
> >  If both hypervisor and guest
> > attempt to single step, I'd expect to have the hypervisor trap first,
> > then return to the guest's single step vector.
> 
> With the fix you suggest above, QEMU will be able to single step the
> interrupt handler during the guest's single step. That means I'll have
> to restore the SRRs as well so that the handler returns to the correct
> place.
> 
> >> I could do the latter, if you prefer.
> >
> > I think that's better - I don't think it's safe to assume that you're
> > *not* synchronized with KVM.
> 
> Ok, that's better indeed.
> 
> Thanks for the comments, I'll prepare another version with the
> appropriate corrections.

Ok, great.
diff mbox series

Patch

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 9d0b4f1f3f..93c20099e6 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -94,6 +94,7 @@  static int cap_ppc_safe_indirect_branch;
 static int cap_ppc_nested_kvm_hv;
 
 static uint32_t debug_inst_opcode;
+static target_ulong trace_handler_addr;
 
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
@@ -509,6 +510,9 @@  int kvm_arch_init_vcpu(CPUState *cs)
     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
     kvmppc_hw_debug_points_init(cenv);
 
+    trace_handler_addr = (cenv->excp_vectors[POWERPC_EXCP_TRACE] |
+                          AIL_C000_0000_0000_4000_OFFSET);
+
     return ret;
 }
 
@@ -1553,6 +1557,24 @@  void kvm_arch_remove_all_hw_breakpoints(void)
 
 void kvm_arch_set_singlestep(CPUState *cs, int enabled)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (kvmppc_is_pr(kvm_state)) {
+            return;
+    }
+
+    if (enabled) {
+        /* MSR_SE = 1 will cause a Trace Interrupt in the guest after
+         * the next instruction executes. */
+        env->msr |= (1ULL << MSR_SE);
+
+        /* We set a breakpoint at the interrupt handler address so
+         * that the singlestep will be seen by KVM (this is treated by
+         * KVM like an ordinary breakpoint) and control is returned to
+         * QEMU. */
+        kvm_insert_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
+    }
 }
 
 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
@@ -1594,6 +1616,43 @@  void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
     }
 }
 
+static int kvm_handle_singlestep(CPUState *cs,
+                                 struct kvm_debug_exit_arch *arch_info)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    target_ulong msr = env->msr;
+    uint32_t insn;
+    int ret = 1;
+    int reg;
+
+    if (kvmppc_is_pr(kvm_state)) {
+        return ret;
+    }
+
+    if (arch_info->address == trace_handler_addr) {
+        cpu_synchronize_state(cs);
+        kvm_remove_breakpoint(cs, trace_handler_addr, 4, GDB_BREAKPOINT_SW);
+
+        cpu_memory_rw_debug(cs, env->spr[SPR_SRR0] - 4, (uint8_t *)&insn,
+                            sizeof(insn), 0);
+
+        /* If the last instruction was a mfmsr, make sure that the
+         * MSR_SE bit is not set to avoid the guest kernel knowing
+         * that it is being single-stepped */
+        if (extract32(insn, 26, 6) == 31 && extract32(insn, 1, 10) == 83) {
+            reg = extract32(insn, 21, 5);
+            env->gpr[reg] &= ~(1ULL << MSR_SE);
+        }
+
+        env->nip = env->spr[SPR_SRR0];
+        env->msr = msr &= ~(1ULL << MSR_SE);
+        cpu_synchronize_state(cs);
+    }
+
+    return ret;
+}
+
 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
 {
     CPUState *cs = CPU(cpu);
@@ -1604,7 +1663,7 @@  static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
     int flag = 0;
 
     if (cs->singlestep_enabled) {
-        handle = 1;
+        handle = kvm_handle_singlestep(cs, arch_info);
     } else if (arch_info->status) {
         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {