diff mbox

[1/2] kvm-unit-tests: Add a func to run instruction in emulator

Message ID 1371654057-17169-1-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li June 19, 2013, 3 p.m. UTC
Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 mode change 100644 => 100755 x86/emulator.c

Comments

??? June 19, 2013, 3:07 p.m. UTC | #1
Hi Gleb,
This version can set %rsp before trapping into emulator, because
insn_page and alt_insn_page is statically defined and their relative
position to (save) is fixed during execution.

In this way, test case of test_mmx_movq_mf needs to pre-define its own
stack, this change is in the next patch.

In this version, insn_ram is initially mapped to insn_page and them
each call to insn_page/alt_insn_page are all via insn_ram. This trick
runs well but I don't know why my previous version causes error.

Arthur.
On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 110 insertions(+)
>  mode change 100644 => 100755 x86/emulator.c
>
> diff --git a/x86/emulator.c b/x86/emulator.c
> old mode 100644
> new mode 100755
> index 96576e5..48d45c8
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,15 @@ int fails, tests;
>
>  static int exceptions;
>
> +struct regs {
> +       u64 rax, rbx, rcx, rdx;
> +       u64 rsi, rdi, rsp, rbp;
> +       u64 r8, r9, r10, r11;
> +       u64 r12, r13, r14, r15;
> +       u64 rip, rflags;
> +};
> +struct regs inregs, outregs, save;
> +
>  void report(const char *name, int result)
>  {
>         ++tests;
> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>
> +#define INSN_SAVE                      \
> +       "ret\n\t"                               \
> +       "pushf\n\t"                     \
> +       "push 136+save \n\t"            \
> +       "popf \n\t"                     \
> +       "xchg %rax, 0+save \n\t"                \
> +       "xchg %rbx, 8+save \n\t"                \
> +       "xchg %rcx, 16+save \n\t"               \
> +       "xchg %rdx, 24+save \n\t"               \
> +       "xchg %rsi, 32+save \n\t"               \
> +       "xchg %rdi, 40+save \n\t"               \
> +       "xchg %rsp, 48+save \n\t"               \
> +       "xchg %rbp, 56+save \n\t"               \
> +       "xchg %r8, 64+save \n\t"                \
> +       "xchg %r9, 72+save \n\t"                \
> +       "xchg %r10, 80+save \n\t"               \
> +       "xchg %r11, 88+save \n\t"               \
> +       "xchg %r12, 96+save \n\t"               \
> +       "xchg %r13, 104+save \n\t"              \
> +       "xchg %r14, 112+save \n\t"              \
> +       "xchg %r15, 120+save \n\t"              \
> +
> +#define INSN_RESTORE                   \
> +       "xchg %rax, 0+save \n\t"                \
> +       "xchg %rbx, 8+save \n\t"                \
> +       "xchg %rcx, 16+save \n\t"               \
> +       "xchg %rdx, 24+save \n\t"               \
> +       "xchg %rsi, 32+save \n\t"               \
> +       "xchg %rdi, 40+save \n\t"               \
> +       "xchg %rsp, 48+save \n\t"               \
> +       "xchg %rbp, 56+save \n\t"               \
> +       "xchg %r8, 64+save \n\t"                \
> +       "xchg %r9, 72+save \n\t"                \
> +       "xchg %r10, 80+save \n\t"               \
> +       "xchg %r11, 88+save \n\t"               \
> +       "xchg %r12, 96+save \n\t"               \
> +       "xchg %r13, 104+save \n\t"              \
> +       "xchg %r14, 112+save \n\t"              \
> +       "xchg %r15, 120+save \n\t"              \
> +       "pushf \n\t"                    \
> +       "pop 136+save \n\t"             \
> +       "popf \n\t"                     \
> +       "ret \n\t"                              \
> +
> +#define INSN_TRAP                      \
> +       "in  (%dx),%al\n\t"                     \
> +       ". = . + 31\n\t"                        \
> +
> +asm(
> +       ".align 4096\n\t"
> +       "insn_page:\n\t"
> +       INSN_SAVE
> +       "test_insn:\n\t"
> +       INSN_TRAP
> +       "test_insn_end:\n\t"
> +       INSN_RESTORE
> +       "insn_page_end:\n\t"
> +       ".align 4096\n\t"
> +
> +       "alt_insn_page:\n\t"
> +       INSN_SAVE
> +       "alt_test_insn:\n\t"
> +       INSN_TRAP
> +       "alt_test_insn_end:\n\t"
> +       INSN_RESTORE
> +       "alt_insn_page_end:\n\t"
> +       ".align 4096\n\t"
> +);
> +
> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> +{
> +       ulong *cr3 = (ulong *)read_cr3();
> +       void *insn_ram;
> +       int i;
> +       extern u8 insn_page[], test_insn[], test_insn_end[];
> +       extern u8 alt_insn_page[], alt_test_insn[];
> +
> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> +       for (i=1; i<test_insn_end - test_insn; i++)
> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> +       for (i=0; i<alt_insn_length; i++)
> +               alt_test_insn[i] = alt_insn[i];
> +       for(;i<test_insn_end - test_insn; i++)
> +               alt_test_insn[i] = 0x90; // nop
> +       save = inregs;
> +
> +       // Load the code TLB with insn_page, but point the page tables at
> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +       // This will make the CPU trap on the insn_page instruction but the
> +       // hypervisor will see alt_insn_page.
> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +       invlpg(insn_ram);
> +       // Load code TLB
> +       asm volatile("call *%0" : : "r"(insn_ram));
> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +       // Trap, let hypervisor emulate at alt_insn_page
> +       asm volatile("call *%0": : "r"(insn_ram+1));
> +
> +       outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> --
> 1.7.9.5
>



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 19, 2013, 4:03 p.m. UTC | #2
On Wed, Jun 19, 2013 at 11:07:18PM +0800, ??? <Arthur Chunqi Li> wrote:
> Hi Gleb,
> This version can set %rsp before trapping into emulator, because
> insn_page and alt_insn_page is statically defined and their relative
> position to (save) is fixed during execution.
> 
The position of the code is not fixed during execution since you execute
it from a virtual address obtained dynamically by vmap() and the address
is definitely different from the one the code was compiled for, but if
you look at the code that compile actually produce you will see that it
uses absolute address to access "save" and this is why it works. I
wounder why compiler decided to use absolute address this time, Paolo?

> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> stack, this change is in the next patch.
> 
> In this version, insn_ram is initially mapped to insn_page and them
> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> runs well but I don't know why my previous version causes error.
> 
Because previous version tried to use install_page() on a large page
mapped region and the function does not know how to handle that.

> Arthur.
> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> > Add a function trap_emulator to run an instruction in emulator.
> > Set inregs first (%rax is invalid because it is used as return
> > address), put instruction codec in alt_insn and call func with
> > alt_insn_length. Get results in outregs.
> >
> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> > ---
> >  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 110 insertions(+)
> >  mode change 100644 => 100755 x86/emulator.c
> >
> > diff --git a/x86/emulator.c b/x86/emulator.c
> > old mode 100644
> > new mode 100755
> > index 96576e5..48d45c8
> > --- a/x86/emulator.c
> > +++ b/x86/emulator.c
> > @@ -11,6 +11,15 @@ int fails, tests;
> >
> >  static int exceptions;
> >
> > +struct regs {
> > +       u64 rax, rbx, rcx, rdx;
> > +       u64 rsi, rdi, rsp, rbp;
> > +       u64 r8, r9, r10, r11;
> > +       u64 r12, r13, r14, r15;
> > +       u64 rip, rflags;
> > +};
> > +struct regs inregs, outregs, save;
> > +
> >  void report(const char *name, int result)
> >  {
> >         ++tests;
> > @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
> >      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >  }
> >
> > +#define INSN_SAVE                      \
> > +       "ret\n\t"                               \
> > +       "pushf\n\t"                     \
> > +       "push 136+save \n\t"            \
> > +       "popf \n\t"                     \
> > +       "xchg %rax, 0+save \n\t"                \
> > +       "xchg %rbx, 8+save \n\t"                \
> > +       "xchg %rcx, 16+save \n\t"               \
> > +       "xchg %rdx, 24+save \n\t"               \
> > +       "xchg %rsi, 32+save \n\t"               \
> > +       "xchg %rdi, 40+save \n\t"               \
> > +       "xchg %rsp, 48+save \n\t"               \
> > +       "xchg %rbp, 56+save \n\t"               \
> > +       "xchg %r8, 64+save \n\t"                \
> > +       "xchg %r9, 72+save \n\t"                \
> > +       "xchg %r10, 80+save \n\t"               \
> > +       "xchg %r11, 88+save \n\t"               \
> > +       "xchg %r12, 96+save \n\t"               \
> > +       "xchg %r13, 104+save \n\t"              \
> > +       "xchg %r14, 112+save \n\t"              \
> > +       "xchg %r15, 120+save \n\t"              \
> > +
> > +#define INSN_RESTORE                   \
> > +       "xchg %rax, 0+save \n\t"                \
> > +       "xchg %rbx, 8+save \n\t"                \
> > +       "xchg %rcx, 16+save \n\t"               \
> > +       "xchg %rdx, 24+save \n\t"               \
> > +       "xchg %rsi, 32+save \n\t"               \
> > +       "xchg %rdi, 40+save \n\t"               \
> > +       "xchg %rsp, 48+save \n\t"               \
> > +       "xchg %rbp, 56+save \n\t"               \
> > +       "xchg %r8, 64+save \n\t"                \
> > +       "xchg %r9, 72+save \n\t"                \
> > +       "xchg %r10, 80+save \n\t"               \
> > +       "xchg %r11, 88+save \n\t"               \
> > +       "xchg %r12, 96+save \n\t"               \
> > +       "xchg %r13, 104+save \n\t"              \
> > +       "xchg %r14, 112+save \n\t"              \
> > +       "xchg %r15, 120+save \n\t"              \
> > +       "pushf \n\t"                    \
> > +       "pop 136+save \n\t"             \
> > +       "popf \n\t"                     \
> > +       "ret \n\t"                              \
> > +
> > +#define INSN_TRAP                      \
> > +       "in  (%dx),%al\n\t"                     \
> > +       ". = . + 31\n\t"                        \
> > +
> > +asm(
> > +       ".align 4096\n\t"
> > +       "insn_page:\n\t"
> > +       INSN_SAVE
> > +       "test_insn:\n\t"
> > +       INSN_TRAP
> > +       "test_insn_end:\n\t"
> > +       INSN_RESTORE
> > +       "insn_page_end:\n\t"
> > +       ".align 4096\n\t"
> > +
> > +       "alt_insn_page:\n\t"
> > +       INSN_SAVE
> > +       "alt_test_insn:\n\t"
> > +       INSN_TRAP
> > +       "alt_test_insn_end:\n\t"
> > +       INSN_RESTORE
> > +       "alt_insn_page_end:\n\t"
> > +       ".align 4096\n\t"
> > +);
> > +
> > +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> > +{
> > +       ulong *cr3 = (ulong *)read_cr3();
> > +       void *insn_ram;
> > +       int i;
> > +       extern u8 insn_page[], test_insn[], test_insn_end[];
> > +       extern u8 alt_insn_page[], alt_test_insn[];
> > +
> > +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> > +       for (i=1; i<test_insn_end - test_insn; i++)
> > +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> > +       for (i=0; i<alt_insn_length; i++)
> > +               alt_test_insn[i] = alt_insn[i];
> > +       for(;i<test_insn_end - test_insn; i++)
> > +               alt_test_insn[i] = 0x90; // nop
> > +       save = inregs;
> > +
> > +       // Load the code TLB with insn_page, but point the page tables at
> > +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> > +       // This will make the CPU trap on the insn_page instruction but the
> > +       // hypervisor will see alt_insn_page.
> > +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> > +       invlpg(insn_ram);
> > +       // Load code TLB
> > +       asm volatile("call *%0" : : "r"(insn_ram));
> > +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> > +       // Trap, let hypervisor emulate at alt_insn_page
> > +       asm volatile("call *%0": : "r"(insn_ram+1));
> > +
> > +       outregs = save;
> > +}
> > +
> >  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >  {
> >      ++exceptions;
> > --
> > 1.7.9.5
> >
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arthur Chunqi Li June 19, 2013, 5:48 p.m. UTC | #3
? 2013-6-20?0:03?Gleb Natapov <gleb@redhat.com> ???

> On Wed, Jun 19, 2013 at 11:07:18PM +0800, ??? <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> This version can set %rsp before trapping into emulator, because
>> insn_page and alt_insn_page is statically defined and their relative
>> position to (save) is fixed during execution.
> The position of the code is not fixed during execution since you execute
> it from a virtual address obtained dynamically by vmap() and the address
> is definitely different from the one the code was compiled for, but if
> you look at the code that compile actually produce you will see that it
> uses absolute address to access "save" and this is why it works. I
> wounder why compiler decided to use absolute address this time, Paolo?
> 
>> In this way, test case of test_mmx_movq_mf needs to pre-define its own
>> stack, this change is in the next patch.
>> 
>> In this version, insn_ram is initially mapped to insn_page and them
>> each call to insn_page/alt_insn_page are all via insn_ram. This trick
>> runs well but I don't know why my previous version causes error.
> Because previous version tried to use install_page() on a large page
> mapped region and the function does not know how to handle that.
I don't quite understand what you mean here. What is the differences between large page and 4k page in this test case? Maybe I don't understand the differences of install_pte() with 4k page and 2m pages.
> 
>> Arthur.
>> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>> 
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>> x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 110 insertions(+)
>>> mode change 100644 => 100755 x86/emulator.c
>>> 
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> old mode 100644
>>> new mode 100755
>>> index 96576e5..48d45c8
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,15 @@ int fails, tests;
>>> 
>>> static int exceptions;
>>> 
>>> +struct regs {
>>> +       u64 rax, rbx, rcx, rdx;
>>> +       u64 rsi, rdi, rsp, rbp;
>>> +       u64 r8, r9, r10, r11;
>>> +       u64 r12, r13, r14, r15;
>>> +       u64 rip, rflags;
>>> +};
>>> +struct regs inregs, outregs, save;
>>> +
>>> void report(const char *name, int result)
>>> {
>>>       ++tests;
>>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>>    report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>> }
>>> 
>>> +#define INSN_SAVE                      \
>>> +       "ret\n\t"                               \
>>> +       "pushf\n\t"                     \
>>> +       "push 136+save \n\t"            \
>>> +       "popf \n\t"                     \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +
>>> +#define INSN_RESTORE                   \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +       "pushf \n\t"                    \
>>> +       "pop 136+save \n\t"             \
>>> +       "popf \n\t"                     \
>>> +       "ret \n\t"                              \
>>> +
>>> +#define INSN_TRAP                      \
>>> +       "in  (%dx),%al\n\t"                     \
>>> +       ". = . + 31\n\t"                        \
>>> +
>>> +asm(
>>> +       ".align 4096\n\t"
>>> +       "insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +
>>> +       "alt_insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "alt_test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "alt_test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "alt_insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +);
>>> +
>>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>>> +{
>>> +       ulong *cr3 = (ulong *)read_cr3();
>>> +       void *insn_ram;
>>> +       int i;
>>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
>>> +       extern u8 alt_insn_page[], alt_test_insn[];
>>> +
>>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
>>> +       for (i=1; i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
>>> +       for (i=0; i<alt_insn_length; i++)
>>> +               alt_test_insn[i] = alt_insn[i];
>>> +       for(;i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = 0x90; // nop
>>> +       save = inregs;
>>> +
>>> +       // Load the code TLB with insn_page, but point the page tables at
>>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +       // This will make the CPU trap on the insn_page instruction but the
>>> +       // hypervisor will see alt_insn_page.
>>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +       invlpg(insn_ram);
>>> +       // Load code TLB
>>> +       asm volatile("call *%0" : : "r"(insn_ram));
>>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +       // Trap, let hypervisor emulate at alt_insn_page
>>> +       asm volatile("call *%0": : "r"(insn_ram+1));
>>> +
>>> +       outregs = save;
>>> +}
>>> +
>>> static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>> {
>>>    ++exceptions;
>>> --
>>> 1.7.9.5
>> 
>> 
>> 
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> --
>           Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 20, 2013, 5:42 a.m. UTC | #4
On Thu, Jun 20, 2013 at 01:48:39AM +0800, Gmail wrote:
> 
> ? 2013-6-20?0:03?Gleb Natapov <gleb@redhat.com> ???
> 
> > On Wed, Jun 19, 2013 at 11:07:18PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> Hi Gleb,
> >> This version can set %rsp before trapping into emulator, because
> >> insn_page and alt_insn_page is statically defined and their relative
> >> position to (save) is fixed during execution.
> > The position of the code is not fixed during execution since you execute
> > it from a virtual address obtained dynamically by vmap() and the address
> > is definitely different from the one the code was compiled for, but if
> > you look at the code that compile actually produce you will see that it
> > uses absolute address to access "save" and this is why it works. I
> > wounder why compiler decided to use absolute address this time, Paolo?
> > 
> >> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> >> stack, this change is in the next patch.
> >> 
> >> In this version, insn_ram is initially mapped to insn_page and them
> >> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> >> runs well but I don't know why my previous version causes error.
> > Because previous version tried to use install_page() on a large page
> > mapped region and the function does not know how to handle that.
> I don't quite understand what you mean here. What is the differences between large page and 4k page in this test case?
Test assumes 4k page size.

> Maybe I don't understand the differences of install_pte() with 4k page and 2m pages.
May be. You cannot install 4k page in place of 2m page before breaking
the later to 512 4k pages.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 20, 2013, 8:29 a.m. UTC | #5
Il 19/06/2013 18:03, Gleb Natapov ha scritto:
> On Wed, Jun 19, 2013 at 11:07:18PM +0800, ??? <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> This version can set %rsp before trapping into emulator, because
>> insn_page and alt_insn_page is statically defined and their relative
>> position to (save) is fixed during execution.
>>
> The position of the code is not fixed during execution since you execute
> it from a virtual address obtained dynamically by vmap() and the address
> is definitely different from the one the code was compiled for, but if
> you look at the code that compile actually produce you will see that it
> uses absolute address to access "save" and this is why it works. I
> wounder why compiler decided to use absolute address this time, Paolo?

Because he's using assembly with operands that he wrote himself.  Before
he was using "m" and the compiler decided to express the memory operand
as "save(%rip)".

The assembler then emits different opcodes (of course) and also
different relocations.  In the current code, it tells the linker to
place an absolute address.  In the previous one, it tells the linker to
place a delta from %rip.

Paolo

>> In this way, test case of test_mmx_movq_mf needs to pre-define its own
>> stack, this change is in the next patch.
>>
>> In this version, insn_ram is initially mapped to insn_page and them
>> each call to insn_page/alt_insn_page are all via insn_ram. This trick
>> runs well but I don't know why my previous version causes error.
>>
> Because previous version tried to use install_page() on a large page
> mapped region and the function does not know how to handle that.
> 
>> Arthur.
>> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>>
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 110 insertions(+)
>>>  mode change 100644 => 100755 x86/emulator.c
>>>
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> old mode 100644
>>> new mode 100755
>>> index 96576e5..48d45c8
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,15 @@ int fails, tests;
>>>
>>>  static int exceptions;
>>>
>>> +struct regs {
>>> +       u64 rax, rbx, rcx, rdx;
>>> +       u64 rsi, rdi, rsp, rbp;
>>> +       u64 r8, r9, r10, r11;
>>> +       u64 r12, r13, r14, r15;
>>> +       u64 rip, rflags;
>>> +};
>>> +struct regs inregs, outregs, save;
>>> +
>>>  void report(const char *name, int result)
>>>  {
>>>         ++tests;
>>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>  }
>>>
>>> +#define INSN_SAVE                      \
>>> +       "ret\n\t"                               \
>>> +       "pushf\n\t"                     \
>>> +       "push 136+save \n\t"            \
>>> +       "popf \n\t"                     \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +
>>> +#define INSN_RESTORE                   \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +       "pushf \n\t"                    \
>>> +       "pop 136+save \n\t"             \
>>> +       "popf \n\t"                     \
>>> +       "ret \n\t"                              \
>>> +
>>> +#define INSN_TRAP                      \
>>> +       "in  (%dx),%al\n\t"                     \
>>> +       ". = . + 31\n\t"                        \
>>> +
>>> +asm(
>>> +       ".align 4096\n\t"
>>> +       "insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +
>>> +       "alt_insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "alt_test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "alt_test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "alt_insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +);
>>> +
>>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>>> +{
>>> +       ulong *cr3 = (ulong *)read_cr3();
>>> +       void *insn_ram;
>>> +       int i;
>>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
>>> +       extern u8 alt_insn_page[], alt_test_insn[];
>>> +
>>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
>>> +       for (i=1; i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
>>> +       for (i=0; i<alt_insn_length; i++)
>>> +               alt_test_insn[i] = alt_insn[i];
>>> +       for(;i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = 0x90; // nop
>>> +       save = inregs;
>>> +
>>> +       // Load the code TLB with insn_page, but point the page tables at
>>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +       // This will make the CPU trap on the insn_page instruction but the
>>> +       // hypervisor will see alt_insn_page.
>>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +       invlpg(insn_ram);
>>> +       // Load code TLB
>>> +       asm volatile("call *%0" : : "r"(insn_ram));
>>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +       // Trap, let hypervisor emulate at alt_insn_page
>>> +       asm volatile("call *%0": : "r"(insn_ram+1));
>>> +
>>> +       outregs = save;
>>> +}
>>> +
>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>  {
>>>      ++exceptions;
>>> --
>>> 1.7.9.5
>>>
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 20, 2013, 8:31 a.m. UTC | #6
On Thu, Jun 20, 2013 at 10:29:42AM +0200, Paolo Bonzini wrote:
> Il 19/06/2013 18:03, Gleb Natapov ha scritto:
> > On Wed, Jun 19, 2013 at 11:07:18PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> Hi Gleb,
> >> This version can set %rsp before trapping into emulator, because
> >> insn_page and alt_insn_page is statically defined and their relative
> >> position to (save) is fixed during execution.
> >>
> > The position of the code is not fixed during execution since you execute
> > it from a virtual address obtained dynamically by vmap() and the address
> > is definitely different from the one the code was compiled for, but if
> > you look at the code that compile actually produce you will see that it
> > uses absolute address to access "save" and this is why it works. I
> > wounder why compiler decided to use absolute address this time, Paolo?
> 
> Because he's using assembly with operands that he wrote himself.  Before
> he was using "m" and the compiler decided to express the memory operand
> as "save(%rip)".
> 
> The assembler then emits different opcodes (of course) and also
> different relocations.  In the current code, it tells the linker to
> place an absolute address.  In the previous one, it tells the linker to
> place a delta from %rip.
> 
Heh, make sense. OK, so we will go with that. Will comment on the patch
itself.

> Paolo
> 
> >> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> >> stack, this change is in the next patch.
> >>
> >> In this version, insn_ram is initially mapped to insn_page and them
> >> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> >> runs well but I don't know why my previous version causes error.
> >>
> > Because previous version tried to use install_page() on a large page
> > mapped region and the function does not know how to handle that.
> > 
> >> Arthur.
> >> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> >>> Add a function trap_emulator to run an instruction in emulator.
> >>> Set inregs first (%rax is invalid because it is used as return
> >>> address), put instruction codec in alt_insn and call func with
> >>> alt_insn_length. Get results in outregs.
> >>>
> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >>> ---
> >>>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 110 insertions(+)
> >>>  mode change 100644 => 100755 x86/emulator.c
> >>>
> >>> diff --git a/x86/emulator.c b/x86/emulator.c
> >>> old mode 100644
> >>> new mode 100755
> >>> index 96576e5..48d45c8
> >>> --- a/x86/emulator.c
> >>> +++ b/x86/emulator.c
> >>> @@ -11,6 +11,15 @@ int fails, tests;
> >>>
> >>>  static int exceptions;
> >>>
> >>> +struct regs {
> >>> +       u64 rax, rbx, rcx, rdx;
> >>> +       u64 rsi, rdi, rsp, rbp;
> >>> +       u64 r8, r9, r10, r11;
> >>> +       u64 r12, r13, r14, r15;
> >>> +       u64 rip, rflags;
> >>> +};
> >>> +struct regs inregs, outregs, save;
> >>> +
> >>>  void report(const char *name, int result)
> >>>  {
> >>>         ++tests;
> >>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >>>  }
> >>>
> >>> +#define INSN_SAVE                      \
> >>> +       "ret\n\t"                               \
> >>> +       "pushf\n\t"                     \
> >>> +       "push 136+save \n\t"            \
> >>> +       "popf \n\t"                     \
> >>> +       "xchg %rax, 0+save \n\t"                \
> >>> +       "xchg %rbx, 8+save \n\t"                \
> >>> +       "xchg %rcx, 16+save \n\t"               \
> >>> +       "xchg %rdx, 24+save \n\t"               \
> >>> +       "xchg %rsi, 32+save \n\t"               \
> >>> +       "xchg %rdi, 40+save \n\t"               \
> >>> +       "xchg %rsp, 48+save \n\t"               \
> >>> +       "xchg %rbp, 56+save \n\t"               \
> >>> +       "xchg %r8, 64+save \n\t"                \
> >>> +       "xchg %r9, 72+save \n\t"                \
> >>> +       "xchg %r10, 80+save \n\t"               \
> >>> +       "xchg %r11, 88+save \n\t"               \
> >>> +       "xchg %r12, 96+save \n\t"               \
> >>> +       "xchg %r13, 104+save \n\t"              \
> >>> +       "xchg %r14, 112+save \n\t"              \
> >>> +       "xchg %r15, 120+save \n\t"              \
> >>> +
> >>> +#define INSN_RESTORE                   \
> >>> +       "xchg %rax, 0+save \n\t"                \
> >>> +       "xchg %rbx, 8+save \n\t"                \
> >>> +       "xchg %rcx, 16+save \n\t"               \
> >>> +       "xchg %rdx, 24+save \n\t"               \
> >>> +       "xchg %rsi, 32+save \n\t"               \
> >>> +       "xchg %rdi, 40+save \n\t"               \
> >>> +       "xchg %rsp, 48+save \n\t"               \
> >>> +       "xchg %rbp, 56+save \n\t"               \
> >>> +       "xchg %r8, 64+save \n\t"                \
> >>> +       "xchg %r9, 72+save \n\t"                \
> >>> +       "xchg %r10, 80+save \n\t"               \
> >>> +       "xchg %r11, 88+save \n\t"               \
> >>> +       "xchg %r12, 96+save \n\t"               \
> >>> +       "xchg %r13, 104+save \n\t"              \
> >>> +       "xchg %r14, 112+save \n\t"              \
> >>> +       "xchg %r15, 120+save \n\t"              \
> >>> +       "pushf \n\t"                    \
> >>> +       "pop 136+save \n\t"             \
> >>> +       "popf \n\t"                     \
> >>> +       "ret \n\t"                              \
> >>> +
> >>> +#define INSN_TRAP                      \
> >>> +       "in  (%dx),%al\n\t"                     \
> >>> +       ". = . + 31\n\t"                        \
> >>> +
> >>> +asm(
> >>> +       ".align 4096\n\t"
> >>> +       "insn_page:\n\t"
> >>> +       INSN_SAVE
> >>> +       "test_insn:\n\t"
> >>> +       INSN_TRAP
> >>> +       "test_insn_end:\n\t"
> >>> +       INSN_RESTORE
> >>> +       "insn_page_end:\n\t"
> >>> +       ".align 4096\n\t"
> >>> +
> >>> +       "alt_insn_page:\n\t"
> >>> +       INSN_SAVE
> >>> +       "alt_test_insn:\n\t"
> >>> +       INSN_TRAP
> >>> +       "alt_test_insn_end:\n\t"
> >>> +       INSN_RESTORE
> >>> +       "alt_insn_page_end:\n\t"
> >>> +       ".align 4096\n\t"
> >>> +);
> >>> +
> >>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >>> +{
> >>> +       ulong *cr3 = (ulong *)read_cr3();
> >>> +       void *insn_ram;
> >>> +       int i;
> >>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
> >>> +       extern u8 alt_insn_page[], alt_test_insn[];
> >>> +
> >>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> >>> +       for (i=1; i<test_insn_end - test_insn; i++)
> >>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> >>> +       for (i=0; i<alt_insn_length; i++)
> >>> +               alt_test_insn[i] = alt_insn[i];
> >>> +       for(;i<test_insn_end - test_insn; i++)
> >>> +               alt_test_insn[i] = 0x90; // nop
> >>> +       save = inregs;
> >>> +
> >>> +       // Load the code TLB with insn_page, but point the page tables at
> >>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>> +       // This will make the CPU trap on the insn_page instruction but the
> >>> +       // hypervisor will see alt_insn_page.
> >>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >>> +       invlpg(insn_ram);
> >>> +       // Load code TLB
> >>> +       asm volatile("call *%0" : : "r"(insn_ram));
> >>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >>> +       // Trap, let hypervisor emulate at alt_insn_page
> >>> +       asm volatile("call *%0": : "r"(insn_ram+1));
> >>> +
> >>> +       outregs = save;
> >>> +}
> >>> +
> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >>>  {
> >>>      ++exceptions;
> >>> --
> >>> 1.7.9.5
> >>>
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> > 
> > --
> > 			Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 20, 2013, 8:48 a.m. UTC | #7
On Wed, Jun 19, 2013 at 11:00:56PM +0800, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 110 insertions(+)
>  mode change 100644 => 100755 x86/emulator.c
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> old mode 100644
> new mode 100755
> index 96576e5..48d45c8
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,15 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 r8, r9, r10, r11;
> +	u64 r12, r13, r14, r15;
> +	u64 rip, rflags;
> +};
> +struct regs inregs, outregs, save;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +#define INSN_SAVE 			\
No need for all the defines. Put all the code into insn_page, allocate
alt_insn_page dynamically and copy the code there by memcpy.

> +	"ret\n\t"				\
> +	"pushf\n\t"			\
> +	"push 136+save \n\t"		\
> +	"popf \n\t"			\
> +	"xchg %rax, 0+save \n\t"		\
> +	"xchg %rbx, 8+save \n\t"		\
> +	"xchg %rcx, 16+save \n\t"		\
> +	"xchg %rdx, 24+save \n\t"		\
> +	"xchg %rsi, 32+save \n\t"		\
> +	"xchg %rdi, 40+save \n\t"		\
> +	"xchg %rsp, 48+save \n\t"		\
> +	"xchg %rbp, 56+save \n\t"		\
> +	"xchg %r8, 64+save \n\t"		\
> +	"xchg %r9, 72+save \n\t"		\
> +	"xchg %r10, 80+save \n\t"		\
> +	"xchg %r11, 88+save \n\t"		\
> +	"xchg %r12, 96+save \n\t"		\
> +	"xchg %r13, 104+save \n\t"		\
> +	"xchg %r14, 112+save \n\t"		\
> +	"xchg %r15, 120+save \n\t"		\
> +
> +#define INSN_RESTORE			\
> +	"xchg %rax, 0+save \n\t"		\
> +	"xchg %rbx, 8+save \n\t"		\
> +	"xchg %rcx, 16+save \n\t"		\
> +	"xchg %rdx, 24+save \n\t"		\
> +	"xchg %rsi, 32+save \n\t"		\
> +	"xchg %rdi, 40+save \n\t"		\
> +	"xchg %rsp, 48+save \n\t"		\
> +	"xchg %rbp, 56+save \n\t"		\
> +	"xchg %r8, 64+save \n\t"		\
> +	"xchg %r9, 72+save \n\t"		\
> +	"xchg %r10, 80+save \n\t"		\
> +	"xchg %r11, 88+save \n\t"		\
> +	"xchg %r12, 96+save \n\t"		\
> +	"xchg %r13, 104+save \n\t"		\
> +	"xchg %r14, 112+save \n\t"		\
> +	"xchg %r15, 120+save \n\t"		\
> +	"pushf \n\t"			\
> +	"pop 136+save \n\t"		\
> +	"popf \n\t"			\
> +	"ret \n\t"				\
> +
> +#define INSN_TRAP			\
> +	"in  (%dx),%al\n\t"			\
> +	". = . + 31\n\t"			\
If you will do ".skip 31, 0x90\n\t" instead you can drop loop
that inserts nops bellow.

> +
> +asm(
> +	".align 4096\n\t"
> +	"insn_page:\n\t"
> +	INSN_SAVE
> +	"test_insn:\n\t"
> +	INSN_TRAP
> +	"test_insn_end:\n\t"
> +	INSN_RESTORE
> +	"insn_page_end:\n\t"
> +	".align 4096\n\t"
> +
> +	"alt_insn_page:\n\t"
> +	INSN_SAVE
> +	"alt_test_insn:\n\t"
> +	INSN_TRAP
> +	"alt_test_insn_end:\n\t"
> +	INSN_RESTORE
> +	"alt_insn_page_end:\n\t"
> +	".align 4096\n\t"
> +);
> +
> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	void *insn_ram;
> +	int i;
> +	extern u8 insn_page[], test_insn[], test_insn_end[];
> +	extern u8 alt_insn_page[], alt_test_insn[];
> +
> +	insn_ram = vmap(virt_to_phys(insn_page), 4096);
> +	for (i=1; i<test_insn_end - test_insn; i++)
> +		alt_test_insn[i] = test_insn[i] = 0x90; // nop
> +	for (i=0; i<alt_insn_length; i++)
> +		alt_test_insn[i] = alt_insn[i];
> +	for(;i<test_insn_end - test_insn; i++)
> +		alt_test_insn[i] = 0x90; // nop
> +	save = inregs;
> +
> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
I prefer all the comments to be changed to /**/ style while we are at it.

> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile("call *%0": : "r"(insn_ram+1));
> +
> +	outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> -- 
> 1.7.9.5

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arthur Chunqi Li June 20, 2013, 8:58 a.m. UTC | #8
ok, I will handle all above in the following commit.

Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

From my iPhone

? 2013-6-20?16:48?Gleb Natapov <gleb@redhat.com> ???

> On Wed, Jun 19, 2013 at 11:00:56PM +0800, Arthur Chunqi Li wrote:
>> Add a function trap_emulator to run an instruction in emulator.
>> Set inregs first (%rax is invalid because it is used as return
>> address), put instruction codec in alt_insn and call func with
>> alt_insn_length. Get results in outregs.
>> 
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>> x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 110 insertions(+)
>> mode change 100644 => 100755 x86/emulator.c
>> 
>> diff --git a/x86/emulator.c b/x86/emulator.c
>> old mode 100644
>> new mode 100755
>> index 96576e5..48d45c8
>> --- a/x86/emulator.c
>> +++ b/x86/emulator.c
>> @@ -11,6 +11,15 @@ int fails, tests;
>> 
>> static int exceptions;
>> 
>> +struct regs {
>> +    u64 rax, rbx, rcx, rdx;
>> +    u64 rsi, rdi, rsp, rbp;
>> +    u64 r8, r9, r10, r11;
>> +    u64 r12, r13, r14, r15;
>> +    u64 rip, rflags;
>> +};
>> +struct regs inregs, outregs, save;
>> +
>> void report(const char *name, int result)
>> {
>>    ++tests;
>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> }
>> 
>> +#define INSN_SAVE            \
> No need for all the defines. Put all the code into insn_page, allocate
> alt_insn_page dynamically and copy the code there by memcpy.
> 
>> +    "ret\n\t"                \
>> +    "pushf\n\t"            \
>> +    "push 136+save \n\t"        \
>> +    "popf \n\t"            \
>> +    "xchg %rax, 0+save \n\t"        \
>> +    "xchg %rbx, 8+save \n\t"        \
>> +    "xchg %rcx, 16+save \n\t"        \
>> +    "xchg %rdx, 24+save \n\t"        \
>> +    "xchg %rsi, 32+save \n\t"        \
>> +    "xchg %rdi, 40+save \n\t"        \
>> +    "xchg %rsp, 48+save \n\t"        \
>> +    "xchg %rbp, 56+save \n\t"        \
>> +    "xchg %r8, 64+save \n\t"        \
>> +    "xchg %r9, 72+save \n\t"        \
>> +    "xchg %r10, 80+save \n\t"        \
>> +    "xchg %r11, 88+save \n\t"        \
>> +    "xchg %r12, 96+save \n\t"        \
>> +    "xchg %r13, 104+save \n\t"        \
>> +    "xchg %r14, 112+save \n\t"        \
>> +    "xchg %r15, 120+save \n\t"        \
>> +
>> +#define INSN_RESTORE            \
>> +    "xchg %rax, 0+save \n\t"        \
>> +    "xchg %rbx, 8+save \n\t"        \
>> +    "xchg %rcx, 16+save \n\t"        \
>> +    "xchg %rdx, 24+save \n\t"        \
>> +    "xchg %rsi, 32+save \n\t"        \
>> +    "xchg %rdi, 40+save \n\t"        \
>> +    "xchg %rsp, 48+save \n\t"        \
>> +    "xchg %rbp, 56+save \n\t"        \
>> +    "xchg %r8, 64+save \n\t"        \
>> +    "xchg %r9, 72+save \n\t"        \
>> +    "xchg %r10, 80+save \n\t"        \
>> +    "xchg %r11, 88+save \n\t"        \
>> +    "xchg %r12, 96+save \n\t"        \
>> +    "xchg %r13, 104+save \n\t"        \
>> +    "xchg %r14, 112+save \n\t"        \
>> +    "xchg %r15, 120+save \n\t"        \
>> +    "pushf \n\t"            \
>> +    "pop 136+save \n\t"        \
>> +    "popf \n\t"            \
>> +    "ret \n\t"                \
>> +
>> +#define INSN_TRAP            \
>> +    "in  (%dx),%al\n\t"            \
>> +    ". = . + 31\n\t"            \
> If you will do ".skip 31, 0x90\n\t" instead you can drop loop
> that inserts nops bellow.
> 
>> +
>> +asm(
>> +    ".align 4096\n\t"
>> +    "insn_page:\n\t"
>> +    INSN_SAVE
>> +    "test_insn:\n\t"
>> +    INSN_TRAP
>> +    "test_insn_end:\n\t"
>> +    INSN_RESTORE
>> +    "insn_page_end:\n\t"
>> +    ".align 4096\n\t"
>> +
>> +    "alt_insn_page:\n\t"
>> +    INSN_SAVE
>> +    "alt_test_insn:\n\t"
>> +    INSN_TRAP
>> +    "alt_test_insn_end:\n\t"
>> +    INSN_RESTORE
>> +    "alt_insn_page_end:\n\t"
>> +    ".align 4096\n\t"
>> +);
>> +
>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> +{
>> +    ulong *cr3 = (ulong *)read_cr3();
>> +    void *insn_ram;
>> +    int i;
>> +    extern u8 insn_page[], test_insn[], test_insn_end[];
>> +    extern u8 alt_insn_page[], alt_test_insn[];
>> +
>> +    insn_ram = vmap(virt_to_phys(insn_page), 4096);
>> +    for (i=1; i<test_insn_end - test_insn; i++)
>> +        alt_test_insn[i] = test_insn[i] = 0x90; // nop
>> +    for (i=0; i<alt_insn_length; i++)
>> +        alt_test_insn[i] = alt_insn[i];
>> +    for(;i<test_insn_end - test_insn; i++)
>> +        alt_test_insn[i] = 0x90; // nop
>> +    save = inregs;
>> +
>> +    // Load the code TLB with insn_page, but point the page tables at
>> +    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> +    // This will make the CPU trap on the insn_page instruction but the
>> +    // hypervisor will see alt_insn_page.
> I prefer all the comments to be changed to /**/ style while we are at it.
> 
>> +    install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> +    invlpg(insn_ram);
>> +    // Load code TLB
>> +    asm volatile("call *%0" : : "r"(insn_ram));
>> +    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> +    // Trap, let hypervisor emulate at alt_insn_page
>> +    asm volatile("call *%0": : "r"(insn_ram+1));
>> +
>> +    outregs = save;
>> +}
>> +
>> static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> {
>>     ++exceptions;
>> -- 
>> 1.7.9.5
> 
> --
>            Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/x86/emulator.c b/x86/emulator.c
old mode 100644
new mode 100755
index 96576e5..48d45c8
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,15 @@  int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 r8, r9, r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 rip, rflags;
+};
+struct regs inregs, outregs, save;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +694,107 @@  static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+#define INSN_SAVE 			\
+	"ret\n\t"				\
+	"pushf\n\t"			\
+	"push 136+save \n\t"		\
+	"popf \n\t"			\
+	"xchg %rax, 0+save \n\t"		\
+	"xchg %rbx, 8+save \n\t"		\
+	"xchg %rcx, 16+save \n\t"		\
+	"xchg %rdx, 24+save \n\t"		\
+	"xchg %rsi, 32+save \n\t"		\
+	"xchg %rdi, 40+save \n\t"		\
+	"xchg %rsp, 48+save \n\t"		\
+	"xchg %rbp, 56+save \n\t"		\
+	"xchg %r8, 64+save \n\t"		\
+	"xchg %r9, 72+save \n\t"		\
+	"xchg %r10, 80+save \n\t"		\
+	"xchg %r11, 88+save \n\t"		\
+	"xchg %r12, 96+save \n\t"		\
+	"xchg %r13, 104+save \n\t"		\
+	"xchg %r14, 112+save \n\t"		\
+	"xchg %r15, 120+save \n\t"		\
+
+#define INSN_RESTORE			\
+	"xchg %rax, 0+save \n\t"		\
+	"xchg %rbx, 8+save \n\t"		\
+	"xchg %rcx, 16+save \n\t"		\
+	"xchg %rdx, 24+save \n\t"		\
+	"xchg %rsi, 32+save \n\t"		\
+	"xchg %rdi, 40+save \n\t"		\
+	"xchg %rsp, 48+save \n\t"		\
+	"xchg %rbp, 56+save \n\t"		\
+	"xchg %r8, 64+save \n\t"		\
+	"xchg %r9, 72+save \n\t"		\
+	"xchg %r10, 80+save \n\t"		\
+	"xchg %r11, 88+save \n\t"		\
+	"xchg %r12, 96+save \n\t"		\
+	"xchg %r13, 104+save \n\t"		\
+	"xchg %r14, 112+save \n\t"		\
+	"xchg %r15, 120+save \n\t"		\
+	"pushf \n\t"			\
+	"pop 136+save \n\t"		\
+	"popf \n\t"			\
+	"ret \n\t"				\
+
+#define INSN_TRAP			\
+	"in  (%dx),%al\n\t"			\
+	". = . + 31\n\t"			\
+
+asm(
+	".align 4096\n\t"
+	"insn_page:\n\t"
+	INSN_SAVE
+	"test_insn:\n\t"
+	INSN_TRAP
+	"test_insn_end:\n\t"
+	INSN_RESTORE
+	"insn_page_end:\n\t"
+	".align 4096\n\t"
+
+	"alt_insn_page:\n\t"
+	INSN_SAVE
+	"alt_test_insn:\n\t"
+	INSN_TRAP
+	"alt_test_insn_end:\n\t"
+	INSN_RESTORE
+	"alt_insn_page_end:\n\t"
+	".align 4096\n\t"
+);
+
+static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	void *insn_ram;
+	int i;
+	extern u8 insn_page[], test_insn[], test_insn_end[];
+	extern u8 alt_insn_page[], alt_test_insn[];
+
+	insn_ram = vmap(virt_to_phys(insn_page), 4096);
+	for (i=1; i<test_insn_end - test_insn; i++)
+		alt_test_insn[i] = test_insn[i] = 0x90; // nop
+	for (i=0; i<alt_insn_length; i++)
+		alt_test_insn[i] = alt_insn[i];
+	for(;i<test_insn_end - test_insn; i++)
+		alt_test_insn[i] = 0x90; // nop
+	save = inregs;
+
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile("call *%0": : "r"(insn_ram+1));
+
+	outregs = save;
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;