diff mbox

[1/2] kvm-unit-tests: Add a func to run instruction in emulator

Message ID 1370532278-22063-1-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li June 6, 2013, 3:24 p.m. UTC
Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Comments

??? June 7, 2013, 2:14 a.m. UTC | #1
This version of save/restore general register seems a bit too ugly, I
will change it and commit another patch.

Some of the registers cannot be set as realmode.c do, for example %rax
used to save return value, wrong %esp %ebp may cause crash, and I
think changed %rflags may cause some unknown error. So these registers
should not be set by caller.

Arthur

On Thu, Jun 6, 2013 at 11:24 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
>
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..8ab9904 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,14 @@ int fails, tests;
>
>  static int exceptions;
>
> +struct regs {
> +       u64 rax, rbx, rcx, rdx;
> +       u64 rsi, rdi, rsp, rbp;
> +       u64 rip, rflags;
> +};
> +
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>         ++tests;
> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +                            uint8_t *alt_insn_page, void *insn_ram,
> +                            uint8_t *alt_insn, int alt_insn_length)
> +{
> +       ulong *cr3 = (ulong *)read_cr3();
> +       int i;
> +
> +       // Pad with RET instructions
> +       memset(insn_page, 0xc3, 4096);
> +       memset(alt_insn_page, 0xc3, 4096);
> +
> +       // Place a trapping instruction in the page to trigger a VMEXIT
> +       insn_page[0] = 0x89; // mov %eax, (%rax)
> +       insn_page[1] = 0x00;
> +       insn_page[2] = 0x90; // nop
> +       insn_page[3] = 0xc3; // ret
> +
> +       // Place the instruction we want the hypervisor to see in the alternate page
> +       for (i=0; i<alt_insn_length; i++)
> +               alt_insn_page[i] = alt_insn[i];
> +
> +       // Save general registers
> +       asm volatile(
> +               "push %rax\n\r"
> +               "push %rbx\n\r"
> +               "push %rcx\n\r"
> +               "push %rdx\n\r"
> +               "push %rsi\n\r"
> +               "push %rdi\n\r"
> +               );
> +       // Load the code TLB with insn_page, but point the page tables at
> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +       // This will make the CPU trap on the insn_page instruction but the
> +       // hypervisor will see alt_insn_page.
> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +       invlpg(insn_ram);
> +       // Load code TLB
> +       asm volatile("call *%0" : : "r"(insn_ram + 3));
> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +       // Trap, let hypervisor emulate at alt_insn_page
> +       asm volatile(
> +               "call *%1\n\r"
> +
> +               "mov %%rax, 0+%[outregs] \n\t"
> +               "mov %%rbx, 8+%[outregs] \n\t"
> +               "mov %%rcx, 16+%[outregs] \n\t"
> +               "mov %%rdx, 24+%[outregs] \n\t"
> +               "mov %%rsi, 32+%[outregs] \n\t"
> +               "mov %%rdi, 40+%[outregs] \n\t"
> +               "mov %%rsp,48+ %[outregs] \n\t"
> +               "mov %%rbp, 56+%[outregs] \n\t"
> +
> +               /* Save RFLAGS in outregs*/
> +               "pushf \n\t"
> +               "popq 72+%[outregs] \n\t"
> +               : [outregs]"+m"(outregs)
> +               : "r"(insn_ram),
> +                       "a"(mem), "b"(inregs.rbx),
> +                       "c"(inregs.rcx), "d"(inregs.rdx),
> +                       "S"(inregs.rsi), "D"(inregs.rdi)
> +               : "memory", "cc"
> +               );
> +       // Restore general registers
> +       asm volatile(
> +               "pop %rax\n\r"
> +               "pop %rbx\n\r"
> +               "pop %rcx\n\r"
> +               "pop %rdx\n\r"
> +               "pop %rsi\n\r"
> +               "pop %rdi\n\r"
> +               );
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> --
> 1.7.9.5
>
Paolo Bonzini June 12, 2013, 8:50 p.m. UTC | #2
Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..8ab9904 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,14 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 rip, rflags;
> +};
> +
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +			     uint8_t *alt_insn_page, void *insn_ram,
> +			     uint8_t *alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	int i;
> +
> +	// Pad with RET instructions
> +	memset(insn_page, 0xc3, 4096);
> +	memset(alt_insn_page, 0xc3, 4096);
> +
> +	// Place a trapping instruction in the page to trigger a VMEXIT
> +	insn_page[0] = 0x89; // mov %eax, (%rax)
> +	insn_page[1] = 0x00;
> +	insn_page[2] = 0x90; // nop
> +	insn_page[3] = 0xc3; // ret
> +
> +	// Place the instruction we want the hypervisor to see in the alternate page
> +	for (i=0; i<alt_insn_length; i++)
> +		alt_insn_page[i] = alt_insn[i];
> +
> +	// Save general registers
> +	asm volatile(
> +		"push %rax\n\r"
> +		"push %rbx\n\r"
> +		"push %rcx\n\r"
> +		"push %rdx\n\r"
> +		"push %rsi\n\r"
> +		"push %rdi\n\r"
> +		);

This will not work if GCC is using rsp-relative addresses to access
local variables.  You need to use mov instructions to load from inregs,
and put the push/pop sequences inside the "main" asm that does the "call
*%1".

Paolo

> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram + 3));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile(
> +		"call *%1\n\r"
> +
> +		"mov %%rax, 0+%[outregs] \n\t"
> +		"mov %%rbx, 8+%[outregs] \n\t"
> +		"mov %%rcx, 16+%[outregs] \n\t"
> +		"mov %%rdx, 24+%[outregs] \n\t"
> +		"mov %%rsi, 32+%[outregs] \n\t"
> +		"mov %%rdi, 40+%[outregs] \n\t"
> +		"mov %%rsp,48+ %[outregs] \n\t"
> +		"mov %%rbp, 56+%[outregs] \n\t"
> +
> +		/* Save RFLAGS in outregs*/
> +		"pushf \n\t"
> +		"popq 72+%[outregs] \n\t"
> +		: [outregs]"+m"(outregs)
> +		: "r"(insn_ram),
> +			"a"(mem), "b"(inregs.rbx),
> +			"c"(inregs.rcx), "d"(inregs.rdx),
> +			"S"(inregs.rsi), "D"(inregs.rdi)
> +		: "memory", "cc"
> +		);
> +	// Restore general registers
> +	asm volatile(
> +		"pop %rax\n\r"
> +		"pop %rbx\n\r"
> +		"pop %rcx\n\r"
> +		"pop %rdx\n\r"
> +		"pop %rsi\n\r"
> +		"pop %rdi\n\r"
> +		);
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 13, 2013, 4:50 a.m. UTC | #3
On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> Add a function trap_emulator to run an instruction in emulator.
>> Set inregs first (%rax is invalid because it is used as return
>> address), put instruction codec in alt_insn and call func with
>> alt_insn_length. Get results in outregs.
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 81 insertions(+)
>>
>> diff --git a/x86/emulator.c b/x86/emulator.c
>> index 96576e5..8ab9904 100644
>> --- a/x86/emulator.c
>> +++ b/x86/emulator.c
>> @@ -11,6 +11,14 @@ int fails, tests;
>>
>>  static int exceptions;
>>
>> +struct regs {
>> +     u64 rax, rbx, rcx, rdx;
>> +     u64 rsi, rdi, rsp, rbp;
>> +     u64 rip, rflags;
>> +};
>> +
>> +static struct regs inregs, outregs;
>> +
>>  void report(const char *name, int result)
>>  {
>>       ++tests;
>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>  }
>>
>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> +                          uint8_t *alt_insn, int alt_insn_length)
>> +{
>> +     ulong *cr3 = (ulong *)read_cr3();
>> +     int i;
>> +
>> +     // Pad with RET instructions
>> +     memset(insn_page, 0xc3, 4096);
>> +     memset(alt_insn_page, 0xc3, 4096);
>> +
>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> +     insn_page[1] = 0x00;
>> +     insn_page[2] = 0x90; // nop
>> +     insn_page[3] = 0xc3; // ret
>> +
>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> +     for (i=0; i<alt_insn_length; i++)
>> +             alt_insn_page[i] = alt_insn[i];
>> +
>> +     // Save general registers
>> +     asm volatile(
>> +             "push %rax\n\r"
>> +             "push %rbx\n\r"
>> +             "push %rcx\n\r"
>> +             "push %rdx\n\r"
>> +             "push %rsi\n\r"
>> +             "push %rdi\n\r"
>> +             );
>
> This will not work if GCC is using rsp-relative addresses to access
> local variables.  You need to use mov instructions to load from inregs,
> and put the push/pop sequences inside the "main" asm that does the "call
> *%1".
Is there any way to let gcc use absolute address to access variables?
I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
and it seems that addressing for "save" is wrong.

Arthur
>
> Paolo
>
>> +     // Load the code TLB with insn_page, but point the page tables at
>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> +     // This will make the CPU trap on the insn_page instruction but the
>> +     // hypervisor will see alt_insn_page.
>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> +     invlpg(insn_ram);
>> +     // Load code TLB
>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> +     // Trap, let hypervisor emulate at alt_insn_page
>> +     asm volatile(
>> +             "call *%1\n\r"
>> +
>> +             "mov %%rax, 0+%[outregs] \n\t"
>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> +
>> +             /* Save RFLAGS in outregs*/
>> +             "pushf \n\t"
>> +             "popq 72+%[outregs] \n\t"
>> +             : [outregs]"+m"(outregs)
>> +             : "r"(insn_ram),
>> +                     "a"(mem), "b"(inregs.rbx),
>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> +             : "memory", "cc"
>> +             );
>> +     // Restore general registers
>> +     asm volatile(
>> +             "pop %rax\n\r"
>> +             "pop %rbx\n\r"
>> +             "pop %rcx\n\r"
>> +             "pop %rdx\n\r"
>> +             "pop %rsi\n\r"
>> +             "pop %rdi\n\r"
>> +             );
>> +}
>> +
>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>  {
>>      ++exceptions;
>>
>



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 13, 2013, 9:30 a.m. UTC | #4
Hi Gleb,
I'm trying to solve these problems in the past days and meet many
difficulties. You want to save all the general registers in calling
insn_page, so registers should be saved to (save) in insn_page.
Because all the instructions should be generated outside and copy to
insn_page, and the instructions generated outside is RIP-relative, so
inside insn_page (save) will be wrong pointed with RIP-relative code.

I have tried to move (save) into insn_page. But when calling
insn_page, data in it can only be read and any instructions like "xchg
%%rax, 0+%[save]" may cause error, because at this time read is from
TLB but write will cause inconsistent.

Another way is disabling RIP-relative code, but I failed when using
"-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
Is there any way to totally disable RIP-relative code? Besides, using
this feature may specified to some newer C compiler. This may not be a
good solution.

If we don't set %rsp and %rbp when executing emulator code, we can
just use “push/pop" to save other general registers.

If you have any better solutions, please let me know.

Thanks,
Arthur

On Thu, Jun 13, 2013 at 12:50 PM, ??? <Arthur Chunqi Li>
<yzt356@gmail.com> wrote:
> On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>>
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 81 insertions(+)
>>>
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> index 96576e5..8ab9904 100644
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,14 @@ int fails, tests;
>>>
>>>  static int exceptions;
>>>
>>> +struct regs {
>>> +     u64 rax, rbx, rcx, rdx;
>>> +     u64 rsi, rdi, rsp, rbp;
>>> +     u64 rip, rflags;
>>> +};
>>> +
>>> +static struct regs inregs, outregs;
>>> +
>>>  void report(const char *name, int result)
>>>  {
>>>       ++tests;
>>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>  }
>>>
>>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>>> +                          uint8_t *alt_insn_page, void *insn_ram,
>>> +                          uint8_t *alt_insn, int alt_insn_length)
>>> +{
>>> +     ulong *cr3 = (ulong *)read_cr3();
>>> +     int i;
>>> +
>>> +     // Pad with RET instructions
>>> +     memset(insn_page, 0xc3, 4096);
>>> +     memset(alt_insn_page, 0xc3, 4096);
>>> +
>>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>>> +     insn_page[1] = 0x00;
>>> +     insn_page[2] = 0x90; // nop
>>> +     insn_page[3] = 0xc3; // ret
>>> +
>>> +     // Place the instruction we want the hypervisor to see in the alternate page
>>> +     for (i=0; i<alt_insn_length; i++)
>>> +             alt_insn_page[i] = alt_insn[i];
>>> +
>>> +     // Save general registers
>>> +     asm volatile(
>>> +             "push %rax\n\r"
>>> +             "push %rbx\n\r"
>>> +             "push %rcx\n\r"
>>> +             "push %rdx\n\r"
>>> +             "push %rsi\n\r"
>>> +             "push %rdi\n\r"
>>> +             );
>>
>> This will not work if GCC is using rsp-relative addresses to access
>> local variables.  You need to use mov instructions to load from inregs,
>> and put the push/pop sequences inside the "main" asm that does the "call
>> *%1".
> Is there any way to let gcc use absolute address to access variables?
> I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
> and it seems that addressing for "save" is wrong.
>
> Arthur
>>
>> Paolo
>>
>>> +     // Load the code TLB with insn_page, but point the page tables at
>>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +     // This will make the CPU trap on the insn_page instruction but the
>>> +     // hypervisor will see alt_insn_page.
>>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +     invlpg(insn_ram);
>>> +     // Load code TLB
>>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +     // Trap, let hypervisor emulate at alt_insn_page
>>> +     asm volatile(
>>> +             "call *%1\n\r"
>>> +
>>> +             "mov %%rax, 0+%[outregs] \n\t"
>>> +             "mov %%rbx, 8+%[outregs] \n\t"
>>> +             "mov %%rcx, 16+%[outregs] \n\t"
>>> +             "mov %%rdx, 24+%[outregs] \n\t"
>>> +             "mov %%rsi, 32+%[outregs] \n\t"
>>> +             "mov %%rdi, 40+%[outregs] \n\t"
>>> +             "mov %%rsp,48+ %[outregs] \n\t"
>>> +             "mov %%rbp, 56+%[outregs] \n\t"
>>> +
>>> +             /* Save RFLAGS in outregs*/
>>> +             "pushf \n\t"
>>> +             "popq 72+%[outregs] \n\t"
>>> +             : [outregs]"+m"(outregs)
>>> +             : "r"(insn_ram),
>>> +                     "a"(mem), "b"(inregs.rbx),
>>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>>> +             : "memory", "cc"
>>> +             );
>>> +     // Restore general registers
>>> +     asm volatile(
>>> +             "pop %rax\n\r"
>>> +             "pop %rbx\n\r"
>>> +             "pop %rcx\n\r"
>>> +             "pop %rdx\n\r"
>>> +             "pop %rsi\n\r"
>>> +             "pop %rdi\n\r"
>>> +             );
>>> +}
>>> +
>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>  {
>>>      ++exceptions;
>>>
>>
>
>
>
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China
Paolo Bonzini June 13, 2013, 1:12 p.m. UTC | #5
Il 13/06/2013 05:30, ??? <Arthur Chunqi Li> ha scritto:
> Hi Gleb,
> I'm trying to solve these problems in the past days and meet many
> difficulties. You want to save all the general registers in calling
> insn_page, so registers should be saved to (save) in insn_page.
> Because all the instructions should be generated outside and copy to
> insn_page, and the instructions generated outside is RIP-relative, so
> inside insn_page (save) will be wrong pointed with RIP-relative code.
> 
> I have tried to move (save) into insn_page. But when calling
> insn_page, data in it can only be read and any instructions like "xchg
> %%rax, 0+%[save]" may cause error, because at this time read is from
> TLB but write will cause inconsistent.
> 
> Another way is disabling RIP-relative code, but I failed when using
> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
> Is there any way to totally disable RIP-relative code? Besides, using
> this feature may specified to some newer C compiler. This may not be a
> good solution.
> 
> If we don't set %rsp and %rbp when executing emulator code, we can
> just use “push/pop" to save other general registers.

%rbp should not be a problem, on the other hand it's okay not to include
%rsp in the registers struct (and assume insn_page/alt_insn_page do not
touch it).  Interestingly, both VMX and SVM put the guest RSP in the VM
control information so that the switch occurs atomically with the start
of the guest.

Paolo

> If you have any better solutions, please let me know.

> Thanks,
> Arthur
> 
> On Thu, Jun 13, 2013 at 12:50 PM, ??? <Arthur Chunqi Li>
> <yzt356@gmail.com> wrote:
>> On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>>>> Add a function trap_emulator to run an instruction in emulator.
>>>> Set inregs first (%rax is invalid because it is used as return
>>>> address), put instruction codec in alt_insn and call func with
>>>> alt_insn_length. Get results in outregs.
>>>>
>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>>> ---
>>>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 81 insertions(+)
>>>>
>>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>>> index 96576e5..8ab9904 100644
>>>> --- a/x86/emulator.c
>>>> +++ b/x86/emulator.c
>>>> @@ -11,6 +11,14 @@ int fails, tests;
>>>>
>>>>  static int exceptions;
>>>>
>>>> +struct regs {
>>>> +     u64 rax, rbx, rcx, rdx;
>>>> +     u64 rsi, rdi, rsp, rbp;
>>>> +     u64 rip, rflags;
>>>> +};
>>>> +
>>>> +static struct regs inregs, outregs;
>>>> +
>>>>  void report(const char *name, int result)
>>>>  {
>>>>       ++tests;
>>>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>>  }
>>>>
>>>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>>>> +                          uint8_t *alt_insn_page, void *insn_ram,
>>>> +                          uint8_t *alt_insn, int alt_insn_length)
>>>> +{
>>>> +     ulong *cr3 = (ulong *)read_cr3();
>>>> +     int i;
>>>> +
>>>> +     // Pad with RET instructions
>>>> +     memset(insn_page, 0xc3, 4096);
>>>> +     memset(alt_insn_page, 0xc3, 4096);
>>>> +
>>>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>>>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>>>> +     insn_page[1] = 0x00;
>>>> +     insn_page[2] = 0x90; // nop
>>>> +     insn_page[3] = 0xc3; // ret
>>>> +
>>>> +     // Place the instruction we want the hypervisor to see in the alternate page
>>>> +     for (i=0; i<alt_insn_length; i++)
>>>> +             alt_insn_page[i] = alt_insn[i];
>>>> +
>>>> +     // Save general registers
>>>> +     asm volatile(
>>>> +             "push %rax\n\r"
>>>> +             "push %rbx\n\r"
>>>> +             "push %rcx\n\r"
>>>> +             "push %rdx\n\r"
>>>> +             "push %rsi\n\r"
>>>> +             "push %rdi\n\r"
>>>> +             );
>>>
>>> This will not work if GCC is using rsp-relative addresses to access
>>> local variables.  You need to use mov instructions to load from inregs,
>>> and put the push/pop sequences inside the "main" asm that does the "call
>>> *%1".
>> Is there any way to let gcc use absolute address to access variables?
>> I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> and it seems that addressing for "save" is wrong.
>>
>> Arthur
>>>
>>> Paolo
>>>
>>>> +     // Load the code TLB with insn_page, but point the page tables at
>>>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>>> +     // This will make the CPU trap on the insn_page instruction but the
>>>> +     // hypervisor will see alt_insn_page.
>>>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>>> +     invlpg(insn_ram);
>>>> +     // Load code TLB
>>>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>>>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>>> +     // Trap, let hypervisor emulate at alt_insn_page
>>>> +     asm volatile(
>>>> +             "call *%1\n\r"
>>>> +
>>>> +             "mov %%rax, 0+%[outregs] \n\t"
>>>> +             "mov %%rbx, 8+%[outregs] \n\t"
>>>> +             "mov %%rcx, 16+%[outregs] \n\t"
>>>> +             "mov %%rdx, 24+%[outregs] \n\t"
>>>> +             "mov %%rsi, 32+%[outregs] \n\t"
>>>> +             "mov %%rdi, 40+%[outregs] \n\t"
>>>> +             "mov %%rsp,48+ %[outregs] \n\t"
>>>> +             "mov %%rbp, 56+%[outregs] \n\t"
>>>> +
>>>> +             /* Save RFLAGS in outregs*/
>>>> +             "pushf \n\t"
>>>> +             "popq 72+%[outregs] \n\t"
>>>> +             : [outregs]"+m"(outregs)
>>>> +             : "r"(insn_ram),
>>>> +                     "a"(mem), "b"(inregs.rbx),
>>>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>>>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>>>> +             : "memory", "cc"
>>>> +             );
>>>> +     // Restore general registers
>>>> +     asm volatile(
>>>> +             "pop %rax\n\r"
>>>> +             "pop %rbx\n\r"
>>>> +             "pop %rcx\n\r"
>>>> +             "pop %rdx\n\r"
>>>> +             "pop %rsi\n\r"
>>>> +             "pop %rdi\n\r"
>>>> +             );
>>>> +}
>>>> +
>>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>>  {
>>>>      ++exceptions;
>>>>
>>>
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 18, 2013, 12:45 p.m. UTC | #6
On Thu, Jun 13, 2013 at 05:30:03PM +0800, ??? <Arthur Chunqi Li> wrote:
> Hi Gleb,
> I'm trying to solve these problems in the past days and meet many
> difficulties. You want to save all the general registers in calling
> insn_page, so registers should be saved to (save) in insn_page.
> Because all the instructions should be generated outside and copy to
> insn_page, and the instructions generated outside is RIP-relative, so
> inside insn_page (save) will be wrong pointed with RIP-relative code.
> 
They do not have to be generated outside. You can write code into
insn_page directly. Something like this outside of any functions:

asm(".align 4096\n\t"
    ".global insn_page\n\t"
    ".global insn_page_end\n\t"
    ".global test_insn\n\t"
    ".global test_insn_end\n\t"
    "insn_page:"
    "mov %%rax, outregs \n\t"
    ...
    "test_insn:\n\t" 
    "in (%ds), %al\n\t"
    ". = . + 31\n\t"
    "test_insn_end:\n\t"
    "mov outregs, %%rax\n\t"
    ...
    "ret\n\t"
    ".align 4096\n\t"
    "insn_page_end:\n\t");

Now you copy that into alt_insn_page, put instruction you want to test
into test_insn offset and remap alt_insn_page into "insn_page" virtual address.

> I have tried to move (save) into insn_page. But when calling
> insn_page, data in it can only be read and any instructions like "xchg
> %%rax, 0+%[save]" may cause error, because at this time read is from
> TLB but write will cause inconsistent.
> 
> Another way is disabling RIP-relative code, but I failed when using
> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
> Is there any way to totally disable RIP-relative code? Besides, using
> this feature may specified to some newer C compiler. This may not be a
> good solution.
> 
> If we don't set %rsp and %rbp when executing emulator code, we can
> just use “push/pop" to save other general registers.
> 
> If you have any better solutions, please let me know.
> 
> Thanks,
> Arthur
> 
> On Thu, Jun 13, 2013 at 12:50 PM, ??? <Arthur Chunqi Li>
> <yzt356@gmail.com> wrote:
> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> >>> Add a function trap_emulator to run an instruction in emulator.
> >>> Set inregs first (%rax is invalid because it is used as return
> >>> address), put instruction codec in alt_insn and call func with
> >>> alt_insn_length. Get results in outregs.
> >>>
> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >>> ---
> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 81 insertions(+)
> >>>
> >>> diff --git a/x86/emulator.c b/x86/emulator.c
> >>> index 96576e5..8ab9904 100644
> >>> --- a/x86/emulator.c
> >>> +++ b/x86/emulator.c
> >>> @@ -11,6 +11,14 @@ int fails, tests;
> >>>
> >>>  static int exceptions;
> >>>
> >>> +struct regs {
> >>> +     u64 rax, rbx, rcx, rdx;
> >>> +     u64 rsi, rdi, rsp, rbp;
> >>> +     u64 rip, rflags;
> >>> +};
> >>> +
> >>> +static struct regs inregs, outregs;
> >>> +
> >>>  void report(const char *name, int result)
> >>>  {
> >>>       ++tests;
> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >>>  }
> >>>
> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
> >>> +                          uint8_t *alt_insn, int alt_insn_length)
> >>> +{
> >>> +     ulong *cr3 = (ulong *)read_cr3();
> >>> +     int i;
> >>> +
> >>> +     // Pad with RET instructions
> >>> +     memset(insn_page, 0xc3, 4096);
> >>> +     memset(alt_insn_page, 0xc3, 4096);
> >>> +
> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >>> +     insn_page[1] = 0x00;
> >>> +     insn_page[2] = 0x90; // nop
> >>> +     insn_page[3] = 0xc3; // ret
> >>> +
> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
> >>> +     for (i=0; i<alt_insn_length; i++)
> >>> +             alt_insn_page[i] = alt_insn[i];
> >>> +
> >>> +     // Save general registers
> >>> +     asm volatile(
> >>> +             "push %rax\n\r"
> >>> +             "push %rbx\n\r"
> >>> +             "push %rcx\n\r"
> >>> +             "push %rdx\n\r"
> >>> +             "push %rsi\n\r"
> >>> +             "push %rdi\n\r"
> >>> +             );
> >>
> >> This will not work if GCC is using rsp-relative addresses to access
> >> local variables.  You need to use mov instructions to load from inregs,
> >> and put the push/pop sequences inside the "main" asm that does the "call
> >> *%1".
> > Is there any way to let gcc use absolute address to access variables?
> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
> > and it seems that addressing for "save" is wrong.
> >
> > Arthur
> >>
> >> Paolo
> >>
> >>> +     // Load the code TLB with insn_page, but point the page tables at
> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>> +     // This will make the CPU trap on the insn_page instruction but the
> >>> +     // hypervisor will see alt_insn_page.
> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >>> +     invlpg(insn_ram);
> >>> +     // Load code TLB
> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >>> +     // Trap, let hypervisor emulate at alt_insn_page
> >>> +     asm volatile(
> >>> +             "call *%1\n\r"
> >>> +
> >>> +             "mov %%rax, 0+%[outregs] \n\t"
> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
> >>> +
> >>> +             /* Save RFLAGS in outregs*/
> >>> +             "pushf \n\t"
> >>> +             "popq 72+%[outregs] \n\t"
> >>> +             : [outregs]"+m"(outregs)
> >>> +             : "r"(insn_ram),
> >>> +                     "a"(mem), "b"(inregs.rbx),
> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
> >>> +             : "memory", "cc"
> >>> +             );
> >>> +     // Restore general registers
> >>> +     asm volatile(
> >>> +             "pop %rax\n\r"
> >>> +             "pop %rbx\n\r"
> >>> +             "pop %rcx\n\r"
> >>> +             "pop %rdx\n\r"
> >>> +             "pop %rsi\n\r"
> >>> +             "pop %rdi\n\r"
> >>> +             );
> >>> +}
> >>> +
> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >>>  {
> >>>      ++exceptions;
> >>>
> >>
> >
> >
> >
> > --
> > Arthur Chunqi Li
> > Department of Computer Science
> > School of EECS
> > Peking University
> > Beijing, China
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 18, 2013, 1:40 p.m. UTC | #7
On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Thu, Jun 13, 2013 at 05:30:03PM +0800, ??? <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> I'm trying to solve these problems in the past days and meet many
>> difficulties. You want to save all the general registers in calling
>> insn_page, so registers should be saved to (save) in insn_page.
>> Because all the instructions should be generated outside and copy to
>> insn_page, and the instructions generated outside is RIP-relative, so
>> inside insn_page (save) will be wrong pointed with RIP-relative code.
>>
> They do not have to be generated outside. You can write code into
> insn_page directly. Something like this outside of any functions:
>
> asm(".align 4096\n\t"
>     ".global insn_page\n\t"
>     ".global insn_page_end\n\t"
>     ".global test_insn\n\t"
>     ".global test_insn_end\n\t"
>     "insn_page:"
>     "mov %%rax, outregs \n\t"
>     ...
>     "test_insn:\n\t"
>     "in (%ds), %al\n\t"
>     ". = . + 31\n\t"
>     "test_insn_end:\n\t"
>     "mov outregs, %%rax\n\t"
>     ...
>     "ret\n\t"
>     ".align 4096\n\t"
>     "insn_page_end:\n\t");
>
> Now you copy that into alt_insn_page, put instruction you want to test
> into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
Which function can be used to remap alt_insn_page into "insn_page"
virtual address?

Arthur
>
>> I have tried to move (save) into insn_page. But when calling
>> insn_page, data in it can only be read and any instructions like "xchg
>> %%rax, 0+%[save]" may cause error, because at this time read is from
>> TLB but write will cause inconsistent.
>>
>> Another way is disabling RIP-relative code, but I failed when using
>> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> Is there any way to totally disable RIP-relative code? Besides, using
>> this feature may specified to some newer C compiler. This may not be a
>> good solution.
>>
>> If we don't set %rsp and %rbp when executing emulator code, we can
>> just use “push/pop" to save other general registers.
>>
>> If you have any better solutions, please let me know.
>>
>> Thanks,
>> Arthur
>>
>> On Thu, Jun 13, 2013 at 12:50 PM, ??? <Arthur Chunqi Li>
>> <yzt356@gmail.com> wrote:
>> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >>> Add a function trap_emulator to run an instruction in emulator.
>> >>> Set inregs first (%rax is invalid because it is used as return
>> >>> address), put instruction codec in alt_insn and call func with
>> >>> alt_insn_length. Get results in outregs.
>> >>>
>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >>> ---
>> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>  1 file changed, 81 insertions(+)
>> >>>
>> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >>> index 96576e5..8ab9904 100644
>> >>> --- a/x86/emulator.c
>> >>> +++ b/x86/emulator.c
>> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >>>
>> >>>  static int exceptions;
>> >>>
>> >>> +struct regs {
>> >>> +     u64 rax, rbx, rcx, rdx;
>> >>> +     u64 rsi, rdi, rsp, rbp;
>> >>> +     u64 rip, rflags;
>> >>> +};
>> >>> +
>> >>> +static struct regs inregs, outregs;
>> >>> +
>> >>>  void report(const char *name, int result)
>> >>>  {
>> >>>       ++tests;
>> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>>  }
>> >>>
>> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >>> +{
>> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >>> +     int i;
>> >>> +
>> >>> +     // Pad with RET instructions
>> >>> +     memset(insn_page, 0xc3, 4096);
>> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >>> +
>> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >>> +     insn_page[1] = 0x00;
>> >>> +     insn_page[2] = 0x90; // nop
>> >>> +     insn_page[3] = 0xc3; // ret
>> >>> +
>> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >>> +     for (i=0; i<alt_insn_length; i++)
>> >>> +             alt_insn_page[i] = alt_insn[i];
>> >>> +
>> >>> +     // Save general registers
>> >>> +     asm volatile(
>> >>> +             "push %rax\n\r"
>> >>> +             "push %rbx\n\r"
>> >>> +             "push %rcx\n\r"
>> >>> +             "push %rdx\n\r"
>> >>> +             "push %rsi\n\r"
>> >>> +             "push %rdi\n\r"
>> >>> +             );
>> >>
>> >> This will not work if GCC is using rsp-relative addresses to access
>> >> local variables.  You need to use mov instructions to load from inregs,
>> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> *%1".
>> > Is there any way to let gcc use absolute address to access variables?
>> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> > and it seems that addressing for "save" is wrong.
>> >
>> > Arthur
>> >>
>> >> Paolo
>> >>
>> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >>> +     // hypervisor will see alt_insn_page.
>> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >>> +     invlpg(insn_ram);
>> >>> +     // Load code TLB
>> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >>> +     asm volatile(
>> >>> +             "call *%1\n\r"
>> >>> +
>> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >>> +
>> >>> +             /* Save RFLAGS in outregs*/
>> >>> +             "pushf \n\t"
>> >>> +             "popq 72+%[outregs] \n\t"
>> >>> +             : [outregs]"+m"(outregs)
>> >>> +             : "r"(insn_ram),
>> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >>> +             : "memory", "cc"
>> >>> +             );
>> >>> +     // Restore general registers
>> >>> +     asm volatile(
>> >>> +             "pop %rax\n\r"
>> >>> +             "pop %rbx\n\r"
>> >>> +             "pop %rcx\n\r"
>> >>> +             "pop %rdx\n\r"
>> >>> +             "pop %rsi\n\r"
>> >>> +             "pop %rdi\n\r"
>> >>> +             );
>> >>> +}
>> >>> +
>> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>>  {
>> >>>      ++exceptions;
>> >>>
>> >>
>> >
>> >
>> >
>> > --
>> > Arthur Chunqi Li
>> > Department of Computer Science
>> > School of EECS
>> > Peking University
>> > Beijing, China
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 18, 2013, 2:28 p.m. UTC | #8
On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Thu, Jun 13, 2013 at 05:30:03PM +0800, ??? <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> I'm trying to solve these problems in the past days and meet many
>> difficulties. You want to save all the general registers in calling
>> insn_page, so registers should be saved to (save) in insn_page.
>> Because all the instructions should be generated outside and copy to
>> insn_page, and the instructions generated outside is RIP-relative, so
>> inside insn_page (save) will be wrong pointed with RIP-relative code.
>>
> They do not have to be generated outside. You can write code into
> insn_page directly. Something like this outside of any functions:
>
> asm(".align 4096\n\t"
>     ".global insn_page\n\t"
>     ".global insn_page_end\n\t"
>     ".global test_insn\n\t"
>     ".global test_insn_end\n\t"
>     "insn_page:"
>     "mov %%rax, outregs \n\t"
>     ...
>     "test_insn:\n\t"
>     "in (%ds), %al\n\t"
>     ". = . + 31\n\t"
>     "test_insn_end:\n\t"
>     "mov outregs, %%rax\n\t"
>     ...
>     "ret\n\t"
>     ".align 4096\n\t"
>     "insn_page_end:\n\t");
>
> Now you copy that into alt_insn_page, put instruction you want to test
> into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
I used such codes:

invlpg((void *)virt_to_phys(insn_page));
asm volatile("call *%0" : : "r"(insn_page));
install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
asm volatile("call *%0": : "r"(insn_page+1));

But it seems that alt_insn_page are not remapped to insn_page. Here
insn_page and alt_insn_page are all declared statically with
"asm(...)".

Arthur
>
>> I have tried to move (save) into insn_page. But when calling
>> insn_page, data in it can only be read and any instructions like "xchg
>> %%rax, 0+%[save]" may cause error, because at this time read is from
>> TLB but write will cause inconsistent.
>>
>> Another way is disabling RIP-relative code, but I failed when using
>> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> Is there any way to totally disable RIP-relative code? Besides, using
>> this feature may specified to some newer C compiler. This may not be a
>> good solution.
>>
>> If we don't set %rsp and %rbp when executing emulator code, we can
>> just use “push/pop" to save other general registers.
>>
>> If you have any better solutions, please let me know.
>>
>> Thanks,
>> Arthur
>>
>> On Thu, Jun 13, 2013 at 12:50 PM, ??? <Arthur Chunqi Li>
>> <yzt356@gmail.com> wrote:
>> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >>> Add a function trap_emulator to run an instruction in emulator.
>> >>> Set inregs first (%rax is invalid because it is used as return
>> >>> address), put instruction codec in alt_insn and call func with
>> >>> alt_insn_length. Get results in outregs.
>> >>>
>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >>> ---
>> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>  1 file changed, 81 insertions(+)
>> >>>
>> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >>> index 96576e5..8ab9904 100644
>> >>> --- a/x86/emulator.c
>> >>> +++ b/x86/emulator.c
>> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >>>
>> >>>  static int exceptions;
>> >>>
>> >>> +struct regs {
>> >>> +     u64 rax, rbx, rcx, rdx;
>> >>> +     u64 rsi, rdi, rsp, rbp;
>> >>> +     u64 rip, rflags;
>> >>> +};
>> >>> +
>> >>> +static struct regs inregs, outregs;
>> >>> +
>> >>>  void report(const char *name, int result)
>> >>>  {
>> >>>       ++tests;
>> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>>  }
>> >>>
>> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >>> +{
>> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >>> +     int i;
>> >>> +
>> >>> +     // Pad with RET instructions
>> >>> +     memset(insn_page, 0xc3, 4096);
>> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >>> +
>> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >>> +     insn_page[1] = 0x00;
>> >>> +     insn_page[2] = 0x90; // nop
>> >>> +     insn_page[3] = 0xc3; // ret
>> >>> +
>> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >>> +     for (i=0; i<alt_insn_length; i++)
>> >>> +             alt_insn_page[i] = alt_insn[i];
>> >>> +
>> >>> +     // Save general registers
>> >>> +     asm volatile(
>> >>> +             "push %rax\n\r"
>> >>> +             "push %rbx\n\r"
>> >>> +             "push %rcx\n\r"
>> >>> +             "push %rdx\n\r"
>> >>> +             "push %rsi\n\r"
>> >>> +             "push %rdi\n\r"
>> >>> +             );
>> >>
>> >> This will not work if GCC is using rsp-relative addresses to access
>> >> local variables.  You need to use mov instructions to load from inregs,
>> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> *%1".
>> > Is there any way to let gcc use absolute address to access variables?
>> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> > and it seems that addressing for "save" is wrong.
>> >
>> > Arthur
>> >>
>> >> Paolo
>> >>
>> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >>> +     // hypervisor will see alt_insn_page.
>> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >>> +     invlpg(insn_ram);
>> >>> +     // Load code TLB
>> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >>> +     asm volatile(
>> >>> +             "call *%1\n\r"
>> >>> +
>> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >>> +
>> >>> +             /* Save RFLAGS in outregs*/
>> >>> +             "pushf \n\t"
>> >>> +             "popq 72+%[outregs] \n\t"
>> >>> +             : [outregs]"+m"(outregs)
>> >>> +             : "r"(insn_ram),
>> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >>> +             : "memory", "cc"
>> >>> +             );
>> >>> +     // Restore general registers
>> >>> +     asm volatile(
>> >>> +             "pop %rax\n\r"
>> >>> +             "pop %rbx\n\r"
>> >>> +             "pop %rcx\n\r"
>> >>> +             "pop %rdx\n\r"
>> >>> +             "pop %rsi\n\r"
>> >>> +             "pop %rdi\n\r"
>> >>> +             );
>> >>> +}
>> >>> +
>> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>>  {
>> >>>      ++exceptions;
>> >>>
>> >>
>> >
>> >
>> >
>> > --
>> > Arthur Chunqi Li
>> > Department of Computer Science
>> > School of EECS
>> > Peking University
>> > Beijing, China
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 18, 2013, 3:56 p.m. UTC | #9
On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
>> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> Hi Gleb,
>> >> I'm trying to solve these problems in the past days and meet many
>> >> difficulties. You want to save all the general registers in calling
>> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> Because all the instructions should be generated outside and copy to
>> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >>
>> > They do not have to be generated outside. You can write code into
>> > insn_page directly. Something like this outside of any functions:
>> >
>> > asm(".align 4096\n\t"
>> >     ".global insn_page\n\t"
>> >     ".global insn_page_end\n\t"
>> >     ".global test_insn\n\t"
>> >     ".global test_insn_end\n\t"
>> >     "insn_page:"
>> >     "mov %%rax, outregs \n\t"
>> >     ...
>> >     "test_insn:\n\t"
>> >     "in (%ds), %al\n\t"
>> >     ". = . + 31\n\t"
>> >     "test_insn_end:\n\t"
>> >     "mov outregs, %%rax\n\t"
>> >     ...
>> >     "ret\n\t"
>> >     ".align 4096\n\t"
>> >     "insn_page_end:\n\t");
>> >
>> > Now you copy that into alt_insn_page, put instruction you want to test
>> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> I used such codes:
>>
>> invlpg((void *)virt_to_phys(insn_page));
> virt_to_phys?
This is a mistake, I changed it to "invlpg(insn_page)" but the result
is the same.
>
>> asm volatile("call *%0" : : "r"(insn_page));
>> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> asm volatile("call *%0": : "r"(insn_page+1));
> +1?
Here I put "ret" on the first byte of insn_page, so the first call of
"insn_page" can just return, and the second call of "insn_page+1“ will
directly call the second byte, which is the real content of insn_page.
>
>>
>> But it seems that alt_insn_page are not remapped to insn_page. Here
>> insn_page and alt_insn_page are all declared statically with
>> "asm(...)".
>>
>> Arthur
>> >
>> >> I have tried to move (save) into insn_page. But when calling
>> >> insn_page, data in it can only be read and any instructions like "xchg
>> >> %%rax, 0+%[save]" may cause error, because at this time read is from
>> >> TLB but write will cause inconsistent.
>> >>
>> >> Another way is disabling RIP-relative code, but I failed when using
>> >> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> >> Is there any way to totally disable RIP-relative code? Besides, using
>> >> this feature may specified to some newer C compiler. This may not be a
>> >> good solution.
>> >>
>> >> If we don't set %rsp and %rbp when executing emulator code, we can
>> >> just use “push/pop" to save other general registers.
>> >>
>> >> If you have any better solutions, please let me know.
>> >>
>> >> Thanks,
>> >> Arthur
>> >>
>> >> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
>> >> <yzt356@gmail.com> wrote:
>> >> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >> >>> Add a function trap_emulator to run an instruction in emulator.
>> >> >>> Set inregs first (%rax is invalid because it is used as return
>> >> >>> address), put instruction codec in alt_insn and call func with
>> >> >>> alt_insn_length. Get results in outregs.
>> >> >>>
>> >> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >> >>> ---
>> >> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >> >>>  1 file changed, 81 insertions(+)
>> >> >>>
>> >> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >> >>> index 96576e5..8ab9904 100644
>> >> >>> --- a/x86/emulator.c
>> >> >>> +++ b/x86/emulator.c
>> >> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >> >>>
>> >> >>>  static int exceptions;
>> >> >>>
>> >> >>> +struct regs {
>> >> >>> +     u64 rax, rbx, rcx, rdx;
>> >> >>> +     u64 rsi, rdi, rsp, rbp;
>> >> >>> +     u64 rip, rflags;
>> >> >>> +};
>> >> >>> +
>> >> >>> +static struct regs inregs, outregs;
>> >> >>> +
>> >> >>>  void report(const char *name, int result)
>> >> >>>  {
>> >> >>>       ++tests;
>> >> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >> >>>  }
>> >> >>>
>> >> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >> >>> +{
>> >> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >> >>> +     int i;
>> >> >>> +
>> >> >>> +     // Pad with RET instructions
>> >> >>> +     memset(insn_page, 0xc3, 4096);
>> >> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >> >>> +
>> >> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >> >>> +     insn_page[1] = 0x00;
>> >> >>> +     insn_page[2] = 0x90; // nop
>> >> >>> +     insn_page[3] = 0xc3; // ret
>> >> >>> +
>> >> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >> >>> +     for (i=0; i<alt_insn_length; i++)
>> >> >>> +             alt_insn_page[i] = alt_insn[i];
>> >> >>> +
>> >> >>> +     // Save general registers
>> >> >>> +     asm volatile(
>> >> >>> +             "push %rax\n\r"
>> >> >>> +             "push %rbx\n\r"
>> >> >>> +             "push %rcx\n\r"
>> >> >>> +             "push %rdx\n\r"
>> >> >>> +             "push %rsi\n\r"
>> >> >>> +             "push %rdi\n\r"
>> >> >>> +             );
>> >> >>
>> >> >> This will not work if GCC is using rsp-relative addresses to access
>> >> >> local variables.  You need to use mov instructions to load from inregs,
>> >> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> >> *%1".
>> >> > Is there any way to let gcc use absolute address to access variables?
>> >> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> >> > and it seems that addressing for "save" is wrong.
>> >> >
>> >> > Arthur
>> >> >>
>> >> >> Paolo
>> >> >>
>> >> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >> >>> +     // hypervisor will see alt_insn_page.
>> >> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >> >>> +     invlpg(insn_ram);
>> >> >>> +     // Load code TLB
>> >> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >> >>> +     asm volatile(
>> >> >>> +             "call *%1\n\r"
>> >> >>> +
>> >> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >> >>> +
>> >> >>> +             /* Save RFLAGS in outregs*/
>> >> >>> +             "pushf \n\t"
>> >> >>> +             "popq 72+%[outregs] \n\t"
>> >> >>> +             : [outregs]"+m"(outregs)
>> >> >>> +             : "r"(insn_ram),
>> >> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >> >>> +             : "memory", "cc"
>> >> >>> +             );
>> >> >>> +     // Restore general registers
>> >> >>> +     asm volatile(
>> >> >>> +             "pop %rax\n\r"
>> >> >>> +             "pop %rbx\n\r"
>> >> >>> +             "pop %rcx\n\r"
>> >> >>> +             "pop %rdx\n\r"
>> >> >>> +             "pop %rsi\n\r"
>> >> >>> +             "pop %rdi\n\r"
>> >> >>> +             );
>> >> >>> +}
>> >> >>> +
>> >> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >> >>>  {
>> >> >>>      ++exceptions;
>> >> >>>
>> >> >>
>> >> >
>> >> >
>> >> >
>> >> > --
>> >> > Arthur Chunqi Li
>> >> > Department of Computer Science
>> >> > School of EECS
>> >> > Peking University
>> >> > Beijing, China
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 18, 2013, 4:09 p.m. UTC | #10
On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> Hi Gleb,
> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> difficulties. You want to save all the general registers in calling
> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> Because all the instructions should be generated outside and copy to
> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >>
> >> > They do not have to be generated outside. You can write code into
> >> > insn_page directly. Something like this outside of any functions:
> >> >
> >> > asm(".align 4096\n\t"
> >> >     ".global insn_page\n\t"
> >> >     ".global insn_page_end\n\t"
> >> >     ".global test_insn\n\t"
> >> >     ".global test_insn_end\n\t"
> >> >     "insn_page:"
> >> >     "mov %%rax, outregs \n\t"
> >> >     ...
> >> >     "test_insn:\n\t"
> >> >     "in (%ds), %al\n\t"
> >> >     ". = . + 31\n\t"
> >> >     "test_insn_end:\n\t"
> >> >     "mov outregs, %%rax\n\t"
> >> >     ...
> >> >     "ret\n\t"
> >> >     ".align 4096\n\t"
> >> >     "insn_page_end:\n\t");
> >> >
> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> I used such codes:
> >>
> >> invlpg((void *)virt_to_phys(insn_page));
> > virt_to_phys?
> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> is the same.
> >
> >> asm volatile("call *%0" : : "r"(insn_page));
> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> asm volatile("call *%0": : "r"(insn_page+1));
> > +1?
> Here I put "ret" on the first byte of insn_page, so the first call of
> "insn_page" can just return, and the second call of "insn_page+1“ will
> directly call the second byte, which is the real content of insn_page.
Send the code.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 18, 2013, 4:14 p.m. UTC | #11
extern u8 insn_page[], insn_page_end[];
extern u8 test_insn[], test_insn_end[];
extern u8 alt_insn_page[];

asm(
".align 4096\n\t"
".global insn_page\n\t"
".global insn_page_end\n\t"
".global test_insn\n\t"
".global test_insn_end\n\t"
"insn_page:\n\t"

"ret \n\t"

"push %rax; push %rbx\n\t"
"push %rcx; push %rdx\n\t"
"push %rsi; push %rdi\n\t"
"push %rbp\n\t"
"push %r8; push %r9\n\t"
"push %r10; push %r11\n\t"
"push %r12; push %r13\n\t"
"push %r14; push %r15\n\t"
"pushf\n\t"

"push 136+save \n\t"
"popf \n\t"
"mov 0+save, %rax \n\t"
"mov 8+save, %rbx \n\t"
"mov 16+save, %rcx \n\t"
"mov 24+save, %rdx \n\t"
"mov 32+save, %rsi \n\t"
"mov 40+save, %rdi \n\t"
"mov 56+save, %rbp \n\t"
"mov 64+save, %r8 \n\t"
"mov 72+save, %r9 \n\t"
"mov 80+save, %r10  \n\t"
"mov 88+save, %r11 \n\t"
"mov 96+save, %r12 \n\t"
"mov 104+save, %r13 \n\t"
"mov 112+save, %r14 \n\t"
"mov 120+save, %r15 \n\t"

"test_insn:\n\t"
"in  (%dx),%al\n\t"
". = . + 31\n\t"
"test_insn_end:\n\t"

"pushf \n\t"
"pop 136+save \n\t"
"mov %rax, 0+save \n\t"
"mov %rbx, 8+save \n\t"
"mov %rcx, 16+save \n\t"
"mov %rdx, 24+save \n\t"
"mov %rsi, 32+save \n\t"
"mov %rdi, 40+save \n\t"
"mov %rbp, 56+save \n\t"
"mov %r8, 64+save \n\t"
"mov %r9, 72+save \n\t"
"mov %r10, 80+save \n\t"
"mov %r11, 88+save \n\t"
"mov %r12, 96+save \n\t"
"mov %r13, 104+save \n\t"
"mov %r14, 112+save \n\t"
"mov %r15, 120+save \n\t"
"popf \n\t"
"pop %r15; pop %r14 \n\t"
"pop %r13; pop %r12 \n\t"
"pop %r11; pop %r10 \n\t"
"pop %r9; pop %r8 \n\t"
"pop %rbp \n\t"
"pop %rdi; pop %rsi \n\t"
"pop %rdx; pop %rcx \n\t"
"pop %rbx; pop %rax \n\t"

"ret\n\t"
"save:\n\t"
". = . + 256\n\t"
".align 4096\n\t"
"alt_insn_page:\n\t"
". = . + 4096\n\t"
);


static void mk_insn_page(uint8_t *alt_insn_page,
uint8_t *alt_insn, int alt_insn_length)
{
    int i, emul_offset;
    for (i=1; i<test_insn_end - test_insn; i++)
        test_insn[i] = 0x90; // nop
    emul_offset = test_insn - insn_page;
    for (i=0; i<alt_insn_length; i++)
        alt_insn_page[i+emul_offset] = alt_insn[i];
}

static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
{
    ulong *cr3 = (ulong *)read_cr3();
    int save_offset = (u8 *)(&save) - insn_page;

    memset(alt_insn_page, 0x90, 4096);
    save = inregs;
    mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
    // Load the code TLB with insn_page, but point the page tables at
    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
    // This will make the CPU trap on the insn_page instruction but the
    // hypervisor will see alt_insn_page.
    //install_page(cr3, virt_to_phys(insn_page), insn_page);
    invlpg(insn_page);
    // Load code TLB
    asm volatile("call *%0" : : "r"(insn_page));
    install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
    // Trap, let hypervisor emulate at alt_insn_page
    asm volatile("call *%0": : "r"(insn_page+1));

    outregs = *((struct regs *)(&alt_insn_page[save_offset]));
}

static void test_movabs(uint64_t *mem)
{
    // mov $0xc3c3c3c3c3c3c3c3, %rcx
    uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
                                0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
    inregs = (struct regs){ 0 };
    trap_emulator(mem, alt_insn, 10);
    report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
}

On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
>> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
>> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> Hi Gleb,
>> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> difficulties. You want to save all the general registers in calling
>> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> Because all the instructions should be generated outside and copy to
>> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >>
>> >> > They do not have to be generated outside. You can write code into
>> >> > insn_page directly. Something like this outside of any functions:
>> >> >
>> >> > asm(".align 4096\n\t"
>> >> >     ".global insn_page\n\t"
>> >> >     ".global insn_page_end\n\t"
>> >> >     ".global test_insn\n\t"
>> >> >     ".global test_insn_end\n\t"
>> >> >     "insn_page:"
>> >> >     "mov %%rax, outregs \n\t"
>> >> >     ...
>> >> >     "test_insn:\n\t"
>> >> >     "in (%ds), %al\n\t"
>> >> >     ". = . + 31\n\t"
>> >> >     "test_insn_end:\n\t"
>> >> >     "mov outregs, %%rax\n\t"
>> >> >     ...
>> >> >     "ret\n\t"
>> >> >     ".align 4096\n\t"
>> >> >     "insn_page_end:\n\t");
>> >> >
>> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> I used such codes:
>> >>
>> >> invlpg((void *)virt_to_phys(insn_page));
>> > virt_to_phys?
>> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> is the same.
>> >
>> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> asm volatile("call *%0": : "r"(insn_page+1));
>> > +1?
>> Here I put "ret" on the first byte of insn_page, so the first call of
>> "insn_page" can just return, and the second call of "insn_page+1“ will
>> directly call the second byte, which is the real content of insn_page.
> Send the code.
>
> --
>                         Gleb.
Gleb Natapov June 18, 2013, 4:44 p.m. UTC | #12
Send code in a form of a patch.

On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
> extern u8 insn_page[], insn_page_end[];
> extern u8 test_insn[], test_insn_end[];
> extern u8 alt_insn_page[];
> 
> asm(
> ".align 4096\n\t"
> ".global insn_page\n\t"
> ".global insn_page_end\n\t"
> ".global test_insn\n\t"
> ".global test_insn_end\n\t"
> "insn_page:\n\t"
> 
> "ret \n\t"
> 
> "push %rax; push %rbx\n\t"
> "push %rcx; push %rdx\n\t"
> "push %rsi; push %rdi\n\t"
> "push %rbp\n\t"
> "push %r8; push %r9\n\t"
> "push %r10; push %r11\n\t"
> "push %r12; push %r13\n\t"
> "push %r14; push %r15\n\t"
> "pushf\n\t"
> 
> "push 136+save \n\t"
> "popf \n\t"
> "mov 0+save, %rax \n\t"
> "mov 8+save, %rbx \n\t"
> "mov 16+save, %rcx \n\t"
> "mov 24+save, %rdx \n\t"
> "mov 32+save, %rsi \n\t"
> "mov 40+save, %rdi \n\t"
> "mov 56+save, %rbp \n\t"
> "mov 64+save, %r8 \n\t"
> "mov 72+save, %r9 \n\t"
> "mov 80+save, %r10  \n\t"
> "mov 88+save, %r11 \n\t"
> "mov 96+save, %r12 \n\t"
> "mov 104+save, %r13 \n\t"
> "mov 112+save, %r14 \n\t"
> "mov 120+save, %r15 \n\t"
> 
> "test_insn:\n\t"
> "in  (%dx),%al\n\t"
> ". = . + 31\n\t"
> "test_insn_end:\n\t"
> 
> "pushf \n\t"
> "pop 136+save \n\t"
> "mov %rax, 0+save \n\t"
> "mov %rbx, 8+save \n\t"
> "mov %rcx, 16+save \n\t"
> "mov %rdx, 24+save \n\t"
> "mov %rsi, 32+save \n\t"
> "mov %rdi, 40+save \n\t"
> "mov %rbp, 56+save \n\t"
> "mov %r8, 64+save \n\t"
> "mov %r9, 72+save \n\t"
> "mov %r10, 80+save \n\t"
> "mov %r11, 88+save \n\t"
> "mov %r12, 96+save \n\t"
> "mov %r13, 104+save \n\t"
> "mov %r14, 112+save \n\t"
> "mov %r15, 120+save \n\t"
> "popf \n\t"
> "pop %r15; pop %r14 \n\t"
> "pop %r13; pop %r12 \n\t"
> "pop %r11; pop %r10 \n\t"
> "pop %r9; pop %r8 \n\t"
> "pop %rbp \n\t"
> "pop %rdi; pop %rsi \n\t"
> "pop %rdx; pop %rcx \n\t"
> "pop %rbx; pop %rax \n\t"
> 
> "ret\n\t"
> "save:\n\t"
> ". = . + 256\n\t"
> ".align 4096\n\t"
> "alt_insn_page:\n\t"
> ". = . + 4096\n\t"
> );
> 
> 
> static void mk_insn_page(uint8_t *alt_insn_page,
> uint8_t *alt_insn, int alt_insn_length)
> {
>     int i, emul_offset;
>     for (i=1; i<test_insn_end - test_insn; i++)
>         test_insn[i] = 0x90; // nop
Why? Gcc should pad it with nops.

>     emul_offset = test_insn - insn_page;
>     for (i=0; i<alt_insn_length; i++)
>         alt_insn_page[i+emul_offset] = alt_insn[i];
> }
> 
> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> {
>     ulong *cr3 = (ulong *)read_cr3();
>     int save_offset = (u8 *)(&save) - insn_page;
> 
>     memset(alt_insn_page, 0x90, 4096);
alt_insn_page should contains the same instruction as insn_page except
between test_insn and test_insn_end. I do not know how you expect it to
work otherwise.

>     save = inregs;
>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>     // Load the code TLB with insn_page, but point the page tables at
>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>     // This will make the CPU trap on the insn_page instruction but the
>     // hypervisor will see alt_insn_page.
>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>     invlpg(insn_page);
>     // Load code TLB
>     asm volatile("call *%0" : : "r"(insn_page));
>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>     // Trap, let hypervisor emulate at alt_insn_page
>     asm volatile("call *%0": : "r"(insn_page+1));
> 
>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> }
> 
> static void test_movabs(uint64_t *mem)
> {
>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>     inregs = (struct regs){ 0 };
>     trap_emulator(mem, alt_insn, 10);
>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> }
> 
> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> Hi Gleb,
> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >>
> >> >> > They do not have to be generated outside. You can write code into
> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >
> >> >> > asm(".align 4096\n\t"
> >> >> >     ".global insn_page\n\t"
> >> >> >     ".global insn_page_end\n\t"
> >> >> >     ".global test_insn\n\t"
> >> >> >     ".global test_insn_end\n\t"
> >> >> >     "insn_page:"
> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >     ...
> >> >> >     "test_insn:\n\t"
> >> >> >     "in (%ds), %al\n\t"
> >> >> >     ". = . + 31\n\t"
> >> >> >     "test_insn_end:\n\t"
> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >     ...
> >> >> >     "ret\n\t"
> >> >> >     ".align 4096\n\t"
> >> >> >     "insn_page_end:\n\t");
> >> >> >
> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> I used such codes:
> >> >>
> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> > virt_to_phys?
> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> is the same.
> >> >
> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> > +1?
> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> directly call the second byte, which is the real content of insn_page.
> > Send the code.
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 19, 2013, 1:26 a.m. UTC | #13
On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> Send code in a form of a patch.
>
> On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
>> extern u8 insn_page[], insn_page_end[];
>> extern u8 test_insn[], test_insn_end[];
>> extern u8 alt_insn_page[];
>>
>> asm(
>> ".align 4096\n\t"
>> ".global insn_page\n\t"
>> ".global insn_page_end\n\t"
>> ".global test_insn\n\t"
>> ".global test_insn_end\n\t"
>> "insn_page:\n\t"
>>
>> "ret \n\t"
>>
>> "push %rax; push %rbx\n\t"
>> "push %rcx; push %rdx\n\t"
>> "push %rsi; push %rdi\n\t"
>> "push %rbp\n\t"
>> "push %r8; push %r9\n\t"
>> "push %r10; push %r11\n\t"
>> "push %r12; push %r13\n\t"
>> "push %r14; push %r15\n\t"
>> "pushf\n\t"
>>
>> "push 136+save \n\t"
>> "popf \n\t"
>> "mov 0+save, %rax \n\t"
>> "mov 8+save, %rbx \n\t"
>> "mov 16+save, %rcx \n\t"
>> "mov 24+save, %rdx \n\t"
>> "mov 32+save, %rsi \n\t"
>> "mov 40+save, %rdi \n\t"
>> "mov 56+save, %rbp \n\t"
>> "mov 64+save, %r8 \n\t"
>> "mov 72+save, %r9 \n\t"
>> "mov 80+save, %r10  \n\t"
>> "mov 88+save, %r11 \n\t"
>> "mov 96+save, %r12 \n\t"
>> "mov 104+save, %r13 \n\t"
>> "mov 112+save, %r14 \n\t"
>> "mov 120+save, %r15 \n\t"
>>
>> "test_insn:\n\t"
>> "in  (%dx),%al\n\t"
>> ". = . + 31\n\t"
>> "test_insn_end:\n\t"
>>
>> "pushf \n\t"
>> "pop 136+save \n\t"
>> "mov %rax, 0+save \n\t"
>> "mov %rbx, 8+save \n\t"
>> "mov %rcx, 16+save \n\t"
>> "mov %rdx, 24+save \n\t"
>> "mov %rsi, 32+save \n\t"
>> "mov %rdi, 40+save \n\t"
>> "mov %rbp, 56+save \n\t"
>> "mov %r8, 64+save \n\t"
>> "mov %r9, 72+save \n\t"
>> "mov %r10, 80+save \n\t"
>> "mov %r11, 88+save \n\t"
>> "mov %r12, 96+save \n\t"
>> "mov %r13, 104+save \n\t"
>> "mov %r14, 112+save \n\t"
>> "mov %r15, 120+save \n\t"
>> "popf \n\t"
>> "pop %r15; pop %r14 \n\t"
>> "pop %r13; pop %r12 \n\t"
>> "pop %r11; pop %r10 \n\t"
>> "pop %r9; pop %r8 \n\t"
>> "pop %rbp \n\t"
>> "pop %rdi; pop %rsi \n\t"
>> "pop %rdx; pop %rcx \n\t"
>> "pop %rbx; pop %rax \n\t"
>>
>> "ret\n\t"
>> "save:\n\t"
>> ". = . + 256\n\t"
>> ".align 4096\n\t"
>> "alt_insn_page:\n\t"
>> ". = . + 4096\n\t"
>> );
>>
>>
>> static void mk_insn_page(uint8_t *alt_insn_page,
>> uint8_t *alt_insn, int alt_insn_length)
>> {
>>     int i, emul_offset;
>>     for (i=1; i<test_insn_end - test_insn; i++)
>>         test_insn[i] = 0x90; // nop
> Why? Gcc should pad it with nops.
>
>>     emul_offset = test_insn - insn_page;
>>     for (i=0; i<alt_insn_length; i++)
>>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> }
>>
>> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> {
>>     ulong *cr3 = (ulong *)read_cr3();
>>     int save_offset = (u8 *)(&save) - insn_page;
>>
>>     memset(alt_insn_page, 0x90, 4096);
> alt_insn_page should contains the same instruction as insn_page except
> between test_insn and test_insn_end. I do not know how you expect it to
> work otherwise.
In my oponion, only codes between test_insn and test_insn_end in
alt_insn_page need to be set, insn_page will be executed in the guest,
and when trapping into emulator OS will load alt_insn_page (because of
invlpg(insn_page)), then return to guest with executing insn_page
(from TLB). I don't know if this is right, but I use this trick in my
previous patch and it runs well. I use "trace-cmd record -e kvm" to
trace it and found instructions in alt_insn_page are not executed, so
I suppose that alt_insn_page is not loaded to the right place.

Arthur
>
>>     save = inregs;
>>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>>     // Load the code TLB with insn_page, but point the page tables at
>>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>     // This will make the CPU trap on the insn_page instruction but the
>>     // hypervisor will see alt_insn_page.
>>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>>     invlpg(insn_page);
>>     // Load code TLB
>>     asm volatile("call *%0" : : "r"(insn_page));
>>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>>     // Trap, let hypervisor emulate at alt_insn_page
>>     asm volatile("call *%0": : "r"(insn_page+1));
>>
>>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> }
>>
>> static void test_movabs(uint64_t *mem)
>> {
>>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>>     inregs = (struct regs){ 0 };
>>     trap_emulator(mem, alt_insn, 10);
>>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> }
>>
>> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
>> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> Hi Gleb,
>> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >>
>> >> >> > They do not have to be generated outside. You can write code into
>> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >
>> >> >> > asm(".align 4096\n\t"
>> >> >> >     ".global insn_page\n\t"
>> >> >> >     ".global insn_page_end\n\t"
>> >> >> >     ".global test_insn\n\t"
>> >> >> >     ".global test_insn_end\n\t"
>> >> >> >     "insn_page:"
>> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >     ...
>> >> >> >     "test_insn:\n\t"
>> >> >> >     "in (%ds), %al\n\t"
>> >> >> >     ". = . + 31\n\t"
>> >> >> >     "test_insn_end:\n\t"
>> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >     ...
>> >> >> >     "ret\n\t"
>> >> >> >     ".align 4096\n\t"
>> >> >> >     "insn_page_end:\n\t");
>> >> >> >
>> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> I used such codes:
>> >> >>
>> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> > virt_to_phys?
>> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> is the same.
>> >> >
>> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> > +1?
>> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> directly call the second byte, which is the real content of insn_page.
>> > Send the code.
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 19, 2013, 9:31 a.m. UTC | #14
On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > Send code in a form of a patch.
> >
> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> extern u8 insn_page[], insn_page_end[];
> >> extern u8 test_insn[], test_insn_end[];
> >> extern u8 alt_insn_page[];
> >>
> >> asm(
> >> ".align 4096\n\t"
> >> ".global insn_page\n\t"
> >> ".global insn_page_end\n\t"
> >> ".global test_insn\n\t"
> >> ".global test_insn_end\n\t"
> >> "insn_page:\n\t"
> >>
> >> "ret \n\t"
> >>
> >> "push %rax; push %rbx\n\t"
> >> "push %rcx; push %rdx\n\t"
> >> "push %rsi; push %rdi\n\t"
> >> "push %rbp\n\t"
> >> "push %r8; push %r9\n\t"
> >> "push %r10; push %r11\n\t"
> >> "push %r12; push %r13\n\t"
> >> "push %r14; push %r15\n\t"
> >> "pushf\n\t"
> >>
> >> "push 136+save \n\t"
> >> "popf \n\t"
> >> "mov 0+save, %rax \n\t"
> >> "mov 8+save, %rbx \n\t"
> >> "mov 16+save, %rcx \n\t"
> >> "mov 24+save, %rdx \n\t"
> >> "mov 32+save, %rsi \n\t"
> >> "mov 40+save, %rdi \n\t"
> >> "mov 56+save, %rbp \n\t"
> >> "mov 64+save, %r8 \n\t"
> >> "mov 72+save, %r9 \n\t"
> >> "mov 80+save, %r10  \n\t"
> >> "mov 88+save, %r11 \n\t"
> >> "mov 96+save, %r12 \n\t"
> >> "mov 104+save, %r13 \n\t"
> >> "mov 112+save, %r14 \n\t"
> >> "mov 120+save, %r15 \n\t"
> >>
> >> "test_insn:\n\t"
> >> "in  (%dx),%al\n\t"
> >> ". = . + 31\n\t"
> >> "test_insn_end:\n\t"
> >>
> >> "pushf \n\t"
> >> "pop 136+save \n\t"
> >> "mov %rax, 0+save \n\t"
> >> "mov %rbx, 8+save \n\t"
> >> "mov %rcx, 16+save \n\t"
> >> "mov %rdx, 24+save \n\t"
> >> "mov %rsi, 32+save \n\t"
> >> "mov %rdi, 40+save \n\t"
> >> "mov %rbp, 56+save \n\t"
> >> "mov %r8, 64+save \n\t"
> >> "mov %r9, 72+save \n\t"
> >> "mov %r10, 80+save \n\t"
> >> "mov %r11, 88+save \n\t"
> >> "mov %r12, 96+save \n\t"
> >> "mov %r13, 104+save \n\t"
> >> "mov %r14, 112+save \n\t"
> >> "mov %r15, 120+save \n\t"
> >> "popf \n\t"
> >> "pop %r15; pop %r14 \n\t"
> >> "pop %r13; pop %r12 \n\t"
> >> "pop %r11; pop %r10 \n\t"
> >> "pop %r9; pop %r8 \n\t"
> >> "pop %rbp \n\t"
> >> "pop %rdi; pop %rsi \n\t"
> >> "pop %rdx; pop %rcx \n\t"
> >> "pop %rbx; pop %rax \n\t"
> >>
> >> "ret\n\t"
> >> "save:\n\t"
> >> ". = . + 256\n\t"
> >> ".align 4096\n\t"
> >> "alt_insn_page:\n\t"
> >> ". = . + 4096\n\t"
> >> );
> >>
> >>
> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> uint8_t *alt_insn, int alt_insn_length)
> >> {
> >>     int i, emul_offset;
> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >>         test_insn[i] = 0x90; // nop
> > Why? Gcc should pad it with nops.
> >
> >>     emul_offset = test_insn - insn_page;
> >>     for (i=0; i<alt_insn_length; i++)
> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> }
> >>
> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> {
> >>     ulong *cr3 = (ulong *)read_cr3();
> >>     int save_offset = (u8 *)(&save) - insn_page;
> >>
> >>     memset(alt_insn_page, 0x90, 4096);
> > alt_insn_page should contains the same instruction as insn_page except
> > between test_insn and test_insn_end. I do not know how you expect it to
> > work otherwise.
> In my oponion, only codes between test_insn and test_insn_end in
> alt_insn_page need to be set, insn_page will be executed in the guest,
> and when trapping into emulator OS will load alt_insn_page (because of
> invlpg(insn_page)), then return to guest with executing insn_page
> (from TLB).
While before trap the code will likely be executed from insn_page,
but after the trap it is very optimistic to assume that tlb cache
will still contain this virtual address since host will execute quite a
lot of code and can even schedule in the middle, so the TLB will not
contain the address and your test will crash. Even the code before test
instruction can be executed from alt_insn_page if guest is scheduled out
after invlpg() and before it executes every instruction until trapping
one. In your case the test will crash too instead of yielding false positive.

> I don't know if this is right, but I use this trick in my
> previous patch and it runs well.
Your previous patches always had c3 (ret) after tested instruction on
alt_insn_page.

>                                  I use "trace-cmd record -e kvm" to
> trace it and found instructions in alt_insn_page are not executed, so
> I suppose that alt_insn_page is not loaded to the right place.
Do you see "in" instruction emulated? Anyway current code is incorrect
since current install_page() implementation cannot handle large pages
and the code is backed up by large pages. You can fix install_page() to
check for that and break large page into small one before installing a
page.

> 
> Arthur
> >
> >>     save = inregs;
> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
> >>     // Load the code TLB with insn_page, but point the page tables at
> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>     // This will make the CPU trap on the insn_page instruction but the
> >>     // hypervisor will see alt_insn_page.
> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
> >>     invlpg(insn_page);
> >>     // Load code TLB
> >>     asm volatile("call *%0" : : "r"(insn_page));
> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >>     // Trap, let hypervisor emulate at alt_insn_page
> >>     asm volatile("call *%0": : "r"(insn_page+1));
> >>
> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> >> }
> >>
> >> static void test_movabs(uint64_t *mem)
> >> {
> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
> >>     inregs = (struct regs){ 0 };
> >>     trap_emulator(mem, alt_insn, 10);
> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> >> }
> >>
> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> Hi Gleb,
> >> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >> >>
> >> >> >> > They do not have to be generated outside. You can write code into
> >> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >> >
> >> >> >> > asm(".align 4096\n\t"
> >> >> >> >     ".global insn_page\n\t"
> >> >> >> >     ".global insn_page_end\n\t"
> >> >> >> >     ".global test_insn\n\t"
> >> >> >> >     ".global test_insn_end\n\t"
> >> >> >> >     "insn_page:"
> >> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >> >     ...
> >> >> >> >     "test_insn:\n\t"
> >> >> >> >     "in (%ds), %al\n\t"
> >> >> >> >     ". = . + 31\n\t"
> >> >> >> >     "test_insn_end:\n\t"
> >> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >> >     ...
> >> >> >> >     "ret\n\t"
> >> >> >> >     ".align 4096\n\t"
> >> >> >> >     "insn_page_end:\n\t");
> >> >> >> >
> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> >> I used such codes:
> >> >> >>
> >> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> >> > virt_to_phys?
> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> >> is the same.
> >> >> >
> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> >> > +1?
> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> >> directly call the second byte, which is the real content of insn_page.
> >> > Send the code.
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 19, 2013, 12:18 p.m. UTC | #15
On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> > Send code in a form of a patch.
>> >
>> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> extern u8 insn_page[], insn_page_end[];
>> >> extern u8 test_insn[], test_insn_end[];
>> >> extern u8 alt_insn_page[];
>> >>
>> >> asm(
>> >> ".align 4096\n\t"
>> >> ".global insn_page\n\t"
>> >> ".global insn_page_end\n\t"
>> >> ".global test_insn\n\t"
>> >> ".global test_insn_end\n\t"
>> >> "insn_page:\n\t"
>> >>
>> >> "ret \n\t"
>> >>
>> >> "push %rax; push %rbx\n\t"
>> >> "push %rcx; push %rdx\n\t"
>> >> "push %rsi; push %rdi\n\t"
>> >> "push %rbp\n\t"
>> >> "push %r8; push %r9\n\t"
>> >> "push %r10; push %r11\n\t"
>> >> "push %r12; push %r13\n\t"
>> >> "push %r14; push %r15\n\t"
>> >> "pushf\n\t"
>> >>
>> >> "push 136+save \n\t"
>> >> "popf \n\t"
>> >> "mov 0+save, %rax \n\t"
>> >> "mov 8+save, %rbx \n\t"
>> >> "mov 16+save, %rcx \n\t"
>> >> "mov 24+save, %rdx \n\t"
>> >> "mov 32+save, %rsi \n\t"
>> >> "mov 40+save, %rdi \n\t"
>> >> "mov 56+save, %rbp \n\t"
>> >> "mov 64+save, %r8 \n\t"
>> >> "mov 72+save, %r9 \n\t"
>> >> "mov 80+save, %r10  \n\t"
>> >> "mov 88+save, %r11 \n\t"
>> >> "mov 96+save, %r12 \n\t"
>> >> "mov 104+save, %r13 \n\t"
>> >> "mov 112+save, %r14 \n\t"
>> >> "mov 120+save, %r15 \n\t"
>> >>
>> >> "test_insn:\n\t"
>> >> "in  (%dx),%al\n\t"
>> >> ". = . + 31\n\t"
>> >> "test_insn_end:\n\t"
>> >>
>> >> "pushf \n\t"
>> >> "pop 136+save \n\t"
>> >> "mov %rax, 0+save \n\t"
>> >> "mov %rbx, 8+save \n\t"
>> >> "mov %rcx, 16+save \n\t"
>> >> "mov %rdx, 24+save \n\t"
>> >> "mov %rsi, 32+save \n\t"
>> >> "mov %rdi, 40+save \n\t"
>> >> "mov %rbp, 56+save \n\t"
>> >> "mov %r8, 64+save \n\t"
>> >> "mov %r9, 72+save \n\t"
>> >> "mov %r10, 80+save \n\t"
>> >> "mov %r11, 88+save \n\t"
>> >> "mov %r12, 96+save \n\t"
>> >> "mov %r13, 104+save \n\t"
>> >> "mov %r14, 112+save \n\t"
>> >> "mov %r15, 120+save \n\t"
>> >> "popf \n\t"
>> >> "pop %r15; pop %r14 \n\t"
>> >> "pop %r13; pop %r12 \n\t"
>> >> "pop %r11; pop %r10 \n\t"
>> >> "pop %r9; pop %r8 \n\t"
>> >> "pop %rbp \n\t"
>> >> "pop %rdi; pop %rsi \n\t"
>> >> "pop %rdx; pop %rcx \n\t"
>> >> "pop %rbx; pop %rax \n\t"
>> >>
>> >> "ret\n\t"
>> >> "save:\n\t"
>> >> ". = . + 256\n\t"
>> >> ".align 4096\n\t"
>> >> "alt_insn_page:\n\t"
>> >> ". = . + 4096\n\t"
>> >> );
>> >>
>> >>
>> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> uint8_t *alt_insn, int alt_insn_length)
>> >> {
>> >>     int i, emul_offset;
>> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >>         test_insn[i] = 0x90; // nop
>> > Why? Gcc should pad it with nops.
>> >
>> >>     emul_offset = test_insn - insn_page;
>> >>     for (i=0; i<alt_insn_length; i++)
>> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> }
>> >>
>> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> {
>> >>     ulong *cr3 = (ulong *)read_cr3();
>> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >>
>> >>     memset(alt_insn_page, 0x90, 4096);
>> > alt_insn_page should contains the same instruction as insn_page except
>> > between test_insn and test_insn_end. I do not know how you expect it to
>> > work otherwise.
>> In my oponion, only codes between test_insn and test_insn_end in
>> alt_insn_page need to be set, insn_page will be executed in the guest,
>> and when trapping into emulator OS will load alt_insn_page (because of
>> invlpg(insn_page)), then return to guest with executing insn_page
>> (from TLB).
> While before trap the code will likely be executed from insn_page,
> but after the trap it is very optimistic to assume that tlb cache
> will still contain this virtual address since host will execute quite a
> lot of code and can even schedule in the middle, so the TLB will not
> contain the address and your test will crash. Even the code before test
> instruction can be executed from alt_insn_page if guest is scheduled out
> after invlpg() and before it executes every instruction until trapping
> one. In your case the test will crash too instead of yielding false positive.
>
>> I don't know if this is right, but I use this trick in my
>> previous patch and it runs well.
> Your previous patches always had c3 (ret) after tested instruction on
> alt_insn_page.
>
>>                                  I use "trace-cmd record -e kvm" to
>> trace it and found instructions in alt_insn_page are not executed, so
>> I suppose that alt_insn_page is not loaded to the right place.
> Do you see "in" instruction emulated? Anyway current code is incorrect
> since current install_page() implementation cannot handle large pages
> and the code is backed up by large pages. You can fix install_page() to
> check for that and break large page into small one before installing a
> page.
Here I have two questions.
1. There's another function called "install_large_page", can it be
used to our occasion? I found that this function is not used at all.
2. Why will current version runs well? Do pages allocated dynamically
are automatically aligned to 2MB (large page size)?

Arthur
>
>>
>> Arthur
>> >
>> >>     save = inregs;
>> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>> >>     // Load the code TLB with insn_page, but point the page tables at
>> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>     // This will make the CPU trap on the insn_page instruction but the
>> >>     // hypervisor will see alt_insn_page.
>> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>> >>     invlpg(insn_page);
>> >>     // Load code TLB
>> >>     asm volatile("call *%0" : : "r"(insn_page));
>> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >>     // Trap, let hypervisor emulate at alt_insn_page
>> >>     asm volatile("call *%0": : "r"(insn_page+1));
>> >>
>> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> >> }
>> >>
>> >> static void test_movabs(uint64_t *mem)
>> >> {
>> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>> >>     inregs = (struct regs){ 0 };
>> >>     trap_emulator(mem, alt_insn, 10);
>> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> >> }
>> >>
>> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
>> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> >> Hi Gleb,
>> >> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >> >>
>> >> >> >> > They do not have to be generated outside. You can write code into
>> >> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >> >
>> >> >> >> > asm(".align 4096\n\t"
>> >> >> >> >     ".global insn_page\n\t"
>> >> >> >> >     ".global insn_page_end\n\t"
>> >> >> >> >     ".global test_insn\n\t"
>> >> >> >> >     ".global test_insn_end\n\t"
>> >> >> >> >     "insn_page:"
>> >> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >> >     ...
>> >> >> >> >     "test_insn:\n\t"
>> >> >> >> >     "in (%ds), %al\n\t"
>> >> >> >> >     ". = . + 31\n\t"
>> >> >> >> >     "test_insn_end:\n\t"
>> >> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >> >     ...
>> >> >> >> >     "ret\n\t"
>> >> >> >> >     ".align 4096\n\t"
>> >> >> >> >     "insn_page_end:\n\t");
>> >> >> >> >
>> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> >> I used such codes:
>> >> >> >>
>> >> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> >> > virt_to_phys?
>> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> >> is the same.
>> >> >> >
>> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> >> > +1?
>> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> >> directly call the second byte, which is the real content of insn_page.
>> >> > Send the code.
>> >> >
>> >> > --
>> >> >                         Gleb.
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 19, 2013, 12:26 p.m. UTC | #16
On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > Send code in a form of a patch.
> >> >
> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> extern u8 alt_insn_page[];
> >> >>
> >> >> asm(
> >> >> ".align 4096\n\t"
> >> >> ".global insn_page\n\t"
> >> >> ".global insn_page_end\n\t"
> >> >> ".global test_insn\n\t"
> >> >> ".global test_insn_end\n\t"
> >> >> "insn_page:\n\t"
> >> >>
> >> >> "ret \n\t"
> >> >>
> >> >> "push %rax; push %rbx\n\t"
> >> >> "push %rcx; push %rdx\n\t"
> >> >> "push %rsi; push %rdi\n\t"
> >> >> "push %rbp\n\t"
> >> >> "push %r8; push %r9\n\t"
> >> >> "push %r10; push %r11\n\t"
> >> >> "push %r12; push %r13\n\t"
> >> >> "push %r14; push %r15\n\t"
> >> >> "pushf\n\t"
> >> >>
> >> >> "push 136+save \n\t"
> >> >> "popf \n\t"
> >> >> "mov 0+save, %rax \n\t"
> >> >> "mov 8+save, %rbx \n\t"
> >> >> "mov 16+save, %rcx \n\t"
> >> >> "mov 24+save, %rdx \n\t"
> >> >> "mov 32+save, %rsi \n\t"
> >> >> "mov 40+save, %rdi \n\t"
> >> >> "mov 56+save, %rbp \n\t"
> >> >> "mov 64+save, %r8 \n\t"
> >> >> "mov 72+save, %r9 \n\t"
> >> >> "mov 80+save, %r10  \n\t"
> >> >> "mov 88+save, %r11 \n\t"
> >> >> "mov 96+save, %r12 \n\t"
> >> >> "mov 104+save, %r13 \n\t"
> >> >> "mov 112+save, %r14 \n\t"
> >> >> "mov 120+save, %r15 \n\t"
> >> >>
> >> >> "test_insn:\n\t"
> >> >> "in  (%dx),%al\n\t"
> >> >> ". = . + 31\n\t"
> >> >> "test_insn_end:\n\t"
> >> >>
> >> >> "pushf \n\t"
> >> >> "pop 136+save \n\t"
> >> >> "mov %rax, 0+save \n\t"
> >> >> "mov %rbx, 8+save \n\t"
> >> >> "mov %rcx, 16+save \n\t"
> >> >> "mov %rdx, 24+save \n\t"
> >> >> "mov %rsi, 32+save \n\t"
> >> >> "mov %rdi, 40+save \n\t"
> >> >> "mov %rbp, 56+save \n\t"
> >> >> "mov %r8, 64+save \n\t"
> >> >> "mov %r9, 72+save \n\t"
> >> >> "mov %r10, 80+save \n\t"
> >> >> "mov %r11, 88+save \n\t"
> >> >> "mov %r12, 96+save \n\t"
> >> >> "mov %r13, 104+save \n\t"
> >> >> "mov %r14, 112+save \n\t"
> >> >> "mov %r15, 120+save \n\t"
> >> >> "popf \n\t"
> >> >> "pop %r15; pop %r14 \n\t"
> >> >> "pop %r13; pop %r12 \n\t"
> >> >> "pop %r11; pop %r10 \n\t"
> >> >> "pop %r9; pop %r8 \n\t"
> >> >> "pop %rbp \n\t"
> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> "pop %rbx; pop %rax \n\t"
> >> >>
> >> >> "ret\n\t"
> >> >> "save:\n\t"
> >> >> ". = . + 256\n\t"
> >> >> ".align 4096\n\t"
> >> >> "alt_insn_page:\n\t"
> >> >> ". = . + 4096\n\t"
> >> >> );
> >> >>
> >> >>
> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> {
> >> >>     int i, emul_offset;
> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >>         test_insn[i] = 0x90; // nop
> >> > Why? Gcc should pad it with nops.
> >> >
> >> >>     emul_offset = test_insn - insn_page;
> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> }
> >> >>
> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> {
> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >>
> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> > alt_insn_page should contains the same instruction as insn_page except
> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> > work otherwise.
> >> In my oponion, only codes between test_insn and test_insn_end in
> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> (from TLB).
> > While before trap the code will likely be executed from insn_page,
> > but after the trap it is very optimistic to assume that tlb cache
> > will still contain this virtual address since host will execute quite a
> > lot of code and can even schedule in the middle, so the TLB will not
> > contain the address and your test will crash. Even the code before test
> > instruction can be executed from alt_insn_page if guest is scheduled out
> > after invlpg() and before it executes every instruction until trapping
> > one. In your case the test will crash too instead of yielding false positive.
> >
> >> I don't know if this is right, but I use this trick in my
> >> previous patch and it runs well.
> > Your previous patches always had c3 (ret) after tested instruction on
> > alt_insn_page.
> >
> >>                                  I use "trace-cmd record -e kvm" to
> >> trace it and found instructions in alt_insn_page are not executed, so
> >> I suppose that alt_insn_page is not loaded to the right place.
> > Do you see "in" instruction emulated? Anyway current code is incorrect
> > since current install_page() implementation cannot handle large pages
> > and the code is backed up by large pages. You can fix install_page() to
> > check for that and break large page into small one before installing a
> > page.
> Here I have two questions.
> 1. There's another function called "install_large_page", can it be
> used to our occasion? I found that this function is not used at all.
It is used when initial page tables are created.
See lib/x86/vm.c:setup_mmu_range()

> 2. Why will current version runs well? Do pages allocated dynamically
> are automatically aligned to 2MB (large page size)?
> 
No, they are 4K pages.

> Arthur
> >
> >>
> >> Arthur
> >> >
> >> >>     save = inregs;
> >> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
> >> >>     // Load the code TLB with insn_page, but point the page tables at
> >> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> >>     // This will make the CPU trap on the insn_page instruction but the
> >> >>     // hypervisor will see alt_insn_page.
> >> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
> >> >>     invlpg(insn_page);
> >> >>     // Load code TLB
> >> >>     asm volatile("call *%0" : : "r"(insn_page));
> >> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >>     // Trap, let hypervisor emulate at alt_insn_page
> >> >>     asm volatile("call *%0": : "r"(insn_page+1));
> >> >>
> >> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> >> >> }
> >> >>
> >> >> static void test_movabs(uint64_t *mem)
> >> >> {
> >> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
> >> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
> >> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
> >> >>     inregs = (struct regs){ 0 };
> >> >>     trap_emulator(mem, alt_insn, 10);
> >> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> >> >> }
> >> >>
> >> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
> >> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> >> Hi Gleb,
> >> >> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >> >> >>
> >> >> >> >> > They do not have to be generated outside. You can write code into
> >> >> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >> >> >
> >> >> >> >> > asm(".align 4096\n\t"
> >> >> >> >> >     ".global insn_page\n\t"
> >> >> >> >> >     ".global insn_page_end\n\t"
> >> >> >> >> >     ".global test_insn\n\t"
> >> >> >> >> >     ".global test_insn_end\n\t"
> >> >> >> >> >     "insn_page:"
> >> >> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >> >> >     ...
> >> >> >> >> >     "test_insn:\n\t"
> >> >> >> >> >     "in (%ds), %al\n\t"
> >> >> >> >> >     ". = . + 31\n\t"
> >> >> >> >> >     "test_insn_end:\n\t"
> >> >> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >> >> >     ...
> >> >> >> >> >     "ret\n\t"
> >> >> >> >> >     ".align 4096\n\t"
> >> >> >> >> >     "insn_page_end:\n\t");
> >> >> >> >> >
> >> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> >> >> I used such codes:
> >> >> >> >>
> >> >> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> >> >> > virt_to_phys?
> >> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> >> >> is the same.
> >> >> >> >
> >> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> >> >> > +1?
> >> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> >> >> directly call the second byte, which is the real content of insn_page.
> >> >> > Send the code.
> >> >> >
> >> >> > --
> >> >> >                         Gleb.
> >> >>
> >> >>
> >> >>
> >> >> --
> >> >> Arthur Chunqi Li
> >> >> Department of Computer Science
> >> >> School of EECS
> >> >> Peking University
> >> >> Beijing, China
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 19, 2013, 12:30 p.m. UTC | #17
On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > Send code in a form of a patch.
>> >> >
>> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> extern u8 alt_insn_page[];
>> >> >>
>> >> >> asm(
>> >> >> ".align 4096\n\t"
>> >> >> ".global insn_page\n\t"
>> >> >> ".global insn_page_end\n\t"
>> >> >> ".global test_insn\n\t"
>> >> >> ".global test_insn_end\n\t"
>> >> >> "insn_page:\n\t"
>> >> >>
>> >> >> "ret \n\t"
>> >> >>
>> >> >> "push %rax; push %rbx\n\t"
>> >> >> "push %rcx; push %rdx\n\t"
>> >> >> "push %rsi; push %rdi\n\t"
>> >> >> "push %rbp\n\t"
>> >> >> "push %r8; push %r9\n\t"
>> >> >> "push %r10; push %r11\n\t"
>> >> >> "push %r12; push %r13\n\t"
>> >> >> "push %r14; push %r15\n\t"
>> >> >> "pushf\n\t"
>> >> >>
>> >> >> "push 136+save \n\t"
>> >> >> "popf \n\t"
>> >> >> "mov 0+save, %rax \n\t"
>> >> >> "mov 8+save, %rbx \n\t"
>> >> >> "mov 16+save, %rcx \n\t"
>> >> >> "mov 24+save, %rdx \n\t"
>> >> >> "mov 32+save, %rsi \n\t"
>> >> >> "mov 40+save, %rdi \n\t"
>> >> >> "mov 56+save, %rbp \n\t"
>> >> >> "mov 64+save, %r8 \n\t"
>> >> >> "mov 72+save, %r9 \n\t"
>> >> >> "mov 80+save, %r10  \n\t"
>> >> >> "mov 88+save, %r11 \n\t"
>> >> >> "mov 96+save, %r12 \n\t"
>> >> >> "mov 104+save, %r13 \n\t"
>> >> >> "mov 112+save, %r14 \n\t"
>> >> >> "mov 120+save, %r15 \n\t"
>> >> >>
>> >> >> "test_insn:\n\t"
>> >> >> "in  (%dx),%al\n\t"
>> >> >> ". = . + 31\n\t"
>> >> >> "test_insn_end:\n\t"
>> >> >>
>> >> >> "pushf \n\t"
>> >> >> "pop 136+save \n\t"
>> >> >> "mov %rax, 0+save \n\t"
>> >> >> "mov %rbx, 8+save \n\t"
>> >> >> "mov %rcx, 16+save \n\t"
>> >> >> "mov %rdx, 24+save \n\t"
>> >> >> "mov %rsi, 32+save \n\t"
>> >> >> "mov %rdi, 40+save \n\t"
>> >> >> "mov %rbp, 56+save \n\t"
>> >> >> "mov %r8, 64+save \n\t"
>> >> >> "mov %r9, 72+save \n\t"
>> >> >> "mov %r10, 80+save \n\t"
>> >> >> "mov %r11, 88+save \n\t"
>> >> >> "mov %r12, 96+save \n\t"
>> >> >> "mov %r13, 104+save \n\t"
>> >> >> "mov %r14, 112+save \n\t"
>> >> >> "mov %r15, 120+save \n\t"
>> >> >> "popf \n\t"
>> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> "pop %rbp \n\t"
>> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> "pop %rbx; pop %rax \n\t"
>> >> >>
>> >> >> "ret\n\t"
>> >> >> "save:\n\t"
>> >> >> ". = . + 256\n\t"
>> >> >> ".align 4096\n\t"
>> >> >> "alt_insn_page:\n\t"
>> >> >> ". = . + 4096\n\t"
>> >> >> );
>> >> >>
>> >> >>
>> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> {
>> >> >>     int i, emul_offset;
>> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >>         test_insn[i] = 0x90; // nop
>> >> > Why? Gcc should pad it with nops.
>> >> >
>> >> >>     emul_offset = test_insn - insn_page;
>> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> }
>> >> >>
>> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> {
>> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >>
>> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> > work otherwise.
>> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> (from TLB).
>> > While before trap the code will likely be executed from insn_page,
>> > but after the trap it is very optimistic to assume that tlb cache
>> > will still contain this virtual address since host will execute quite a
>> > lot of code and can even schedule in the middle, so the TLB will not
>> > contain the address and your test will crash. Even the code before test
>> > instruction can be executed from alt_insn_page if guest is scheduled out
>> > after invlpg() and before it executes every instruction until trapping
>> > one. In your case the test will crash too instead of yielding false positive.
>> >
>> >> I don't know if this is right, but I use this trick in my
>> >> previous patch and it runs well.
>> > Your previous patches always had c3 (ret) after tested instruction on
>> > alt_insn_page.
>> >
>> >>                                  I use "trace-cmd record -e kvm" to
>> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> I suppose that alt_insn_page is not loaded to the right place.
>> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> > since current install_page() implementation cannot handle large pages
>> > and the code is backed up by large pages. You can fix install_page() to
>> > check for that and break large page into small one before installing a
>> > page.
>> Here I have two questions.
>> 1. There's another function called "install_large_page", can it be
>> used to our occasion? I found that this function is not used at all.
> It is used when initial page tables are created.
> See lib/x86/vm.c:setup_mmu_range()
>
>> 2. Why will current version runs well? Do pages allocated dynamically
>> are automatically aligned to 2MB (large page size)?
>>
> No, they are 4K pages.
Thus why dynamically creating insn_page and alt_insn_page with
alloc_page() can get the right result?
>
>> Arthur
>> >
>> >>
>> >> Arthur
>> >> >
>> >> >>     save = inregs;
>> >> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>> >> >>     // Load the code TLB with insn_page, but point the page tables at
>> >> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> >>     // This will make the CPU trap on the insn_page instruction but the
>> >> >>     // hypervisor will see alt_insn_page.
>> >> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>> >> >>     invlpg(insn_page);
>> >> >>     // Load code TLB
>> >> >>     asm volatile("call *%0" : : "r"(insn_page));
>> >> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >>     // Trap, let hypervisor emulate at alt_insn_page
>> >> >>     asm volatile("call *%0": : "r"(insn_page+1));
>> >> >>
>> >> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> >> >> }
>> >> >>
>> >> >> static void test_movabs(uint64_t *mem)
>> >> >> {
>> >> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>> >> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>> >> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>> >> >>     inregs = (struct regs){ 0 };
>> >> >>     trap_emulator(mem, alt_insn, 10);
>> >> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> >> >> }
>> >> >>
>> >> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?•Â•? <Arthur Chunqi Li> wrote:
>> >> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> >> >> Hi Gleb,
>> >> >> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >> >> >>
>> >> >> >> >> > They do not have to be generated outside. You can write code into
>> >> >> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >> >> >
>> >> >> >> >> > asm(".align 4096\n\t"
>> >> >> >> >> >     ".global insn_page\n\t"
>> >> >> >> >> >     ".global insn_page_end\n\t"
>> >> >> >> >> >     ".global test_insn\n\t"
>> >> >> >> >> >     ".global test_insn_end\n\t"
>> >> >> >> >> >     "insn_page:"
>> >> >> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >> >> >     ...
>> >> >> >> >> >     "test_insn:\n\t"
>> >> >> >> >> >     "in (%ds), %al\n\t"
>> >> >> >> >> >     ". = . + 31\n\t"
>> >> >> >> >> >     "test_insn_end:\n\t"
>> >> >> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >> >> >     ...
>> >> >> >> >> >     "ret\n\t"
>> >> >> >> >> >     ".align 4096\n\t"
>> >> >> >> >> >     "insn_page_end:\n\t");
>> >> >> >> >> >
>> >> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> >> >> I used such codes:
>> >> >> >> >>
>> >> >> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> >> >> > virt_to_phys?
>> >> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> >> >> is the same.
>> >> >> >> >
>> >> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> >> >> > +1?
>> >> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> >> >> directly call the second byte, which is the real content of insn_page.
>> >> >> > Send the code.
>> >> >> >
>> >> >> > --
>> >> >> >                         Gleb.
>> >> >>
>> >> >>
>> >> >>
>> >> >> --
>> >> >> Arthur Chunqi Li
>> >> >> Department of Computer Science
>> >> >> School of EECS
>> >> >> Peking University
>> >> >> Beijing, China
>> >> >
>> >> > --
>> >> >                         Gleb.
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 19, 2013, 12:32 p.m. UTC | #18
On Wed, Jun 19, 2013 at 08:30:33PM +0800, ??? <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > Send code in a form of a patch.
> >> >> >
> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> >> extern u8 alt_insn_page[];
> >> >> >>
> >> >> >> asm(
> >> >> >> ".align 4096\n\t"
> >> >> >> ".global insn_page\n\t"
> >> >> >> ".global insn_page_end\n\t"
> >> >> >> ".global test_insn\n\t"
> >> >> >> ".global test_insn_end\n\t"
> >> >> >> "insn_page:\n\t"
> >> >> >>
> >> >> >> "ret \n\t"
> >> >> >>
> >> >> >> "push %rax; push %rbx\n\t"
> >> >> >> "push %rcx; push %rdx\n\t"
> >> >> >> "push %rsi; push %rdi\n\t"
> >> >> >> "push %rbp\n\t"
> >> >> >> "push %r8; push %r9\n\t"
> >> >> >> "push %r10; push %r11\n\t"
> >> >> >> "push %r12; push %r13\n\t"
> >> >> >> "push %r14; push %r15\n\t"
> >> >> >> "pushf\n\t"
> >> >> >>
> >> >> >> "push 136+save \n\t"
> >> >> >> "popf \n\t"
> >> >> >> "mov 0+save, %rax \n\t"
> >> >> >> "mov 8+save, %rbx \n\t"
> >> >> >> "mov 16+save, %rcx \n\t"
> >> >> >> "mov 24+save, %rdx \n\t"
> >> >> >> "mov 32+save, %rsi \n\t"
> >> >> >> "mov 40+save, %rdi \n\t"
> >> >> >> "mov 56+save, %rbp \n\t"
> >> >> >> "mov 64+save, %r8 \n\t"
> >> >> >> "mov 72+save, %r9 \n\t"
> >> >> >> "mov 80+save, %r10  \n\t"
> >> >> >> "mov 88+save, %r11 \n\t"
> >> >> >> "mov 96+save, %r12 \n\t"
> >> >> >> "mov 104+save, %r13 \n\t"
> >> >> >> "mov 112+save, %r14 \n\t"
> >> >> >> "mov 120+save, %r15 \n\t"
> >> >> >>
> >> >> >> "test_insn:\n\t"
> >> >> >> "in  (%dx),%al\n\t"
> >> >> >> ". = . + 31\n\t"
> >> >> >> "test_insn_end:\n\t"
> >> >> >>
> >> >> >> "pushf \n\t"
> >> >> >> "pop 136+save \n\t"
> >> >> >> "mov %rax, 0+save \n\t"
> >> >> >> "mov %rbx, 8+save \n\t"
> >> >> >> "mov %rcx, 16+save \n\t"
> >> >> >> "mov %rdx, 24+save \n\t"
> >> >> >> "mov %rsi, 32+save \n\t"
> >> >> >> "mov %rdi, 40+save \n\t"
> >> >> >> "mov %rbp, 56+save \n\t"
> >> >> >> "mov %r8, 64+save \n\t"
> >> >> >> "mov %r9, 72+save \n\t"
> >> >> >> "mov %r10, 80+save \n\t"
> >> >> >> "mov %r11, 88+save \n\t"
> >> >> >> "mov %r12, 96+save \n\t"
> >> >> >> "mov %r13, 104+save \n\t"
> >> >> >> "mov %r14, 112+save \n\t"
> >> >> >> "mov %r15, 120+save \n\t"
> >> >> >> "popf \n\t"
> >> >> >> "pop %r15; pop %r14 \n\t"
> >> >> >> "pop %r13; pop %r12 \n\t"
> >> >> >> "pop %r11; pop %r10 \n\t"
> >> >> >> "pop %r9; pop %r8 \n\t"
> >> >> >> "pop %rbp \n\t"
> >> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> >> "pop %rbx; pop %rax \n\t"
> >> >> >>
> >> >> >> "ret\n\t"
> >> >> >> "save:\n\t"
> >> >> >> ". = . + 256\n\t"
> >> >> >> ".align 4096\n\t"
> >> >> >> "alt_insn_page:\n\t"
> >> >> >> ". = . + 4096\n\t"
> >> >> >> );
> >> >> >>
> >> >> >>
> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> >> {
> >> >> >>     int i, emul_offset;
> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >> >>         test_insn[i] = 0x90; // nop
> >> >> > Why? Gcc should pad it with nops.
> >> >> >
> >> >> >>     emul_offset = test_insn - insn_page;
> >> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> >> }
> >> >> >>
> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> >> {
> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >> >>
> >> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> >> > alt_insn_page should contains the same instruction as insn_page except
> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> >> > work otherwise.
> >> >> In my oponion, only codes between test_insn and test_insn_end in
> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> >> (from TLB).
> >> > While before trap the code will likely be executed from insn_page,
> >> > but after the trap it is very optimistic to assume that tlb cache
> >> > will still contain this virtual address since host will execute quite a
> >> > lot of code and can even schedule in the middle, so the TLB will not
> >> > contain the address and your test will crash. Even the code before test
> >> > instruction can be executed from alt_insn_page if guest is scheduled out
> >> > after invlpg() and before it executes every instruction until trapping
> >> > one. In your case the test will crash too instead of yielding false positive.
> >> >
> >> >> I don't know if this is right, but I use this trick in my
> >> >> previous patch and it runs well.
> >> > Your previous patches always had c3 (ret) after tested instruction on
> >> > alt_insn_page.
> >> >
> >> >>                                  I use "trace-cmd record -e kvm" to
> >> >> trace it and found instructions in alt_insn_page are not executed, so
> >> >> I suppose that alt_insn_page is not loaded to the right place.
> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
> >> > since current install_page() implementation cannot handle large pages
> >> > and the code is backed up by large pages. You can fix install_page() to
> >> > check for that and break large page into small one before installing a
> >> > page.
> >> Here I have two questions.
> >> 1. There's another function called "install_large_page", can it be
> >> used to our occasion? I found that this function is not used at all.
> > It is used when initial page tables are created.
> > See lib/x86/vm.c:setup_mmu_range()
> >
> >> 2. Why will current version runs well? Do pages allocated dynamically
> >> are automatically aligned to 2MB (large page size)?
> >>
> > No, they are 4K pages.
> Thus why dynamically creating insn_page and alt_insn_page with
> alloc_page() can get the right result?
Probably.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 19, 2013, 2:01 p.m. UTC | #19
I found the final reason! The initial use of init_ram is also used by
test_rip_relative(), which will cause conflict. I changed it and
everything runs well.

On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 08:30:33PM +0800, ??? <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > Send code in a form of a patch.
>> >> >> >
>> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> >> extern u8 alt_insn_page[];
>> >> >> >>
>> >> >> >> asm(
>> >> >> >> ".align 4096\n\t"
>> >> >> >> ".global insn_page\n\t"
>> >> >> >> ".global insn_page_end\n\t"
>> >> >> >> ".global test_insn\n\t"
>> >> >> >> ".global test_insn_end\n\t"
>> >> >> >> "insn_page:\n\t"
>> >> >> >>
>> >> >> >> "ret \n\t"
>> >> >> >>
>> >> >> >> "push %rax; push %rbx\n\t"
>> >> >> >> "push %rcx; push %rdx\n\t"
>> >> >> >> "push %rsi; push %rdi\n\t"
>> >> >> >> "push %rbp\n\t"
>> >> >> >> "push %r8; push %r9\n\t"
>> >> >> >> "push %r10; push %r11\n\t"
>> >> >> >> "push %r12; push %r13\n\t"
>> >> >> >> "push %r14; push %r15\n\t"
>> >> >> >> "pushf\n\t"
>> >> >> >>
>> >> >> >> "push 136+save \n\t"
>> >> >> >> "popf \n\t"
>> >> >> >> "mov 0+save, %rax \n\t"
>> >> >> >> "mov 8+save, %rbx \n\t"
>> >> >> >> "mov 16+save, %rcx \n\t"
>> >> >> >> "mov 24+save, %rdx \n\t"
>> >> >> >> "mov 32+save, %rsi \n\t"
>> >> >> >> "mov 40+save, %rdi \n\t"
>> >> >> >> "mov 56+save, %rbp \n\t"
>> >> >> >> "mov 64+save, %r8 \n\t"
>> >> >> >> "mov 72+save, %r9 \n\t"
>> >> >> >> "mov 80+save, %r10  \n\t"
>> >> >> >> "mov 88+save, %r11 \n\t"
>> >> >> >> "mov 96+save, %r12 \n\t"
>> >> >> >> "mov 104+save, %r13 \n\t"
>> >> >> >> "mov 112+save, %r14 \n\t"
>> >> >> >> "mov 120+save, %r15 \n\t"
>> >> >> >>
>> >> >> >> "test_insn:\n\t"
>> >> >> >> "in  (%dx),%al\n\t"
>> >> >> >> ". = . + 31\n\t"
>> >> >> >> "test_insn_end:\n\t"
>> >> >> >>
>> >> >> >> "pushf \n\t"
>> >> >> >> "pop 136+save \n\t"
>> >> >> >> "mov %rax, 0+save \n\t"
>> >> >> >> "mov %rbx, 8+save \n\t"
>> >> >> >> "mov %rcx, 16+save \n\t"
>> >> >> >> "mov %rdx, 24+save \n\t"
>> >> >> >> "mov %rsi, 32+save \n\t"
>> >> >> >> "mov %rdi, 40+save \n\t"
>> >> >> >> "mov %rbp, 56+save \n\t"
>> >> >> >> "mov %r8, 64+save \n\t"
>> >> >> >> "mov %r9, 72+save \n\t"
>> >> >> >> "mov %r10, 80+save \n\t"
>> >> >> >> "mov %r11, 88+save \n\t"
>> >> >> >> "mov %r12, 96+save \n\t"
>> >> >> >> "mov %r13, 104+save \n\t"
>> >> >> >> "mov %r14, 112+save \n\t"
>> >> >> >> "mov %r15, 120+save \n\t"
>> >> >> >> "popf \n\t"
>> >> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> >> "pop %rbp \n\t"
>> >> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> >> "pop %rbx; pop %rax \n\t"
>> >> >> >>
>> >> >> >> "ret\n\t"
>> >> >> >> "save:\n\t"
>> >> >> >> ". = . + 256\n\t"
>> >> >> >> ".align 4096\n\t"
>> >> >> >> "alt_insn_page:\n\t"
>> >> >> >> ". = . + 4096\n\t"
>> >> >> >> );
>> >> >> >>
>> >> >> >>
>> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> >> {
>> >> >> >>     int i, emul_offset;
>> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >> >>         test_insn[i] = 0x90; // nop
>> >> >> > Why? Gcc should pad it with nops.
>> >> >> >
>> >> >> >>     emul_offset = test_insn - insn_page;
>> >> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> >> }
>> >> >> >>
>> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> >> {
>> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >> >>
>> >> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> >> > work otherwise.
>> >> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> >> (from TLB).
>> >> > While before trap the code will likely be executed from insn_page,
>> >> > but after the trap it is very optimistic to assume that tlb cache
>> >> > will still contain this virtual address since host will execute quite a
>> >> > lot of code and can even schedule in the middle, so the TLB will not
>> >> > contain the address and your test will crash. Even the code before test
>> >> > instruction can be executed from alt_insn_page if guest is scheduled out
>> >> > after invlpg() and before it executes every instruction until trapping
>> >> > one. In your case the test will crash too instead of yielding false positive.
>> >> >
>> >> >> I don't know if this is right, but I use this trick in my
>> >> >> previous patch and it runs well.
>> >> > Your previous patches always had c3 (ret) after tested instruction on
>> >> > alt_insn_page.
>> >> >
>> >> >>                                  I use "trace-cmd record -e kvm" to
>> >> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> >> I suppose that alt_insn_page is not loaded to the right place.
>> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> >> > since current install_page() implementation cannot handle large pages
>> >> > and the code is backed up by large pages. You can fix install_page() to
>> >> > check for that and break large page into small one before installing a
>> >> > page.
>> >> Here I have two questions.
>> >> 1. There's another function called "install_large_page", can it be
>> >> used to our occasion? I found that this function is not used at all.
>> > It is used when initial page tables are created.
>> > See lib/x86/vm.c:setup_mmu_range()
>> >
>> >> 2. Why will current version runs well? Do pages allocated dynamically
>> >> are automatically aligned to 2MB (large page size)?
>> >>
>> > No, they are 4K pages.
>> Thus why dynamically creating insn_page and alt_insn_page with
>> alloc_page() can get the right result?
> Probably.
>
> --
>                         Gleb.
Gleb Natapov June 19, 2013, 2:13 p.m. UTC | #20
On Wed, Jun 19, 2013 at 10:01:40PM +0800, ??? <Arthur Chunqi Li> wrote:
> I found the final reason! The initial use of init_ram is also used by
> test_rip_relative(), which will cause conflict. I changed it and
> everything runs well.
> 
Not sure what you mean. Your version of test_movabs does not use insn_ram.

> On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 08:30:33PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > Send code in a form of a patch.
> >> >> >> >
> >> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
> >> >> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> >> >> extern u8 alt_insn_page[];
> >> >> >> >>
> >> >> >> >> asm(
> >> >> >> >> ".align 4096\n\t"
> >> >> >> >> ".global insn_page\n\t"
> >> >> >> >> ".global insn_page_end\n\t"
> >> >> >> >> ".global test_insn\n\t"
> >> >> >> >> ".global test_insn_end\n\t"
> >> >> >> >> "insn_page:\n\t"
> >> >> >> >>
> >> >> >> >> "ret \n\t"
> >> >> >> >>
> >> >> >> >> "push %rax; push %rbx\n\t"
> >> >> >> >> "push %rcx; push %rdx\n\t"
> >> >> >> >> "push %rsi; push %rdi\n\t"
> >> >> >> >> "push %rbp\n\t"
> >> >> >> >> "push %r8; push %r9\n\t"
> >> >> >> >> "push %r10; push %r11\n\t"
> >> >> >> >> "push %r12; push %r13\n\t"
> >> >> >> >> "push %r14; push %r15\n\t"
> >> >> >> >> "pushf\n\t"
> >> >> >> >>
> >> >> >> >> "push 136+save \n\t"
> >> >> >> >> "popf \n\t"
> >> >> >> >> "mov 0+save, %rax \n\t"
> >> >> >> >> "mov 8+save, %rbx \n\t"
> >> >> >> >> "mov 16+save, %rcx \n\t"
> >> >> >> >> "mov 24+save, %rdx \n\t"
> >> >> >> >> "mov 32+save, %rsi \n\t"
> >> >> >> >> "mov 40+save, %rdi \n\t"
> >> >> >> >> "mov 56+save, %rbp \n\t"
> >> >> >> >> "mov 64+save, %r8 \n\t"
> >> >> >> >> "mov 72+save, %r9 \n\t"
> >> >> >> >> "mov 80+save, %r10  \n\t"
> >> >> >> >> "mov 88+save, %r11 \n\t"
> >> >> >> >> "mov 96+save, %r12 \n\t"
> >> >> >> >> "mov 104+save, %r13 \n\t"
> >> >> >> >> "mov 112+save, %r14 \n\t"
> >> >> >> >> "mov 120+save, %r15 \n\t"
> >> >> >> >>
> >> >> >> >> "test_insn:\n\t"
> >> >> >> >> "in  (%dx),%al\n\t"
> >> >> >> >> ". = . + 31\n\t"
> >> >> >> >> "test_insn_end:\n\t"
> >> >> >> >>
> >> >> >> >> "pushf \n\t"
> >> >> >> >> "pop 136+save \n\t"
> >> >> >> >> "mov %rax, 0+save \n\t"
> >> >> >> >> "mov %rbx, 8+save \n\t"
> >> >> >> >> "mov %rcx, 16+save \n\t"
> >> >> >> >> "mov %rdx, 24+save \n\t"
> >> >> >> >> "mov %rsi, 32+save \n\t"
> >> >> >> >> "mov %rdi, 40+save \n\t"
> >> >> >> >> "mov %rbp, 56+save \n\t"
> >> >> >> >> "mov %r8, 64+save \n\t"
> >> >> >> >> "mov %r9, 72+save \n\t"
> >> >> >> >> "mov %r10, 80+save \n\t"
> >> >> >> >> "mov %r11, 88+save \n\t"
> >> >> >> >> "mov %r12, 96+save \n\t"
> >> >> >> >> "mov %r13, 104+save \n\t"
> >> >> >> >> "mov %r14, 112+save \n\t"
> >> >> >> >> "mov %r15, 120+save \n\t"
> >> >> >> >> "popf \n\t"
> >> >> >> >> "pop %r15; pop %r14 \n\t"
> >> >> >> >> "pop %r13; pop %r12 \n\t"
> >> >> >> >> "pop %r11; pop %r10 \n\t"
> >> >> >> >> "pop %r9; pop %r8 \n\t"
> >> >> >> >> "pop %rbp \n\t"
> >> >> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> >> >> "pop %rbx; pop %rax \n\t"
> >> >> >> >>
> >> >> >> >> "ret\n\t"
> >> >> >> >> "save:\n\t"
> >> >> >> >> ". = . + 256\n\t"
> >> >> >> >> ".align 4096\n\t"
> >> >> >> >> "alt_insn_page:\n\t"
> >> >> >> >> ". = . + 4096\n\t"
> >> >> >> >> );
> >> >> >> >>
> >> >> >> >>
> >> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> >> >> {
> >> >> >> >>     int i, emul_offset;
> >> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >> >> >>         test_insn[i] = 0x90; // nop
> >> >> >> > Why? Gcc should pad it with nops.
> >> >> >> >
> >> >> >> >>     emul_offset = test_insn - insn_page;
> >> >> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> >> >> }
> >> >> >> >>
> >> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> >> >> {
> >> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >> >> >>
> >> >> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> >> >> > alt_insn_page should contains the same instruction as insn_page except
> >> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> >> >> > work otherwise.
> >> >> >> In my oponion, only codes between test_insn and test_insn_end in
> >> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> >> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> >> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> >> >> (from TLB).
> >> >> > While before trap the code will likely be executed from insn_page,
> >> >> > but after the trap it is very optimistic to assume that tlb cache
> >> >> > will still contain this virtual address since host will execute quite a
> >> >> > lot of code and can even schedule in the middle, so the TLB will not
> >> >> > contain the address and your test will crash. Even the code before test
> >> >> > instruction can be executed from alt_insn_page if guest is scheduled out
> >> >> > after invlpg() and before it executes every instruction until trapping
> >> >> > one. In your case the test will crash too instead of yielding false positive.
> >> >> >
> >> >> >> I don't know if this is right, but I use this trick in my
> >> >> >> previous patch and it runs well.
> >> >> > Your previous patches always had c3 (ret) after tested instruction on
> >> >> > alt_insn_page.
> >> >> >
> >> >> >>                                  I use "trace-cmd record -e kvm" to
> >> >> >> trace it and found instructions in alt_insn_page are not executed, so
> >> >> >> I suppose that alt_insn_page is not loaded to the right place.
> >> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
> >> >> > since current install_page() implementation cannot handle large pages
> >> >> > and the code is backed up by large pages. You can fix install_page() to
> >> >> > check for that and break large page into small one before installing a
> >> >> > page.
> >> >> Here I have two questions.
> >> >> 1. There's another function called "install_large_page", can it be
> >> >> used to our occasion? I found that this function is not used at all.
> >> > It is used when initial page tables are created.
> >> > See lib/x86/vm.c:setup_mmu_range()
> >> >
> >> >> 2. Why will current version runs well? Do pages allocated dynamically
> >> >> are automatically aligned to 2MB (large page size)?
> >> >>
> >> > No, they are 4K pages.
> >> Thus why dynamically creating insn_page and alt_insn_page with
> >> alloc_page() can get the right result?
> > Probably.
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
??? June 19, 2013, 2:20 p.m. UTC | #21
I use insn_ram as what the origin/master done before. I don't know how
to describe it clearly, I will commit a patch later and you can get to
know from my codes.

Arthur

On Wed, Jun 19, 2013 at 10:13 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 10:01:40PM +0800, ??? <Arthur Chunqi Li> wrote:
>> I found the final reason! The initial use of init_ram is also used by
>> test_rip_relative(), which will cause conflict. I changed it and
>> everything runs well.
>>
> Not sure what you mean. Your version of test_movabs does not use insn_ram.
>
>> On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 08:30:33PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > Send code in a form of a patch.
>> >> >> >> >
>> >> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, ??? <Arthur Chunqi Li> wrote:
>> >> >> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> >> >> extern u8 alt_insn_page[];
>> >> >> >> >>
>> >> >> >> >> asm(
>> >> >> >> >> ".align 4096\n\t"
>> >> >> >> >> ".global insn_page\n\t"
>> >> >> >> >> ".global insn_page_end\n\t"
>> >> >> >> >> ".global test_insn\n\t"
>> >> >> >> >> ".global test_insn_end\n\t"
>> >> >> >> >> "insn_page:\n\t"
>> >> >> >> >>
>> >> >> >> >> "ret \n\t"
>> >> >> >> >>
>> >> >> >> >> "push %rax; push %rbx\n\t"
>> >> >> >> >> "push %rcx; push %rdx\n\t"
>> >> >> >> >> "push %rsi; push %rdi\n\t"
>> >> >> >> >> "push %rbp\n\t"
>> >> >> >> >> "push %r8; push %r9\n\t"
>> >> >> >> >> "push %r10; push %r11\n\t"
>> >> >> >> >> "push %r12; push %r13\n\t"
>> >> >> >> >> "push %r14; push %r15\n\t"
>> >> >> >> >> "pushf\n\t"
>> >> >> >> >>
>> >> >> >> >> "push 136+save \n\t"
>> >> >> >> >> "popf \n\t"
>> >> >> >> >> "mov 0+save, %rax \n\t"
>> >> >> >> >> "mov 8+save, %rbx \n\t"
>> >> >> >> >> "mov 16+save, %rcx \n\t"
>> >> >> >> >> "mov 24+save, %rdx \n\t"
>> >> >> >> >> "mov 32+save, %rsi \n\t"
>> >> >> >> >> "mov 40+save, %rdi \n\t"
>> >> >> >> >> "mov 56+save, %rbp \n\t"
>> >> >> >> >> "mov 64+save, %r8 \n\t"
>> >> >> >> >> "mov 72+save, %r9 \n\t"
>> >> >> >> >> "mov 80+save, %r10  \n\t"
>> >> >> >> >> "mov 88+save, %r11 \n\t"
>> >> >> >> >> "mov 96+save, %r12 \n\t"
>> >> >> >> >> "mov 104+save, %r13 \n\t"
>> >> >> >> >> "mov 112+save, %r14 \n\t"
>> >> >> >> >> "mov 120+save, %r15 \n\t"
>> >> >> >> >>
>> >> >> >> >> "test_insn:\n\t"
>> >> >> >> >> "in  (%dx),%al\n\t"
>> >> >> >> >> ". = . + 31\n\t"
>> >> >> >> >> "test_insn_end:\n\t"
>> >> >> >> >>
>> >> >> >> >> "pushf \n\t"
>> >> >> >> >> "pop 136+save \n\t"
>> >> >> >> >> "mov %rax, 0+save \n\t"
>> >> >> >> >> "mov %rbx, 8+save \n\t"
>> >> >> >> >> "mov %rcx, 16+save \n\t"
>> >> >> >> >> "mov %rdx, 24+save \n\t"
>> >> >> >> >> "mov %rsi, 32+save \n\t"
>> >> >> >> >> "mov %rdi, 40+save \n\t"
>> >> >> >> >> "mov %rbp, 56+save \n\t"
>> >> >> >> >> "mov %r8, 64+save \n\t"
>> >> >> >> >> "mov %r9, 72+save \n\t"
>> >> >> >> >> "mov %r10, 80+save \n\t"
>> >> >> >> >> "mov %r11, 88+save \n\t"
>> >> >> >> >> "mov %r12, 96+save \n\t"
>> >> >> >> >> "mov %r13, 104+save \n\t"
>> >> >> >> >> "mov %r14, 112+save \n\t"
>> >> >> >> >> "mov %r15, 120+save \n\t"
>> >> >> >> >> "popf \n\t"
>> >> >> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> >> >> "pop %rbp \n\t"
>> >> >> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> >> >> "pop %rbx; pop %rax \n\t"
>> >> >> >> >>
>> >> >> >> >> "ret\n\t"
>> >> >> >> >> "save:\n\t"
>> >> >> >> >> ". = . + 256\n\t"
>> >> >> >> >> ".align 4096\n\t"
>> >> >> >> >> "alt_insn_page:\n\t"
>> >> >> >> >> ". = . + 4096\n\t"
>> >> >> >> >> );
>> >> >> >> >>
>> >> >> >> >>
>> >> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> >> >> {
>> >> >> >> >>     int i, emul_offset;
>> >> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >> >> >>         test_insn[i] = 0x90; // nop
>> >> >> >> > Why? Gcc should pad it with nops.
>> >> >> >> >
>> >> >> >> >>     emul_offset = test_insn - insn_page;
>> >> >> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> >> >> }
>> >> >> >> >>
>> >> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> >> >> {
>> >> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >> >> >>
>> >> >> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> >> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> >> >> > work otherwise.
>> >> >> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> >> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> >> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> >> >> (from TLB).
>> >> >> > While before trap the code will likely be executed from insn_page,
>> >> >> > but after the trap it is very optimistic to assume that tlb cache
>> >> >> > will still contain this virtual address since host will execute quite a
>> >> >> > lot of code and can even schedule in the middle, so the TLB will not
>> >> >> > contain the address and your test will crash. Even the code before test
>> >> >> > instruction can be executed from alt_insn_page if guest is scheduled out
>> >> >> > after invlpg() and before it executes every instruction until trapping
>> >> >> > one. In your case the test will crash too instead of yielding false positive.
>> >> >> >
>> >> >> >> I don't know if this is right, but I use this trick in my
>> >> >> >> previous patch and it runs well.
>> >> >> > Your previous patches always had c3 (ret) after tested instruction on
>> >> >> > alt_insn_page.
>> >> >> >
>> >> >> >>                                  I use "trace-cmd record -e kvm" to
>> >> >> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> >> >> I suppose that alt_insn_page is not loaded to the right place.
>> >> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> >> >> > since current install_page() implementation cannot handle large pages
>> >> >> > and the code is backed up by large pages. You can fix install_page() to
>> >> >> > check for that and break large page into small one before installing a
>> >> >> > page.
>> >> >> Here I have two questions.
>> >> >> 1. There's another function called "install_large_page", can it be
>> >> >> used to our occasion? I found that this function is not used at all.
>> >> > It is used when initial page tables are created.
>> >> > See lib/x86/vm.c:setup_mmu_range()
>> >> >
>> >> >> 2. Why will current version runs well? Do pages allocated dynamically
>> >> >> are automatically aligned to 2MB (large page size)?
>> >> >>
>> >> > No, they are 4K pages.
>> >> Thus why dynamically creating insn_page and alt_insn_page with
>> >> alloc_page() can get the right result?
>> > Probably.
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.
diff mbox

Patch

diff --git a/x86/emulator.c b/x86/emulator.c
index 96576e5..8ab9904 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,14 @@  int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 rip, rflags;
+};
+
+static struct regs inregs, outregs;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +693,79 @@  static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
+			     uint8_t *alt_insn_page, void *insn_ram,
+			     uint8_t *alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	int i;
+
+	// Pad with RET instructions
+	memset(insn_page, 0xc3, 4096);
+	memset(alt_insn_page, 0xc3, 4096);
+
+	// Place a trapping instruction in the page to trigger a VMEXIT
+	insn_page[0] = 0x89; // mov %eax, (%rax)
+	insn_page[1] = 0x00;
+	insn_page[2] = 0x90; // nop
+	insn_page[3] = 0xc3; // ret
+
+	// Place the instruction we want the hypervisor to see in the alternate page
+	for (i=0; i<alt_insn_length; i++)
+		alt_insn_page[i] = alt_insn[i];
+
+	// Save general registers
+	asm volatile(
+		"push %rax\n\r"
+		"push %rbx\n\r"
+		"push %rcx\n\r"
+		"push %rdx\n\r"
+		"push %rsi\n\r"
+		"push %rdi\n\r"
+		);
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram + 3));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile(
+		"call *%1\n\r"
+
+		"mov %%rax, 0+%[outregs] \n\t"
+		"mov %%rbx, 8+%[outregs] \n\t"
+		"mov %%rcx, 16+%[outregs] \n\t"
+		"mov %%rdx, 24+%[outregs] \n\t"
+		"mov %%rsi, 32+%[outregs] \n\t"
+		"mov %%rdi, 40+%[outregs] \n\t"
+		"mov %%rsp,48+ %[outregs] \n\t"
+		"mov %%rbp, 56+%[outregs] \n\t"
+
+		/* Save RFLAGS in outregs*/
+		"pushf \n\t"
+		"popq 72+%[outregs] \n\t"
+		: [outregs]"+m"(outregs)
+		: "r"(insn_ram),
+			"a"(mem), "b"(inregs.rbx),
+			"c"(inregs.rcx), "d"(inregs.rdx),
+			"S"(inregs.rsi), "D"(inregs.rdi)
+		: "memory", "cc"
+		);
+	// Restore general registers
+	asm volatile(
+		"pop %rax\n\r"
+		"pop %rbx\n\r"
+		"pop %rcx\n\r"
+		"pop %rdx\n\r"
+		"pop %rsi\n\r"
+		"pop %rdi\n\r"
+		);
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;