diff mbox series

[v4,2/2] x86/emulate: Send vm_event from emulate

Message ID 20190520125454.14805-2-aisaila@bitdefender.com (mailing list archive)
State New, archived
Headers show
Series [v4,1/2] x86/emulate: Move hvmemul_linear_to_phys | expand

Commit Message

Alexandru Stefan ISAILA May 20, 2019, 12:55 p.m. UTC
This patch aims to have mem access vm events sent from the emulator.
This is useful in the case of emulated instructions that cause
page-walks on access protected pages.

We use hvmemul_map_linear_addr() ro intercept r/w access and
hvmemul_insn_fetch() to intercept exec access.

First we try to send a vm event and if the event is sent then emulation
returns X86EMUL_ACCESS_EXCEPTION. If the event is not sent then the
emulation goes on as expected.

Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>

---
Changes since V3:
	- Calculate gpa in hvmemul_send_vm_event()
	- Move hvmemul_linear_to_phys() call inside
	hvmemul_send_vm_event()
	- Check only if hvmemul_virtual_to_linear() returns X86EMUL_OKAY
	- Add commnet for X86EMUL_ACCESS_EXCEPTION.
---
 xen/arch/x86/hvm/emulate.c             | 89 +++++++++++++++++++++++++-
 xen/arch/x86/hvm/vm_event.c            |  2 +-
 xen/arch/x86/mm/mem_access.c           |  3 +-
 xen/arch/x86/x86_emulate/x86_emulate.h |  2 +
 xen/include/asm-x86/hvm/emulate.h      |  4 +-
 5 files changed, 95 insertions(+), 5 deletions(-)

Comments

Jan Beulich May 22, 2019, 9:56 a.m. UTC | #1
>>> On 20.05.19 at 14:55, <aisaila@bitdefender.com> wrote:
> This patch aims to have mem access vm events sent from the emulator.
> This is useful in the case of emulated instructions that cause
> page-walks on access protected pages.
> 
> We use hvmemul_map_linear_addr() ro intercept r/w access and
> hvmemul_insn_fetch() to intercept exec access.

I'm afraid I don't understand this sentence. Or wait - is this a
simple typo, and you mean "to" instead of "ro"?

> First we try to send a vm event and if the event is sent then emulation
> returns X86EMUL_ACCESS_EXCEPTION. If the event is not sent then the
> emulation goes on as expected.

Perhaps it's obvious for a vm-event person why successful sending
of an event is to result in X86EMUL_ACCESS_EXCEPTION, but it's not
to me, despite having looked at prior versions. Can this (odd at the
first glance) behavior please be briefly explained here?

> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -15,6 +15,7 @@
>  #include <xen/paging.h>
>  #include <xen/trace.h>
>  #include <xen/vm_event.h>
> +#include <xen/monitor.h>
>  #include <asm/event.h>
>  #include <asm/i387.h>
>  #include <asm/xstate.h>
> @@ -26,6 +27,7 @@
>  #include <asm/hvm/support.h>
>  #include <asm/hvm/svm/svm.h>
>  #include <asm/vm_event.h>
> +#include <asm/altp2m.h>

In both cases please try to insert at least half way alphabetically
(I didn't check if the directives are fully sorted already), rather
than blindly at the end.

> @@ -619,6 +621,68 @@ static int hvmemul_linear_to_phys(
>      return X86EMUL_OKAY;
>  }
>  
> +static bool hvmemul_send_vm_event(unsigned long gla,
> +                                  uint32_t pfec, unsigned int bytes,
> +                                  struct hvm_emulate_ctxt ctxt)
> +{
> +    xenmem_access_t access;
> +    vm_event_request_t req = {};
> +    gfn_t gfn;
> +    paddr_t gpa;
> +    unsigned long reps = 1;
> +    int rc;
> +
> +    if ( !ctxt.send_event || !pfec )

Why the !pfec part of the condition?

> +        return false;
> +
> +    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);

As said before - I don't think it's a good idea to do the page walk
twice: This and the pre-existing one can easily return different
results.

Additionally, as also said before (I think), the function may raise
#PF, which you don't seem to deal with despite discarding the
X86EMUL_EXCEPTION return value ...

> +    if ( rc != X86EMUL_OKAY )
> +        return false;

... here.

> +    gfn = gaddr_to_gfn(gpa);
> +
> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
> +                            altp2m_vcpu_idx(current)) != 0 )
> +        return false;
> +
> +    switch ( access ) {
> +    case XENMEM_access_x:
> +    case XENMEM_access_rx:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
> +        break;
> +
> +    case XENMEM_access_w:
> +    case XENMEM_access_rw:
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags = MEM_ACCESS_X;
> +        break;
> +
> +    case XENMEM_access_r:
> +    case XENMEM_access_n:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags |= MEM_ACCESS_X;
> +        break;
> +
> +    default:
> +        return false;
> +    }

Aren't you looking at the leaf page here, rather than at any of the
involved page tables? Or am I misunderstanding the description
saying "page-walks on access protected pages"?

> @@ -636,6 +700,7 @@ static void *hvmemul_map_linear_addr(
>      unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
>          (linear >> PAGE_SHIFT) + 1;
>      unsigned int i;
> +    gfn_t gfn;
>  
>      /*
>       * mfn points to the next free slot.  All used slots have a page reference
> @@ -674,7 +739,7 @@ static void *hvmemul_map_linear_addr(
>          ASSERT(mfn_x(*mfn) == 0);
>  
>          res = hvm_translate_get_page(curr, addr, true, pfec,
> -                                     &pfinfo, &page, NULL, &p2mt);
> +                                     &pfinfo, &page, &gfn, &p2mt);
>  
>          switch ( res )
>          {

Are these two hunks leftovers? You don't use "gfn" anywhere.

> @@ -1248,7 +1318,21 @@ int hvmemul_insn_fetch(
>          container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
>      /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
>      uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
> +    uint32_t pfec = PFEC_page_present | PFEC_insn_fetch;
> +    unsigned long addr, reps = 1;
> +    int rc = 0;
> +
> +    rc = hvmemul_virtual_to_linear(
> +        seg, offset, bytes, &reps, hvm_access_insn_fetch, hvmemul_ctxt, &addr);
> +
> +    if ( rc != X86EMUL_OKAY || !bytes )
> +        return rc;
> +
> +    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
> +        pfec |= PFEC_user_mode;
>  
> +    if ( hvmemul_send_vm_event(addr, pfec, bytes, *hvmemul_ctxt) )
> +        return X86EMUL_ACCESS_EXCEPTION;
>      /*
>       * Fall back if requested bytes are not in the prefetch cache.
>       * But always perform the (fake) read when bytes == 0.

Despite what was said before you're still doing things a 2nd time
here just because of hvmemul_send_vm_event()'s needs, even
if that function ends up bailing right away.

Also please don't lose the blank line ahead of the comment you
add code ahead of.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -162,6 +162,8 @@ struct x86_emul_fpu_aux {
>  #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
>   /* (cmpxchg accessor): CMPXCHG failed. */
>  #define X86EMUL_CMPXCHG_FAILED 7
> +/* Emulator tried to access a protected page. */
> +#define X86EMUL_ACCESS_EXCEPTION 8

This still doesn't make clear what the difference is to
X86EMUL_EXCEPTION.

Jan
Alexandru Stefan ISAILA May 22, 2019, 12:59 p.m. UTC | #2
On 22.05.2019 12:56, Jan Beulich wrote:
>>>> On 20.05.19 at 14:55, <aisaila@bitdefender.com> wrote:
>> This patch aims to have mem access vm events sent from the emulator.
>> This is useful in the case of emulated instructions that cause
>> page-walks on access protected pages.
>>
>> We use hvmemul_map_linear_addr() ro intercept r/w access and
>> hvmemul_insn_fetch() to intercept exec access.
> 
> I'm afraid I don't understand this sentence. Or wait - is this a
> simple typo, and you mean "to" instead of "ro"?

Yes that is a typo it was meant to be a "to".

> 
>> First we try to send a vm event and if the event is sent then emulation
>> returns X86EMUL_ACCESS_EXCEPTION. If the event is not sent then the
>> emulation goes on as expected.
> 
> Perhaps it's obvious for a vm-event person why successful sending
> of an event is to result in X86EMUL_ACCESS_EXCEPTION, but it's not
> to me, despite having looked at prior versions. Can this (odd at the
> first glance) behavior please be briefly explained here?

If the event was successfully sent then the emulation has to stop and 
return.

> 
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -15,6 +15,7 @@
>>   #include <xen/paging.h>
>>   #include <xen/trace.h>
>>   #include <xen/vm_event.h>
>> +#include <xen/monitor.h>
>>   #include <asm/event.h>
>>   #include <asm/i387.h>
>>   #include <asm/xstate.h>
>> @@ -26,6 +27,7 @@
>>   #include <asm/hvm/support.h>
>>   #include <asm/hvm/svm/svm.h>
>>   #include <asm/vm_event.h>
>> +#include <asm/altp2m.h>
> 
> In both cases please try to insert at least half way alphabetically
> (I didn't check if the directives are fully sorted already), rather
> than blindly at the end.

Ok, I will correct that.

> 
>> @@ -619,6 +621,68 @@ static int hvmemul_linear_to_phys(
>>       return X86EMUL_OKAY;
>>   }
>>   
>> +static bool hvmemul_send_vm_event(unsigned long gla,
>> +                                  uint32_t pfec, unsigned int bytes,
>> +                                  struct hvm_emulate_ctxt ctxt)
>> +{
>> +    xenmem_access_t access;
>> +    vm_event_request_t req = {};
>> +    gfn_t gfn;
>> +    paddr_t gpa;
>> +    unsigned long reps = 1;
>> +    int rc;
>> +
>> +    if ( !ctxt.send_event || !pfec )
> 
> Why the !pfec part of the condition?

Because it is used to check the type of access violation and if it is 0 
then we do not want to call get_mem_access or get the gpa, it is clear 
that there will be no violation.

> 
>> +        return false;
>> +
>> +    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);
> 
> As said before - I don't think it's a good idea to do the page walk
> twice: This and the pre-existing one can easily return different
> results.

I do this just to get the gpa. If there is another way I will gladly use it.

> 
> Additionally, as also said before (I think), the function may raise
> #PF, which you don't seem to deal with despite discarding the
> X86EMUL_EXCEPTION return value ... >
>> +    if ( rc != X86EMUL_OKAY )
>> +        return false;
> 
> ... here.
> 
>> +    gfn = gaddr_to_gfn(gpa);
>> +
>> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
>> +                            altp2m_vcpu_idx(current)) != 0 )
>> +        return false;
>> +
>> +    switch ( access ) {
>> +    case XENMEM_access_x:
>> +    case XENMEM_access_rx:
>> +        if ( pfec & PFEC_write_access )
>> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
>> +        break;
>> +
>> +    case XENMEM_access_w:
>> +    case XENMEM_access_rw:
>> +        if ( pfec & PFEC_insn_fetch )
>> +            req.u.mem_access.flags = MEM_ACCESS_X;
>> +        break;
>> +
>> +    case XENMEM_access_r:
>> +    case XENMEM_access_n:
>> +        if ( pfec & PFEC_write_access )
>> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
>> +        if ( pfec & PFEC_insn_fetch )
>> +            req.u.mem_access.flags |= MEM_ACCESS_X;
>> +        break;
>> +
>> +    default:
>> +        return false;
>> +    }
> 
> Aren't you looking at the leaf page here, rather than at any of the
> involved page tables? Or am I misunderstanding the description
> saying "page-walks on access protected pages"?

We want to ignore access write for the page tables and only fire a 
vm_event for "regular" pages possibly hit by the actual instruction that 
has also happened to trigger the A/D write(s). So we don't want to send 
out vm_events for written-to page tables at all.

> 
>> @@ -636,6 +700,7 @@ static void *hvmemul_map_linear_addr(
>>       unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
>>           (linear >> PAGE_SHIFT) + 1;
>>       unsigned int i;
>> +    gfn_t gfn;
>>   
>>       /*
>>        * mfn points to the next free slot.  All used slots have a page reference
>> @@ -674,7 +739,7 @@ static void *hvmemul_map_linear_addr(
>>           ASSERT(mfn_x(*mfn) == 0);
>>   
>>           res = hvm_translate_get_page(curr, addr, true, pfec,
>> -                                     &pfinfo, &page, NULL, &p2mt);
>> +                                     &pfinfo, &page, &gfn, &p2mt);
>>   
>>           switch ( res )
>>           {
> 
> Are these two hunks leftovers? You don't use "gfn" anywhere.

Yes, there is no need for the gfn any more.

> 
>> @@ -1248,7 +1318,21 @@ int hvmemul_insn_fetch(
>>           container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
>>       /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
>>       uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
>> +    uint32_t pfec = PFEC_page_present | PFEC_insn_fetch;
>> +    unsigned long addr, reps = 1;
>> +    int rc = 0;
>> +
>> +    rc = hvmemul_virtual_to_linear(
>> +        seg, offset, bytes, &reps, hvm_access_insn_fetch, hvmemul_ctxt, &addr);
>> +
>> +    if ( rc != X86EMUL_OKAY || !bytes )
>> +        return rc;
>> +
>> +    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
>> +        pfec |= PFEC_user_mode;
>>   
>> +    if ( hvmemul_send_vm_event(addr, pfec, bytes, *hvmemul_ctxt) )
>> +        return X86EMUL_ACCESS_EXCEPTION;
>>       /*
>>        * Fall back if requested bytes are not in the prefetch cache.
>>        * But always perform the (fake) read when bytes == 0.
> 
> Despite what was said before you're still doing things a 2nd time
> here just because of hvmemul_send_vm_event()'s needs, even
> if that function ends up bailing right away.

I don't understand what things are done 2 times. Can you please explain?

> 
> Also please don't lose the blank line ahead of the comment you
> add code ahead of.
> 
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
>> @@ -162,6 +162,8 @@ struct x86_emul_fpu_aux {
>>   #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
>>    /* (cmpxchg accessor): CMPXCHG failed. */
>>   #define X86EMUL_CMPXCHG_FAILED 7
>> +/* Emulator tried to access a protected page. */
>> +#define X86EMUL_ACCESS_EXCEPTION 8
> 
> This still doesn't make clear what the difference is to
> X86EMUL_EXCEPTION.

We need a return that has no side effects.

Alex
Jan Beulich May 22, 2019, 1:34 p.m. UTC | #3
>>> On 22.05.19 at 14:59, <aisaila@bitdefender.com> wrote:
> On 22.05.2019 12:56, Jan Beulich wrote:
>>>>> On 20.05.19 at 14:55, <aisaila@bitdefender.com> wrote:
>>> First we try to send a vm event and if the event is sent then emulation
>>> returns X86EMUL_ACCESS_EXCEPTION. If the event is not sent then the
>>> emulation goes on as expected.
>> 
>> Perhaps it's obvious for a vm-event person why successful sending
>> of an event is to result in X86EMUL_ACCESS_EXCEPTION, but it's not
>> to me, despite having looked at prior versions. Can this (odd at the
>> first glance) behavior please be briefly explained here?
> 
> If the event was successfully sent then the emulation has to stop and 
> return.

Which is where we commonly use X86EMUL_RETRY. I've explained to
you before that introduction of new return values needs careful
inspection that it'll work for all involved pieces of code (in particular
ones specially treating some of the values).

>>> @@ -619,6 +621,68 @@ static int hvmemul_linear_to_phys(
>>>       return X86EMUL_OKAY;
>>>   }
>>>   
>>> +static bool hvmemul_send_vm_event(unsigned long gla,
>>> +                                  uint32_t pfec, unsigned int bytes,
>>> +                                  struct hvm_emulate_ctxt ctxt)
>>> +{
>>> +    xenmem_access_t access;
>>> +    vm_event_request_t req = {};
>>> +    gfn_t gfn;
>>> +    paddr_t gpa;
>>> +    unsigned long reps = 1;
>>> +    int rc;
>>> +
>>> +    if ( !ctxt.send_event || !pfec )
>> 
>> Why the !pfec part of the condition?
> 
> Because it is used to check the type of access violation and if it is 0 
> then we do not want to call get_mem_access or get the gpa, it is clear 
> that there will be no violation.

So what about, as an example, the case of just PFEC_implicit set?
And do you really care about accesses with PFEC_page_present
clear?

>>> +        return false;
>>> +
>>> +    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);
>> 
>> As said before - I don't think it's a good idea to do the page walk
>> twice: This and the pre-existing one can easily return different
>> results.
> 
> I do this just to get the gpa. If there is another way I will gladly use it.

To get the gpa you need to do a page walk. But you shouldn't do
two page walks. Which as a result tells me that the question is
not about "another way", but that things need to be structured
differently.

>>> +    switch ( access ) {

Btw, I'm noticing this style issue only now.

>>> +    case XENMEM_access_x:
>>> +    case XENMEM_access_rx:
>>> +        if ( pfec & PFEC_write_access )
>>> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
>>> +        break;
>>> +
>>> +    case XENMEM_access_w:
>>> +    case XENMEM_access_rw:
>>> +        if ( pfec & PFEC_insn_fetch )
>>> +            req.u.mem_access.flags = MEM_ACCESS_X;
>>> +        break;
>>> +
>>> +    case XENMEM_access_r:
>>> +    case XENMEM_access_n:
>>> +        if ( pfec & PFEC_write_access )
>>> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
>>> +        if ( pfec & PFEC_insn_fetch )
>>> +            req.u.mem_access.flags |= MEM_ACCESS_X;
>>> +        break;
>>> +
>>> +    default:
>>> +        return false;
>>> +    }
>> 
>> Aren't you looking at the leaf page here, rather than at any of the
>> involved page tables? Or am I misunderstanding the description
>> saying "page-walks on access protected pages"?
> 
> We want to ignore access write for the page tables and only fire a 
> vm_event for "regular" pages possibly hit by the actual instruction that 
> has also happened to trigger the A/D write(s). So we don't want to send 
> out vm_events for written-to page tables at all.

In which case may I ask for the description to be worded to make
this unambiguous?

>>> @@ -1248,7 +1318,21 @@ int hvmemul_insn_fetch(
>>>           container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
>>>       /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
>>>       uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
>>> +    uint32_t pfec = PFEC_page_present | PFEC_insn_fetch;
>>> +    unsigned long addr, reps = 1;
>>> +    int rc = 0;
>>> +
>>> +    rc = hvmemul_virtual_to_linear(
>>> +        seg, offset, bytes, &reps, hvm_access_insn_fetch, hvmemul_ctxt, &addr);
>>> +
>>> +    if ( rc != X86EMUL_OKAY || !bytes )
>>> +        return rc;
>>> +
>>> +    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
>>> +        pfec |= PFEC_user_mode;
>>>   
>>> +    if ( hvmemul_send_vm_event(addr, pfec, bytes, *hvmemul_ctxt) )
>>> +        return X86EMUL_ACCESS_EXCEPTION;
>>>       /*
>>>        * Fall back if requested bytes are not in the prefetch cache.
>>>        * But always perform the (fake) read when bytes == 0.
>> 
>> Despite what was said before you're still doing things a 2nd time
>> here just because of hvmemul_send_vm_event()'s needs, even
>> if that function ends up bailing right away.
> 
> I don't understand what things are done 2 times. Can you please explain?

You add code above that exists already in __hvmemul_read().
Even worse, __hvmemul_read() may not need calling at all, in
which case there's no (emulated) memory access and hence
imo there shouldn't be any event. Plus, just like in the
hvmemul_linear_to_phys() case there may be an exception
raised by the function, yet just like there you also discard the
return value saying so without also zapping the exception.

>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
>>> @@ -162,6 +162,8 @@ struct x86_emul_fpu_aux {
>>>   #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
>>>    /* (cmpxchg accessor): CMPXCHG failed. */
>>>   #define X86EMUL_CMPXCHG_FAILED 7
>>> +/* Emulator tried to access a protected page. */
>>> +#define X86EMUL_ACCESS_EXCEPTION 8
>> 
>> This still doesn't make clear what the difference is to
>> X86EMUL_EXCEPTION.
> 
> We need a return that has no side effects.

So besides saying so you also need to make sure there actually
are no side effects.

Jan
Alexandru Stefan ISAILA May 22, 2019, 1:50 p.m. UTC | #4
>>> Despite what was said before you're still doing things a 2nd time
>>> here just because of hvmemul_send_vm_event()'s needs, even
>>> if that function ends up bailing right away.
>>
>> I don't understand what things are done 2 times. Can you please explain?
> 
> You add code above that exists already in __hvmemul_read().
> Even worse, __hvmemul_read() may not need calling at all, in
> which case there's no (emulated) memory access and hence
> imo there shouldn't be any event. Plus, just like in the
> hvmemul_linear_to_phys() case there may be an exception
> raised by the function, yet just like there you also discard the
> return value saying so without also zapping the exception.
> 

Isn't it safer to move the hvmemul_send_vm_event() form 
hvmemul_insn_fetch() to __hvmemul_read()?

Alex
Jan Beulich May 22, 2019, 1:57 p.m. UTC | #5
>>> On 22.05.19 at 15:50, <aisaila@bitdefender.com> wrote:
> Isn't it safer to move the hvmemul_send_vm_event() form 
> hvmemul_insn_fetch() to __hvmemul_read()?

Possibly - I can't tell whether that'll fit all your needs. I also
don't recall whether this was proposed before and there
were reasons speaking against doing so.

Jan
Alexandru Stefan ISAILA May 30, 2019, 8:59 a.m. UTC | #6
> 
>> +        return false;
>> +
>> +    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);
> 
> As said before - I don't think it's a good idea to do the page walk
> twice: This and the pre-existing one can easily return different
> results.

What preexisting page walk are you talking about here? I don't think 
there is a way to get the gpa by passing it from somewhere.

Alex

> 
> Additionally, as also said before (I think), the function may raise
> #PF, which you don't seem to deal with despite discarding the
> X86EMUL_EXCEPTION return value ...
> 
>> +    if ( rc != X86EMUL_OKAY )
>> +        return false;
> 
> ... here.
Jan Beulich May 31, 2019, 9:16 a.m. UTC | #7
>>> On 30.05.19 at 10:59, <aisaila@bitdefender.com> wrote:
>>  
>>> +        return false;
>>> +
>>> +    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);
>> 
>> As said before - I don't think it's a good idea to do the page walk
>> twice: This and the pre-existing one can easily return different
>> results.
> 
> What preexisting page walk are you talking about here?

Well, I'm afraid I don't know what to say (in a polite way). I'm sure
you're understanding the code you try to add to, so it would seem
natural to me that you can answer this question all by yourself:
Since you don't remove any linear->phys translations in your patch,
and since there necessarily is one prior to your patch, you adding
(another) one means there are now two of them.

So to answer your question directly: hvmemul_map_linear_addr()
calls hvm_translate_get_page(), and hvmemul_insn_fetch() ->
__hvmemul_read() -> linear_read() -> hvm_copy_from_guest_linear()
-> __hvm_copy() calls hvm_translate_get_page() as well.

As an aside - while I'm advocating the stripping of reply quoting that's
not needed as context, you surely went too far here: You've left in
place neither an indication when the mails were sent context of which
is still present above, nor enough context to re-construct what
function your additions go into. IOW I had to search the earlier parts
of this thread to gather enough context to actually be able to reply.

> I don't think 
> there is a way to get the gpa by passing it from somewhere.

Possibly, but that's also not what I did suggest. Instead what I
think you need to look into is how to restructure things, perhaps
just as to the place where you insert your new code.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 254ff6515d..75403ebc9b 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -15,6 +15,7 @@ 
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <xen/vm_event.h>
+#include <xen/monitor.h>
 #include <asm/event.h>
 #include <asm/i387.h>
 #include <asm/xstate.h>
@@ -26,6 +27,7 @@ 
 #include <asm/hvm/support.h>
 #include <asm/hvm/svm/svm.h>
 #include <asm/vm_event.h>
+#include <asm/altp2m.h>
 
 static void hvmtrace_io_assist(const ioreq_t *p)
 {
@@ -619,6 +621,68 @@  static int hvmemul_linear_to_phys(
     return X86EMUL_OKAY;
 }
 
+static bool hvmemul_send_vm_event(unsigned long gla,
+                                  uint32_t pfec, unsigned int bytes,
+                                  struct hvm_emulate_ctxt ctxt)
+{
+    xenmem_access_t access;
+    vm_event_request_t req = {};
+    gfn_t gfn;
+    paddr_t gpa;
+    unsigned long reps = 1;
+    int rc;
+
+    if ( !ctxt.send_event || !pfec )
+        return false;
+
+    rc = hvmemul_linear_to_phys(gla, &gpa, bytes, &reps, pfec, &ctxt);
+
+    if ( rc != X86EMUL_OKAY )
+        return false;
+
+    gfn = gaddr_to_gfn(gpa);
+
+    if ( p2m_get_mem_access(current->domain, gfn, &access,
+                            altp2m_vcpu_idx(current)) != 0 )
+        return false;
+
+    switch ( access ) {
+    case XENMEM_access_x:
+    case XENMEM_access_rx:
+        if ( pfec & PFEC_write_access )
+            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
+        break;
+
+    case XENMEM_access_w:
+    case XENMEM_access_rw:
+        if ( pfec & PFEC_insn_fetch )
+            req.u.mem_access.flags = MEM_ACCESS_X;
+        break;
+
+    case XENMEM_access_r:
+    case XENMEM_access_n:
+        if ( pfec & PFEC_write_access )
+            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
+        if ( pfec & PFEC_insn_fetch )
+            req.u.mem_access.flags |= MEM_ACCESS_X;
+        break;
+
+    default:
+        return false;
+    }
+
+    if ( !req.u.mem_access.flags )
+        return false; /* no violation */
+
+    req.reason = VM_EVENT_REASON_MEM_ACCESS;
+    req.u.mem_access.gfn = gfn_x(gfn);
+    req.u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA | MEM_ACCESS_GLA_VALID;
+    req.u.mem_access.gla = gla;
+    req.u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1);
+
+    return monitor_traps(current, true, &req) >= 0;
+}
+
 /*
  * Map the frame(s) covering an individual linear access, for writeable
  * access.  May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
@@ -636,6 +700,7 @@  static void *hvmemul_map_linear_addr(
     unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
         (linear >> PAGE_SHIFT) + 1;
     unsigned int i;
+    gfn_t gfn;
 
     /*
      * mfn points to the next free slot.  All used slots have a page reference
@@ -674,7 +739,7 @@  static void *hvmemul_map_linear_addr(
         ASSERT(mfn_x(*mfn) == 0);
 
         res = hvm_translate_get_page(curr, addr, true, pfec,
-                                     &pfinfo, &page, NULL, &p2mt);
+                                     &pfinfo, &page, &gfn, &p2mt);
 
         switch ( res )
         {
@@ -704,6 +769,11 @@  static void *hvmemul_map_linear_addr(
 
         if ( pfec & PFEC_write_access )
         {
+            if ( hvmemul_send_vm_event(addr, pfec, bytes, *hvmemul_ctxt) )
+            {
+                err = ERR_PTR(~X86EMUL_ACCESS_EXCEPTION);
+                goto out;
+            }
             if ( p2m_is_discard_write(p2mt) )
             {
                 err = ERR_PTR(~X86EMUL_OKAY);
@@ -1248,7 +1318,21 @@  int hvmemul_insn_fetch(
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
     /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
     uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
+    uint32_t pfec = PFEC_page_present | PFEC_insn_fetch;
+    unsigned long addr, reps = 1;
+    int rc = 0;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_insn_fetch, hvmemul_ctxt, &addr);
+
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
+        pfec |= PFEC_user_mode;
 
+    if ( hvmemul_send_vm_event(addr, pfec, bytes, *hvmemul_ctxt) )
+        return X86EMUL_ACCESS_EXCEPTION;
     /*
      * Fall back if requested bytes are not in the prefetch cache.
      * But always perform the (fake) read when bytes == 0.
@@ -2508,12 +2592,13 @@  int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
 }
 
 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
-    unsigned int errcode)
+    unsigned int errcode, bool send_event)
 {
     struct hvm_emulate_ctxt ctx = {{ 0 }};
     int rc;
 
     hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
+    ctx.send_event = send_event;
 
     switch ( kind )
     {
diff --git a/xen/arch/x86/hvm/vm_event.c b/xen/arch/x86/hvm/vm_event.c
index 121de23071..6d203e8db5 100644
--- a/xen/arch/x86/hvm/vm_event.c
+++ b/xen/arch/x86/hvm/vm_event.c
@@ -87,7 +87,7 @@  void hvm_vm_event_do_resume(struct vcpu *v)
             kind = EMUL_KIND_SET_CONTEXT_INSN;
 
         hvm_emulate_one_vm_event(kind, TRAP_invalid_op,
-                                 X86_EVENT_NO_EC);
+                                 X86_EVENT_NO_EC, false);
 
         v->arch.vm_event->emulate_flags = 0;
     }
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 0144f92b98..c9972bab8c 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -214,7 +214,8 @@  bool p2m_mem_access_check(paddr_t gpa, unsigned long gla,
          d->arch.monitor.inguest_pagefault_disabled &&
          npfec.kind != npfec_kind_with_gla ) /* don't send a mem_event */
     {
-        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, X86_EVENT_NO_EC);
+        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
+                                 X86_EVENT_NO_EC, true);
 
         return true;
     }
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index 08645762cc..8a20e733fa 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -162,6 +162,8 @@  struct x86_emul_fpu_aux {
 #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
  /* (cmpxchg accessor): CMPXCHG failed. */
 #define X86EMUL_CMPXCHG_FAILED 7
+/* Emulator tried to access a protected page. */
+#define X86EMUL_ACCESS_EXCEPTION 8
 
 /* FPU sub-types which may be requested via ->get_fpu(). */
 enum x86_emulate_fpu_type {
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index b39a1a0331..ed22ed0baf 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -47,6 +47,7 @@  struct hvm_emulate_ctxt {
     uint32_t intr_shadow;
 
     bool_t set_context;
+    bool send_event;
 };
 
 enum emul_kind {
@@ -63,7 +64,8 @@  int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 void hvm_emulate_one_vm_event(enum emul_kind kind,
     unsigned int trapnr,
-    unsigned int errcode);
+    unsigned int errcode,
+    bool send_event);
 /* Must be called once to set up hvmemul state. */
 void hvm_emulate_init_once(
     struct hvm_emulate_ctxt *hvmemul_ctxt,