diff mbox series

[v5,06/11] x86/hvm: processor trace interface in HVM

Message ID a4833c8168e287f0caf1dc6f16ec5c054bd88b0a.1593974333.git.michal.leszczynski@cert.pl (mailing list archive)
State Superseded
Headers show
Series Implement support for external IPT monitoring | expand

Commit Message

Michał Leszczyński July 5, 2020, 6:54 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Implement necessary changes in common code/HVM to support
processor trace features. Define vmtrace_pt_* API and
implement trace buffer allocation/deallocation in common
code.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/arch/x86/domain.c         | 19 +++++++++++++++++++
 xen/common/domain.c           | 19 +++++++++++++++++++
 xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
 xen/include/xen/sched.h       |  4 ++++
 4 files changed, 62 insertions(+)

Comments

Michał Leszczyński July 5, 2020, 7:11 p.m. UTC | #1
----- 5 lip 2020 o 20:54, Michał Leszczyński michal.leszczynski@cert.pl napisał(a):

> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Implement necessary changes in common code/HVM to support
> processor trace features. Define vmtrace_pt_* API and
> implement trace buffer allocation/deallocation in common
> code.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
> xen/arch/x86/domain.c         | 19 +++++++++++++++++++
> xen/common/domain.c           | 19 +++++++++++++++++++
> xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
> xen/include/xen/sched.h       |  4 ++++
> 4 files changed, 62 insertions(+)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index fee6c3931a..79c9794408 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
>                 altp2m_vcpu_disable_ve(v);
>         }
> 
> +        for_each_vcpu ( d, v )
> +        {
> +            unsigned int i;
> +
> +            if ( !v->vmtrace.pt_buf )
> +                continue;
> +
> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
> +            {
> +                struct page_info *pg = mfn_to_page(
> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
> +                if ( (pg->count_info & PGC_count_mask) != 1 )
> +                    return -EBUSY;
> +            }
> +
> +            free_domheap_pages(v->vmtrace.pt_buf,
> +                get_order_from_bytes(v->domain->vmtrace_pt_size));


While this works, I don't feel that this is a good solution with this loop
returning -EBUSY here. I would like to kindly ask for suggestions regarding
this topic.


Best regards,
Michał Leszczyński
CERT Polska
Jan Beulich July 6, 2020, 8:31 a.m. UTC | #2
On 05.07.2020 21:11, Michał Leszczyński wrote:
> ----- 5 lip 2020 o 20:54, Michał Leszczyński michal.leszczynski@cert.pl napisał(a):
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
>>                 altp2m_vcpu_disable_ve(v);
>>         }
>>
>> +        for_each_vcpu ( d, v )
>> +        {
>> +            unsigned int i;
>> +
>> +            if ( !v->vmtrace.pt_buf )
>> +                continue;
>> +
>> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
>> +            {
>> +                struct page_info *pg = mfn_to_page(
>> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
>> +                if ( (pg->count_info & PGC_count_mask) != 1 )
>> +                    return -EBUSY;
>> +            }
>> +
>> +            free_domheap_pages(v->vmtrace.pt_buf,
>> +                get_order_from_bytes(v->domain->vmtrace_pt_size));
> 
> 
> While this works, I don't feel that this is a good solution with this loop
> returning -EBUSY here. I would like to kindly ask for suggestions regarding
> this topic.

I'm sorry to ask, but with the previously give suggestions to mirror
existing code, why do you still need to play with this function? You
really shouldn't have a need to, just like e.g. the ioreq server page
handling code didn't.

Jan
Roger Pau Monné July 6, 2020, 8:42 a.m. UTC | #3
On Sun, Jul 05, 2020 at 08:54:59PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Implement necessary changes in common code/HVM to support
> processor trace features. Define vmtrace_pt_* API and
> implement trace buffer allocation/deallocation in common
> code.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  xen/arch/x86/domain.c         | 19 +++++++++++++++++++
>  xen/common/domain.c           | 19 +++++++++++++++++++
>  xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
>  xen/include/xen/sched.h       |  4 ++++
>  4 files changed, 62 insertions(+)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index fee6c3931a..79c9794408 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
>                  altp2m_vcpu_disable_ve(v);
>          }
>  
> +        for_each_vcpu ( d, v )
> +        {
> +            unsigned int i;
> +
> +            if ( !v->vmtrace.pt_buf )
> +                continue;
> +
> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
> +            {
> +                struct page_info *pg = mfn_to_page(
> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
> +                if ( (pg->count_info & PGC_count_mask) != 1 )
> +                    return -EBUSY;
> +            }
> +
> +            free_domheap_pages(v->vmtrace.pt_buf,
> +                get_order_from_bytes(v->domain->vmtrace_pt_size));

This is racy as a control domain could take a reference between the
check and the freeing.

> +        }
> +
>          if ( is_pv_domain(d) )
>          {
>              for_each_vcpu ( d, v )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 25d3359c5b..f480c4e033 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -137,6 +137,21 @@ static void vcpu_destroy(struct vcpu *v)
>      free_vcpu_struct(v);
>  }
>  
> +static int vmtrace_alloc_buffers(struct vcpu *v)
> +{
> +    struct page_info *pg;
> +    uint64_t size = v->domain->vmtrace_pt_size;
> +
> +    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
> +                             MEMF_no_refcount);
> +
> +    if ( !pg )
> +        return -ENOMEM;
> +
> +    v->vmtrace.pt_buf = pg;
> +    return 0;
> +}

I think we already agreed that you would use the same model as ioreq
servers, where a reference is taken on allocation and then the pages
are not explicitly freed on domain destruction and put_page_and_type
is used. Is there some reason why that model doesn't work in this
case?

If not, please see hvm_alloc_ioreq_mfn and hvm_free_ioreq_mfn.

Roger.
Michał Leszczyński July 6, 2020, 10:09 a.m. UTC | #4
----- 6 lip 2020 o 10:42, Roger Pau Monné roger.pau@citrix.com napisał(a):

> On Sun, Jul 05, 2020 at 08:54:59PM +0200, Michał Leszczyński wrote:
>> From: Michal Leszczynski <michal.leszczynski@cert.pl>
>> 
>> Implement necessary changes in common code/HVM to support
>> processor trace features. Define vmtrace_pt_* API and
>> implement trace buffer allocation/deallocation in common
>> code.
>> 
>> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
>> ---
>>  xen/arch/x86/domain.c         | 19 +++++++++++++++++++
>>  xen/common/domain.c           | 19 +++++++++++++++++++
>>  xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
>>  xen/include/xen/sched.h       |  4 ++++
>>  4 files changed, 62 insertions(+)
>> 
>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>> index fee6c3931a..79c9794408 100644
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
>>                  altp2m_vcpu_disable_ve(v);
>>          }
>>  
>> +        for_each_vcpu ( d, v )
>> +        {
>> +            unsigned int i;
>> +
>> +            if ( !v->vmtrace.pt_buf )
>> +                continue;
>> +
>> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
>> +            {
>> +                struct page_info *pg = mfn_to_page(
>> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
>> +                if ( (pg->count_info & PGC_count_mask) != 1 )
>> +                    return -EBUSY;
>> +            }
>> +
>> +            free_domheap_pages(v->vmtrace.pt_buf,
>> +                get_order_from_bytes(v->domain->vmtrace_pt_size));
> 
> This is racy as a control domain could take a reference between the
> check and the freeing.
> 
>> +        }
>> +
>>          if ( is_pv_domain(d) )
>>          {
>>              for_each_vcpu ( d, v )
>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>> index 25d3359c5b..f480c4e033 100644
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -137,6 +137,21 @@ static void vcpu_destroy(struct vcpu *v)
>>      free_vcpu_struct(v);
>>  }
>>  
>> +static int vmtrace_alloc_buffers(struct vcpu *v)
>> +{
>> +    struct page_info *pg;
>> +    uint64_t size = v->domain->vmtrace_pt_size;
>> +
>> +    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
>> +                             MEMF_no_refcount);
>> +
>> +    if ( !pg )
>> +        return -ENOMEM;
>> +
>> +    v->vmtrace.pt_buf = pg;
>> +    return 0;
>> +}
> 
> I think we already agreed that you would use the same model as ioreq
> servers, where a reference is taken on allocation and then the pages
> are not explicitly freed on domain destruction and put_page_and_type
> is used. Is there some reason why that model doesn't work in this
> case?
> 
> If not, please see hvm_alloc_ioreq_mfn and hvm_free_ioreq_mfn.
> 
> Roger.


Ok, I've got it, will do. Thanks for pointing out the examples.


One thing that is confusing to me is that I don't get what is
the meaning of MEMF_no_refcount flag.

In the hvm_{alloc,free}_ioreq_mfn the memory is allocated
explicitly but freed just by putting out the reference, so
I guess it's automatically detected that the refcount dropped to 0
and the page should be freed? If so, why the flag is named "no refcount"?


Best regards,
Michał Leszczyński
CERT Polska
Michał Leszczyński July 6, 2020, 10:31 a.m. UTC | #5
----- 6 lip 2020 o 10:31, Jan Beulich jbeulich@suse.com napisał(a):

> On 05.07.2020 21:11, Michał Leszczyński wrote:
>> ----- 5 lip 2020 o 20:54, Michał Leszczyński michal.leszczynski@cert.pl
>> napisał(a):
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
>>>                 altp2m_vcpu_disable_ve(v);
>>>         }
>>>
>>> +        for_each_vcpu ( d, v )
>>> +        {
>>> +            unsigned int i;
>>> +
>>> +            if ( !v->vmtrace.pt_buf )
>>> +                continue;
>>> +
>>> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
>>> +            {
>>> +                struct page_info *pg = mfn_to_page(
>>> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
>>> +                if ( (pg->count_info & PGC_count_mask) != 1 )
>>> +                    return -EBUSY;
>>> +            }
>>> +
>>> +            free_domheap_pages(v->vmtrace.pt_buf,
>>> +                get_order_from_bytes(v->domain->vmtrace_pt_size));
>> 
>> 
>> While this works, I don't feel that this is a good solution with this loop
>> returning -EBUSY here. I would like to kindly ask for suggestions regarding
>> this topic.
> 
> I'm sorry to ask, but with the previously give suggestions to mirror
> existing code, why do you still need to play with this function? You
> really shouldn't have a need to, just like e.g. the ioreq server page
> handling code didn't.
> 
> Jan


Ok, sorry. I think I've finally got it after latest Roger's suggestions :P

This will be fixed in the next version.


Best regards,
Michał Leszczyński
CERT Polska
Roger Pau Monné July 6, 2020, 2:38 p.m. UTC | #6
On Mon, Jul 06, 2020 at 12:09:02PM +0200, Michał Leszczyński wrote:
> ----- 6 lip 2020 o 10:42, Roger Pau Monné roger.pau@citrix.com napisał(a):
> 
> > On Sun, Jul 05, 2020 at 08:54:59PM +0200, Michał Leszczyński wrote:
> >> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> >> 
> >> Implement necessary changes in common code/HVM to support
> >> processor trace features. Define vmtrace_pt_* API and
> >> implement trace buffer allocation/deallocation in common
> >> code.
> >> 
> >> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> >> ---
> >>  xen/arch/x86/domain.c         | 19 +++++++++++++++++++
> >>  xen/common/domain.c           | 19 +++++++++++++++++++
> >>  xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
> >>  xen/include/xen/sched.h       |  4 ++++
> >>  4 files changed, 62 insertions(+)
> >> 
> >> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> >> index fee6c3931a..79c9794408 100644
> >> --- a/xen/arch/x86/domain.c
> >> +++ b/xen/arch/x86/domain.c
> >> @@ -2199,6 +2199,25 @@ int domain_relinquish_resources(struct domain *d)
> >>                  altp2m_vcpu_disable_ve(v);
> >>          }
> >>  
> >> +        for_each_vcpu ( d, v )
> >> +        {
> >> +            unsigned int i;
> >> +
> >> +            if ( !v->vmtrace.pt_buf )
> >> +                continue;
> >> +
> >> +            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
> >> +            {
> >> +                struct page_info *pg = mfn_to_page(
> >> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
> >> +                if ( (pg->count_info & PGC_count_mask) != 1 )
> >> +                    return -EBUSY;
> >> +            }
> >> +
> >> +            free_domheap_pages(v->vmtrace.pt_buf,
> >> +                get_order_from_bytes(v->domain->vmtrace_pt_size));
> > 
> > This is racy as a control domain could take a reference between the
> > check and the freeing.
> > 
> >> +        }
> >> +
> >>          if ( is_pv_domain(d) )
> >>          {
> >>              for_each_vcpu ( d, v )
> >> diff --git a/xen/common/domain.c b/xen/common/domain.c
> >> index 25d3359c5b..f480c4e033 100644
> >> --- a/xen/common/domain.c
> >> +++ b/xen/common/domain.c
> >> @@ -137,6 +137,21 @@ static void vcpu_destroy(struct vcpu *v)
> >>      free_vcpu_struct(v);
> >>  }
> >>  
> >> +static int vmtrace_alloc_buffers(struct vcpu *v)
> >> +{
> >> +    struct page_info *pg;
> >> +    uint64_t size = v->domain->vmtrace_pt_size;
> >> +
> >> +    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
> >> +                             MEMF_no_refcount);
> >> +
> >> +    if ( !pg )
> >> +        return -ENOMEM;
> >> +
> >> +    v->vmtrace.pt_buf = pg;
> >> +    return 0;
> >> +}
> > 
> > I think we already agreed that you would use the same model as ioreq
> > servers, where a reference is taken on allocation and then the pages
> > are not explicitly freed on domain destruction and put_page_and_type
> > is used. Is there some reason why that model doesn't work in this
> > case?
> > 
> > If not, please see hvm_alloc_ioreq_mfn and hvm_free_ioreq_mfn.
> > 
> > Roger.
> 
> 
> Ok, I've got it, will do. Thanks for pointing out the examples.
> 
> 
> One thing that is confusing to me is that I don't get what is
> the meaning of MEMF_no_refcount flag.

That flag prevents the memory from being counted towards the amount of
memory assigned to the domain. You want it that way so that trace
buffers are not accounted as part of the memory assigned to the
domain.

You then need to get a (extra) reference to the pages (there's always
the 'allocated' reference AFAICT) so that when the last reference is
dropped (either by the domain being destroyed or the memory being
unmapped from the control domain) it will be freed.

> In the hvm_{alloc,free}_ioreq_mfn the memory is allocated
> explicitly but freed just by putting out the reference, so
> I guess it's automatically detected that the refcount dropped to 0
> and the page should be freed?

Yes, put_page_alloc_ref will remove the allocated flag and then when
the last reference is dropped the page will be freed.

> If so, why the flag is named "no refcount"?

I'm not sure about the naming, but you can get references to pages
allocated as MEMF_no_refcount, and change their types. They are
however not accounted towards the memory usage of a domain.

Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index fee6c3931a..79c9794408 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2199,6 +2199,25 @@  int domain_relinquish_resources(struct domain *d)
                 altp2m_vcpu_disable_ve(v);
         }
 
+        for_each_vcpu ( d, v )
+        {
+            unsigned int i;
+
+            if ( !v->vmtrace.pt_buf )
+                continue;
+
+            for ( i = 0; i < (v->domain->vmtrace_pt_size >> PAGE_SHIFT); i++ )
+            {
+                struct page_info *pg = mfn_to_page(
+                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
+                if ( (pg->count_info & PGC_count_mask) != 1 )
+                    return -EBUSY;
+            }
+
+            free_domheap_pages(v->vmtrace.pt_buf,
+                get_order_from_bytes(v->domain->vmtrace_pt_size));
+        }
+
         if ( is_pv_domain(d) )
         {
             for_each_vcpu ( d, v )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 25d3359c5b..f480c4e033 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -137,6 +137,21 @@  static void vcpu_destroy(struct vcpu *v)
     free_vcpu_struct(v);
 }
 
+static int vmtrace_alloc_buffers(struct vcpu *v)
+{
+    struct page_info *pg;
+    uint64_t size = v->domain->vmtrace_pt_size;
+
+    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
+                             MEMF_no_refcount);
+
+    if ( !pg )
+        return -ENOMEM;
+
+    v->vmtrace.pt_buf = pg;
+    return 0;
+}
+
 struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
 {
     struct vcpu *v;
@@ -162,6 +177,9 @@  struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
     v->vcpu_id = vcpu_id;
     v->dirty_cpu = VCPU_CPU_CLEAN;
 
+    if ( d->vmtrace_pt_size && vmtrace_alloc_buffers(v) != 0 )
+        return NULL;
+
     spin_lock_init(&v->virq_lock);
 
     tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
@@ -422,6 +440,7 @@  struct domain *domain_create(domid_t domid,
     d->shutdown_code = SHUTDOWN_CODE_INVALID;
 
     spin_lock_init(&d->pbuf_lock);
+    spin_lock_init(&d->vmtrace_lock);
 
     rwlock_init(&d->vnuma_rwlock);
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 1eb377dd82..2d474a4c50 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -214,6 +214,10 @@  struct hvm_function_table {
     bool_t (*altp2m_vcpu_emulate_ve)(struct vcpu *v);
     int (*altp2m_vcpu_emulate_vmfunc)(const struct cpu_user_regs *regs);
 
+    /* vmtrace */
+    int (*vmtrace_control_pt)(struct vcpu *v, bool enable);
+    int (*vmtrace_get_pt_offset)(struct vcpu *v, uint64_t *offset);
+
     /*
      * Parameters and callbacks for hardware-assisted TSC scaling,
      * which are valid only when the hardware feature is available.
@@ -655,6 +659,22 @@  static inline bool altp2m_vcpu_emulate_ve(struct vcpu *v)
     return false;
 }
 
+static inline int vmtrace_control_pt(struct vcpu *v, bool enable)
+{
+    if ( hvm_funcs.vmtrace_control_pt )
+        return hvm_funcs.vmtrace_control_pt(v, enable);
+
+    return -EOPNOTSUPP;
+}
+
+static inline int vmtrace_get_pt_offset(struct vcpu *v, uint64_t *offset)
+{
+    if ( hvm_funcs.vmtrace_get_pt_offset )
+        return hvm_funcs.vmtrace_get_pt_offset(v, offset);
+
+    return -EOPNOTSUPP;
+}
+
 /*
  * This must be defined as a macro instead of an inline function,
  * because it uses 'struct vcpu' and 'struct domain' which have
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 48f0a61bbd..95ebab0d30 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -253,6 +253,10 @@  struct vcpu
     /* vPCI per-vCPU area, used to store data for long running operations. */
     struct vpci_vcpu vpci;
 
+    struct {
+        struct page_info *pt_buf;
+    } vmtrace;
+
     struct arch_vcpu arch;
 };