diff mbox series

[v7,02/10] xen/domain: Add vmtrace_frames domain creation parameter

Message ID 20210121212718.2441-3-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series Implement support for external IPT monitoring | expand

Commit Message

Andrew Cooper Jan. 21, 2021, 9:27 p.m. UTC
From: Michał Leszczyński <michal.leszczynski@cert.pl>

To use vmtrace, buffers of a suitable size need allocating, and different
tasks will want different sizes.

Add a domain creation parameter, and audit it appropriately in the
{arch_,}sanitise_domain_config() functions.

For now, the x86 specific auditing is tuned to Processor Trace running in
Single Output mode, which requires a single contiguous range of memory.

The size is given an arbitrary limit of 64M which is expected to be enough for
anticipated usecases, but not large enough to get into long-running-hypercall
problems.

Signed-off-by: Michał Leszczyński <michal.leszczynski@cert.pl>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Anthony PERARD <anthony.perard@citrix.com>
CC: Michał Leszczyński <michal.leszczynski@cert.pl>
CC: Tamas K Lengyel <tamas@tklengyel.com>

When support for later generations of IPT get added, we can in principle start
to use ToTP which is a scatter list of smaller trace regions to use, if we
need to massively up the buffer size available.

v7:
 * Major chop&change within the series.
 * Use the name 'vmtrace' consistently.
 * Use the (new) common vcpu_teardown() functionality, rather than leaving a
   latent memory leak on ARM.
---
 xen/arch/x86/domain.c       | 23 +++++++++++++++++++
 xen/common/domain.c         | 56 +++++++++++++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h |  1 +
 xen/include/xen/sched.h     |  7 ++++++
 4 files changed, 87 insertions(+)

Comments

Jan Beulich Jan. 25, 2021, 3:08 p.m. UTC | #1
On 21.01.2021 22:27, Andrew Cooper wrote:
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -132,6 +132,48 @@ static void vcpu_info_reset(struct vcpu *v)
>      v->vcpu_info_mfn = INVALID_MFN;
>  }
>  
> +static void vmtrace_free_buffer(struct vcpu *v)
> +{
> +    const struct domain *d = v->domain;
> +    struct page_info *pg = v->vmtrace.buf;
> +    unsigned int i;
> +
> +    if ( !pg )
> +        return;
> +
> +    for ( i = 0; i < d->vmtrace_frames; i++ )
> +    {
> +        put_page_alloc_ref(&pg[i]);
> +        put_page_and_type(&pg[i]);
> +    }
> +
> +    v->vmtrace.buf = NULL;

To set a good precedent, maybe this wants moving up ahead of
the loop and ...

> +}
> +
> +static int vmtrace_alloc_buffer(struct vcpu *v)
> +{
> +    struct domain *d = v->domain;
> +    struct page_info *pg;
> +    unsigned int i;
> +
> +    if ( !d->vmtrace_frames )
> +        return 0;
> +
> +    pg = alloc_domheap_pages(d, get_order_from_pages(d->vmtrace_frames),
> +                             MEMF_no_refcount);
> +    if ( !pg )
> +        return -ENOMEM;
> +
> +    v->vmtrace.buf = pg;

... this wants moving down past the loop, to avoid
globally announcing something that isn't fully initialized
yet / anymore?

> +    for ( i = 0; i < d->vmtrace_frames; i++ )
> +        /* Domain can't know about this page yet - something fishy going on. */
> +        if ( !get_page_and_type(&pg[i], d, PGT_writable_page) )
> +            BUG();

Whatever the final verdict to the other similar places
that one of your patch changes should be applied here,
too.

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -94,6 +94,7 @@ struct xen_domctl_createdomain {
>      uint32_t max_evtchn_port;
>      int32_t max_grant_frames;
>      int32_t max_maptrack_frames;
> +    uint32_t vmtrace_frames;

Considering page size related irritations elsewhere in the
public interface, could you have a comment clarify the unit
of this value (Xen's page size according to the rest of the
patch), and that space will be allocated once per-vCPU
rather than per-domain (to stand a chance of recognizing
the ultimate memory footprint resulting from this)?

> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -257,6 +257,10 @@ struct vcpu
>      /* vPCI per-vCPU area, used to store data for long running operations. */
>      struct vpci_vcpu vpci;
>  
> +    struct {
> +        struct page_info *buf;
> +    } vmtrace;

While perhaps minor, I'm unconvinced "buf" is a good name
for a field of this type.

> @@ -470,6 +474,9 @@ struct domain
>      unsigned    pbuf_idx;
>      spinlock_t  pbuf_lock;
>  
> +    /* Used by vmtrace features */
> +    uint32_t    vmtrace_frames;

unsigned int? Also could you move this to an existing 32-bit
hole, like immediately after "monitor"?

Jan
Andrew Cooper Jan. 25, 2021, 5:17 p.m. UTC | #2
On 25/01/2021 15:08, Jan Beulich wrote:
> On 21.01.2021 22:27, Andrew Cooper wrote:
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -132,6 +132,48 @@ static void vcpu_info_reset(struct vcpu *v)
>>      v->vcpu_info_mfn = INVALID_MFN;
>>  }
>>  
>> +static void vmtrace_free_buffer(struct vcpu *v)
>> +{
>> +    const struct domain *d = v->domain;
>> +    struct page_info *pg = v->vmtrace.buf;
>> +    unsigned int i;
>> +
>> +    if ( !pg )
>> +        return;
>> +
>> +    for ( i = 0; i < d->vmtrace_frames; i++ )
>> +    {
>> +        put_page_alloc_ref(&pg[i]);
>> +        put_page_and_type(&pg[i]);
>> +    }
>> +
>> +    v->vmtrace.buf = NULL;
> To set a good precedent, maybe this wants moving up ahead of
> the loop and ...
>
>> +}
>> +
>> +static int vmtrace_alloc_buffer(struct vcpu *v)
>> +{
>> +    struct domain *d = v->domain;
>> +    struct page_info *pg;
>> +    unsigned int i;
>> +
>> +    if ( !d->vmtrace_frames )
>> +        return 0;
>> +
>> +    pg = alloc_domheap_pages(d, get_order_from_pages(d->vmtrace_frames),
>> +                             MEMF_no_refcount);
>> +    if ( !pg )
>> +        return -ENOMEM;
>> +
>> +    v->vmtrace.buf = pg;
> ... this wants moving down past the loop, to avoid
> globally announcing something that isn't fully initialized
> yet / anymore?

Fine.

>
>> +    for ( i = 0; i < d->vmtrace_frames; i++ )
>> +        /* Domain can't know about this page yet - something fishy going on. */
>> +        if ( !get_page_and_type(&pg[i], d, PGT_writable_page) )
>> +            BUG();
> Whatever the final verdict to the other similar places
> that one of your patch changes should be applied here,
> too.

Obviously, except there's 0 room for manoeuvring on that patch, so this
hunk is correct.

>
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -94,6 +94,7 @@ struct xen_domctl_createdomain {
>>      uint32_t max_evtchn_port;
>>      int32_t max_grant_frames;
>>      int32_t max_maptrack_frames;
>> +    uint32_t vmtrace_frames;
> Considering page size related irritations elsewhere in the
> public interface, could you have a comment clarify the unit
> of this value (Xen's page size according to the rest of the
> patch), and that space will be allocated once per-vCPU
> rather than per-domain (to stand a chance of recognizing
> the ultimate memory footprint resulting from this)?

Well - its hopefully obvious that it shares the same units as the other
*_frames parameters.

But yes - the future ABI fixes, it will be forbidden to use anything in
units of frames, to fix the multitude of interface bugs pertaining to
non-4k page sizes.

I'll switch to using vmtrace_size, in units of bytes, and the per-arch
filtering can enforce being a multiple of 4k.

>
>> --- a/xen/include/xen/sched.h
>> +++ b/xen/include/xen/sched.h
>> @@ -257,6 +257,10 @@ struct vcpu
>>      /* vPCI per-vCPU area, used to store data for long running operations. */
>>      struct vpci_vcpu vpci;
>>  
>> +    struct {
>> +        struct page_info *buf;
>> +    } vmtrace;
> While perhaps minor, I'm unconvinced "buf" is a good name
> for a field of this type.

Please suggest a better one then.  This one is properly namespaced as
v->vmtrace.buf which is the least bad option I could come up with.

>
>> @@ -470,6 +474,9 @@ struct domain
>>      unsigned    pbuf_idx;
>>      spinlock_t  pbuf_lock;
>>  
>> +    /* Used by vmtrace features */
>> +    uint32_t    vmtrace_frames;
> unsigned int? Also could you move this to an existing 32-bit
> hole, like immediately after "monitor"?

Ok.

~Andrew
Jan Beulich Jan. 26, 2021, 10:51 a.m. UTC | #3
On 25.01.2021 18:17, Andrew Cooper wrote:
> On 25/01/2021 15:08, Jan Beulich wrote:
>> On 21.01.2021 22:27, Andrew Cooper wrote:
>>> --- a/xen/include/xen/sched.h
>>> +++ b/xen/include/xen/sched.h
>>> @@ -257,6 +257,10 @@ struct vcpu
>>>      /* vPCI per-vCPU area, used to store data for long running operations. */
>>>      struct vpci_vcpu vpci;
>>>  
>>> +    struct {
>>> +        struct page_info *buf;
>>> +    } vmtrace;
>> While perhaps minor, I'm unconvinced "buf" is a good name
>> for a field of this type.
> 
> Please suggest a better one then.  This one is properly namespaced as
> v->vmtrace.buf which is the least bad option I could come up with.

"pg", "page", or "pages" are all names we use for variables and
fields of this type. I don't see at all how the namespacing
helps it - to me "v->vmtrace.buf" is still a pointer to the
actual buffer, not the struct page_info describing the 1st page
of it.

Jan
Jan Beulich Jan. 29, 2021, 4:37 p.m. UTC | #4
On 25.01.2021 16:08, Jan Beulich wrote:
> On 21.01.2021 22:27, Andrew Cooper wrote:
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -132,6 +132,48 @@ static void vcpu_info_reset(struct vcpu *v)
>>      v->vcpu_info_mfn = INVALID_MFN;
>>  }
>>  
>> +static void vmtrace_free_buffer(struct vcpu *v)
>> +{
>> +    const struct domain *d = v->domain;
>> +    struct page_info *pg = v->vmtrace.buf;
>> +    unsigned int i;
>> +
>> +    if ( !pg )
>> +        return;
>> +
>> +    for ( i = 0; i < d->vmtrace_frames; i++ )
>> +    {
>> +        put_page_alloc_ref(&pg[i]);
>> +        put_page_and_type(&pg[i]);
>> +    }
>> +
>> +    v->vmtrace.buf = NULL;
> 
> To set a good precedent, maybe this wants moving up ahead of
> the loop and ...
> 
>> +}
>> +
>> +static int vmtrace_alloc_buffer(struct vcpu *v)
>> +{
>> +    struct domain *d = v->domain;
>> +    struct page_info *pg;
>> +    unsigned int i;
>> +
>> +    if ( !d->vmtrace_frames )
>> +        return 0;
>> +
>> +    pg = alloc_domheap_pages(d, get_order_from_pages(d->vmtrace_frames),
>> +                             MEMF_no_refcount);
>> +    if ( !pg )
>> +        return -ENOMEM;
>> +
>> +    v->vmtrace.buf = pg;
> 
> ... this wants moving down past the loop, to avoid
> globally announcing something that isn't fully initialized
> yet / anymore?

So having replied on the other thread I looked back here: The
suggested change actually is not just to set a good precedent.
By recording the page(s) in v->vmtrace_buf, ...

>> +    for ( i = 0; i < d->vmtrace_frames; i++ )
>> +        /* Domain can't know about this page yet - something fishy going on. */
>> +        if ( !get_page_and_type(&pg[i], d, PGT_writable_page) )
>> +            BUG();

... failure here (if handled better than by BUG()) will lead
vmtrace_free_buffer() to believe it needs to put references
(besides freeing the page(s), which is fine). So your claimed
bug really is just here, not everywhere else, and there is no
reason to go backwards in terms of error handling "quality",
as per the bottom of ./CODING_STYLE.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index b9ba04633e..3f12d68e9e 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -660,6 +660,29 @@  int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
         return -EINVAL;
     }
 
+    if ( config->vmtrace_frames )
+    {
+        unsigned int frames = config->vmtrace_frames;
+
+        ASSERT(vmtrace_available); /* Checked by common code. */
+
+        /*
+         * For now, vmtrace is restricted to HVM guests, and using a
+         * power-of-2 buffer up to 64M in size.
+         */
+        if ( !hvm )
+        {
+            dprintk(XENLOG_INFO, "vmtrace not supported for PV\n");
+            return -EINVAL;
+        }
+
+        if ( frames > (MB(64) >> PAGE_SHIFT) || (frames & (frames - 1)) )
+        {
+            dprintk(XENLOG_INFO, "Unsupported vmtrace frames: %u\n", frames);
+            return -EINVAL;
+        }
+    }
+
     return 0;
 }
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index d1e94d88cf..a844bc7b78 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -132,6 +132,48 @@  static void vcpu_info_reset(struct vcpu *v)
     v->vcpu_info_mfn = INVALID_MFN;
 }
 
+static void vmtrace_free_buffer(struct vcpu *v)
+{
+    const struct domain *d = v->domain;
+    struct page_info *pg = v->vmtrace.buf;
+    unsigned int i;
+
+    if ( !pg )
+        return;
+
+    for ( i = 0; i < d->vmtrace_frames; i++ )
+    {
+        put_page_alloc_ref(&pg[i]);
+        put_page_and_type(&pg[i]);
+    }
+
+    v->vmtrace.buf = NULL;
+}
+
+static int vmtrace_alloc_buffer(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct page_info *pg;
+    unsigned int i;
+
+    if ( !d->vmtrace_frames )
+        return 0;
+
+    pg = alloc_domheap_pages(d, get_order_from_pages(d->vmtrace_frames),
+                             MEMF_no_refcount);
+    if ( !pg )
+        return -ENOMEM;
+
+    v->vmtrace.buf = pg;
+
+    for ( i = 0; i < d->vmtrace_frames; i++ )
+        /* Domain can't know about this page yet - something fishy going on. */
+        if ( !get_page_and_type(&pg[i], d, PGT_writable_page) )
+            BUG();
+
+    return 0;
+}
+
 /*
  * Release resources held by a vcpu.  There may or may not be live references
  * to the vcpu, and it may or may not be fully constructed.
@@ -140,6 +182,8 @@  static void vcpu_info_reset(struct vcpu *v)
  */
 static int vcpu_teardown(struct vcpu *v)
 {
+    vmtrace_free_buffer(v);
+
     return 0;
 }
 
@@ -201,6 +245,9 @@  struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
     if ( sched_init_vcpu(v) != 0 )
         goto fail_wq;
 
+    if ( vmtrace_alloc_buffer(v) != 0 )
+        goto fail_wq;
+
     if ( arch_vcpu_create(v) != 0 )
         goto fail_sched;
 
@@ -449,6 +496,12 @@  static int sanitise_domain_config(struct xen_domctl_createdomain *config)
         }
     }
 
+    if ( config->vmtrace_frames && !vmtrace_available )
+    {
+        dprintk(XENLOG_INFO, "vmtrace requested but not available\n");
+        return -EINVAL;
+    }
+
     return arch_sanitise_domain_config(config);
 }
 
@@ -474,7 +527,10 @@  struct domain *domain_create(domid_t domid,
     ASSERT(is_system_domain(d) ? config == NULL : config != NULL);
 
     if ( config )
+    {
         d->options = config->flags;
+        d->vmtrace_frames = config->vmtrace_frames;
+    }
 
     /* Sort out our idea of is_control_domain(). */
     d->is_privileged = is_priv;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 666aeb71bf..1585678d50 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -94,6 +94,7 @@  struct xen_domctl_createdomain {
     uint32_t max_evtchn_port;
     int32_t max_grant_frames;
     int32_t max_maptrack_frames;
+    uint32_t vmtrace_frames;
 
     struct xen_arch_domainconfig arch;
 };
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index da19f4e9f6..03905f6246 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -257,6 +257,10 @@  struct vcpu
     /* vPCI per-vCPU area, used to store data for long running operations. */
     struct vpci_vcpu vpci;
 
+    struct {
+        struct page_info *buf;
+    } vmtrace;
+
     struct arch_vcpu arch;
 };
 
@@ -470,6 +474,9 @@  struct domain
     unsigned    pbuf_idx;
     spinlock_t  pbuf_lock;
 
+    /* Used by vmtrace features */
+    uint32_t    vmtrace_frames;
+
     /* OProfile support. */
     struct xenoprof *xenoprof;