[v4,03/10] tools/libxl: add vmtrace_pt_size parameter
diff mbox series

Message ID 5f4f4b1afa432258daff43f2dc8119b6a441fff4.1593519420.git.michal.leszczynski@cert.pl
State Superseded
Headers show
Series
  • Implement support for external IPT monitoring
Related show

Commit Message

Michał Leszczyński June 30, 2020, 12:33 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Allow to specify the size of per-vCPU trace buffer upon
domain creation. This is zero by default (meaning: not enabled).

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 docs/man/xl.cfg.5.pod.in             | 10 ++++++++++
 tools/golang/xenlight/helpers.gen.go |  2 ++
 tools/golang/xenlight/types.gen.go   |  1 +
 tools/libxl/libxl.h                  |  8 ++++++++
 tools/libxl/libxl_create.c           |  1 +
 tools/libxl/libxl_types.idl          |  2 ++
 tools/xl/xl_parse.c                  | 20 ++++++++++++++++++++
 xen/common/domain.c                  | 12 ++++++++++++
 xen/include/public/domctl.h          |  1 +
 9 files changed, 57 insertions(+)

Comments

Roger Pau Monné July 1, 2020, 10:05 a.m. UTC | #1
On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to specify the size of per-vCPU trace buffer upon
> domain creation. This is zero by default (meaning: not enabled).
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  docs/man/xl.cfg.5.pod.in             | 10 ++++++++++
>  tools/golang/xenlight/helpers.gen.go |  2 ++
>  tools/golang/xenlight/types.gen.go   |  1 +
>  tools/libxl/libxl.h                  |  8 ++++++++
>  tools/libxl/libxl_create.c           |  1 +
>  tools/libxl/libxl_types.idl          |  2 ++
>  tools/xl/xl_parse.c                  | 20 ++++++++++++++++++++
>  xen/common/domain.c                  | 12 ++++++++++++
>  xen/include/public/domctl.h          |  1 +
>  9 files changed, 57 insertions(+)
> 
> diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
> index 0532739c1f..78f434b722 100644
> --- a/docs/man/xl.cfg.5.pod.in
> +++ b/docs/man/xl.cfg.5.pod.in
> @@ -278,6 +278,16 @@ memory=8096 will report significantly less memory available for use
>  than a system with maxmem=8096 memory=8096 due to the memory overhead
>  of having to track the unused pages.
>  
> +=item B<vmtrace_pt_size=BYTES>
> +
> +Specifies the size of processor trace buffer that would be allocated
> +for each vCPU belonging to this domain. Disabled (i.e. B<vmtrace_pt_size=0>
> +by default. This must be set to non-zero value in order to be able to
> +use processor tracing features with this domain.
> +
> +B<NOTE>: The size value must be between 4 kB and 4 GB and it must

I think the minimum value is 8kB, since 4kB would be order 0, which
is used to signal that the feature is disabled?

> diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
> index 61b4ef7b7e..4eba224590 100644
> --- a/tools/xl/xl_parse.c
> +++ b/tools/xl/xl_parse.c
> @@ -1861,6 +1861,26 @@ void parse_config_data(const char *config_source,
>          }
>      }
>  
> +    if (!xlu_cfg_get_long(config, "vmtrace_pt_size", &l, 1) && l) {
> +        int32_t shift = 0;

unsigned int? I don't think there's a reason for this to be a fixed
width signed integer.

> +
> +        if (l & (l - 1))
> +        {
> +            fprintf(stderr, "ERROR: pt buffer size must be a power of 2\n");
> +            exit(1);
> +        }
> +
> +        while (l >>= 1) ++shift;
> +
> +        if (shift <= XEN_PAGE_SHIFT)
> +        {
> +            fprintf(stderr, "ERROR: too small pt buffer\n");
> +            exit(1);
> +        }
> +
> +        b_info->vmtrace_pt_order = shift - XEN_PAGE_SHIFT;
> +    }
> +
>      if (!xlu_cfg_get_list(config, "ioports", &ioports, &num_ioports, 0)) {
>          b_info->num_ioports = num_ioports;
>          b_info->ioports = calloc(num_ioports, sizeof(*b_info->ioports));
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 0a33e0dfd6..27dcfbac8c 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -338,6 +338,12 @@ static int sanitise_domain_config(struct xen_domctl_createdomain *config)
>          return -EINVAL;
>      }
>  
> +    if ( config->vmtrace_pt_order && !vmtrace_supported )
> +    {
> +        dprintk(XENLOG_INFO, "Processor tracing is not supported\n");
> +        return -EINVAL;
> +    }
> +
>      return arch_sanitise_domain_config(config);
>  }
>  
> @@ -443,6 +449,12 @@ struct domain *domain_create(domid_t domid,
>          d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
>  
>          radix_tree_init(&d->pirq_tree);
> +
> +        if ( config->vmtrace_pt_order )
> +        {
> +            uint32_t shift_val = config->vmtrace_pt_order + PAGE_SHIFT;
> +            d->vmtrace_pt_size = (1ULL << shift_val);

I don't think the vmtrace_pt_size domain field has been introduced
yet?

Please check that each patch builds on it's own, or else we would
break bisectability of the tree.

Also I would consider just storing this directly as an order, there's
no reason to convert it back to a size?

Thanks, Roger.
Roger Pau Monné July 2, 2020, 9 a.m. UTC | #2
On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 59bdc28c89..7b8289d436 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>      uint32_t max_evtchn_port;
>      int32_t max_grant_frames;
>      int32_t max_maptrack_frames;
> +    uint8_t vmtrace_pt_order;

I've been thinking about this, and even though this is a domctl (so
not a stable interface) we might want to consider using a size (or a
number of pages) here rather than an order. IPT also supports
TOPA mode (kind of a linked list of buffers) that would allow for
sizes not rounded to order boundaries to be used, since then only each
item in the linked list needs to be rounded to an order boundary, so
you could for example use three 4K pages in TOPA mode AFAICT.

Roger.
Anthony PERARD July 2, 2020, 10:24 a.m. UTC | #3
Hi Michał,

On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to specify the size of per-vCPU trace buffer upon
> domain creation. This is zero by default (meaning: not enabled).
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
> 
> diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
> index 0532739c1f..78f434b722 100644
> --- a/docs/man/xl.cfg.5.pod.in
> +++ b/docs/man/xl.cfg.5.pod.in
> @@ -278,6 +278,16 @@ memory=8096 will report significantly less memory available for use
>  than a system with maxmem=8096 memory=8096 due to the memory overhead
>  of having to track the unused pages.
>  
> +=item B<vmtrace_pt_size=BYTES>

I don't like much this new configuration name. To me, "pt" sound like
passthrough, as in pci passthrough. But it seems to be for "processor
trace" (or tracing), isn't it? So if it is, then we have "trace" twice
in the name and I don't think that configuration is about tracing the
processor tracing feature. (Also I don't think we need to state "vm" in
the name easier as every configuration option should be about a vm.)

How about a name that is easier to understand without having to know all
the possible abbreviations? Maybe "processor_trace_buffer_size" or
similar?

> +
> +Specifies the size of processor trace buffer that would be allocated
> +for each vCPU belonging to this domain. Disabled (i.e. B<vmtrace_pt_size=0>
> +by default. This must be set to non-zero value in order to be able to
> +use processor tracing features with this domain.
> +
> +B<NOTE>: The size value must be between 4 kB and 4 GB and it must
> +be also a power of 2.

Maybe the configuration variable could take KBYTES for kilo-bytes
instead of just BYTES since the min is 4kB?

Also that item seems to be in the "Memory Allocation" section, but I
don't think that's a good place as the other options are for the size of
guest RAM. I don't know in which section this would be better but maybe
"Other Options" would be OK.

>  =back
>  
>  =head3 Guest Virtual NUMA Configuration
> diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
> index 61b4ef7b7e..4eba224590 100644
> --- a/tools/xl/xl_parse.c
> +++ b/tools/xl/xl_parse.c
> @@ -1861,6 +1861,26 @@ void parse_config_data(const char *config_source,
>          }
>      }
>  
> +    if (!xlu_cfg_get_long(config, "vmtrace_pt_size", &l, 1) && l) {
> +        int32_t shift = 0;
> +
> +        if (l & (l - 1))
> +        {
> +            fprintf(stderr, "ERROR: pt buffer size must be a power of 2\n");

It would be better to state the option name in the error message.

> +            exit(1);
> +        }
> +
> +        while (l >>= 1) ++shift;
> +
> +        if (shift <= XEN_PAGE_SHIFT)
> +        {
> +            fprintf(stderr, "ERROR: too small pt buffer\n");
> +            exit(1);
> +        }
> +
> +        b_info->vmtrace_pt_order = shift - XEN_PAGE_SHIFT;
> +    }
> +
>      if (!xlu_cfg_get_list(config, "ioports", &ioports, &num_ioports, 0)) {
>          b_info->num_ioports = num_ioports;
>          b_info->ioports = calloc(num_ioports, sizeof(*b_info->ioports));
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 0a33e0dfd6..27dcfbac8c 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 59bdc28c89..7b8289d436 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h

I don't think it's wise to modify the toolstack, the hypervisor, and the
hypercall ABI in the same patch. Can you change this last two files in a
separate patch?

Thank you,
Michał Leszczyński July 2, 2020, 4:23 p.m. UTC | #4
----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):

> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 59bdc28c89..7b8289d436 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>      uint32_t max_evtchn_port;
>>      int32_t max_grant_frames;
>>      int32_t max_maptrack_frames;
>> +    uint8_t vmtrace_pt_order;
> 
> I've been thinking about this, and even though this is a domctl (so
> not a stable interface) we might want to consider using a size (or a
> number of pages) here rather than an order. IPT also supports
> TOPA mode (kind of a linked list of buffers) that would allow for
> sizes not rounded to order boundaries to be used, since then only each
> item in the linked list needs to be rounded to an order boundary, so
> you could for example use three 4K pages in TOPA mode AFAICT.
> 
> Roger.

In previous versions it was "size" but it was requested to change it
to "order" in order to shrink the variable size from uint64_t to
uint8_t, because there is limited space for xen_domctl_createdomain
structure.

How should I proceed?

Best regards,
Michał Leszczyński
CERT Polska
Roger Pau Monné July 3, 2020, 9:44 a.m. UTC | #5
On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
> 
> > On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
> >> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> >> index 59bdc28c89..7b8289d436 100644
> >> --- a/xen/include/public/domctl.h
> >> +++ b/xen/include/public/domctl.h
> >> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
> >>      uint32_t max_evtchn_port;
> >>      int32_t max_grant_frames;
> >>      int32_t max_maptrack_frames;
> >> +    uint8_t vmtrace_pt_order;
> > 
> > I've been thinking about this, and even though this is a domctl (so
> > not a stable interface) we might want to consider using a size (or a
> > number of pages) here rather than an order. IPT also supports
> > TOPA mode (kind of a linked list of buffers) that would allow for
> > sizes not rounded to order boundaries to be used, since then only each
> > item in the linked list needs to be rounded to an order boundary, so
> > you could for example use three 4K pages in TOPA mode AFAICT.
> > 
> > Roger.
> 
> In previous versions it was "size" but it was requested to change it
> to "order" in order to shrink the variable size from uint64_t to
> uint8_t, because there is limited space for xen_domctl_createdomain
> structure.

It's likely I'm missing something here, but I wasn't aware
xen_domctl_createdomain had any constrains regarding it's size. It's
currently 48bytes which seems fairly small.

There might be constrains on struct domain (the hypervisor internal
domain tracking structure), but I think you are already using a size
field there IIRC.

> 
> How should I proceed?

This is an unstable interface, so we could always change it. It seems
like we might want to use a size parameter at some point to take
advantage of non physically contiguous buffers, but if there are other
blockers that prevent such field from being wider ATM I'm fine with
it.

Thanks, Roger.
Jan Beulich July 3, 2020, 9:56 a.m. UTC | #6
On 03.07.2020 11:44, Roger Pau Monné wrote:
> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>
>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>> index 59bdc28c89..7b8289d436 100644
>>>> --- a/xen/include/public/domctl.h
>>>> +++ b/xen/include/public/domctl.h
>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>      uint32_t max_evtchn_port;
>>>>      int32_t max_grant_frames;
>>>>      int32_t max_maptrack_frames;
>>>> +    uint8_t vmtrace_pt_order;
>>>
>>> I've been thinking about this, and even though this is a domctl (so
>>> not a stable interface) we might want to consider using a size (or a
>>> number of pages) here rather than an order. IPT also supports
>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>> sizes not rounded to order boundaries to be used, since then only each
>>> item in the linked list needs to be rounded to an order boundary, so
>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>
>>> Roger.
>>
>> In previous versions it was "size" but it was requested to change it
>> to "order" in order to shrink the variable size from uint64_t to
>> uint8_t, because there is limited space for xen_domctl_createdomain
>> structure.
> 
> It's likely I'm missing something here, but I wasn't aware
> xen_domctl_createdomain had any constrains regarding it's size. It's
> currently 48bytes which seems fairly small.

Additionally I would guess a uint32_t could do here, if the value
passed was "number of pages" rather than "number of bytes"?

Jan
Roger Pau Monné July 3, 2020, 10:11 a.m. UTC | #7
On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
> On 03.07.2020 11:44, Roger Pau Monné wrote:
> > On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
> >> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
> >>
> >>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
> >>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> >>>> index 59bdc28c89..7b8289d436 100644
> >>>> --- a/xen/include/public/domctl.h
> >>>> +++ b/xen/include/public/domctl.h
> >>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
> >>>>      uint32_t max_evtchn_port;
> >>>>      int32_t max_grant_frames;
> >>>>      int32_t max_maptrack_frames;
> >>>> +    uint8_t vmtrace_pt_order;
> >>>
> >>> I've been thinking about this, and even though this is a domctl (so
> >>> not a stable interface) we might want to consider using a size (or a
> >>> number of pages) here rather than an order. IPT also supports
> >>> TOPA mode (kind of a linked list of buffers) that would allow for
> >>> sizes not rounded to order boundaries to be used, since then only each
> >>> item in the linked list needs to be rounded to an order boundary, so
> >>> you could for example use three 4K pages in TOPA mode AFAICT.
> >>>
> >>> Roger.
> >>
> >> In previous versions it was "size" but it was requested to change it
> >> to "order" in order to shrink the variable size from uint64_t to
> >> uint8_t, because there is limited space for xen_domctl_createdomain
> >> structure.
> > 
> > It's likely I'm missing something here, but I wasn't aware
> > xen_domctl_createdomain had any constrains regarding it's size. It's
> > currently 48bytes which seems fairly small.
> 
> Additionally I would guess a uint32_t could do here, if the value
> passed was "number of pages" rather than "number of bytes"?

That could work, not sure if it needs to state however that those will
be 4K pages, since Arm can have a different minimum page size IIRC?
(or that's already the assumption for all number of frames fields)
vmtrace_nr_frames seems fine to me.

Roger.
Julien Grall July 4, 2020, 5:23 p.m. UTC | #8
Hi,

On 03/07/2020 11:11, Roger Pau Monné wrote:
> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>>>
>>>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>>>> index 59bdc28c89..7b8289d436 100644
>>>>>> --- a/xen/include/public/domctl.h
>>>>>> +++ b/xen/include/public/domctl.h
>>>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>>>       uint32_t max_evtchn_port;
>>>>>>       int32_t max_grant_frames;
>>>>>>       int32_t max_maptrack_frames;
>>>>>> +    uint8_t vmtrace_pt_order;
>>>>>
>>>>> I've been thinking about this, and even though this is a domctl (so
>>>>> not a stable interface) we might want to consider using a size (or a
>>>>> number of pages) here rather than an order. IPT also supports
>>>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>>>> sizes not rounded to order boundaries to be used, since then only each
>>>>> item in the linked list needs to be rounded to an order boundary, so
>>>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>>>
>>>>> Roger.
>>>>
>>>> In previous versions it was "size" but it was requested to change it
>>>> to "order" in order to shrink the variable size from uint64_t to
>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>> structure.
>>>
>>> It's likely I'm missing something here, but I wasn't aware
>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>> currently 48bytes which seems fairly small.
>>
>> Additionally I would guess a uint32_t could do here, if the value
>> passed was "number of pages" rather than "number of bytes"?
Looking at the rest of the code, the toolstack accepts a 64-bit value. 
So this would lead to truncation of the buffer if it is bigger than 2^44 
bytes.

I agree such buffer is unlikely, yet I still think we want to harden the 
code whenever we can. So the solution is to either prevent check 
truncation in libxl or directly use 64-bit in the domctl.

My preference is the latter.

> 
> That could work, not sure if it needs to state however that those will
> be 4K pages, since Arm can have a different minimum page size IIRC?
> (or that's already the assumption for all number of frames fields)
> vmtrace_nr_frames seems fine to me.

The hypercalls interface is using the same page granularity as the 
hypervisor (i.e 4KB).

While we already support guest using 64KB page granularity, it is 
impossible to have a 64KB Arm hypervisor in the current state. You are 
going to either break existing guest (if you switch to 64KB page 
granularity for the hypercall ABI) or render them insecure (the mimimum 
mapping in the P2M would be 64KB).

DOMCTLs are not stable yet, so using a number of pages is OK. However, I 
would strongly suggest to use a number of bytes for any xl/libxl/stable 
libraries interfaces as this avoids confusion and also make more 
futureproof.

Cheers,
Julien Grall July 4, 2020, 5:48 p.m. UTC | #9
Hi,

On 30/06/2020 13:33, Michał Leszczyński wrote:
> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 71709dc585..891e8e28d6 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -438,6 +438,14 @@
>    */
>   #define LIBXL_HAVE_CREATEINFO_PASSTHROUGH 1
>   
> +/*
> + * LIBXL_HAVE_VMTRACE_PT_ORDER indicates that
> + * libxl_domain_create_info has a vmtrace_pt_order parameter, which
> + * allows to enable pre-allocation of processor tracing buffers
> + * with the given order of size.
> + */
> +#define LIBXL_HAVE_VMTRACE_PT_ORDER 1
> +
>   /*
>    * libxl ABI compatibility
>    *
> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
> index 75862dc6ed..651d1f4c0f 100644
> --- a/tools/libxl/libxl_create.c
> +++ b/tools/libxl/libxl_create.c
> @@ -608,6 +608,7 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
>               .max_evtchn_port = b_info->event_channels,
>               .max_grant_frames = b_info->max_grant_frames,
>               .max_maptrack_frames = b_info->max_maptrack_frames,
> +            .vmtrace_pt_order = b_info->vmtrace_pt_order,
>           };
>   
>           if (info->type != LIBXL_DOMAIN_TYPE_PV) {
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index 9d3f05f399..1c5dd43e4d 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -645,6 +645,8 @@ libxl_domain_build_info = Struct("domain_build_info",[
>       # supported by x86 HVM and ARM support is planned.
>       ("altp2m", libxl_altp2m_mode),
>   
> +    ("vmtrace_pt_order", integer),

libxl can be used by external projects (such libvirt) for implementing 
their own toolstack.

While on x86 you always have the same granularity, on Arm the hypervisor 
and each guest may have a different page granularity (e.g 4KB, 16KB, 
64KB). So it is unclear what order one would have to use.

I think it would be best if the external user only specify the number of 
bytes. You can then sanity check the value and convert to an order (or 
number of pages) in libxl before passing the value to the hypervisor.

Cheers,
Jan Beulich July 6, 2020, 8:46 a.m. UTC | #10
On 04.07.2020 19:23, Julien Grall wrote:
> Hi,
> 
> On 03/07/2020 11:11, Roger Pau Monné wrote:
>> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>>>>
>>>>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>>>>> index 59bdc28c89..7b8289d436 100644
>>>>>>> --- a/xen/include/public/domctl.h
>>>>>>> +++ b/xen/include/public/domctl.h
>>>>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>>>>       uint32_t max_evtchn_port;
>>>>>>>       int32_t max_grant_frames;
>>>>>>>       int32_t max_maptrack_frames;
>>>>>>> +    uint8_t vmtrace_pt_order;
>>>>>>
>>>>>> I've been thinking about this, and even though this is a domctl (so
>>>>>> not a stable interface) we might want to consider using a size (or a
>>>>>> number of pages) here rather than an order. IPT also supports
>>>>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>>>>> sizes not rounded to order boundaries to be used, since then only each
>>>>>> item in the linked list needs to be rounded to an order boundary, so
>>>>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>>>>
>>>>>> Roger.
>>>>>
>>>>> In previous versions it was "size" but it was requested to change it
>>>>> to "order" in order to shrink the variable size from uint64_t to
>>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>>> structure.
>>>>
>>>> It's likely I'm missing something here, but I wasn't aware
>>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>>> currently 48bytes which seems fairly small.
>>>
>>> Additionally I would guess a uint32_t could do here, if the value
>>> passed was "number of pages" rather than "number of bytes"?
> Looking at the rest of the code, the toolstack accepts a 64-bit value. 
> So this would lead to truncation of the buffer if it is bigger than 2^44 
> bytes.
> 
> I agree such buffer is unlikely, yet I still think we want to harden the 
> code whenever we can. So the solution is to either prevent check 
> truncation in libxl or directly use 64-bit in the domctl.
> 
> My preference is the latter.
> 
>>
>> That could work, not sure if it needs to state however that those will
>> be 4K pages, since Arm can have a different minimum page size IIRC?
>> (or that's already the assumption for all number of frames fields)
>> vmtrace_nr_frames seems fine to me.
> 
> The hypercalls interface is using the same page granularity as the 
> hypervisor (i.e 4KB).
> 
> While we already support guest using 64KB page granularity, it is 
> impossible to have a 64KB Arm hypervisor in the current state. You are 
> going to either break existing guest (if you switch to 64KB page 
> granularity for the hypercall ABI) or render them insecure (the mimimum 
> mapping in the P2M would be 64KB).
> 
> DOMCTLs are not stable yet, so using a number of pages is OK. However, I 
> would strongly suggest to use a number of bytes for any xl/libxl/stable 
> libraries interfaces as this avoids confusion and also make more 
> futureproof.

If we can't settle on what "page size" means in the public interface
(which imo is embarrassing), then how about going with number of kb,
like other memory libxl controls do? (I guess using Mb, in line with
other config file controls, may end up being too coarse here.) This
would likely still allow for a 32-bit field to be wide enough.

Jan
Julien Grall July 7, 2020, 8:44 a.m. UTC | #11
Hi,

On 06/07/2020 09:46, Jan Beulich wrote:
> On 04.07.2020 19:23, Julien Grall wrote:
>> Hi,
>>
>> On 03/07/2020 11:11, Roger Pau Monné wrote:
>>> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>>>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>>>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>>>>>
>>>>>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>>>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>>>>>> index 59bdc28c89..7b8289d436 100644
>>>>>>>> --- a/xen/include/public/domctl.h
>>>>>>>> +++ b/xen/include/public/domctl.h
>>>>>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>>>>>        uint32_t max_evtchn_port;
>>>>>>>>        int32_t max_grant_frames;
>>>>>>>>        int32_t max_maptrack_frames;
>>>>>>>> +    uint8_t vmtrace_pt_order;
>>>>>>>
>>>>>>> I've been thinking about this, and even though this is a domctl (so
>>>>>>> not a stable interface) we might want to consider using a size (or a
>>>>>>> number of pages) here rather than an order. IPT also supports
>>>>>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>>>>>> sizes not rounded to order boundaries to be used, since then only each
>>>>>>> item in the linked list needs to be rounded to an order boundary, so
>>>>>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>>>>>
>>>>>>> Roger.
>>>>>>
>>>>>> In previous versions it was "size" but it was requested to change it
>>>>>> to "order" in order to shrink the variable size from uint64_t to
>>>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>>>> structure.
>>>>>
>>>>> It's likely I'm missing something here, but I wasn't aware
>>>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>>>> currently 48bytes which seems fairly small.
>>>>
>>>> Additionally I would guess a uint32_t could do here, if the value
>>>> passed was "number of pages" rather than "number of bytes"?
>> Looking at the rest of the code, the toolstack accepts a 64-bit value.
>> So this would lead to truncation of the buffer if it is bigger than 2^44
>> bytes.
>>
>> I agree such buffer is unlikely, yet I still think we want to harden the
>> code whenever we can. So the solution is to either prevent check
>> truncation in libxl or directly use 64-bit in the domctl.
>>
>> My preference is the latter.
>>
>>>
>>> That could work, not sure if it needs to state however that those will
>>> be 4K pages, since Arm can have a different minimum page size IIRC?
>>> (or that's already the assumption for all number of frames fields)
>>> vmtrace_nr_frames seems fine to me.
>>
>> The hypercalls interface is using the same page granularity as the
>> hypervisor (i.e 4KB).
>>
>> While we already support guest using 64KB page granularity, it is
>> impossible to have a 64KB Arm hypervisor in the current state. You are
>> going to either break existing guest (if you switch to 64KB page
>> granularity for the hypercall ABI) or render them insecure (the mimimum
>> mapping in the P2M would be 64KB).
>>
>> DOMCTLs are not stable yet, so using a number of pages is OK. However, I
>> would strongly suggest to use a number of bytes for any xl/libxl/stable
>> libraries interfaces as this avoids confusion and also make more
>> futureproof.
> 
> If we can't settle on what "page size" means in the public interface
> (which imo is embarrassing), then how about going with number of kb,
> like other memory libxl controls do? (I guess using Mb, in line with
> other config file controls, may end up being too coarse here.) This
> would likely still allow for a 32-bit field to be wide enough.

A 32-bit field would definitely not be able to cover a full address 
space. So do you mind to explain what is the upper bound you expect here?

Cheers,
Jan Beulich July 7, 2020, 9:10 a.m. UTC | #12
On 07.07.2020 10:44, Julien Grall wrote:
> Hi,
> 
> On 06/07/2020 09:46, Jan Beulich wrote:
>> On 04.07.2020 19:23, Julien Grall wrote:
>>> Hi,
>>>
>>> On 03/07/2020 11:11, Roger Pau Monné wrote:
>>>> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>>>>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>>>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>>>>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>>>>>>
>>>>>>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>>>>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>>>>>>> index 59bdc28c89..7b8289d436 100644
>>>>>>>>> --- a/xen/include/public/domctl.h
>>>>>>>>> +++ b/xen/include/public/domctl.h
>>>>>>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>>>>>>        uint32_t max_evtchn_port;
>>>>>>>>>        int32_t max_grant_frames;
>>>>>>>>>        int32_t max_maptrack_frames;
>>>>>>>>> +    uint8_t vmtrace_pt_order;
>>>>>>>>
>>>>>>>> I've been thinking about this, and even though this is a domctl (so
>>>>>>>> not a stable interface) we might want to consider using a size (or a
>>>>>>>> number of pages) here rather than an order. IPT also supports
>>>>>>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>>>>>>> sizes not rounded to order boundaries to be used, since then only each
>>>>>>>> item in the linked list needs to be rounded to an order boundary, so
>>>>>>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>>>>>>
>>>>>>>> Roger.
>>>>>>>
>>>>>>> In previous versions it was "size" but it was requested to change it
>>>>>>> to "order" in order to shrink the variable size from uint64_t to
>>>>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>>>>> structure.
>>>>>>
>>>>>> It's likely I'm missing something here, but I wasn't aware
>>>>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>>>>> currently 48bytes which seems fairly small.
>>>>>
>>>>> Additionally I would guess a uint32_t could do here, if the value
>>>>> passed was "number of pages" rather than "number of bytes"?
>>> Looking at the rest of the code, the toolstack accepts a 64-bit value.
>>> So this would lead to truncation of the buffer if it is bigger than 2^44
>>> bytes.
>>>
>>> I agree such buffer is unlikely, yet I still think we want to harden the
>>> code whenever we can. So the solution is to either prevent check
>>> truncation in libxl or directly use 64-bit in the domctl.
>>>
>>> My preference is the latter.
>>>
>>>>
>>>> That could work, not sure if it needs to state however that those will
>>>> be 4K pages, since Arm can have a different minimum page size IIRC?
>>>> (or that's already the assumption for all number of frames fields)
>>>> vmtrace_nr_frames seems fine to me.
>>>
>>> The hypercalls interface is using the same page granularity as the
>>> hypervisor (i.e 4KB).
>>>
>>> While we already support guest using 64KB page granularity, it is
>>> impossible to have a 64KB Arm hypervisor in the current state. You are
>>> going to either break existing guest (if you switch to 64KB page
>>> granularity for the hypercall ABI) or render them insecure (the mimimum
>>> mapping in the P2M would be 64KB).
>>>
>>> DOMCTLs are not stable yet, so using a number of pages is OK. However, I
>>> would strongly suggest to use a number of bytes for any xl/libxl/stable
>>> libraries interfaces as this avoids confusion and also make more
>>> futureproof.
>>
>> If we can't settle on what "page size" means in the public interface
>> (which imo is embarrassing), then how about going with number of kb,
>> like other memory libxl controls do? (I guess using Mb, in line with
>> other config file controls, may end up being too coarse here.) This
>> would likely still allow for a 32-bit field to be wide enough.
> 
> A 32-bit field would definitely not be able to cover a full address 
> space. So do you mind to explain what is the upper bound you expect here?

Do you foresee a need for buffer sizes of 4Tb and up?

Jan
Julien Grall July 7, 2020, 9:16 a.m. UTC | #13
On 07/07/2020 10:10, Jan Beulich wrote:
> On 07.07.2020 10:44, Julien Grall wrote:
>> Hi,
>>
>> On 06/07/2020 09:46, Jan Beulich wrote:
>>> On 04.07.2020 19:23, Julien Grall wrote:
>>>> Hi,
>>>>
>>>> On 03/07/2020 11:11, Roger Pau Monné wrote:
>>>>> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>>>>>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>>>>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>>>>>> ----- 2 lip 2020 o 11:00, Roger Pau Monné roger.pau@citrix.com napisał(a):
>>>>>>>>
>>>>>>>>> On Tue, Jun 30, 2020 at 02:33:46PM +0200, Michał Leszczyński wrote:
>>>>>>>>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>>>>>>>>> index 59bdc28c89..7b8289d436 100644
>>>>>>>>>> --- a/xen/include/public/domctl.h
>>>>>>>>>> +++ b/xen/include/public/domctl.h
>>>>>>>>>> @@ -92,6 +92,7 @@ struct xen_domctl_createdomain {
>>>>>>>>>>         uint32_t max_evtchn_port;
>>>>>>>>>>         int32_t max_grant_frames;
>>>>>>>>>>         int32_t max_maptrack_frames;
>>>>>>>>>> +    uint8_t vmtrace_pt_order;
>>>>>>>>>
>>>>>>>>> I've been thinking about this, and even though this is a domctl (so
>>>>>>>>> not a stable interface) we might want to consider using a size (or a
>>>>>>>>> number of pages) here rather than an order. IPT also supports
>>>>>>>>> TOPA mode (kind of a linked list of buffers) that would allow for
>>>>>>>>> sizes not rounded to order boundaries to be used, since then only each
>>>>>>>>> item in the linked list needs to be rounded to an order boundary, so
>>>>>>>>> you could for example use three 4K pages in TOPA mode AFAICT.
>>>>>>>>>
>>>>>>>>> Roger.
>>>>>>>>
>>>>>>>> In previous versions it was "size" but it was requested to change it
>>>>>>>> to "order" in order to shrink the variable size from uint64_t to
>>>>>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>>>>>> structure.
>>>>>>>
>>>>>>> It's likely I'm missing something here, but I wasn't aware
>>>>>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>>>>>> currently 48bytes which seems fairly small.
>>>>>>
>>>>>> Additionally I would guess a uint32_t could do here, if the value
>>>>>> passed was "number of pages" rather than "number of bytes"?
>>>> Looking at the rest of the code, the toolstack accepts a 64-bit value.
>>>> So this would lead to truncation of the buffer if it is bigger than 2^44
>>>> bytes.
>>>>
>>>> I agree such buffer is unlikely, yet I still think we want to harden the
>>>> code whenever we can. So the solution is to either prevent check
>>>> truncation in libxl or directly use 64-bit in the domctl.
>>>>
>>>> My preference is the latter.
>>>>
>>>>>
>>>>> That could work, not sure if it needs to state however that those will
>>>>> be 4K pages, since Arm can have a different minimum page size IIRC?
>>>>> (or that's already the assumption for all number of frames fields)
>>>>> vmtrace_nr_frames seems fine to me.
>>>>
>>>> The hypercalls interface is using the same page granularity as the
>>>> hypervisor (i.e 4KB).
>>>>
>>>> While we already support guest using 64KB page granularity, it is
>>>> impossible to have a 64KB Arm hypervisor in the current state. You are
>>>> going to either break existing guest (if you switch to 64KB page
>>>> granularity for the hypercall ABI) or render them insecure (the mimimum
>>>> mapping in the P2M would be 64KB).
>>>>
>>>> DOMCTLs are not stable yet, so using a number of pages is OK. However, I
>>>> would strongly suggest to use a number of bytes for any xl/libxl/stable
>>>> libraries interfaces as this avoids confusion and also make more
>>>> futureproof.
>>>
>>> If we can't settle on what "page size" means in the public interface
>>> (which imo is embarrassing), then how about going with number of kb,
>>> like other memory libxl controls do? (I guess using Mb, in line with
>>> other config file controls, may end up being too coarse here.) This
>>> would likely still allow for a 32-bit field to be wide enough.
>>
>> A 32-bit field would definitely not be able to cover a full address
>> space. So do you mind to explain what is the upper bound you expect here?
> 
> Do you foresee a need for buffer sizes of 4Tb and up?

Not I am aware of... However, I think the question was worth it given 
that "wide enough" can mean anything.

Cheers,
Michał Leszczyński July 7, 2020, 11:17 a.m. UTC | #14
----- 7 lip 2020 o 11:16, Julien Grall julien@xen.org napisał(a):

> On 07/07/2020 10:10, Jan Beulich wrote:
>> On 07.07.2020 10:44, Julien Grall wrote:
>>> Hi,
>>>
>>> On 06/07/2020 09:46, Jan Beulich wrote:
>>>> On 04.07.2020 19:23, Julien Grall wrote:
>>>>> Hi,
>>>>>
>>>>> On 03/07/2020 11:11, Roger Pau Monné wrote:
>>>>>> On Fri, Jul 03, 2020 at 11:56:38AM +0200, Jan Beulich wrote:
>>>>>>> On 03.07.2020 11:44, Roger Pau Monné wrote:
>>>>>>>> On Thu, Jul 02, 2020 at 06:23:28PM +0200, Michał Leszczyński wrote:
>>>>>>>>> In previous versions it was "size" but it was requested to change it
>>>>>>>>> to "order" in order to shrink the variable size from uint64_t to
>>>>>>>>> uint8_t, because there is limited space for xen_domctl_createdomain
>>>>>>>>> structure.
>>>>>>>>
>>>>>>>> It's likely I'm missing something here, but I wasn't aware
>>>>>>>> xen_domctl_createdomain had any constrains regarding it's size. It's
>>>>>>>> currently 48bytes which seems fairly small.
>>>>>>>
>>>>>>> Additionally I would guess a uint32_t could do here, if the value
>>>>>>> passed was "number of pages" rather than "number of bytes"?
>>>>> Looking at the rest of the code, the toolstack accepts a 64-bit value.
>>>>> So this would lead to truncation of the buffer if it is bigger than 2^44
>>>>> bytes.
>>>>>
>>>>> I agree such buffer is unlikely, yet I still think we want to harden the
>>>>> code whenever we can. So the solution is to either prevent check
>>>>> truncation in libxl or directly use 64-bit in the domctl.
>>>>>
>>>>> My preference is the latter.
>>>>>
>>>>>>
>>>>>> That could work, not sure if it needs to state however that those will
>>>>>> be 4K pages, since Arm can have a different minimum page size IIRC?
>>>>>> (or that's already the assumption for all number of frames fields)
>>>>>> vmtrace_nr_frames seems fine to me.
>>>>>
>>>>> The hypercalls interface is using the same page granularity as the
>>>>> hypervisor (i.e 4KB).
>>>>>
>>>>> While we already support guest using 64KB page granularity, it is
>>>>> impossible to have a 64KB Arm hypervisor in the current state. You are
>>>>> going to either break existing guest (if you switch to 64KB page
>>>>> granularity for the hypercall ABI) or render them insecure (the mimimum
>>>>> mapping in the P2M would be 64KB).
>>>>>
>>>>> DOMCTLs are not stable yet, so using a number of pages is OK. However, I
>>>>> would strongly suggest to use a number of bytes for any xl/libxl/stable
>>>>> libraries interfaces as this avoids confusion and also make more
>>>>> futureproof.
>>>>
>>>> If we can't settle on what "page size" means in the public interface
>>>> (which imo is embarrassing), then how about going with number of kb,
>>>> like other memory libxl controls do? (I guess using Mb, in line with
>>>> other config file controls, may end up being too coarse here.) This
>>>> would likely still allow for a 32-bit field to be wide enough.
>>>
>>> A 32-bit field would definitely not be able to cover a full address
>>> space. So do you mind to explain what is the upper bound you expect here?
>> 
>> Do you foresee a need for buffer sizes of 4Tb and up?
> 
> Not I am aware of... However, I think the question was worth it given
> that "wide enough" can mean anything.
> 
> Cheers,
> 
> --
> Julien Grall


So would it be OK to use uint32_t everywhere and to store the trace buffer
size as number of kB? I think this is the most straightforward option.

I would also stick with the name "processor_trace_buf_size"
everywhere, both in the hypervisor, ABI and the toolstack, with the
respective comments that the size is in kB.


Best regards,
Michał Leszczyński
CERT Polska
Jan Beulich July 7, 2020, 11:21 a.m. UTC | #15
On 07.07.2020 13:17, Michał Leszczyński wrote:
> So would it be OK to use uint32_t everywhere and to store the trace buffer
> size as number of kB? I think this is the most straightforward option.
> 
> I would also stick with the name "processor_trace_buf_size"
> everywhere, both in the hypervisor, ABI and the toolstack, with the
> respective comments that the size is in kB.

Perhaps even more clearly "processor_trace_buf_kb" then?

Jan
Michał Leszczyński July 7, 2020, 11:35 a.m. UTC | #16
----- 7 lip 2020 o 13:21, Jan Beulich jbeulich@suse.com napisał(a):

> On 07.07.2020 13:17, Michał Leszczyński wrote:
>> So would it be OK to use uint32_t everywhere and to store the trace buffer
>> size as number of kB? I think this is the most straightforward option.
>> 
>> I would also stick with the name "processor_trace_buf_size"
>> everywhere, both in the hypervisor, ABI and the toolstack, with the
>> respective comments that the size is in kB.
> 
> Perhaps even more clearly "processor_trace_buf_kb" then?
> 
> Jan


Ok.

Best regards,
Michał Leszczyński
CERT Polska

Patch
diff mbox series

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index 0532739c1f..78f434b722 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -278,6 +278,16 @@  memory=8096 will report significantly less memory available for use
 than a system with maxmem=8096 memory=8096 due to the memory overhead
 of having to track the unused pages.
 
+=item B<vmtrace_pt_size=BYTES>
+
+Specifies the size of processor trace buffer that would be allocated
+for each vCPU belonging to this domain. Disabled (i.e. B<vmtrace_pt_size=0>
+by default. This must be set to non-zero value in order to be able to
+use processor tracing features with this domain.
+
+B<NOTE>: The size value must be between 4 kB and 4 GB and it must
+be also a power of 2.
+
 =back
 
 =head3 Guest Virtual NUMA Configuration
diff --git a/tools/golang/xenlight/helpers.gen.go b/tools/golang/xenlight/helpers.gen.go
index 935d3bc50a..ecace9634e 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -1117,6 +1117,7 @@  return fmt.Errorf("invalid union key '%v'", x.Type)}
 x.ArchArm.GicVersion = GicVersion(xc.arch_arm.gic_version)
 x.ArchArm.Vuart = VuartType(xc.arch_arm.vuart)
 x.Altp2M = Altp2MMode(xc.altp2m)
+x.VmtracePtOrder = int(xc.vmtrace_pt_order)
 
  return nil}
 
@@ -1592,6 +1593,7 @@  return fmt.Errorf("invalid union key '%v'", x.Type)}
 xc.arch_arm.gic_version = C.libxl_gic_version(x.ArchArm.GicVersion)
 xc.arch_arm.vuart = C.libxl_vuart_type(x.ArchArm.Vuart)
 xc.altp2m = C.libxl_altp2m_mode(x.Altp2M)
+xc.vmtrace_pt_order = C.int(x.VmtracePtOrder)
 
  return nil
  }
diff --git a/tools/golang/xenlight/types.gen.go b/tools/golang/xenlight/types.gen.go
index 663c1e86b4..f9b07ac862 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -516,6 +516,7 @@  GicVersion GicVersion
 Vuart VuartType
 }
 Altp2M Altp2MMode
+VmtracePtOrder int
 }
 
 type domainBuildInfoTypeUnion interface {
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 71709dc585..891e8e28d6 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -438,6 +438,14 @@ 
  */
 #define LIBXL_HAVE_CREATEINFO_PASSTHROUGH 1
 
+/*
+ * LIBXL_HAVE_VMTRACE_PT_ORDER indicates that
+ * libxl_domain_create_info has a vmtrace_pt_order parameter, which
+ * allows to enable pre-allocation of processor tracing buffers
+ * with the given order of size.
+ */
+#define LIBXL_HAVE_VMTRACE_PT_ORDER 1
+
 /*
  * libxl ABI compatibility
  *
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 75862dc6ed..651d1f4c0f 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -608,6 +608,7 @@  int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
             .max_evtchn_port = b_info->event_channels,
             .max_grant_frames = b_info->max_grant_frames,
             .max_maptrack_frames = b_info->max_maptrack_frames,
+            .vmtrace_pt_order = b_info->vmtrace_pt_order,
         };
 
         if (info->type != LIBXL_DOMAIN_TYPE_PV) {
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 9d3f05f399..1c5dd43e4d 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -645,6 +645,8 @@  libxl_domain_build_info = Struct("domain_build_info",[
     # supported by x86 HVM and ARM support is planned.
     ("altp2m", libxl_altp2m_mode),
 
+    ("vmtrace_pt_order", integer),
+
     ], dir=DIR_IN,
        copy_deprecated_fn="libxl__domain_build_info_copy_deprecated",
 )
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 61b4ef7b7e..4eba224590 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -1861,6 +1861,26 @@  void parse_config_data(const char *config_source,
         }
     }
 
+    if (!xlu_cfg_get_long(config, "vmtrace_pt_size", &l, 1) && l) {
+        int32_t shift = 0;
+
+        if (l & (l - 1))
+        {
+            fprintf(stderr, "ERROR: pt buffer size must be a power of 2\n");
+            exit(1);
+        }
+
+        while (l >>= 1) ++shift;
+
+        if (shift <= XEN_PAGE_SHIFT)
+        {
+            fprintf(stderr, "ERROR: too small pt buffer\n");
+            exit(1);
+        }
+
+        b_info->vmtrace_pt_order = shift - XEN_PAGE_SHIFT;
+    }
+
     if (!xlu_cfg_get_list(config, "ioports", &ioports, &num_ioports, 0)) {
         b_info->num_ioports = num_ioports;
         b_info->ioports = calloc(num_ioports, sizeof(*b_info->ioports));
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 0a33e0dfd6..27dcfbac8c 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -338,6 +338,12 @@  static int sanitise_domain_config(struct xen_domctl_createdomain *config)
         return -EINVAL;
     }
 
+    if ( config->vmtrace_pt_order && !vmtrace_supported )
+    {
+        dprintk(XENLOG_INFO, "Processor tracing is not supported\n");
+        return -EINVAL;
+    }
+
     return arch_sanitise_domain_config(config);
 }
 
@@ -443,6 +449,12 @@  struct domain *domain_create(domid_t domid,
         d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
 
         radix_tree_init(&d->pirq_tree);
+
+        if ( config->vmtrace_pt_order )
+        {
+            uint32_t shift_val = config->vmtrace_pt_order + PAGE_SHIFT;
+            d->vmtrace_pt_size = (1ULL << shift_val);
+        }
     }
 
     if ( (err = arch_domain_create(d, config)) != 0 )
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 59bdc28c89..7b8289d436 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -92,6 +92,7 @@  struct xen_domctl_createdomain {
     uint32_t max_evtchn_port;
     int32_t max_grant_frames;
     int32_t max_maptrack_frames;
+    uint8_t vmtrace_pt_order;
 
     struct xen_arch_domainconfig arch;
 };