diff mbox series

[RFC,1/2] xen/memory : Add stats_table resource type

Message ID d0afb6657b1e78df4857ad7bcc875982e9c022b4.1652797713.git.matias.vara@vates.fr (mailing list archive)
State New, archived
Headers show
Series Add a new acquire resource to query vcpu statistics | expand

Commit Message

Matias Ezequiel Vara Larsen May 17, 2022, 2:33 p.m. UTC
Allow to map vcpu stats using acquire_resource().

Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@vates.fr>
---
 xen/common/domain.c         | 42 +++++++++++++++++++++++++++++++++++++
 xen/common/memory.c         | 29 +++++++++++++++++++++++++
 xen/common/sched/core.c     |  5 +++++
 xen/include/public/memory.h |  1 +
 xen/include/xen/sched.h     |  5 +++++
 5 files changed, 82 insertions(+)

Comments

George Dunlap June 17, 2022, 8:54 p.m. UTC | #1
Preface: It looks like this may be one of your first hypervisor patches.  So thank you!  FYI there’s a lot that needs fixing up here; please don’t read a tone of annoyance into it, I’m just trying to tell you what needs fixing in the most efficient manner. :-)

> On 17 May 2022, at 15:33, Matias Ezequiel Vara Larsen <matiasevara@gmail.com> wrote:
> 
> Allow to map vcpu stats using acquire_resource().

This needs a lot more expansion in terms of what it is that you’re doing in this patch and why.

> 
> Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@vates.fr>
> ---
> xen/common/domain.c         | 42 +++++++++++++++++++++++++++++++++++++
> xen/common/memory.c         | 29 +++++++++++++++++++++++++
> xen/common/sched/core.c     |  5 +++++
> xen/include/public/memory.h |  1 +
> xen/include/xen/sched.h     |  5 +++++
> 5 files changed, 82 insertions(+)
> 
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 17cc32fde3..ddd9f88874 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -132,6 +132,42 @@ static void vcpu_info_reset(struct vcpu *v)
>     v->vcpu_info_mfn = INVALID_MFN;
> }
> 
> +static void stats_free_buffer(struct vcpu * v)
> +{
> +    struct page_info *pg = v->stats.pg;
> +
> +    if ( !pg )
> +        return;
> +
> +    v->stats.va = NULL;
> +
> +    if ( v->stats.va )
> +        unmap_domain_page_global(v->stats.va);
> +
> +    v->stats.va = NULL;

Looks like you meant to delete the first `va->stats.va = NULL` but forgot?

> +
> +    free_domheap_page(pg);

Pretty sure this will crash.

Unfortunately page allocation and freeing is somewhat complicated and requires a bit of boilerplate.  You can look at the vmtrace_alloc_buffer() code for a template, but the general sequence you want is as follows:

* On the allocate side:

1. Allocate the page

   pg = alloc_domheap_page(d, MEMF_no_refcount);

This will allocate a page with the PGC_allocated bit set and a single reference count.  (If you pass a page with PGC_allocated set to free_domheap_page(), it will crash; which is why I said the above.)

2.  Grab a general reference count for the vcpu struct’s reference to it; as well as a writable type count, so that it doesn’t get used as a special page

if (get_page_and_type(pg, d, PGT_writable_page)) {
  put_page_alloc_ref(p);
  /* failure path */
}

* On the free side, don’t call free_domheap_pages() directly.  Rather, drop the allocation, then drop your own type count, thus:

v->stats.va <http://stats.va/> = NULL;

put_page_alloc_ref(pg);
put_page_and_type(pg);

The issue here is that we can’t free the page until all references have dropped; and the whole point of this exercise is to allow guest userspace in dom0 to gain a reference to the page.  We can’t actually free the page until *all* references are gone, including the userspace one in dom0.  The put_page() which brings the reference count to 0 will automatically free the page.


> +}
> +
> +static int stats_alloc_buffer(struct vcpu *v)
> +{
> +    struct domain *d = v->domain;
> +    struct page_info *pg;
> +
> +    pg = alloc_domheap_page(d, MEMF_no_refcount);
> +
> +    if ( !pg )
> +        return -ENOMEM;
> +
> +    v->stats.va = __map_domain_page_global(pg);
> +    if ( !v->stats.va )
> +        return -ENOMEM;
> +
> +    v->stats.pg = pg;
> +    clear_page(v->stats.va);
> +    return 0;
> +}

The other thing to say about this is that the memory is being allocated unconditionally, even if nobody is planning to read it.  The vast majority of Xen users will not be running xcp-rrd, so it will be pointless overheard.

At a basic level, you want to follow suit with the vmtrace buffers, which are only allocated if the proper domctl flag is set on domain creation.  (We could consider instead, or in addition, having a global Xen command-line parameter which would enable this feature for all domains.)

> +
> static void vmtrace_free_buffer(struct vcpu *v)
> {
>     const struct domain *d = v->domain;
> @@ -203,6 +239,9 @@ static int vmtrace_alloc_buffer(struct vcpu *v)
>  */
> static int vcpu_teardown(struct vcpu *v)
> {
> +
> +    stats_free_buffer(v);
> +
>     vmtrace_free_buffer(v);
> 
>     return 0;
> @@ -269,6 +308,9 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
>     if ( vmtrace_alloc_buffer(v) != 0 )
>         goto fail_wq;
> 
> +    if ( stats_alloc_buffer(v) != 0 )
> +        goto fail_wq;
> +
>     if ( arch_vcpu_create(v) != 0 )
>         goto fail_sched;
> 
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 297b98a562..39de6d9d05 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1099,6 +1099,10 @@ static unsigned int resource_max_frames(const struct domain *d,
>     case XENMEM_resource_vmtrace_buf:
>         return d->vmtrace_size >> PAGE_SHIFT;
> 
> +    // WIP: to figure out the correct size of the resource
> +    case XENMEM_resource_stats_table:
> +        return 1;
> +
>     default:
>         return -EOPNOTSUPP;
>     }
> @@ -1162,6 +1166,28 @@ static int acquire_vmtrace_buf(
>     return nr_frames;
> }
> 
> +static int acquire_stats_table(struct domain *d,
> +                                unsigned int id,
> +                                unsigned int frame,
> +                                unsigned int nr_frames,
> +                                xen_pfn_t mfn_list[])
> +{
> +    const struct vcpu *v = domain_vcpu(d, id);
> +    mfn_t mfn;
> +

Maybe I’m paranoid, but I might add an “ASSERT(nr_frames == 1)” here

> +    if ( !v )
> +        return -ENOENT;
> +
> +    if ( !v->stats.pg )
> +        return -EINVAL;
> +
> +    mfn = page_to_mfn(v->stats.pg);
> +    mfn_list[0] = mfn_x(mfn);
> +
> +    printk("acquire_perf_table: id: %d, nr_frames: %d, %p, domainid: %d\n", id, nr_frames, v->stats.pg, d->domain_id);
> +    return 1;
> +}
> +
> /*
>  * Returns -errno on error, or positive in the range [1, nr_frames] on
>  * success.  Returning less than nr_frames contitutes a request for a
> @@ -1182,6 +1208,9 @@ static int _acquire_resource(
>     case XENMEM_resource_vmtrace_buf:
>         return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
> 
> +    case XENMEM_resource_stats_table:
> +        return acquire_stats_table(d, id, frame, nr_frames, mfn_list);
> +
>     default:
>         return -EOPNOTSUPP;
>     }
> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> index 8f4b1ca10d..2a8b534977 100644
> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -264,6 +264,7 @@ static inline void vcpu_runstate_change(
> {
>     s_time_t delta;
>     struct sched_unit *unit = v->sched_unit;
> +    uint64_t * runstate;
> 
>     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
>     if ( v->runstate.state == new_state )
> @@ -287,6 +288,10 @@ static inline void vcpu_runstate_change(
>     }
> 
>     v->runstate.state = new_state;
> +
> +    // WIP: use a different interface
> +    runstate = (uint64_t*)v->stats.va;

I think you should look at xen.git/xen/include/public/hvm/ioreq.h:shared_iopage_t for inspiration.  Basically, you cast the void pointer to that type, and then just access `iopage->vcpu_ioreq[vcpuid]`.   Put it in a public header, and then both the userspace consumer and Xen can use the same structure.

As I said in my response to the cover letter, I think vcpu_runstate_info_t would be something to look at and gain inspiration from.

The other thing to say here is that this is a hot path; we don’t want to be copying lots of information around if it’s not going to be used.  So you should only allocate the buffers if specifically enabled, and you should only copy the information over if v->stats.va <http://stats.va/> != NULL.

I think that should be enough to start with; we can nail down more when you send v1.

Peace,
 -George
Jan Beulich June 22, 2022, 8:59 a.m. UTC | #2
On 17.05.2022 16:33, Matias Ezequiel Vara Larsen wrote:
> @@ -287,6 +288,10 @@ static inline void vcpu_runstate_change(
>      }
>  
>      v->runstate.state = new_state;
> +
> +    // WIP: use a different interface
> +    runstate = (uint64_t*)v->stats.va;
> +    memcpy(runstate, &v->runstate.time[0], sizeof(v->runstate.time[0]));
>  }

One remark on top of what George has said: By exposing this information the
way you do, you allow updating and reading of it to be fully asynchronous.
That way a consumer may fetch inconsistent (partially updated) state (and
this would be even more so when further fields would be added). For the
data to be useful, you need to add a mechanism for consumers to know when
an update is in progress, so they can wait and then retry. You'll find a
number of instances of such in the code base.

In general I also have to admit that I'm not sure the exposed data really
qualifies as a "resource", and hence I'm not really convinced of your use
of the resource mapping interface as a vehicle.

Jan
George Dunlap June 22, 2022, 10:05 a.m. UTC | #3
> On 22 Jun 2022, at 09:59, Jan Beulich <jbeulich@suse.com> wrote:
> 
[snip]
> In general I also have to admit that I'm not sure the exposed data really
> qualifies as a "resource", and hence I'm not really convinced of your use
> of the resource mapping interface as a vehicle.

I’m not sure if I’d call any of the things currently mappable via that interface (ioreq pages, vmcall buffers, etc) “resources”.  I’m not sure why the name was chosen, except perhaps that it was meant to be a more generalized form of “page” or “pages".

The alternate is to try to plumb through a new ad-hoc hypercall.  Andy suggested Matias use this interface specifically to avoid having to do that; and it sounds like he believes the interface was designed specifically for this kind of thing.

 -George
Jan Beulich June 22, 2022, 10:09 a.m. UTC | #4
On 22.06.2022 12:05, George Dunlap wrote:
>> On 22 Jun 2022, at 09:59, Jan Beulich <jbeulich@suse.com> wrote:
>>
> [snip]
>> In general I also have to admit that I'm not sure the exposed data really
>> qualifies as a "resource", and hence I'm not really convinced of your use
>> of the resource mapping interface as a vehicle.
> 
> I’m not sure if I’d call any of the things currently mappable via that interface (ioreq pages, vmcall buffers, etc) “resources”.  I’m not sure why the name was chosen, except perhaps that it was meant to be a more generalized form of “page” or “pages".
> 
> The alternate is to try to plumb through a new ad-hoc hypercall.  Andy suggested Matias use this interface specifically to avoid having to do that; and it sounds like he believes the interface was designed specifically for this kind of thing.

Okay. I guess I wasn't aware of that suggestion.

Jan
Matias Ezequiel Vara Larsen June 29, 2022, 9:44 a.m. UTC | #5
On Fri, Jun 17, 2022 at 08:54:34PM +0000, George Dunlap wrote:
> Preface: It looks like this may be one of your first hypervisor patches.  So thank you!  FYI there’s a lot that needs fixing up here; please don’t read a tone of annoyance into it, I’m just trying to tell you what needs fixing in the most efficient manner. :-)
> 

Thanks for the comments :) I have addressed some of them already in the response to the
cover letter.

> > On 17 May 2022, at 15:33, Matias Ezequiel Vara Larsen <matiasevara@gmail.com> wrote:
> > 
> > Allow to map vcpu stats using acquire_resource().
> 
> This needs a lot more expansion in terms of what it is that you’re doing in this patch and why.
> 

Got it. I'll improve the commit message in v1.

> > 
> > Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@vates.fr>
> > ---
> > xen/common/domain.c         | 42 +++++++++++++++++++++++++++++++++++++
> > xen/common/memory.c         | 29 +++++++++++++++++++++++++
> > xen/common/sched/core.c     |  5 +++++
> > xen/include/public/memory.h |  1 +
> > xen/include/xen/sched.h     |  5 +++++
> > 5 files changed, 82 insertions(+)
> > 
> > diff --git a/xen/common/domain.c b/xen/common/domain.c
> > index 17cc32fde3..ddd9f88874 100644
> > --- a/xen/common/domain.c
> > +++ b/xen/common/domain.c
> > @@ -132,6 +132,42 @@ static void vcpu_info_reset(struct vcpu *v)
> >     v->vcpu_info_mfn = INVALID_MFN;
> > }
> > 
> > +static void stats_free_buffer(struct vcpu * v)
> > +{
> > +    struct page_info *pg = v->stats.pg;
> > +
> > +    if ( !pg )
> > +        return;
> > +
> > +    v->stats.va = NULL;
> > +
> > +    if ( v->stats.va )
> > +        unmap_domain_page_global(v->stats.va);
> > +
> > +    v->stats.va = NULL;
> 
> Looks like you meant to delete the first `va->stats.va = NULL` but forgot?
> 

Apologies for this, I completely missed.

> > +
> > +    free_domheap_page(pg);
> 
> Pretty sure this will crash.
> 
> Unfortunately page allocation and freeing is somewhat complicated and requires a bit of boilerplate.  You can look at the vmtrace_alloc_buffer() code for a template, but the general sequence you want is as follows:
> 
> * On the allocate side:
> 
> 1. Allocate the page
> 
>    pg = alloc_domheap_page(d, MEMF_no_refcount);
> 
> This will allocate a page with the PGC_allocated bit set and a single reference count.  (If you pass a page with PGC_allocated set to free_domheap_page(), it will crash; which is why I said the above.)
> 
> 2.  Grab a general reference count for the vcpu struct’s reference to it; as well as a writable type count, so that it doesn’t get used as a special page
> 
> if (get_page_and_type(pg, d, PGT_writable_page)) {
>   put_page_alloc_ref(p);
>   /* failure path */
> }
> 
> * On the free side, don’t call free_domheap_pages() directly.  Rather, drop the allocation, then drop your own type count, thus:
> 
> v->stats.va <http://stats.va/> = NULL;
> 
> put_page_alloc_ref(pg);
> put_page_and_type(pg);
> 
> The issue here is that we can’t free the page until all references have dropped; and the whole point of this exercise is to allow guest userspace in dom0 to gain a reference to the page.  We can’t actually free the page until *all* references are gone, including the userspace one in dom0.  The put_page() which brings the reference count to 0 will automatically free the page.
> 

Thanks for the explanation. For some reason, this is not crashing. I will
reimplement the allocation, releasing, and then update the documentation that I
proposed at
https://lists.xenproject.org/archives/html/xen-devel/2022-05/msg01963.html. The
idea of this reference document is to guide someone that would like to export a new
resource by relying on the acquire-resource interface. 

> 
> > +}
> > +
> > +static int stats_alloc_buffer(struct vcpu *v)
> > +{
> > +    struct domain *d = v->domain;
> > +    struct page_info *pg;
> > +
> > +    pg = alloc_domheap_page(d, MEMF_no_refcount);
> > +
> > +    if ( !pg )
> > +        return -ENOMEM;
> > +
> > +    v->stats.va = __map_domain_page_global(pg);
> > +    if ( !v->stats.va )
> > +        return -ENOMEM;
> > +
> > +    v->stats.pg = pg;
> > +    clear_page(v->stats.va);
> > +    return 0;
> > +}
> 
> The other thing to say about this is that the memory is being allocated unconditionally, even if nobody is planning to read it.  The vast majority of Xen users will not be running xcp-rrd, so it will be pointless overheard.
> 
> At a basic level, you want to follow suit with the vmtrace buffers, which are only allocated if the proper domctl flag is set on domain creation.  (We could consider instead, or in addition, having a global Xen command-line parameter which would enable this feature for all domains.)
>

I agree. I will add a domctl flag on domain creation to enable the allocation of
these buffers.
 
> > +
> > static void vmtrace_free_buffer(struct vcpu *v)
> > {
> >     const struct domain *d = v->domain;
> > @@ -203,6 +239,9 @@ static int vmtrace_alloc_buffer(struct vcpu *v)
> >  */
> > static int vcpu_teardown(struct vcpu *v)
> > {
> > +
> > +    stats_free_buffer(v);
> > +
> >     vmtrace_free_buffer(v);
> > 
> >     return 0;
> > @@ -269,6 +308,9 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
> >     if ( vmtrace_alloc_buffer(v) != 0 )
> >         goto fail_wq;
> > 
> > +    if ( stats_alloc_buffer(v) != 0 )
> > +        goto fail_wq;
> > +
> >     if ( arch_vcpu_create(v) != 0 )
> >         goto fail_sched;
> > 
> > diff --git a/xen/common/memory.c b/xen/common/memory.c
> > index 297b98a562..39de6d9d05 100644
> > --- a/xen/common/memory.c
> > +++ b/xen/common/memory.c
> > @@ -1099,6 +1099,10 @@ static unsigned int resource_max_frames(const struct domain *d,
> >     case XENMEM_resource_vmtrace_buf:
> >         return d->vmtrace_size >> PAGE_SHIFT;
> > 
> > +    // WIP: to figure out the correct size of the resource
> > +    case XENMEM_resource_stats_table:
> > +        return 1;
> > +
> >     default:
> >         return -EOPNOTSUPP;
> >     }
> > @@ -1162,6 +1166,28 @@ static int acquire_vmtrace_buf(
> >     return nr_frames;
> > }
> > 
> > +static int acquire_stats_table(struct domain *d,
> > +                                unsigned int id,
> > +                                unsigned int frame,
> > +                                unsigned int nr_frames,
> > +                                xen_pfn_t mfn_list[])
> > +{
> > +    const struct vcpu *v = domain_vcpu(d, id);
> > +    mfn_t mfn;
> > +
> 
> Maybe I’m paranoid, but I might add an “ASSERT(nr_frames == 1)” here
>

Thanks, I will have this in mind in v1.
 
> > +    if ( !v )
> > +        return -ENOENT;
> > +
> > +    if ( !v->stats.pg )
> > +        return -EINVAL;
> > +
> > +    mfn = page_to_mfn(v->stats.pg);
> > +    mfn_list[0] = mfn_x(mfn);
> > +
> > +    printk("acquire_perf_table: id: %d, nr_frames: %d, %p, domainid: %d\n", id, nr_frames, v->stats.pg, d->domain_id);
> > +    return 1;
> > +}
> > +
> > /*
> >  * Returns -errno on error, or positive in the range [1, nr_frames] on
> >  * success.  Returning less than nr_frames contitutes a request for a
> > @@ -1182,6 +1208,9 @@ static int _acquire_resource(
> >     case XENMEM_resource_vmtrace_buf:
> >         return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
> > 
> > +    case XENMEM_resource_stats_table:
> > +        return acquire_stats_table(d, id, frame, nr_frames, mfn_list);
> > +
> >     default:
> >         return -EOPNOTSUPP;
> >     }
> > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> > index 8f4b1ca10d..2a8b534977 100644
> > --- a/xen/common/sched/core.c
> > +++ b/xen/common/sched/core.c
> > @@ -264,6 +264,7 @@ static inline void vcpu_runstate_change(
> > {
> >     s_time_t delta;
> >     struct sched_unit *unit = v->sched_unit;
> > +    uint64_t * runstate;
> > 
> >     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
> >     if ( v->runstate.state == new_state )
> > @@ -287,6 +288,10 @@ static inline void vcpu_runstate_change(
> >     }
> > 
> >     v->runstate.state = new_state;
> > +
> > +    // WIP: use a different interface
> > +    runstate = (uint64_t*)v->stats.va;
> 
> I think you should look at xen.git/xen/include/public/hvm/ioreq.h:shared_iopage_t for inspiration.  Basically, you cast the void pointer to that type, and then just access `iopage->vcpu_ioreq[vcpuid]`.   Put it in a public header, and then both the userspace consumer and Xen can use the same structure.
> 

Thanks for pointing it out. I've addresed this comment in the response to the cover
letter.

> As I said in my response to the cover letter, I think vcpu_runstate_info_t would be something to look at and gain inspiration from.
> 
> The other thing to say here is that this is a hot path; we don’t want to be copying lots of information around if it’s not going to be used.  So you should only allocate the buffers if specifically enabled, and you should only copy the information over if v->stats.va <http://stats.va/> != NULL.
> 
> I think that should be enough to start with; we can nail down more when you send v1.
> 

Thanks for the comments, I will tackle them in v1.

Matias

> Peace,
>  -George
>
diff mbox series

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 17cc32fde3..ddd9f88874 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -132,6 +132,42 @@  static void vcpu_info_reset(struct vcpu *v)
     v->vcpu_info_mfn = INVALID_MFN;
 }
 
+static void stats_free_buffer(struct vcpu * v)
+{
+    struct page_info *pg = v->stats.pg;
+
+    if ( !pg )
+        return;
+
+    v->stats.va = NULL;
+
+    if ( v->stats.va )
+        unmap_domain_page_global(v->stats.va);
+
+    v->stats.va = NULL;
+
+    free_domheap_page(pg);
+}
+
+static int stats_alloc_buffer(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct page_info *pg;
+
+    pg = alloc_domheap_page(d, MEMF_no_refcount);
+
+    if ( !pg )
+        return -ENOMEM;
+
+    v->stats.va = __map_domain_page_global(pg);
+    if ( !v->stats.va )
+        return -ENOMEM;
+
+    v->stats.pg = pg;
+    clear_page(v->stats.va);
+    return 0;
+}
+
 static void vmtrace_free_buffer(struct vcpu *v)
 {
     const struct domain *d = v->domain;
@@ -203,6 +239,9 @@  static int vmtrace_alloc_buffer(struct vcpu *v)
  */
 static int vcpu_teardown(struct vcpu *v)
 {
+
+    stats_free_buffer(v);
+
     vmtrace_free_buffer(v);
 
     return 0;
@@ -269,6 +308,9 @@  struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
     if ( vmtrace_alloc_buffer(v) != 0 )
         goto fail_wq;
 
+    if ( stats_alloc_buffer(v) != 0 )
+        goto fail_wq;
+
     if ( arch_vcpu_create(v) != 0 )
         goto fail_sched;
 
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 297b98a562..39de6d9d05 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1099,6 +1099,10 @@  static unsigned int resource_max_frames(const struct domain *d,
     case XENMEM_resource_vmtrace_buf:
         return d->vmtrace_size >> PAGE_SHIFT;
 
+    // WIP: to figure out the correct size of the resource
+    case XENMEM_resource_stats_table:
+        return 1;
+
     default:
         return -EOPNOTSUPP;
     }
@@ -1162,6 +1166,28 @@  static int acquire_vmtrace_buf(
     return nr_frames;
 }
 
+static int acquire_stats_table(struct domain *d,
+                                unsigned int id,
+                                unsigned int frame,
+                                unsigned int nr_frames,
+                                xen_pfn_t mfn_list[])
+{
+    const struct vcpu *v = domain_vcpu(d, id);
+    mfn_t mfn;
+
+    if ( !v )
+        return -ENOENT;
+
+    if ( !v->stats.pg )
+        return -EINVAL;
+
+    mfn = page_to_mfn(v->stats.pg);
+    mfn_list[0] = mfn_x(mfn);
+
+    printk("acquire_perf_table: id: %d, nr_frames: %d, %p, domainid: %d\n", id, nr_frames, v->stats.pg, d->domain_id);
+    return 1;
+}
+
 /*
  * Returns -errno on error, or positive in the range [1, nr_frames] on
  * success.  Returning less than nr_frames contitutes a request for a
@@ -1182,6 +1208,9 @@  static int _acquire_resource(
     case XENMEM_resource_vmtrace_buf:
         return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
 
+    case XENMEM_resource_stats_table:
+        return acquire_stats_table(d, id, frame, nr_frames, mfn_list);
+
     default:
         return -EOPNOTSUPP;
     }
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 8f4b1ca10d..2a8b534977 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -264,6 +264,7 @@  static inline void vcpu_runstate_change(
 {
     s_time_t delta;
     struct sched_unit *unit = v->sched_unit;
+    uint64_t * runstate;
 
     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
     if ( v->runstate.state == new_state )
@@ -287,6 +288,10 @@  static inline void vcpu_runstate_change(
     }
 
     v->runstate.state = new_state;
+
+    // WIP: use a different interface
+    runstate = (uint64_t*)v->stats.va;
+    memcpy(runstate, &v->runstate.time[0], sizeof(v->runstate.time[0]));
 }
 
 void sched_guest_idle(void (*idle) (void), unsigned int cpu)
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 50e73eef98..752fd0be0f 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -626,6 +626,7 @@  struct xen_mem_acquire_resource {
 #define XENMEM_resource_ioreq_server 0
 #define XENMEM_resource_grant_table 1
 #define XENMEM_resource_vmtrace_buf 2
+#define XENMEM_resource_stats_table 3
 
     /*
      * IN - a type-specific resource identifier, which must be zero
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5485d08afb..bc99adea7e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -284,6 +284,11 @@  struct vcpu
         struct page_info *pg; /* One contiguous allocation of d->vmtrace_size */
     } vmtrace;
 
+    struct {
+        struct page_info *pg;
+        void * va;
+    } stats;
+
     struct arch_vcpu arch;
 
 #ifdef CONFIG_IOREQ_SERVER