diff mbox series

[2/2] xen/arm: Support runstate crossing pages

Message ID b4843bd234d4ece4f843bc636071106746abb3b5.1591806713.git.bertrand.marquis@arm.com (mailing list archive)
State Superseded
Headers show
Series xen/arm: Convert runstate address during hypcall | expand

Commit Message

Bertrand Marquis June 11, 2020, 11:58 a.m. UTC
Add support for runstate area register with the structure crossing pages
The code is storing up to 2 pages reference during the hypercall.
During a context switch, the code is computing where the
state_entry_time is and is breaking the memcpy in 2 parts when it is
required.

Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
---
 xen/arch/arm/domain.c        | 101 +++++++++++++++++++++++++----------
 xen/include/asm-arm/domain.h |   5 +-
 2 files changed, 76 insertions(+), 30 deletions(-)

Comments

Stefano Stabellini June 12, 2020, 1:10 a.m. UTC | #1
On Thu, 11 Jun 2020, Bertrand Marquis wrote:
> Add support for runstate area register with the structure crossing pages
> The code is storing up to 2 pages reference during the hypercall.
> During a context switch, the code is computing where the
> state_entry_time is and is breaking the memcpy in 2 parts when it is
> required.
> 
> Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>

Clearly a lot of efforts went into this patch, thanks you Bertrand.

The change is complex for the feature it adds. I wonder if we could use
ioremap (or maybe something similar but using a different virtual space)
to simplify it. Julien, do you have good ideas?

Otherwise I don't think there is much we can do to make this patch
simpler.

As ugly as it is (sorry Bertrand, it is not your fault!) the patch looks
correct.


> ---
>  xen/arch/arm/domain.c        | 101 +++++++++++++++++++++++++----------
>  xen/include/asm-arm/domain.h |   5 +-
>  2 files changed, 76 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 739059234f..d847cb00f2 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -280,11 +280,16 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>  {
>      spin_lock(&v->arch.runstate_guest.lock);
>  
> -    /* cleanup previous page if any */
> -    if ( v->arch.runstate_guest.page )
> +    /* cleanup previous pages if any */
> +    if ( v->arch.runstate_guest.page[0] )
>      {
> -        put_page_and_type(v->arch.runstate_guest.page);
> -        v->arch.runstate_guest.page = NULL;
> +        put_page_and_type(v->arch.runstate_guest.page[0]);
> +        v->arch.runstate_guest.page[0] = NULL;
> +        if ( v->arch.runstate_guest.page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[1]);
> +            v->arch.runstate_guest.page[1] = NULL;
> +        }
>          v->arch.runstate_guest.offset = 0;
>      }
>  
> @@ -294,26 +299,25 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>  int arch_setup_runstate_guest(struct vcpu *v,
>                              struct vcpu_register_runstate_memory_area area)
>  {
> -    struct page_info *page;
> +    struct page_info *page[2] = {NULL,NULL};
>      unsigned offset;
>  
>      spin_lock(&v->arch.runstate_guest.lock);
>  
> -    /* cleanup previous page if any */
> -    if ( v->arch.runstate_guest.page )
> +    /* cleanup previous pages if any */
> +    if ( v->arch.runstate_guest.page[0] )
>      {
> -        put_page_and_type(v->arch.runstate_guest.page);
> -        v->arch.runstate_guest.page = NULL;
> +        put_page_and_type(v->arch.runstate_guest.page[0]);
> +        v->arch.runstate_guest.page[0] = NULL;
> +        if ( v->arch.runstate_guest.page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[1]);
> +            v->arch.runstate_guest.page[1] = NULL;
> +        }
>          v->arch.runstate_guest.offset = 0;
>      }
>  
>      offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK;
> -    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
> -    {
> -        spin_unlock(&v->arch.runstate_guest.lock);
> -        gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n");
> -        return -EINVAL;
> -    }
>  
>      /* provided address must be aligned to a 64bit */
>      if ( offset % alignof(struct vcpu_runstate_info) )
> @@ -323,15 +327,30 @@ int arch_setup_runstate_guest(struct vcpu *v,
>          return -EINVAL;
>      }
>  
> -    page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
> -    if ( !page )
> +    page[0] = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
> +    if ( !page[0] )
>      {
>          spin_unlock(&v->arch.runstate_guest.lock);
>          gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n");
>          return -EINVAL;
>      }
>  
> -    v->arch.runstate_guest.page = page;
> +    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
> +    {
> +        /* guest area is crossing pages */
> +        page[1] = get_page_from_gva(v, (vaddr_t)area.addr.v + PAGE_SIZE,
> +                                        GV2M_WRITE);
> +        if ( !page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[0]);
> +            spin_unlock(&v->arch.runstate_guest.lock);
> +            gprintk(XENLOG_DEBUG, "Runstate pointer is not fully mapped\n");
> +            return -EINVAL;
> +        }
> +    }
> +
> +    v->arch.runstate_guest.page[0] = page[0];
> +    v->arch.runstate_guest.page[1] = page[1];
>      v->arch.runstate_guest.offset = offset;
>  
>      spin_unlock(&v->arch.runstate_guest.lock);
> @@ -343,34 +362,60 @@ int arch_setup_runstate_guest(struct vcpu *v,
>  /* Update per-VCPU guest runstate shared memory area (if registered). */
>  static void update_runstate_area(struct vcpu *v)
>  {
> -    struct vcpu_runstate_info *guest_runstate;
> -    void *p;
> +    void *p[2];
> +    unsigned offset, time_offset;
> +    uint64_t *state_entry_time;
>  
>      spin_lock(&v->arch.runstate_guest.lock);
>  
> -    if ( v->arch.runstate_guest.page )
> +    if ( v->arch.runstate_guest.page[0] )
>      {
> -        p = __map_domain_page(v->arch.runstate_guest.page);
> -        guest_runstate = p + v->arch.runstate_guest.offset;
> +        p[0] = __map_domain_page(v->arch.runstate_guest.page[0]);
> +        if ( v->arch.runstate_guest.page[1] )
> +            p[1] = __map_domain_page(v->arch.runstate_guest.page[1]);
> +        else
> +            p[1] = NULL;
> +        offset = v->arch.runstate_guest.offset;
>  
>          if ( VM_ASSIST(v->domain, runstate_update_flag) )
>          {
> +            time_offset = offset +
> +                    offsetof(struct vcpu_runstate_info, state_entry_time);
> +
> +            if ( time_offset < PAGE_SIZE )
> +                state_entry_time = p[0] + time_offset;
> +            else
> +                state_entry_time = p[1] + (time_offset - PAGE_SIZE);
> +
>              v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> -            guest_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
> +            *state_entry_time |= XEN_RUNSTATE_UPDATE;
>              smp_wmb();
>          }
> +        else
> +            state_entry_time = NULL;
> +
> +        if ( p[1] )
> +        {
> +            memcpy(p[0] + offset, &v->runstate, PAGE_SIZE - offset);
> +            memcpy(p[1], &v->runstate + (PAGE_SIZE - offset),
> +                    sizeof(v->runstate) - (PAGE_SIZE - offset));
> +        }
> +        else
> +            memcpy(p[0] + offset, &v->runstate, sizeof(v->runstate));
>  
> -        memcpy(guest_runstate, &v->runstate, sizeof(v->runstate));
> +        /* copy must be done before switching the bit */
>          smp_wmb();
>  
> -        if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +        if ( state_entry_time )
>          {
>              v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> -            guest_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +            *state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>              smp_wmb();
>          }
>  
> -        unmap_domain_page(p);
> +        unmap_domain_page(p[0]);
> +        if ( p[1] )
> +            unmap_domain_page(p[1]);
>      }
>  
>      spin_unlock(&v->arch.runstate_guest.lock);
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 3a7f53e13d..61e32f1ed5 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -209,8 +209,9 @@ struct arch_vcpu
>  
>      /* runstate guest info */
>      struct {
> -        struct page_info *page;  /* guest page */
> -        unsigned         offset; /* offset in page */
> +        /* we need 2 pages in case the guest area is crossing pages */
> +        struct page_info *page[2];  /* guest pages */
> +        unsigned         offset; /* offset in first page */
>          spinlock_t       lock;   /* lock to access page */
>      } runstate_guest;
Julien Grall June 12, 2020, 11:37 a.m. UTC | #2
Hi,

On 12/06/2020 02:10, Stefano Stabellini wrote:
> On Thu, 11 Jun 2020, Bertrand Marquis wrote:
>> Add support for runstate area register with the structure crossing pages
>> The code is storing up to 2 pages reference during the hypercall.
>> During a context switch, the code is computing where the
>> state_entry_time is and is breaking the memcpy in 2 parts when it is
>> required.
>>
>> Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
> 
> Clearly a lot of efforts went into this patch, thanks you Bertrand.
> 
> The change is complex for the feature it adds. I wonder if we could use
> ioremap (or maybe something similar but using a different virtual space)
> to simplify it. Julien, do you have good ideas?

ioremap() is for MMIO region, so you would want to use vmap(). Note that 
former is just a wrapper of the latter.

vmap() is probably a good start for now, but not ideal for long term 
because the vmap() area is fairly small (768MB) and if we want to go 
towards a secret-free hypervisor on Arm, we would want to restrict the 
visibility of the mapping to the other CPUs.

I think it would be good to have some per-CPU/per-domain mapping to 
limit the waste of the address space. But this will require to 
deduplicate page-tables for arm64 (I was actually looking at it over the 
past few week-ends).

For the time-being, I would suggest to use vmap(). The memory is always 
direct mapped on arm64, so it should make no different for security concern.

Cheers,
Julien Grall June 12, 2020, 12:14 p.m. UTC | #3
Hi,

On 11/06/2020 12:58, Bertrand Marquis wrote:
> Add support for runstate area register with the structure crossing pages

Well, this has always been supported until your previous patch. In 
general, we try to not break thing in a middle of the series so we can 
still bisect it.

I think this patch can be simplified a lot by mapping the two page 
contiguously (see my previous answer). With that it would be feasible to 
fold this patch in #1.

> The code is storing up to 2 pages reference during the hypercall.
> During a context switch, the code is computing where the
> state_entry_time is and is breaking the memcpy in 2 parts when it is
> required.
> 
> Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
> ---
>   xen/arch/arm/domain.c        | 101 +++++++++++++++++++++++++----------
>   xen/include/asm-arm/domain.h |   5 +-
>   2 files changed, 76 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 739059234f..d847cb00f2 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -280,11 +280,16 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>   {
>       spin_lock(&v->arch.runstate_guest.lock);
>   
> -    /* cleanup previous page if any */
> -    if ( v->arch.runstate_guest.page )
> +    /* cleanup previous pages if any */
> +    if ( v->arch.runstate_guest.page[0] )
>       {
> -        put_page_and_type(v->arch.runstate_guest.page);
> -        v->arch.runstate_guest.page = NULL;
> +        put_page_and_type(v->arch.runstate_guest.page[0]);
> +        v->arch.runstate_guest.page[0] = NULL;
> +        if ( v->arch.runstate_guest.page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[1]);
> +            v->arch.runstate_guest.page[1] = NULL;
> +        }

I think this can be moved outside of the first if.

>           v->arch.runstate_guest.offset = 0;
>       }
>   
> @@ -294,26 +299,25 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>   int arch_setup_runstate_guest(struct vcpu *v,
>                               struct vcpu_register_runstate_memory_area area)
>   {
> -    struct page_info *page;
> +    struct page_info *page[2] = {NULL,NULL};

I don't think you need the temporary variable. You can directly update 
page[0] as it is protected by the lock. The nice benefits is you could 
take advantage of a common helper to cleanup and reduce the complexity 
of the code.

>       unsigned offset;
>   
>       spin_lock(&v->arch.runstate_guest.lock);
>   
> -    /* cleanup previous page if any */
> -    if ( v->arch.runstate_guest.page )
> +    /* cleanup previous pages if any */
> +    if ( v->arch.runstate_guest.page[0] )
>       {
> -        put_page_and_type(v->arch.runstate_guest.page);
> -        v->arch.runstate_guest.page = NULL;
> +        put_page_and_type(v->arch.runstate_guest.page[0]);
> +        v->arch.runstate_guest.page[0] = NULL;
> +        if ( v->arch.runstate_guest.page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[1]);
> +            v->arch.runstate_guest.page[1] = NULL;
> +        }
>           v->arch.runstate_guest.offset = 0;
>       }
>   
>       offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK;
> -    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
> -    {
> -        spin_unlock(&v->arch.runstate_guest.lock);
> -        gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n");
> -        return -EINVAL;
> -    }
>   
>       /* provided address must be aligned to a 64bit */
>       if ( offset % alignof(struct vcpu_runstate_info) )
> @@ -323,15 +327,30 @@ int arch_setup_runstate_guest(struct vcpu *v,
>           return -EINVAL;
>       }
>   
> -    page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
> -    if ( !page )
> +    page[0] = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
> +    if ( !page[0] )
>       {
>           spin_unlock(&v->arch.runstate_guest.lock);
>           gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n");
>           return -EINVAL;
>       }
>   
> -    v->arch.runstate_guest.page = page;
> +    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
> +    {
> +        /* guest area is crossing pages */
> +        page[1] = get_page_from_gva(v, (vaddr_t)area.addr.v + PAGE_SIZE,
> +                                        GV2M_WRITE);
> +        if ( !page[1] )
> +        {
> +            put_page_and_type(v->arch.runstate_guest.page[0]);
v->arch.runstate_guest.page[0] would be NULL as you set it afterwards. 
So you want to set v->arch.runstate_guest.page[0] beforehand.

Cheers,
Bertrand Marquis June 12, 2020, 4:13 p.m. UTC | #4
Hi,

> On 12 Jun 2020, at 13:14, Julien Grall <julien@xen.org> wrote:
> 
> Hi,
> 
> On 11/06/2020 12:58, Bertrand Marquis wrote:
>> Add support for runstate area register with the structure crossing pages
> 
> Well, this has always been supported until your previous patch. In general, we try to not break thing in a middle of the series so we can still bisect it.
> 
> I think this patch can be simplified a lot by mapping the two page contiguously (see my previous answer). With that it would be feasible to fold this patch in #1.

I will dig into switching to something using vmap.
Worst case scenario we would consume 8k * 128 vCPU which is still 1M but should be acceptable as it could simplify greatly the code.

> 
>> The code is storing up to 2 pages reference during the hypercall.
>> During a context switch, the code is computing where the
>> state_entry_time is and is breaking the memcpy in 2 parts when it is
>> required.
>> Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
>> ---
>>  xen/arch/arm/domain.c        | 101 +++++++++++++++++++++++++----------
>>  xen/include/asm-arm/domain.h |   5 +-
>>  2 files changed, 76 insertions(+), 30 deletions(-)
>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>> index 739059234f..d847cb00f2 100644
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -280,11 +280,16 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>>  {
>>      spin_lock(&v->arch.runstate_guest.lock);
>>  -    /* cleanup previous page if any */
>> -    if ( v->arch.runstate_guest.page )
>> +    /* cleanup previous pages if any */
>> +    if ( v->arch.runstate_guest.page[0] )
>>      {
>> -        put_page_and_type(v->arch.runstate_guest.page);
>> -        v->arch.runstate_guest.page = NULL;
>> +        put_page_and_type(v->arch.runstate_guest.page[0]);
>> +        v->arch.runstate_guest.page[0] = NULL;
>> +        if ( v->arch.runstate_guest.page[1] )
>> +        {
>> +            put_page_and_type(v->arch.runstate_guest.page[1]);
>> +            v->arch.runstate_guest.page[1] = NULL;
>> +        }
> 
> I think this can be moved outside of the first if.
Agree

> 
>>          v->arch.runstate_guest.offset = 0;
>>      }
>>  @@ -294,26 +299,25 @@ void arch_cleanup_runstate_guest(struct vcpu *v)
>>  int arch_setup_runstate_guest(struct vcpu *v,
>>                              struct vcpu_register_runstate_memory_area area)
>>  {
>> -    struct page_info *page;
>> +    struct page_info *page[2] = {NULL,NULL};
> 
> I don't think you need the temporary variable. You can directly update page[0] as it is protected by the lock. The nice benefits is you could take advantage of a common helper to cleanup and reduce the complexity of the code.

Would make sense yes (and remove the unlock everywhere).
I will try that, thanks for the hint.

> 
>>      unsigned offset;
>>        spin_lock(&v->arch.runstate_guest.lock);
>>  -    /* cleanup previous page if any */
>> -    if ( v->arch.runstate_guest.page )
>> +    /* cleanup previous pages if any */
>> +    if ( v->arch.runstate_guest.page[0] )
>>      {
>> -        put_page_and_type(v->arch.runstate_guest.page);
>> -        v->arch.runstate_guest.page = NULL;
>> +        put_page_and_type(v->arch.runstate_guest.page[0]);
>> +        v->arch.runstate_guest.page[0] = NULL;
>> +        if ( v->arch.runstate_guest.page[1] )
>> +        {
>> +            put_page_and_type(v->arch.runstate_guest.page[1]);
>> +            v->arch.runstate_guest.page[1] = NULL;
>> +        }
>>          v->arch.runstate_guest.offset = 0;
>>      }
>>        offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK;
>> -    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
>> -    {
>> -        spin_unlock(&v->arch.runstate_guest.lock);
>> -        gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n");
>> -        return -EINVAL;
>> -    }
>>        /* provided address must be aligned to a 64bit */
>>      if ( offset % alignof(struct vcpu_runstate_info) )
>> @@ -323,15 +327,30 @@ int arch_setup_runstate_guest(struct vcpu *v,
>>          return -EINVAL;
>>      }
>>  -    page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
>> -    if ( !page )
>> +    page[0] = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
>> +    if ( !page[0] )
>>      {
>>          spin_unlock(&v->arch.runstate_guest.lock);
>>          gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n");
>>          return -EINVAL;
>>      }
>>  -    v->arch.runstate_guest.page = page;
>> +    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
>> +    {
>> +        /* guest area is crossing pages */
>> +        page[1] = get_page_from_gva(v, (vaddr_t)area.addr.v + PAGE_SIZE,
>> +                                        GV2M_WRITE);
>> +        if ( !page[1] )
>> +        {
>> +            put_page_and_type(v->arch.runstate_guest.page[0]);
> v->arch.runstate_guest.page[0] would be NULL as you set it afterwards. So you want to set v->arch.runstate_guest.page[0] beforehand.

Nice catch, should have been page[0] alone.

Thanks
Bertrand
diff mbox series

Patch

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 739059234f..d847cb00f2 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -280,11 +280,16 @@  void arch_cleanup_runstate_guest(struct vcpu *v)
 {
     spin_lock(&v->arch.runstate_guest.lock);
 
-    /* cleanup previous page if any */
-    if ( v->arch.runstate_guest.page )
+    /* cleanup previous pages if any */
+    if ( v->arch.runstate_guest.page[0] )
     {
-        put_page_and_type(v->arch.runstate_guest.page);
-        v->arch.runstate_guest.page = NULL;
+        put_page_and_type(v->arch.runstate_guest.page[0]);
+        v->arch.runstate_guest.page[0] = NULL;
+        if ( v->arch.runstate_guest.page[1] )
+        {
+            put_page_and_type(v->arch.runstate_guest.page[1]);
+            v->arch.runstate_guest.page[1] = NULL;
+        }
         v->arch.runstate_guest.offset = 0;
     }
 
@@ -294,26 +299,25 @@  void arch_cleanup_runstate_guest(struct vcpu *v)
 int arch_setup_runstate_guest(struct vcpu *v,
                             struct vcpu_register_runstate_memory_area area)
 {
-    struct page_info *page;
+    struct page_info *page[2] = {NULL,NULL};
     unsigned offset;
 
     spin_lock(&v->arch.runstate_guest.lock);
 
-    /* cleanup previous page if any */
-    if ( v->arch.runstate_guest.page )
+    /* cleanup previous pages if any */
+    if ( v->arch.runstate_guest.page[0] )
     {
-        put_page_and_type(v->arch.runstate_guest.page);
-        v->arch.runstate_guest.page = NULL;
+        put_page_and_type(v->arch.runstate_guest.page[0]);
+        v->arch.runstate_guest.page[0] = NULL;
+        if ( v->arch.runstate_guest.page[1] )
+        {
+            put_page_and_type(v->arch.runstate_guest.page[1]);
+            v->arch.runstate_guest.page[1] = NULL;
+        }
         v->arch.runstate_guest.offset = 0;
     }
 
     offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK;
-    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
-    {
-        spin_unlock(&v->arch.runstate_guest.lock);
-        gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n");
-        return -EINVAL;
-    }
 
     /* provided address must be aligned to a 64bit */
     if ( offset % alignof(struct vcpu_runstate_info) )
@@ -323,15 +327,30 @@  int arch_setup_runstate_guest(struct vcpu *v,
         return -EINVAL;
     }
 
-    page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
-    if ( !page )
+    page[0] = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
+    if ( !page[0] )
     {
         spin_unlock(&v->arch.runstate_guest.lock);
         gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n");
         return -EINVAL;
     }
 
-    v->arch.runstate_guest.page = page;
+    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
+    {
+        /* guest area is crossing pages */
+        page[1] = get_page_from_gva(v, (vaddr_t)area.addr.v + PAGE_SIZE,
+                                        GV2M_WRITE);
+        if ( !page[1] )
+        {
+            put_page_and_type(v->arch.runstate_guest.page[0]);
+            spin_unlock(&v->arch.runstate_guest.lock);
+            gprintk(XENLOG_DEBUG, "Runstate pointer is not fully mapped\n");
+            return -EINVAL;
+        }
+    }
+
+    v->arch.runstate_guest.page[0] = page[0];
+    v->arch.runstate_guest.page[1] = page[1];
     v->arch.runstate_guest.offset = offset;
 
     spin_unlock(&v->arch.runstate_guest.lock);
@@ -343,34 +362,60 @@  int arch_setup_runstate_guest(struct vcpu *v,
 /* Update per-VCPU guest runstate shared memory area (if registered). */
 static void update_runstate_area(struct vcpu *v)
 {
-    struct vcpu_runstate_info *guest_runstate;
-    void *p;
+    void *p[2];
+    unsigned offset, time_offset;
+    uint64_t *state_entry_time;
 
     spin_lock(&v->arch.runstate_guest.lock);
 
-    if ( v->arch.runstate_guest.page )
+    if ( v->arch.runstate_guest.page[0] )
     {
-        p = __map_domain_page(v->arch.runstate_guest.page);
-        guest_runstate = p + v->arch.runstate_guest.offset;
+        p[0] = __map_domain_page(v->arch.runstate_guest.page[0]);
+        if ( v->arch.runstate_guest.page[1] )
+            p[1] = __map_domain_page(v->arch.runstate_guest.page[1]);
+        else
+            p[1] = NULL;
+        offset = v->arch.runstate_guest.offset;
 
         if ( VM_ASSIST(v->domain, runstate_update_flag) )
         {
+            time_offset = offset +
+                    offsetof(struct vcpu_runstate_info, state_entry_time);
+
+            if ( time_offset < PAGE_SIZE )
+                state_entry_time = p[0] + time_offset;
+            else
+                state_entry_time = p[1] + (time_offset - PAGE_SIZE);
+
             v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
-            guest_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+            *state_entry_time |= XEN_RUNSTATE_UPDATE;
             smp_wmb();
         }
+        else
+            state_entry_time = NULL;
+
+        if ( p[1] )
+        {
+            memcpy(p[0] + offset, &v->runstate, PAGE_SIZE - offset);
+            memcpy(p[1], &v->runstate + (PAGE_SIZE - offset),
+                    sizeof(v->runstate) - (PAGE_SIZE - offset));
+        }
+        else
+            memcpy(p[0] + offset, &v->runstate, sizeof(v->runstate));
 
-        memcpy(guest_runstate, &v->runstate, sizeof(v->runstate));
+        /* copy must be done before switching the bit */
         smp_wmb();
 
-        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        if ( state_entry_time )
         {
             v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
-            guest_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+            *state_entry_time &= ~XEN_RUNSTATE_UPDATE;
             smp_wmb();
         }
 
-        unmap_domain_page(p);
+        unmap_domain_page(p[0]);
+        if ( p[1] )
+            unmap_domain_page(p[1]);
     }
 
     spin_unlock(&v->arch.runstate_guest.lock);
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 3a7f53e13d..61e32f1ed5 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -209,8 +209,9 @@  struct arch_vcpu
 
     /* runstate guest info */
     struct {
-        struct page_info *page;  /* guest page */
-        unsigned         offset; /* offset in page */
+        /* we need 2 pages in case the guest area is crossing pages */
+        struct page_info *page[2];  /* guest pages */
+        unsigned         offset; /* offset in first page */
         spinlock_t       lock;   /* lock to access page */
     } runstate_guest;