diff mbox

common/vm_event: Initialize vm_event lists on domain creation

Message ID 1498470497-20595-1-git-send-email-rcojocaru@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Razvan Cojocaru June 26, 2017, 9:48 a.m. UTC
Pending livepatch code wants to check if the vm_event wait queues
are active, and this is made harder by the fact that they were
previously only initialized some time after the domain was created,
in vm_event_enable(). This patch initializes the lists immediately
after xzalloc()ating the vm_event memory, in domain_create(), in
the newly added init_domain_vm_event() function.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/common/domain.c        |  5 ++---
 xen/common/vm_event.c      | 23 ++++++++++++++++++++---
 xen/include/xen/vm_event.h |  2 ++
 3 files changed, 24 insertions(+), 6 deletions(-)

Comments

Konrad Rzeszutek Wilk June 26, 2017, 11:39 a.m. UTC | #1
On June 26, 2017 5:48:17 AM EDT, Razvan Cojocaru <rcojocaru@bitdefender.com> wrote:
>Pending livepatch code wants to check if the vm_event wait queues
>are active, and this is made harder by the fact that they were


Hmm, it wants to? Is there an missing patch that hasn't been posted for this?

If you mean to post this _before_ the other one please adjust the commit description.

>previously only initialized some time after the domain was created,
>in vm_event_enable(). This patch initializes the lists immediately
>after xzalloc()ating the vm_event memory, in domain_create(), in
>the newly added init_domain_vm_event() function.
>
>Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>---
> xen/common/domain.c        |  5 ++---
> xen/common/vm_event.c      | 23 ++++++++++++++++++++---
> xen/include/xen/vm_event.h |  2 ++
> 3 files changed, 24 insertions(+), 6 deletions(-)
>
>diff --git a/xen/common/domain.c b/xen/common/domain.c
>index b22aacc..89a8f1d 100644
>--- a/xen/common/domain.c
>+++ b/xen/common/domain.c
>@@ -362,11 +362,10 @@ struct domain *domain_create(domid_t domid,
>unsigned int domcr_flags,
> 
>         poolid = 0;
> 
>-        err = -ENOMEM;
>-        d->vm_event = xzalloc(struct vm_event_per_domain);
>-        if ( !d->vm_event )
>+        if ( (err = init_domain_vm_event(d)) != 0 )
>             goto fail;
> 
>+        err = -ENOMEM;
>         d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
>         if ( !d->pbuf )
>             goto fail;
>diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
>index 9291db6..294ddd7 100644
>--- a/xen/common/vm_event.c
>+++ b/xen/common/vm_event.c
>@@ -39,6 +39,26 @@
> #define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
> #define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
> 
>+int init_domain_vm_event(struct domain *d)
>+{
>+    d->vm_event = xzalloc(struct vm_event_per_domain);
>+
>+    if ( !d->vm_event )
>+        return -ENOMEM;
>+
>+#ifdef CONFIG_HAS_MEM_PAGING
>+    init_waitqueue_head(&d->vm_event->paging.wq);
>+#endif
>+
>+    init_waitqueue_head(&d->vm_event->monitor.wq);
>+
>+#ifdef CONFIG_HAS_MEM_SHARING
>+    init_waitqueue_head(&d->vm_event->share.wq);
>+#endif
>+
>+    return 0;
>+}
>+
> static int vm_event_enable(
>     struct domain *d,
>     xen_domctl_vm_event_op_t *vec,
>@@ -93,9 +113,6 @@ static int vm_event_enable(
>     /* Save the pause flag for this particular ring. */
>     ved->pause_flag = pause_flag;
> 
>-    /* Initialize the last-chance wait queue. */
>-    init_waitqueue_head(&ved->wq);
>-
>     vm_event_ring_unlock(ved);
>     return 0;
> 
>diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
>index 2fb3951..482243e 100644
>--- a/xen/include/xen/vm_event.h
>+++ b/xen/include/xen/vm_event.h
>@@ -80,6 +80,8 @@ void vm_event_set_registers(struct vcpu *v,
>vm_event_response_t *rsp);
> 
> void vm_event_monitor_next_interrupt(struct vcpu *v);
> 
>+int init_domain_vm_event(struct domain *d);
>+
> #endif /* __VM_EVENT_H__ */
> 
> /*
>-- 
>1.9.1
>
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@lists.xen.org
>https://lists.xen.org/xen-devel


Thanks!
Razvan Cojocaru June 26, 2017, 11:44 a.m. UTC | #2
On 06/26/2017 02:39 PM, Konrad Rzeszutek Wilk wrote:
> On June 26, 2017 5:48:17 AM EDT, Razvan Cojocaru <rcojocaru@bitdefender.com> wrote:
>> Pending livepatch code wants to check if the vm_event wait queues
>> are active, and this is made harder by the fact that they were
> 
> 
> Hmm, it wants to? Is there an missing patch that hasn't been posted for this?
> 
> If you mean to post this _before_ the other one please adjust the commit description.

Yes, I think that patch hasn't been posted yet (Andrew is / was working
on it). I thought "pending" was appropriate in the description, but I
can change the text (after seeing if there are more comments on the code).


Thanks,
Razvan
Andrew Cooper June 26, 2017, 12:14 p.m. UTC | #3
On 26/06/17 12:39, Konrad Rzeszutek Wilk wrote:
> On June 26, 2017 5:48:17 AM EDT, Razvan Cojocaru <rcojocaru@bitdefender.com> wrote:
>> Pending livepatch code wants to check if the vm_event wait queues
>> are active, and this is made harder by the fact that they were
>
> Hmm, it wants to? Is there an missing patch that hasn't been posted for this?
>
> If you mean to post this _before_ the other one please adjust the commit description.

I was intending patch is only for XenServer.  Its a stopgap solution to
prevent livepatching from crashing the hypervisor if there are active
waitqueues.

The longterm solution is to get multipage rings working (so there are
guaranteed to be sufficient slots not to block) and drop waitqueues
entirely.  In the short term however, I suppose a variant of it might be
useful, depending on how supported vm_event actually is.


Razvan: I'd reword this to not mention livepatching.  Simply having
list_empty() working is a good enough reason alone.

~Andrew
Razvan Cojocaru June 26, 2017, 12:17 p.m. UTC | #4
On 06/26/2017 03:14 PM, Andrew Cooper wrote:
> Razvan: I'd reword this to not mention livepatching.  Simply having
> list_empty() working is a good enough reason alone.

Fair enough, I'll change the patch description as soon as we hear from
Tamas, so that I might address as many comments as possible.


Thanks,
Razvan
Tamas K Lengyel June 26, 2017, 2:52 p.m. UTC | #5
On Mon, Jun 26, 2017 at 3:48 AM, Razvan Cojocaru
<rcojocaru@bitdefender.com> wrote:
> Pending livepatch code wants to check if the vm_event wait queues
> are active, and this is made harder by the fact that they were
> previously only initialized some time after the domain was created,
> in vm_event_enable(). This patch initializes the lists immediately
> after xzalloc()ating the vm_event memory, in domain_create(), in
> the newly added init_domain_vm_event() function.
>
> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
> ---
>  xen/common/domain.c        |  5 ++---
>  xen/common/vm_event.c      | 23 ++++++++++++++++++++---
>  xen/include/xen/vm_event.h |  2 ++
>  3 files changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index b22aacc..89a8f1d 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -362,11 +362,10 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>
>          poolid = 0;
>
> -        err = -ENOMEM;
> -        d->vm_event = xzalloc(struct vm_event_per_domain);
> -        if ( !d->vm_event )
> +        if ( (err = init_domain_vm_event(d)) != 0 )
>              goto fail;
>
> +        err = -ENOMEM;
>          d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
>          if ( !d->pbuf )
>              goto fail;
> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> index 9291db6..294ddd7 100644
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -39,6 +39,26 @@
>  #define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
>  #define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
>
> +int init_domain_vm_event(struct domain *d)

We already have a vm_event_init_domain function so the naming of this
one here is not a particularly good one. It also looks like to me
these two functions could simply be merged..

> +{
> +    d->vm_event = xzalloc(struct vm_event_per_domain);
> +
> +    if ( !d->vm_event )
> +        return -ENOMEM;
> +
> +#ifdef CONFIG_HAS_MEM_PAGING
> +    init_waitqueue_head(&d->vm_event->paging.wq);
> +#endif
> +
> +    init_waitqueue_head(&d->vm_event->monitor.wq);

Move this one up before the #ifdef block for MEM_PAGING.

> +
> +#ifdef CONFIG_HAS_MEM_SHARING
> +    init_waitqueue_head(&d->vm_event->share.wq);
> +#endif
> +
> +    return 0;
> +}
> +
>  static int vm_event_enable(
>      struct domain *d,
>      xen_domctl_vm_event_op_t *vec,
> @@ -93,9 +113,6 @@ static int vm_event_enable(
>      /* Save the pause flag for this particular ring. */
>      ved->pause_flag = pause_flag;
>
> -    /* Initialize the last-chance wait queue. */
> -    init_waitqueue_head(&ved->wq);
> -
>      vm_event_ring_unlock(ved);
>      return 0;
>
> diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
> index 2fb3951..482243e 100644
> --- a/xen/include/xen/vm_event.h
> +++ b/xen/include/xen/vm_event.h
> @@ -80,6 +80,8 @@ void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp);
>
>  void vm_event_monitor_next_interrupt(struct vcpu *v);
>
> +int init_domain_vm_event(struct domain *d);
> +
>  #endif /* __VM_EVENT_H__ */
>
>  /*
> --
> 1.9.1

Tamas
Andrew Cooper June 26, 2017, 3:09 p.m. UTC | #6
On 26/06/17 15:52, Tamas K Lengyel wrote:
> On Mon, Jun 26, 2017 at 3:48 AM, Razvan Cojocaru
> <rcojocaru@bitdefender.com> wrote:
>> Pending livepatch code wants to check if the vm_event wait queues
>> are active, and this is made harder by the fact that they were
>> previously only initialized some time after the domain was created,
>> in vm_event_enable(). This patch initializes the lists immediately
>> after xzalloc()ating the vm_event memory, in domain_create(), in
>> the newly added init_domain_vm_event() function.
>>
>> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>> ---
>>  xen/common/domain.c        |  5 ++---
>>  xen/common/vm_event.c      | 23 ++++++++++++++++++++---
>>  xen/include/xen/vm_event.h |  2 ++
>>  3 files changed, 24 insertions(+), 6 deletions(-)
>>
>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>> index b22aacc..89a8f1d 100644
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -362,11 +362,10 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>>
>>          poolid = 0;
>>
>> -        err = -ENOMEM;
>> -        d->vm_event = xzalloc(struct vm_event_per_domain);
>> -        if ( !d->vm_event )
>> +        if ( (err = init_domain_vm_event(d)) != 0 )
>>              goto fail;
>>
>> +        err = -ENOMEM;
>>          d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
>>          if ( !d->pbuf )
>>              goto fail;
>> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
>> index 9291db6..294ddd7 100644
>> --- a/xen/common/vm_event.c
>> +++ b/xen/common/vm_event.c
>> @@ -39,6 +39,26 @@
>>  #define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
>>  #define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
>>
>> +int init_domain_vm_event(struct domain *d)
> We already have a vm_event_init_domain function so the naming of this
> one here is not a particularly good one. It also looks like to me
> these two functions could simply be merged..

They shouldn't be merged.

The current vm_event_init_domain() should probably be
init_domain_arch_vm_event(), as it deals with constructing
d->arch.vm_event, whereas this function deals with d->vm_event.

There is also a difference in timing; vm_event_init_domain() is called
when vm_event is started on the domain, not when the domain is
constructed.  IMO, the two should happen at the same time to be
consistent.  I'm not fussed at which point, as it would be fine (in
principle) for d->vm_event to be NULL in most cases.

~Andrew
Tamas K Lengyel June 26, 2017, 4:06 p.m. UTC | #7
On Mon, Jun 26, 2017 at 9:09 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 26/06/17 15:52, Tamas K Lengyel wrote:
>> On Mon, Jun 26, 2017 at 3:48 AM, Razvan Cojocaru
>> <rcojocaru@bitdefender.com> wrote:
>>> Pending livepatch code wants to check if the vm_event wait queues
>>> are active, and this is made harder by the fact that they were
>>> previously only initialized some time after the domain was created,
>>> in vm_event_enable(). This patch initializes the lists immediately
>>> after xzalloc()ating the vm_event memory, in domain_create(), in
>>> the newly added init_domain_vm_event() function.
>>>
>>> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>>> ---
>>>  xen/common/domain.c        |  5 ++---
>>>  xen/common/vm_event.c      | 23 ++++++++++++++++++++---
>>>  xen/include/xen/vm_event.h |  2 ++
>>>  3 files changed, 24 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>> index b22aacc..89a8f1d 100644
>>> --- a/xen/common/domain.c
>>> +++ b/xen/common/domain.c
>>> @@ -362,11 +362,10 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>>>
>>>          poolid = 0;
>>>
>>> -        err = -ENOMEM;
>>> -        d->vm_event = xzalloc(struct vm_event_per_domain);
>>> -        if ( !d->vm_event )
>>> +        if ( (err = init_domain_vm_event(d)) != 0 )
>>>              goto fail;
>>>
>>> +        err = -ENOMEM;
>>>          d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
>>>          if ( !d->pbuf )
>>>              goto fail;
>>> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
>>> index 9291db6..294ddd7 100644
>>> --- a/xen/common/vm_event.c
>>> +++ b/xen/common/vm_event.c
>>> @@ -39,6 +39,26 @@
>>>  #define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
>>>  #define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
>>>
>>> +int init_domain_vm_event(struct domain *d)
>> We already have a vm_event_init_domain function so the naming of this
>> one here is not a particularly good one. It also looks like to me
>> these two functions could simply be merged..
>
> They shouldn't be merged.
>
> The current vm_event_init_domain() should probably be
> init_domain_arch_vm_event(), as it deals with constructing
> d->arch.vm_event, whereas this function deals with d->vm_event.

That would be fine for me, it's really the naming that I had a problem with.

Thanks,
Tamas
Jan Beulich June 27, 2017, 6:21 a.m. UTC | #8
>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>There is also a difference in timing; vm_event_init_domain() is called
>when vm_event is started on the domain, not when the domain is
>constructed.  IMO, the two should happen at the same time to be
>consistent.  I'm not fussed at which point, as it would be fine (in
>principle) for d->vm_event to be NULL in most cases.

I very much support that last aspect - there's shouldn't be any memory
allocated here if the domain isn't going to make use of VM events.

Jan
Razvan Cojocaru June 27, 2017, 8:32 a.m. UTC | #9
On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>> There is also a difference in timing; vm_event_init_domain() is called
>> when vm_event is started on the domain, not when the domain is
>> constructed.  IMO, the two should happen at the same time to be
>> consistent.  I'm not fussed at which point, as it would be fine (in
>> principle) for d->vm_event to be NULL in most cases.
> 
> I very much support that last aspect - there's shouldn't be any memory
> allocated here if the domain isn't going to make use of VM events.

Unfortunately it's not as simple as that.

While I didn't write that code, it would seem that on domain creation
that data is being allocated because it holds information about 3
different vm_event subsystems - monitor, paging, and memshare.

vm_event_init_domain() is then not being called when vm_event generally
is started on the domain, but when a specific vm_event part is being
initialized (monitor usually, but could be paging, etc.).


Thanks,
Razvan
Jan Beulich June 27, 2017, 11:26 a.m. UTC | #10
>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 10:32 AM >>>
>On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>>> There is also a difference in timing; vm_event_init_domain() is called
>>> when vm_event is started on the domain, not when the domain is
>>> constructed.  IMO, the two should happen at the same time to be
>>> consistent.  I'm not fussed at which point, as it would be fine (in
>>> principle) for d->vm_event to be NULL in most cases.
>> 
>> I very much support that last aspect - there's shouldn't be any memory
>> allocated here if the domain isn't going to make use of VM events.
>
>Unfortunately it's not as simple as that.
>
>While I didn't write that code, it would seem that on domain creation
>that data is being allocated because it holds information about 3
>different vm_event subsystems - monitor, paging, and memshare.

But it can't be that difficult to make it work the suggested way?

Jan
Razvan Cojocaru June 27, 2017, 11:38 a.m. UTC | #11
On 06/27/2017 02:26 PM, Jan Beulich wrote:
>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 10:32 AM >>>
>> On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>>>> There is also a difference in timing; vm_event_init_domain() is called
>>>> when vm_event is started on the domain, not when the domain is
>>>> constructed.  IMO, the two should happen at the same time to be
>>>> consistent.  I'm not fussed at which point, as it would be fine (in
>>>> principle) for d->vm_event to be NULL in most cases.
>>>
>>> I very much support that last aspect - there's shouldn't be any memory
>>> allocated here if the domain isn't going to make use of VM events.
>>
>> Unfortunately it's not as simple as that.
>>
>> While I didn't write that code, it would seem that on domain creation
>> that data is being allocated because it holds information about 3
>> different vm_event subsystems - monitor, paging, and memshare.
> 
> But it can't be that difficult to make it work the suggested way?

We can lazy-allocate that the first time any one of the three gets
initialized (monitor, paging, share), and update all the code that
checks d->vm_event->member to check that d->vm_event is not NULL before
that.

Any objections?


Thanks,
Razvan
Jan Beulich June 27, 2017, 11:45 a.m. UTC | #12
>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 1:38 PM >>>
>On 06/27/2017 02:26 PM, Jan Beulich wrote:
>>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 10:32 AM >>>
>>> On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>>>>> There is also a difference in timing; vm_event_init_domain() is called
>>>>> when vm_event is started on the domain, not when the domain is
>>>>> constructed.  IMO, the two should happen at the same time to be
>>>>> consistent.  I'm not fussed at which point, as it would be fine (in
>>>>> principle) for d->vm_event to be NULL in most cases.
>>>>
>>>> I very much support that last aspect - there's shouldn't be any memory
>>>> allocated here if the domain isn't going to make use of VM events.
>>>
>>> Unfortunately it's not as simple as that.
>>>
>>> While I didn't write that code, it would seem that on domain creation
>>> that data is being allocated because it holds information about 3
>>> different vm_event subsystems - monitor, paging, and memshare.
>> 
>> But it can't be that difficult to make it work the suggested way?
>
>We can lazy-allocate that the first time any one of the three gets
>initialized (monitor, paging, share), and update all the code that
>checks d->vm_event->member to check that d->vm_event is not NULL before
>that.
>
>Any objections?

None here.

Jan
Razvan Cojocaru June 27, 2017, 2:25 p.m. UTC | #13
On 06/27/2017 02:45 PM, Jan Beulich wrote:
>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 1:38 PM >>>
>> On 06/27/2017 02:26 PM, Jan Beulich wrote:
>>>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 10:32 AM >>>
>>>> On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>>>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>>>>>> There is also a difference in timing; vm_event_init_domain() is called
>>>>>> when vm_event is started on the domain, not when the domain is
>>>>>> constructed.  IMO, the two should happen at the same time to be
>>>>>> consistent.  I'm not fussed at which point, as it would be fine (in
>>>>>> principle) for d->vm_event to be NULL in most cases.
>>>>>
>>>>> I very much support that last aspect - there's shouldn't be any memory
>>>>> allocated here if the domain isn't going to make use of VM events.
>>>>
>>>> Unfortunately it's not as simple as that.
>>>>
>>>> While I didn't write that code, it would seem that on domain creation
>>>> that data is being allocated because it holds information about 3
>>>> different vm_event subsystems - monitor, paging, and memshare.
>>>
>>> But it can't be that difficult to make it work the suggested way?
>>
>> We can lazy-allocate that the first time any one of the three gets
>> initialized (monitor, paging, share), and update all the code that
>> checks d->vm_event->member to check that d->vm_event is not NULL before
>> that.
>>
>> Any objections?
> 
> None here.

That's actually much more involved than I thought: almost every vm_event
function assumes that for a created domain d->vm_event is not NULL, and
takes a struct vm_event_domain *ved parameter to differentiate between
monitor, mem paging and mem sharing. So while this technically is not a
hard thing to fix at all, it would touch a lot of ARM and x86 code and
be quite an overhaul of vm_event.

My immediate reaction was to add small helper functions such as:

 42 static struct vm_event_domain *vm_event_get_ved(
 43     struct domain *d,
 44     enum vm_event_domain_type domain_type)
 45 {
 46     if ( !d->vm_event )
 47         return NULL;
 48
 49     switch ( domain_type )
 50     {
 51 #ifdef CONFIG_HAS_MEM_PAGING
 52     case VM_EVENT_DOMAIN_TYPE_MEM_PAGING:
 53         return &d->vm_event->paging;
 54 #endif
 55     case VM_EVENT_DOMAIN_TYPE_MONITOR:
 56         return &d->vm_event->monitor;
 57 #ifdef CONFIG_HAS_MEM_SHARING
 58     case VM_EVENT_DOMAIN_TYPE_MEM_SHARING:
 59         return &d->vm_event->share;
 60 #endif
 61     default:
 62         return NULL;
 63     }
 64 }

and try to get all places to use that line of thinking, but there's
quite a lot of them.

Tamas, any thoughts on this before going deeper?


Thanks,
Razvan
Tamas K Lengyel June 27, 2017, 5:37 p.m. UTC | #14
On Tue, Jun 27, 2017 at 8:25 AM, Razvan Cojocaru
<rcojocaru@bitdefender.com> wrote:
> On 06/27/2017 02:45 PM, Jan Beulich wrote:
>>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 1:38 PM >>>
>>> On 06/27/2017 02:26 PM, Jan Beulich wrote:
>>>>>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 06/27/17 10:32 AM >>>
>>>>> On 06/27/2017 09:21 AM, Jan Beulich wrote:
>>>>>>>>> Andrew Cooper <andrew.cooper3@citrix.com> 06/26/17 5:11 PM >>>
>>>>>>> There is also a difference in timing; vm_event_init_domain() is called
>>>>>>> when vm_event is started on the domain, not when the domain is
>>>>>>> constructed.  IMO, the two should happen at the same time to be
>>>>>>> consistent.  I'm not fussed at which point, as it would be fine (in
>>>>>>> principle) for d->vm_event to be NULL in most cases.
>>>>>>
>>>>>> I very much support that last aspect - there's shouldn't be any memory
>>>>>> allocated here if the domain isn't going to make use of VM events.
>>>>>
>>>>> Unfortunately it's not as simple as that.
>>>>>
>>>>> While I didn't write that code, it would seem that on domain creation
>>>>> that data is being allocated because it holds information about 3
>>>>> different vm_event subsystems - monitor, paging, and memshare.
>>>>
>>>> But it can't be that difficult to make it work the suggested way?
>>>
>>> We can lazy-allocate that the first time any one of the three gets
>>> initialized (monitor, paging, share), and update all the code that
>>> checks d->vm_event->member to check that d->vm_event is not NULL before
>>> that.
>>>
>>> Any objections?
>>
>> None here.
>
> That's actually much more involved than I thought: almost every vm_event
> function assumes that for a created domain d->vm_event is not NULL, and
> takes a struct vm_event_domain *ved parameter to differentiate between
> monitor, mem paging and mem sharing. So while this technically is not a
> hard thing to fix at all, it would touch a lot of ARM and x86 code and
> be quite an overhaul of vm_event.

That sounds right, since currently if a domain is created it is
guaranteed to have d->vm_event allocated. That is quite a large struct
so I think it would be a nice optimization to only allocate it when
needed. If we are reworking this code, I would prefer to see that
structure actually being removed altogether and just have three
pointers to vm_event_domain's (monitor/sharing/paging), which get
allocated when needed. The pointers for paging and sharing could then
further be wrapped in ifdef CONFIG checks, so we really only have
memory for what we need to.

>
> My immediate reaction was to add small helper functions such as:
>
>  42 static struct vm_event_domain *vm_event_get_ved(
>  43     struct domain *d,
>  44     enum vm_event_domain_type domain_type)
>  45 {
>  46     if ( !d->vm_event )
>  47         return NULL;
>  48
>  49     switch ( domain_type )
>  50     {
>  51 #ifdef CONFIG_HAS_MEM_PAGING
>  52     case VM_EVENT_DOMAIN_TYPE_MEM_PAGING:
>  53         return &d->vm_event->paging;
>  54 #endif
>  55     case VM_EVENT_DOMAIN_TYPE_MONITOR:
>  56         return &d->vm_event->monitor;
>  57 #ifdef CONFIG_HAS_MEM_SHARING
>  58     case VM_EVENT_DOMAIN_TYPE_MEM_SHARING:
>  59         return &d->vm_event->share;
>  60 #endif
>  61     default:
>  62         return NULL;
>  63     }
>  64 }
>
> and try to get all places to use that line of thinking, but there's
> quite a lot of them.
>
> Tamas, any thoughts on this before going deeper?

Having this helper would be fine (even with my proposed changes
above). And yes, I can see this would involve touching a lot of code,
but I presume it will be mostly mechanical.

Tamas
diff mbox

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index b22aacc..89a8f1d 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -362,11 +362,10 @@  struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
 
         poolid = 0;
 
-        err = -ENOMEM;
-        d->vm_event = xzalloc(struct vm_event_per_domain);
-        if ( !d->vm_event )
+        if ( (err = init_domain_vm_event(d)) != 0 )
             goto fail;
 
+        err = -ENOMEM;
         d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
         if ( !d->pbuf )
             goto fail;
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 9291db6..294ddd7 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -39,6 +39,26 @@ 
 #define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
 #define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
 
+int init_domain_vm_event(struct domain *d)
+{
+    d->vm_event = xzalloc(struct vm_event_per_domain);
+
+    if ( !d->vm_event )
+        return -ENOMEM;
+
+#ifdef CONFIG_HAS_MEM_PAGING
+    init_waitqueue_head(&d->vm_event->paging.wq);
+#endif
+
+    init_waitqueue_head(&d->vm_event->monitor.wq);
+
+#ifdef CONFIG_HAS_MEM_SHARING
+    init_waitqueue_head(&d->vm_event->share.wq);
+#endif
+
+    return 0;
+}
+
 static int vm_event_enable(
     struct domain *d,
     xen_domctl_vm_event_op_t *vec,
@@ -93,9 +113,6 @@  static int vm_event_enable(
     /* Save the pause flag for this particular ring. */
     ved->pause_flag = pause_flag;
 
-    /* Initialize the last-chance wait queue. */
-    init_waitqueue_head(&ved->wq);
-
     vm_event_ring_unlock(ved);
     return 0;
 
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
index 2fb3951..482243e 100644
--- a/xen/include/xen/vm_event.h
+++ b/xen/include/xen/vm_event.h
@@ -80,6 +80,8 @@  void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp);
 
 void vm_event_monitor_next_interrupt(struct vcpu *v);
 
+int init_domain_vm_event(struct domain *d);
+
 #endif /* __VM_EVENT_H__ */
 
 /*