diff mbox

[v5,3/8] xen: delay allocation of grant table sub structures

Message ID 20170908065634.5420-4-jgross@suse.com (mailing list archive)
State New, archived
Headers show

Commit Message

Juergen Gross Sept. 8, 2017, 6:56 a.m. UTC
Delay the allocation of the grant table sub structures in order to
allow modifying parameters needed for sizing of these structures at a
per domain basis. Either do it from gnttab_setup_table() or just
before the domain is started the first time.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
---
V4:
- make ret more local (Wei Liu)

V3:
- move call of grant_table_init() from gnttab_setup_table() to
  gnttab_grow_table() (Paul Durrant)
---
 xen/common/domain.c           |  17 +++++-
 xen/common/grant_table.c      | 138 ++++++++++++++++++++++++------------------
 xen/include/xen/grant_table.h |   2 +
 3 files changed, 96 insertions(+), 61 deletions(-)

Comments

Jan Beulich Sept. 8, 2017, 3:28 p.m. UTC | #1
>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
> Delay the allocation of the grant table sub structures in order to
> allow modifying parameters needed for sizing of these structures at a
> per domain basis. Either do it from gnttab_setup_table() or just
> before the domain is started the first time.

The reference to gnttab_setup_table() is now sort of stale.

> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -363,6 +363,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>              goto fail;
>          init_status |= INIT_gnttab;
>  
> +        if ( domid == 0 && grant_table_init(d) )
> +            goto fail;

Besides not really liking the special case, why can't
grant_table_create() make this call, keeping more grant table
logic within grant_table.c? And if you special case Dom0,
wouldn't it be better to (also) special case the hardware domain
(in case that's not Dom0)?

> @@ -1029,8 +1033,17 @@ int domain_unpause_by_systemcontroller(struct domain *d)
>       * Creation is considered finished when the controller reference count
>       * first drops to 0.
>       */
> -    if ( new == 0 )
> +    if ( new == 0 && !d->creation_finished )
> +    {
> +        int ret = grant_table_init(d);
> +
> +        if ( ret )
> +        {
> +            __domain_pause_by_systemcontroller(d, NULL);
> +            return ret;
> +        }
>          d->creation_finished = true;
> +    }

Adding a grant table call here looks rather arbitrary, if not hackish.
Why can't you call it from the domctl you're going to add in a later
patch, requiring the tool stack to issue that domctl in all cases, just
like e.g. a max_vcpus one is always necessary? That would also
avoid a possibly confusing error (from the unpause, i.e. not
obviously related to grant table setup failure). Of course that will
require merging this patch with the other one to avoid an
intermediate state in which the call wouldn't be made at all.

> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -1655,6 +1655,78 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>      gt->nr_status_frames = 0;
>  }
>  
> +int
> +grant_table_init(struct domain *d)
> +{
> +    struct grant_table *gt = d->grant_table;
> +    unsigned int i, j;
> +
> +    if ( gt->nr_grant_frames )
> +        return 0;
> +
> +    gt->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
> +
> +    /* Active grant table. */
> +    if ( (gt->active = xzalloc_array(struct active_grant_entry *,
> +                                     max_nr_active_grant_frames)) == NULL )
> +        goto no_mem_1;
> +    for ( i = 0;
> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
> +    {
> +        if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
> +            goto no_mem_2;
> +        clear_page(gt->active[i]);
> +        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
> +            spin_lock_init(&gt->active[i][j].lock);
> +    }
> +
> +    /* Tracking of mapped foreign frames table */
> +    gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
> +    if ( gt->maptrack == NULL )
> +        goto no_mem_2;
> +
> +    /* Shared grant table. */
> +    if ( (gt->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
> +        goto no_mem_3;
> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
> +    {
> +        if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
> +            goto no_mem_4;
> +        clear_page(gt->shared_raw[i]);
> +    }
> +
> +    /* Status pages for grant table - for version 2 */
> +    gt->status = xzalloc_array(grant_status_t *,
> +                               grant_to_status_frames(max_grant_frames));
> +    if ( gt->status == NULL )
> +        goto no_mem_4;
> +
> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
> +        gnttab_create_shared_page(d, gt, i);
> +
> +    gt->nr_status_frames = 0;
> +
> +    return 0;
> +
> + no_mem_4:
> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
> +        free_xenheap_page(gt->shared_raw[i]);
> +    xfree(gt->shared_raw);
> +    gt->shared_raw = NULL;
> + no_mem_3:
> +    vfree(gt->maptrack);
> +    gt->maptrack = NULL;
> + no_mem_2:
> +    for ( i = 0;
> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
> +        free_xenheap_page(gt->active[i]);
> +    xfree(gt->active);
> +    gt->active = NULL;
> + no_mem_1:
> +    gt->nr_grant_frames = 0;
> +    return -ENOMEM;
> +}

The redundancy between this code and gnttab_grow_table() has
always bothered me, and now would seem to be a good occasion
to do away with it. Why don't you defer to gnttab_grow_table()
anything that function already does (keeping the respective limits
at zero in here)?

Jan
Juergen Gross Sept. 11, 2017, 9:03 a.m. UTC | #2
On 08/09/17 17:28, Jan Beulich wrote:
>>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
>> Delay the allocation of the grant table sub structures in order to
>> allow modifying parameters needed for sizing of these structures at a
>> per domain basis. Either do it from gnttab_setup_table() or just
>> before the domain is started the first time.
> 
> The reference to gnttab_setup_table() is now sort of stale.

Hmm, yes.

> 
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -363,6 +363,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
>>              goto fail;
>>          init_status |= INIT_gnttab;
>>  
>> +        if ( domid == 0 && grant_table_init(d) )
>> +            goto fail;
> 
> Besides not really liking the special case, why can't
> grant_table_create() make this call, keeping more grant table
> logic within grant_table.c?

Right, this is better.

> And if you special case Dom0,
> wouldn't it be better to (also) special case the hardware domain
> (in case that's not Dom0)?

As a hardware domain not being dom0 would need to be created via the
appropriate domctl hypercalls I don't see why the measures for all
other domains wouldn't be enough for that case.

> 
>> @@ -1029,8 +1033,17 @@ int domain_unpause_by_systemcontroller(struct domain *d)
>>       * Creation is considered finished when the controller reference count
>>       * first drops to 0.
>>       */
>> -    if ( new == 0 )
>> +    if ( new == 0 && !d->creation_finished )
>> +    {
>> +        int ret = grant_table_init(d);
>> +
>> +        if ( ret )
>> +        {
>> +            __domain_pause_by_systemcontroller(d, NULL);
>> +            return ret;
>> +        }
>>          d->creation_finished = true;
>> +    }
> 
> Adding a grant table call here looks rather arbitrary, if not hackish.

Would it still be hackish if I'd add a generic function doing last
init calls just before the domain starts to run for the first time?
The call to grant_table_init() would be just the first such late init
calls for the domain.

> Why can't you call it from the domctl you're going to add in a later
> patch, requiring the tool stack to issue that domctl in all cases, just
> like e.g. a max_vcpus one is always necessary? That would also
> avoid a possibly confusing error (from the unpause, i.e. not
> obviously related to grant table setup failure). Of course that will
> require merging this patch with the other one to avoid an
> intermediate state in which the call wouldn't be made at all.

This would be another possibility, yes.

Instead of merging the patches I'd just move patches 6-8 before this one
to have everything in place, including the needed tools side.

> 
>> --- a/xen/common/grant_table.c
>> +++ b/xen/common/grant_table.c
>> @@ -1655,6 +1655,78 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>      gt->nr_status_frames = 0;
>>  }
>>  
>> +int
>> +grant_table_init(struct domain *d)
>> +{
>> +    struct grant_table *gt = d->grant_table;
>> +    unsigned int i, j;
>> +
>> +    if ( gt->nr_grant_frames )
>> +        return 0;
>> +
>> +    gt->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
>> +
>> +    /* Active grant table. */
>> +    if ( (gt->active = xzalloc_array(struct active_grant_entry *,
>> +                                     max_nr_active_grant_frames)) == NULL )
>> +        goto no_mem_1;
>> +    for ( i = 0;
>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>> +    {
>> +        if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
>> +            goto no_mem_2;
>> +        clear_page(gt->active[i]);
>> +        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
>> +            spin_lock_init(&gt->active[i][j].lock);
>> +    }
>> +
>> +    /* Tracking of mapped foreign frames table */
>> +    gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
>> +    if ( gt->maptrack == NULL )
>> +        goto no_mem_2;
>> +
>> +    /* Shared grant table. */
>> +    if ( (gt->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
>> +        goto no_mem_3;
>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>> +    {
>> +        if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
>> +            goto no_mem_4;
>> +        clear_page(gt->shared_raw[i]);
>> +    }
>> +
>> +    /* Status pages for grant table - for version 2 */
>> +    gt->status = xzalloc_array(grant_status_t *,
>> +                               grant_to_status_frames(max_grant_frames));
>> +    if ( gt->status == NULL )
>> +        goto no_mem_4;
>> +
>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>> +        gnttab_create_shared_page(d, gt, i);
>> +
>> +    gt->nr_status_frames = 0;
>> +
>> +    return 0;
>> +
>> + no_mem_4:
>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>> +        free_xenheap_page(gt->shared_raw[i]);
>> +    xfree(gt->shared_raw);
>> +    gt->shared_raw = NULL;
>> + no_mem_3:
>> +    vfree(gt->maptrack);
>> +    gt->maptrack = NULL;
>> + no_mem_2:
>> +    for ( i = 0;
>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>> +        free_xenheap_page(gt->active[i]);
>> +    xfree(gt->active);
>> +    gt->active = NULL;
>> + no_mem_1:
>> +    gt->nr_grant_frames = 0;
>> +    return -ENOMEM;
>> +}
> 
> The redundancy between this code and gnttab_grow_table() has
> always bothered me, and now would seem to be a good occasion
> to do away with it. Why don't you defer to gnttab_grow_table()
> anything that function already does (keeping the respective limits
> at zero in here)?

Just to be sure I understand you correctly: you want to get rid of
INITIAL_NR_GRANT_FRAMES and just grow from 0 on instead of starting at
the current value 4, right?

So I would just allocate the arrays (gt->active, gt->maptrack,
gt->shared_raw, gt->status) in grant_table_init().


Juergen
Jan Beulich Sept. 11, 2017, 9:23 a.m. UTC | #3
>>> On 11.09.17 at 11:03, <jgross@suse.com> wrote:
> On 08/09/17 17:28, Jan Beulich wrote:
>>>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
>> And if you special case Dom0,
>> wouldn't it be better to (also) special case the hardware domain
>> (in case that's not Dom0)?
> 
> As a hardware domain not being dom0 would need to be created via the
> appropriate domctl hypercalls I don't see why the measures for all
> other domains wouldn't be enough for that case.

Yes, that's true especially when making the domctl mandatory to be
used. Whether suitable default values for that case wouldn't better
live in a single place (the hypervisor) is a point to be considered
here, though (by default values I mean ones to be used when the
config file doesn't specify any, not ones to be used by the domctl
handler if the passed in values are zero or some other "use
defaults" indicator, as you had elsewhere).

>>> @@ -1029,8 +1033,17 @@ int domain_unpause_by_systemcontroller(struct domain *d)
>>>       * Creation is considered finished when the controller reference count
>>>       * first drops to 0.
>>>       */
>>> -    if ( new == 0 )
>>> +    if ( new == 0 && !d->creation_finished )
>>> +    {
>>> +        int ret = grant_table_init(d);
>>> +
>>> +        if ( ret )
>>> +        {
>>> +            __domain_pause_by_systemcontroller(d, NULL);
>>> +            return ret;
>>> +        }
>>>          d->creation_finished = true;
>>> +    }
>> 
>> Adding a grant table call here looks rather arbitrary, if not hackish.
> 
> Would it still be hackish if I'd add a generic function doing last
> init calls just before the domain starts to run for the first time?
> The call to grant_table_init() would be just the first such late init
> calls for the domain.

Generalizing this would make things look better, yes, but that
would then still not deal with the bad error reporting which
results.

>> Why can't you call it from the domctl you're going to add in a later
>> patch, requiring the tool stack to issue that domctl in all cases, just
>> like e.g. a max_vcpus one is always necessary? That would also
>> avoid a possibly confusing error (from the unpause, i.e. not
>> obviously related to grant table setup failure). Of course that will
>> require merging this patch with the other one to avoid an
>> intermediate state in which the call wouldn't be made at all.
> 
> This would be another possibility, yes.
> 
> Instead of merging the patches I'd just move patches 6-8 before this one
> to have everything in place, including the needed tools side.

Right, later I had realized too that simple re-ordering would be
sufficient.

>>> --- a/xen/common/grant_table.c
>>> +++ b/xen/common/grant_table.c
>>> @@ -1655,6 +1655,78 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>>      gt->nr_status_frames = 0;
>>>  }
>>>  
>>> +int
>>> +grant_table_init(struct domain *d)
>>> +{
>>> +    struct grant_table *gt = d->grant_table;
>>> +    unsigned int i, j;
>>> +
>>> +    if ( gt->nr_grant_frames )
>>> +        return 0;
>>> +
>>> +    gt->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
>>> +
>>> +    /* Active grant table. */
>>> +    if ( (gt->active = xzalloc_array(struct active_grant_entry *,
>>> +                                     max_nr_active_grant_frames)) == NULL )
>>> +        goto no_mem_1;
>>> +    for ( i = 0;
>>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>>> +    {
>>> +        if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
>>> +            goto no_mem_2;
>>> +        clear_page(gt->active[i]);
>>> +        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
>>> +            spin_lock_init(&gt->active[i][j].lock);
>>> +    }
>>> +
>>> +    /* Tracking of mapped foreign frames table */
>>> +    gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
>>> +    if ( gt->maptrack == NULL )
>>> +        goto no_mem_2;
>>> +
>>> +    /* Shared grant table. */
>>> +    if ( (gt->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
>>> +        goto no_mem_3;
>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>> +    {
>>> +        if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
>>> +            goto no_mem_4;
>>> +        clear_page(gt->shared_raw[i]);
>>> +    }
>>> +
>>> +    /* Status pages for grant table - for version 2 */
>>> +    gt->status = xzalloc_array(grant_status_t *,
>>> +                               grant_to_status_frames(max_grant_frames));
>>> +    if ( gt->status == NULL )
>>> +        goto no_mem_4;
>>> +
>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>> +        gnttab_create_shared_page(d, gt, i);
>>> +
>>> +    gt->nr_status_frames = 0;
>>> +
>>> +    return 0;
>>> +
>>> + no_mem_4:
>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>> +        free_xenheap_page(gt->shared_raw[i]);
>>> +    xfree(gt->shared_raw);
>>> +    gt->shared_raw = NULL;
>>> + no_mem_3:
>>> +    vfree(gt->maptrack);
>>> +    gt->maptrack = NULL;
>>> + no_mem_2:
>>> +    for ( i = 0;
>>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>>> +        free_xenheap_page(gt->active[i]);
>>> +    xfree(gt->active);
>>> +    gt->active = NULL;
>>> + no_mem_1:
>>> +    gt->nr_grant_frames = 0;
>>> +    return -ENOMEM;
>>> +}
>> 
>> The redundancy between this code and gnttab_grow_table() has
>> always bothered me, and now would seem to be a good occasion
>> to do away with it. Why don't you defer to gnttab_grow_table()
>> anything that function already does (keeping the respective limits
>> at zero in here)?
> 
> Just to be sure I understand you correctly: you want to get rid of
> INITIAL_NR_GRANT_FRAMES and just grow from 0 on instead of starting at
> the current value 4, right?

Yes, the use of INITIAL_NR_GRANT_FRAMES would move to that
first "grow" invocation.

Jan
Juergen Gross Sept. 11, 2017, 9:39 a.m. UTC | #4
On 11/09/17 11:23, Jan Beulich wrote:
>>>> On 11.09.17 at 11:03, <jgross@suse.com> wrote:
>> On 08/09/17 17:28, Jan Beulich wrote:
>>>>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
>>> And if you special case Dom0,
>>> wouldn't it be better to (also) special case the hardware domain
>>> (in case that's not Dom0)?
>>
>> As a hardware domain not being dom0 would need to be created via the
>> appropriate domctl hypercalls I don't see why the measures for all
>> other domains wouldn't be enough for that case.
> 
> Yes, that's true especially when making the domctl mandatory to be
> used. Whether suitable default values for that case wouldn't better
> live in a single place (the hypervisor) is a point to be considered
> here, though (by default values I mean ones to be used when the
> config file doesn't specify any, not ones to be used by the domctl
> handler if the passed in values are zero or some other "use
> defaults" indicator, as you had elsewhere).

But this is exactly what happens: the hypervisor defaults are being
used in case nothing is specified in the domain's config file: a value
of 0 for a value (grant table frame limit or maptrack frame limit)
specified in the domctl will just use the default values.

Or did I misunderstand you here?

> 
>>>> @@ -1029,8 +1033,17 @@ int domain_unpause_by_systemcontroller(struct domain *d)
>>>>       * Creation is considered finished when the controller reference count
>>>>       * first drops to 0.
>>>>       */
>>>> -    if ( new == 0 )
>>>> +    if ( new == 0 && !d->creation_finished )
>>>> +    {
>>>> +        int ret = grant_table_init(d);
>>>> +
>>>> +        if ( ret )
>>>> +        {
>>>> +            __domain_pause_by_systemcontroller(d, NULL);
>>>> +            return ret;
>>>> +        }
>>>>          d->creation_finished = true;
>>>> +    }
>>>
>>> Adding a grant table call here looks rather arbitrary, if not hackish.
>>
>> Would it still be hackish if I'd add a generic function doing last
>> init calls just before the domain starts to run for the first time?
>> The call to grant_table_init() would be just the first such late init
>> calls for the domain.
> 
> Generalizing this would make things look better, yes, but that
> would then still not deal with the bad error reporting which
> results.

In case nobody objects I'll go with making the new domctl mandatory
then.

> 
>>> Why can't you call it from the domctl you're going to add in a later
>>> patch, requiring the tool stack to issue that domctl in all cases, just
>>> like e.g. a max_vcpus one is always necessary? That would also
>>> avoid a possibly confusing error (from the unpause, i.e. not
>>> obviously related to grant table setup failure). Of course that will
>>> require merging this patch with the other one to avoid an
>>> intermediate state in which the call wouldn't be made at all.
>>
>> This would be another possibility, yes.
>>
>> Instead of merging the patches I'd just move patches 6-8 before this one
>> to have everything in place, including the needed tools side.
> 
> Right, later I had realized too that simple re-ordering would be
> sufficient.
> 
>>>> --- a/xen/common/grant_table.c
>>>> +++ b/xen/common/grant_table.c
>>>> @@ -1655,6 +1655,78 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>>>      gt->nr_status_frames = 0;
>>>>  }
>>>>  
>>>> +int
>>>> +grant_table_init(struct domain *d)
>>>> +{
>>>> +    struct grant_table *gt = d->grant_table;
>>>> +    unsigned int i, j;
>>>> +
>>>> +    if ( gt->nr_grant_frames )
>>>> +        return 0;
>>>> +
>>>> +    gt->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
>>>> +
>>>> +    /* Active grant table. */
>>>> +    if ( (gt->active = xzalloc_array(struct active_grant_entry *,
>>>> +                                     max_nr_active_grant_frames)) == NULL )
>>>> +        goto no_mem_1;
>>>> +    for ( i = 0;
>>>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>>>> +    {
>>>> +        if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
>>>> +            goto no_mem_2;
>>>> +        clear_page(gt->active[i]);
>>>> +        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
>>>> +            spin_lock_init(&gt->active[i][j].lock);
>>>> +    }
>>>> +
>>>> +    /* Tracking of mapped foreign frames table */
>>>> +    gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
>>>> +    if ( gt->maptrack == NULL )
>>>> +        goto no_mem_2;
>>>> +
>>>> +    /* Shared grant table. */
>>>> +    if ( (gt->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
>>>> +        goto no_mem_3;
>>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>>> +    {
>>>> +        if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
>>>> +            goto no_mem_4;
>>>> +        clear_page(gt->shared_raw[i]);
>>>> +    }
>>>> +
>>>> +    /* Status pages for grant table - for version 2 */
>>>> +    gt->status = xzalloc_array(grant_status_t *,
>>>> +                               grant_to_status_frames(max_grant_frames));
>>>> +    if ( gt->status == NULL )
>>>> +        goto no_mem_4;
>>>> +
>>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>>> +        gnttab_create_shared_page(d, gt, i);
>>>> +
>>>> +    gt->nr_status_frames = 0;
>>>> +
>>>> +    return 0;
>>>> +
>>>> + no_mem_4:
>>>> +    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
>>>> +        free_xenheap_page(gt->shared_raw[i]);
>>>> +    xfree(gt->shared_raw);
>>>> +    gt->shared_raw = NULL;
>>>> + no_mem_3:
>>>> +    vfree(gt->maptrack);
>>>> +    gt->maptrack = NULL;
>>>> + no_mem_2:
>>>> +    for ( i = 0;
>>>> +          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>>>> +        free_xenheap_page(gt->active[i]);
>>>> +    xfree(gt->active);
>>>> +    gt->active = NULL;
>>>> + no_mem_1:
>>>> +    gt->nr_grant_frames = 0;
>>>> +    return -ENOMEM;
>>>> +}
>>>
>>> The redundancy between this code and gnttab_grow_table() has
>>> always bothered me, and now would seem to be a good occasion
>>> to do away with it. Why don't you defer to gnttab_grow_table()
>>> anything that function already does (keeping the respective limits
>>> at zero in here)?
>>
>> Just to be sure I understand you correctly: you want to get rid of
>> INITIAL_NR_GRANT_FRAMES and just grow from 0 on instead of starting at
>> the current value 4, right?
> 
> Yes, the use of INITIAL_NR_GRANT_FRAMES would move to that
> first "grow" invocation.

Hmm, shouldn't we just grow one frame after the other? Is it really true
that most domains will need more than 1500 grants? A simple test domain
with one disk and one NIC seems to be okay with a little bit more than
300 grants, so 1 grant table frame would be enough for that case.


Juergen
Jan Beulich Sept. 11, 2017, 11:02 a.m. UTC | #5
>>> On 11.09.17 at 11:39, <jgross@suse.com> wrote:
> On 11/09/17 11:23, Jan Beulich wrote:
>>>>> On 11.09.17 at 11:03, <jgross@suse.com> wrote:
>>> On 08/09/17 17:28, Jan Beulich wrote:
>>>>>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
>>>> And if you special case Dom0,
>>>> wouldn't it be better to (also) special case the hardware domain
>>>> (in case that's not Dom0)?
>>>
>>> As a hardware domain not being dom0 would need to be created via the
>>> appropriate domctl hypercalls I don't see why the measures for all
>>> other domains wouldn't be enough for that case.
>> 
>> Yes, that's true especially when making the domctl mandatory to be
>> used. Whether suitable default values for that case wouldn't better
>> live in a single place (the hypervisor) is a point to be considered
>> here, though (by default values I mean ones to be used when the
>> config file doesn't specify any, not ones to be used by the domctl
>> handler if the passed in values are zero or some other "use
>> defaults" indicator, as you had elsewhere).
> 
> But this is exactly what happens: the hypervisor defaults are being
> used in case nothing is specified in the domain's config file: a value
> of 0 for a value (grant table frame limit or maptrack frame limit)
> specified in the domctl will just use the default values.
> 
> Or did I misunderstand you here?

Hypervisor defaults are in general meaningless when the domctl
becomes mandatory (as indicated elsewhere, at least for the
maptrack table size I view zero as a valid to use option). The only
time hypervisor defaults would continue to be needed would be
for Dom0 and, maybe (for consistency as explained) for the
hardware and/or control domains.

>>> Just to be sure I understand you correctly: you want to get rid of
>>> INITIAL_NR_GRANT_FRAMES and just grow from 0 on instead of starting at
>>> the current value 4, right?
>> 
>> Yes, the use of INITIAL_NR_GRANT_FRAMES would move to that
>> first "grow" invocation.
> 
> Hmm, shouldn't we just grow one frame after the other? Is it really true
> that most domains will need more than 1500 grants? A simple test domain
> with one disk and one NIC seems to be okay with a little bit more than
> 300 grants, so 1 grant table frame would be enough for that case.

Yes and no. Yes because indeed many domains will not need more.
No, however, because of the risk of memory shortage: By giving all
domains a certain minimum, they can prepare themselves for how
much (or really how little) they can do without depending on there
being memory available to grow the tables later on. IOW I don't
generally object to the limit being reduced, but not as a side effect
of the re-work. In case of problems this needs to be easy to revert
(or adjust). I.e. the change can be in the same series, but ought to
be a separate patch.

Jan
Juergen Gross Sept. 11, 2017, 11:36 a.m. UTC | #6
On 11/09/17 13:02, Jan Beulich wrote:
>>>> On 11.09.17 at 11:39, <jgross@suse.com> wrote:
>> On 11/09/17 11:23, Jan Beulich wrote:
>>>>>> On 11.09.17 at 11:03, <jgross@suse.com> wrote:
>>>> On 08/09/17 17:28, Jan Beulich wrote:
>>>>>>>> On 08.09.17 at 08:56, <jgross@suse.com> wrote:
>>>>> And if you special case Dom0,
>>>>> wouldn't it be better to (also) special case the hardware domain
>>>>> (in case that's not Dom0)?
>>>>
>>>> As a hardware domain not being dom0 would need to be created via the
>>>> appropriate domctl hypercalls I don't see why the measures for all
>>>> other domains wouldn't be enough for that case.
>>>
>>> Yes, that's true especially when making the domctl mandatory to be
>>> used. Whether suitable default values for that case wouldn't better
>>> live in a single place (the hypervisor) is a point to be considered
>>> here, though (by default values I mean ones to be used when the
>>> config file doesn't specify any, not ones to be used by the domctl
>>> handler if the passed in values are zero or some other "use
>>> defaults" indicator, as you had elsewhere).
>>
>> But this is exactly what happens: the hypervisor defaults are being
>> used in case nothing is specified in the domain's config file: a value
>> of 0 for a value (grant table frame limit or maptrack frame limit)
>> specified in the domctl will just use the default values.
>>
>> Or did I misunderstand you here?
> 
> Hypervisor defaults are in general meaningless when the domctl
> becomes mandatory (as indicated elsewhere, at least for the
> maptrack table size I view zero as a valid to use option). The only
> time hypervisor defaults would continue to be needed would be
> for Dom0 and, maybe (for consistency as explained) for the
> hardware and/or control domains.
> 
>>>> Just to be sure I understand you correctly: you want to get rid of
>>>> INITIAL_NR_GRANT_FRAMES and just grow from 0 on instead of starting at
>>>> the current value 4, right?
>>>
>>> Yes, the use of INITIAL_NR_GRANT_FRAMES would move to that
>>> first "grow" invocation.
>>
>> Hmm, shouldn't we just grow one frame after the other? Is it really true
>> that most domains will need more than 1500 grants? A simple test domain
>> with one disk and one NIC seems to be okay with a little bit more than
>> 300 grants, so 1 grant table frame would be enough for that case.
> 
> Yes and no. Yes because indeed many domains will not need more.
> No, however, because of the risk of memory shortage: By giving all
> domains a certain minimum, they can prepare themselves for how
> much (or really how little) they can do without depending on there
> being memory available to grow the tables later on. IOW I don't
> generally object to the limit being reduced, but not as a side effect
> of the re-work. In case of problems this needs to be easy to revert
> (or adjust). I.e. the change can be in the same series, but ought to
> be a separate patch.

Okay, I'll add another patch for that purpose.


Juergen
diff mbox

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5aebcf265f..983f3336a9 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -363,6 +363,9 @@  struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
             goto fail;
         init_status |= INIT_gnttab;
 
+        if ( domid == 0 && grant_table_init(d) )
+            goto fail;
+
         poolid = 0;
 
         err = -ENOMEM;
@@ -998,7 +1001,8 @@  int __domain_pause_by_systemcontroller(struct domain *d,
         prev = cmpxchg(&d->controller_pause_count, old, new);
     } while ( prev != old );
 
-    pause_fn(d);
+    if ( pause_fn )
+        pause_fn(d);
 
     return 0;
 }
@@ -1029,8 +1033,17 @@  int domain_unpause_by_systemcontroller(struct domain *d)
      * Creation is considered finished when the controller reference count
      * first drops to 0.
      */
-    if ( new == 0 )
+    if ( new == 0 && !d->creation_finished )
+    {
+        int ret = grant_table_init(d);
+
+        if ( ret )
+        {
+            __domain_pause_by_systemcontroller(d, NULL);
+            return ret;
+        }
         d->creation_finished = true;
+    }
 
     domain_unpause(d);
 
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 4520e36d90..29e7fa539b 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1655,6 +1655,78 @@  gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
     gt->nr_status_frames = 0;
 }
 
+int
+grant_table_init(struct domain *d)
+{
+    struct grant_table *gt = d->grant_table;
+    unsigned int i, j;
+
+    if ( gt->nr_grant_frames )
+        return 0;
+
+    gt->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
+
+    /* Active grant table. */
+    if ( (gt->active = xzalloc_array(struct active_grant_entry *,
+                                     max_nr_active_grant_frames)) == NULL )
+        goto no_mem_1;
+    for ( i = 0;
+          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
+    {
+        if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
+            goto no_mem_2;
+        clear_page(gt->active[i]);
+        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+            spin_lock_init(&gt->active[i][j].lock);
+    }
+
+    /* Tracking of mapped foreign frames table */
+    gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
+    if ( gt->maptrack == NULL )
+        goto no_mem_2;
+
+    /* Shared grant table. */
+    if ( (gt->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
+        goto no_mem_3;
+    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+    {
+        if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
+            goto no_mem_4;
+        clear_page(gt->shared_raw[i]);
+    }
+
+    /* Status pages for grant table - for version 2 */
+    gt->status = xzalloc_array(grant_status_t *,
+                               grant_to_status_frames(max_grant_frames));
+    if ( gt->status == NULL )
+        goto no_mem_4;
+
+    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+        gnttab_create_shared_page(d, gt, i);
+
+    gt->nr_status_frames = 0;
+
+    return 0;
+
+ no_mem_4:
+    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+        free_xenheap_page(gt->shared_raw[i]);
+    xfree(gt->shared_raw);
+    gt->shared_raw = NULL;
+ no_mem_3:
+    vfree(gt->maptrack);
+    gt->maptrack = NULL;
+ no_mem_2:
+    for ( i = 0;
+          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
+        free_xenheap_page(gt->active[i]);
+    xfree(gt->active);
+    gt->active = NULL;
+ no_mem_1:
+    gt->nr_grant_frames = 0;
+    return -ENOMEM;
+}
+
 /*
  * Grow the grant table. The caller must hold the grant table's
  * write lock before calling this function.
@@ -1665,6 +1737,12 @@  gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
     struct grant_table *gt = d->grant_table;
     unsigned int i, j;
 
+    if ( !gt->nr_grant_frames && grant_table_init(d) )
+    {
+        gdprintk(XENLOG_INFO, "Allocation failure in grant table init.\n");
+        return 0;
+    }
+
     ASSERT(req_nr_frames <= max_grant_frames);
 
     gdprintk(XENLOG_INFO,
@@ -3380,75 +3458,17 @@  grant_table_create(
     struct domain *d)
 {
     struct grant_table *t;
-    unsigned int i, j;
 
     if ( (t = xzalloc(struct grant_table)) == NULL )
-        goto no_mem_0;
+        return -ENOMEM;
 
     /* Simple stuff. */
     percpu_rwlock_resource_init(&t->lock, grant_rwlock);
     spin_lock_init(&t->maptrack_lock);
-    t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
-
-    /* Active grant table. */
-    if ( (t->active = xzalloc_array(struct active_grant_entry *,
-                                    max_nr_active_grant_frames)) == NULL )
-        goto no_mem_1;
-    for ( i = 0;
-          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
-    {
-        if ( (t->active[i] = alloc_xenheap_page()) == NULL )
-            goto no_mem_2;
-        clear_page(t->active[i]);
-        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
-            spin_lock_init(&t->active[i][j].lock);
-    }
-
-    /* Tracking of mapped foreign frames table */
-    t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
-    if ( t->maptrack == NULL )
-        goto no_mem_2;
-
-    /* Shared grant table. */
-    if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
-        goto no_mem_3;
-    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
-    {
-        if ( (t->shared_raw[i] = alloc_xenheap_page()) == NULL )
-            goto no_mem_4;
-        clear_page(t->shared_raw[i]);
-    }
-
-    /* Status pages for grant table - for version 2 */
-    t->status = xzalloc_array(grant_status_t *,
-                              grant_to_status_frames(max_grant_frames));
-    if ( t->status == NULL )
-        goto no_mem_4;
-
-    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
-        gnttab_create_shared_page(d, t, i);
-
-    t->nr_status_frames = 0;
 
     /* Okay, install the structure. */
     d->grant_table = t;
     return 0;
-
- no_mem_4:
-    for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
-        free_xenheap_page(t->shared_raw[i]);
-    xfree(t->shared_raw);
- no_mem_3:
-    vfree(t->maptrack);
- no_mem_2:
-    for ( i = 0;
-          i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
-        free_xenheap_page(t->active[i]);
-    xfree(t->active);
- no_mem_1:
-    xfree(t);
- no_mem_0:
-    return -ENOMEM;
 }
 
 void
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 43b07e60c5..84a8d61616 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -35,6 +35,8 @@  extern unsigned int max_grant_frames;
 /* Create/destroy per-domain grant table context. */
 int grant_table_create(
     struct domain *d);
+int grant_table_init(
+    struct domain *d);
 void grant_table_destroy(
     struct domain *d);
 void grant_table_init_vcpu(struct vcpu *v);