diff mbox

[v7] x86/mem-sharing: mem-sharing a range of memory

Message ID 1468876480-27373-1-git-send-email-tamas.lengyel@zentific.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tamas Lengyel July 18, 2016, 9:14 p.m. UTC
Currently mem-sharing can be performed on a page-by-page basis from the control
domain. However, this process is quite wasteful when a range of pages have to
be deduplicated.

This patch introduces a new mem_sharing memop for range sharing where
the user doesn't have to separately nominate each page in both the source and
destination domain, and the looping over all pages happen in the hypervisor.
This significantly reduces the overhead of sharing a range of memory.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>

v7: style fixes
---
 tools/libxc/include/xenctrl.h        |  15 ++++
 tools/libxc/xc_memshr.c              |  19 +++++
 tools/tests/mem-sharing/memshrtool.c |  22 ++++++
 xen/arch/x86/mm/mem_sharing.c        | 140 +++++++++++++++++++++++++++++++++++
 xen/include/public/memory.h          |  10 ++-
 5 files changed, 205 insertions(+), 1 deletion(-)

Comments

Andrew Cooper July 18, 2016, 9:47 p.m. UTC | #1
On 18/07/2016 22:14, Tamas K Lengyel wrote:
> Currently mem-sharing can be performed on a page-by-page basis from the control
> domain. However, this process is quite wasteful when a range of pages have to
> be deduplicated.
>
> This patch introduces a new mem_sharing memop for range sharing where
> the user doesn't have to separately nominate each page in both the source and
> destination domain, and the looping over all pages happen in the hypervisor.
> This significantly reduces the overhead of sharing a range of memory.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
> Acked-by: Wei Liu <wei.liu2@citrix.com>

Some style nits, and one functional suggestion.

If you are happy with the suggestion, then Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index a522423..6d00228 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1294,6 +1294,58 @@ int relinquish_shared_pages(struct domain *d)
>      return rc;
>  }
>  
> +static int range_share(struct domain *d, struct domain *cd,
> +                      struct mem_sharing_op_range *range)

Alignment.

> +{
> +    int rc = 0;
> +    shr_handle_t sh, ch;
> +    unsigned long start =
> +        range->_scratchspace ? range->_scratchspace : range->start;

This can be shortened to "unsigned long start = range->_scratchspace ?:
range->start;" and fit on a single line.

> +
> +    while( range->end >= start )
> +    {
> +        /*
> +         * We only break out if we run out of memory as individual pages may
> +         * legitimately be unsharable and we just want to skip over those.
> +         */
> +        rc = mem_sharing_nominate_page(d, start, 0, &sh);
> +        if ( rc == -ENOMEM )
> +            break;

Newline here please

> +        if ( !rc )
> +        {
> +            rc = mem_sharing_nominate_page(cd, start, 0, &ch);
> +            if ( rc == -ENOMEM )
> +                break;

And here.

> +            if ( !rc )
> +            {
> +                /* If we get here this should be guaranteed to succeed. */
> +                rc = mem_sharing_share_pages(d, start, sh,
> +                                             cd, start, ch);
> +                ASSERT(!rc);
> +            }
> +        }
> +
> +        /* Check for continuation if it's not the last iteration. */
> +        if ( range->end >= ++start && hypercall_preempt_check() )
> +        {
> +            rc = 1;
> +            break;
> +        }
> +    }
> +
> +    range->_scratchspace = start;
> +
> +    /*
> +     * We only propagate -ENOMEM as individual pages may fail with -EINVAL,
> +     * and for range sharing we only care if -ENOMEM was encountered so we reset
> +     * rc here.
> +     */
> +    if ( rc < 0 && rc != -ENOMEM )

Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe
that to be an ok case to ignore?  In the future if more errors get
raised, we don't want to silently lose a more serious error which should
be propagated up.

> +        rc = 0;
> +
> +    return rc;
> +}
> +
>  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>  {
>      int rc;
> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>          }
>          break;
>  
> +        case XENMEM_sharing_op_range_share:
> +        {
> +            unsigned long max_sgfn, max_cgfn;
> +            struct domain *cd;
> +
> +            rc = -EINVAL;
> +            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||
> +                mso.u.range._pad[2] )
> +                goto out;
> +
> +            /*
> +             * We use _scratchscape for the hypercall continuation value.
> +             * Ideally the user sets this to 0 in the beginning but
> +             * there is no good way of enforcing that here, so we just check
> +             * that it's at least in range.
> +             */
> +            if ( mso.u.range._scratchspace &&
> +                (mso.u.range._scratchspace < mso.u.range.start ||
> +                 mso.u.range._scratchspace > mso.u.range.end) )

Alignment (extra space) for these two lines.

> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 29ec571..e0bc018 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -500,7 +501,14 @@ struct xen_mem_sharing_op {
>              uint64_aligned_t client_gfn;    /* IN: the client gfn */
>              uint64_aligned_t client_handle; /* IN: handle to the client page */
>              domid_t  client_domain; /* IN: the client domain id */
> -        } share; 
> +        } share;
> +        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */

Alignment of the comment.

~Andrew

> +            uint64_aligned_t start;          /* IN: start gfn. */
> +            uint64_aligned_t end;            /* IN: end gfn (inclusive) */
> +            uint64_aligned_t _scratchspace;  /* Must be set to 0 */
> +            domid_t client_domain;           /* IN: the client domain id */
> +            uint16_t _pad[3];                /* Must be set to 0 */
> +        } range;
>          struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
>              union {
>                  uint64_aligned_t gfn;      /* IN: gfn to debug          */
Julien Grall July 19, 2016, 7:54 a.m. UTC | #2
Hello Tamas,

On 18/07/2016 22:14, Tamas K Lengyel wrote:
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index e904bd5..0ca94cd 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2334,6 +2334,21 @@ int xc_memshr_add_to_physmap(xc_interface *xch,
>                      domid_t client_domain,
>                      unsigned long client_gfn);
>
> +/* Allows to deduplicate a range of memory of a client domain. Using
> + * this function is equivalent of calling xc_memshr_nominate_gfn for each gfn
> + * in the two domains followed by xc_memshr_share_gfns.
> + *
> + * May fail with -EINVAL if the source and client domain have different
> + * memory size or if memory sharing is not enabled on either of the domains.
> + * May also fail with -ENOMEM if there isn't enough memory available to store
> + * the sharing metadata before deduplication can happen.
> + */
> +int xc_memshr_range_share(xc_interface *xch,
> +                          domid_t source_domain,
> +                          domid_t client_domain,
> +                          unsigned long start,
> +                          unsigned long end);

I know the rest of memshr interface in libxc is using "unsigned long". 
However, this should really be "uint64_t" to match the interface and 
avoid issue with 32-bit toolstack on 64-bit hypervisor.

> +
>  /* Debug calls: return the number of pages referencing the shared frame backing
>   * the input argument. Should be one or greater.
>   *

[...]

> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index a522423..6d00228 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c

[...]

> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>          }
>          break;
>
> +        case XENMEM_sharing_op_range_share:
> +        {
> +            unsigned long max_sgfn, max_cgfn;
> +            struct domain *cd;
> +
> +            rc = -EINVAL;
> +            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||

NIT: missing space after the "if".

> +                mso.u.range._pad[2] )
> +                goto out;
> +

Regards,
Tamas Lengyel July 19, 2016, 4:27 p.m. UTC | #3
On Mon, Jul 18, 2016 at 3:47 PM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 18/07/2016 22:14, Tamas K Lengyel wrote:
>> Currently mem-sharing can be performed on a page-by-page basis from the control
>> domain. However, this process is quite wasteful when a range of pages have to
>> be deduplicated.
>>
>> This patch introduces a new mem_sharing memop for range sharing where
>> the user doesn't have to separately nominate each page in both the source and
>> destination domain, and the looping over all pages happen in the hypervisor.
>> This significantly reduces the overhead of sharing a range of memory.
>>
>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
>> Acked-by: Wei Liu <wei.liu2@citrix.com>
>
> Some style nits, and one functional suggestion.
>
> If you are happy with the suggestion, then Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks!

>
>> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
>> index a522423..6d00228 100644
>> --- a/xen/arch/x86/mm/mem_sharing.c
>> +++ b/xen/arch/x86/mm/mem_sharing.c
>> @@ -1294,6 +1294,58 @@ int relinquish_shared_pages(struct domain *d)
>>      return rc;
>>  }
>>
>> +static int range_share(struct domain *d, struct domain *cd,
>> +                      struct mem_sharing_op_range *range)
>
> Alignment.
>
>> +{
>> +    int rc = 0;
>> +    shr_handle_t sh, ch;
>> +    unsigned long start =
>> +        range->_scratchspace ? range->_scratchspace : range->start;
>
> This can be shortened to "unsigned long start = range->_scratchspace ?:
> range->start;" and fit on a single line.

I'm not that familiar with this style of the syntax, does that have
the effect of setting start = _scratchspace when _scratchspace is not
0?

>
>> +
>> +    while( range->end >= start )
>> +    {
>> +        /*
>> +         * We only break out if we run out of memory as individual pages may
>> +         * legitimately be unsharable and we just want to skip over those.
>> +         */
>> +        rc = mem_sharing_nominate_page(d, start, 0, &sh);
>> +        if ( rc == -ENOMEM )
>> +            break;
>
> Newline here please
>
>> +        if ( !rc )
>> +        {
>> +            rc = mem_sharing_nominate_page(cd, start, 0, &ch);
>> +            if ( rc == -ENOMEM )
>> +                break;
>
> And here.
>
>> +            if ( !rc )
>> +            {
>> +                /* If we get here this should be guaranteed to succeed. */
>> +                rc = mem_sharing_share_pages(d, start, sh,
>> +                                             cd, start, ch);
>> +                ASSERT(!rc);
>> +            }
>> +        }
>> +
>> +        /* Check for continuation if it's not the last iteration. */
>> +        if ( range->end >= ++start && hypercall_preempt_check() )
>> +        {
>> +            rc = 1;
>> +            break;
>> +        }
>> +    }
>> +
>> +    range->_scratchspace = start;
>> +
>> +    /*
>> +     * We only propagate -ENOMEM as individual pages may fail with -EINVAL,
>> +     * and for range sharing we only care if -ENOMEM was encountered so we reset
>> +     * rc here.
>> +     */
>> +    if ( rc < 0 && rc != -ENOMEM )
>
> Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe
> that to be an ok case to ignore?  In the future if more errors get
> raised, we don't want to silently lose a more serious error which should
> be propagated up.

Well, in that case I can just change the if statement to rc == -EINVAL.

>
>> +        rc = 0;
>> +
>> +    return rc;
>> +}
>> +
>>  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>>  {
>>      int rc;
>> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>>          }
>>          break;
>>
>> +        case XENMEM_sharing_op_range_share:
>> +        {
>> +            unsigned long max_sgfn, max_cgfn;
>> +            struct domain *cd;
>> +
>> +            rc = -EINVAL;
>> +            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||
>> +                mso.u.range._pad[2] )
>> +                goto out;
>> +
>> +            /*
>> +             * We use _scratchscape for the hypercall continuation value.
>> +             * Ideally the user sets this to 0 in the beginning but
>> +             * there is no good way of enforcing that here, so we just check
>> +             * that it's at least in range.
>> +             */
>> +            if ( mso.u.range._scratchspace &&
>> +                (mso.u.range._scratchspace < mso.u.range.start ||
>> +                 mso.u.range._scratchspace > mso.u.range.end) )
>
> Alignment (extra space) for these two lines.

You mean add an extra space or that there is an extra space?

>
>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>> index 29ec571..e0bc018 100644
>> --- a/xen/include/public/memory.h
>> +++ b/xen/include/public/memory.h
>> @@ -500,7 +501,14 @@ struct xen_mem_sharing_op {
>>              uint64_aligned_t client_gfn;    /* IN: the client gfn */
>>              uint64_aligned_t client_handle; /* IN: handle to the client page */
>>              domid_t  client_domain; /* IN: the client domain id */
>> -        } share;
>> +        } share;
>> +        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */
>
> Alignment of the comment.
>
> ~Andrew
>
>> +            uint64_aligned_t start;          /* IN: start gfn. */
>> +            uint64_aligned_t end;            /* IN: end gfn (inclusive) */
>> +            uint64_aligned_t _scratchspace;  /* Must be set to 0 */
>> +            domid_t client_domain;           /* IN: the client domain id */
>> +            uint16_t _pad[3];                /* Must be set to 0 */
>> +        } range;
>>          struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
>>              union {
>>                  uint64_aligned_t gfn;      /* IN: gfn to debug          */
>
Tamas Lengyel July 19, 2016, 4:29 p.m. UTC | #4
On Tue, Jul 19, 2016 at 1:54 AM, Julien Grall <julien.grall@arm.com> wrote:
> Hello Tamas,
>
> On 18/07/2016 22:14, Tamas K Lengyel wrote:
>>
>> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
>> index e904bd5..0ca94cd 100644
>> --- a/tools/libxc/include/xenctrl.h
>> +++ b/tools/libxc/include/xenctrl.h
>> @@ -2334,6 +2334,21 @@ int xc_memshr_add_to_physmap(xc_interface *xch,
>>                      domid_t client_domain,
>>                      unsigned long client_gfn);
>>
>> +/* Allows to deduplicate a range of memory of a client domain. Using
>> + * this function is equivalent of calling xc_memshr_nominate_gfn for each
>> gfn
>> + * in the two domains followed by xc_memshr_share_gfns.
>> + *
>> + * May fail with -EINVAL if the source and client domain have different
>> + * memory size or if memory sharing is not enabled on either of the
>> domains.
>> + * May also fail with -ENOMEM if there isn't enough memory available to
>> store
>> + * the sharing metadata before deduplication can happen.
>> + */
>> +int xc_memshr_range_share(xc_interface *xch,
>> +                          domid_t source_domain,
>> +                          domid_t client_domain,
>> +                          unsigned long start,
>> +                          unsigned long end);
>
>
> I know the rest of memshr interface in libxc is using "unsigned long".
> However, this should really be "uint64_t" to match the interface and avoid
> issue with 32-bit toolstack on 64-bit hypervisor.

Sounds good to me.

>
>> +
>>  /* Debug calls: return the number of pages referencing the shared frame
>> backing
>>   * the input argument. Should be one or greater.
>>   *
>
>
> [...]
>
>> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
>> index a522423..6d00228 100644
>> --- a/xen/arch/x86/mm/mem_sharing.c
>> +++ b/xen/arch/x86/mm/mem_sharing.c
>
>
> [...]
>
>> @@ -1468,6 +1520,94 @@ int
>> mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>>          }
>>          break;
>>
>> +        case XENMEM_sharing_op_range_share:
>> +        {
>> +            unsigned long max_sgfn, max_cgfn;
>> +            struct domain *cd;
>> +
>> +            rc = -EINVAL;
>> +            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||
>
>
> NIT: missing space after the "if".
>
>> +                mso.u.range._pad[2] )
>> +                goto out;
>> +
>
>
> Regards,
>
> --
> Julien Grall

Thanks!
Tamas
Andrew Cooper July 19, 2016, 4:49 p.m. UTC | #5
On 19/07/16 17:27, Tamas K Lengyel wrote:
>
>>> +{
>>> +    int rc = 0;
>>> +    shr_handle_t sh, ch;
>>> +    unsigned long start =
>>> +        range->_scratchspace ? range->_scratchspace : range->start;
>> This can be shortened to "unsigned long start = range->_scratchspace ?:
>> range->start;" and fit on a single line.
> I'm not that familiar with this style of the syntax, does that have
> the effect of setting start = _scratchspace when _scratchspace is not
> 0?

It is a GCC extension
https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which
allows you to omit the middle parameter if it is identical to the first.

It is very useful for chaining together a load of items where you want
to stop at the first non-zero one.

x = a ?: b ?: c ?: d;

but can also be used with functions calls which 0 success, nonzero error
semantics:

rc = a() ?: b() ?: c() ?: d();

If you don't need to do any special cleanup in-between them.

>>> +    /*
>>> +     * We only propagate -ENOMEM as individual pages may fail with -EINVAL,
>>> +     * and for range sharing we only care if -ENOMEM was encountered so we reset
>>> +     * rc here.
>>> +     */
>>> +    if ( rc < 0 && rc != -ENOMEM )
>> Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe
>> that to be an ok case to ignore?  In the future if more errors get
>> raised, we don't want to silently lose a more serious error which should
>> be propagated up.
> Well, in that case I can just change the if statement to rc == -EINVAL.

That is a much better suggestion.

>>> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>>>          }
>>>          break;
>>>
>>> +        case XENMEM_sharing_op_range_share:
>>> +        {
>>> +            unsigned long max_sgfn, max_cgfn;
>>> +            struct domain *cd;
>>> +
>>> +            rc = -EINVAL;
>>> +            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||
>>> +                mso.u.range._pad[2] )
>>> +                goto out;
>>> +
>>> +            /*
>>> +             * We use _scratchscape for the hypercall continuation value.
>>> +             * Ideally the user sets this to 0 in the beginning but
>>> +             * there is no good way of enforcing that here, so we just check
>>> +             * that it's at least in range.
>>> +             */
>>> +            if ( mso.u.range._scratchspace &&
>>> +                (mso.u.range._scratchspace < mso.u.range.start ||
>>> +                 mso.u.range._scratchspace > mso.u.range.end) )
>> Alignment (extra space) for these two lines.
> You mean add an extra space or that there is an extra space?

Please add an extra space in.  It should look like:

if ( mso.u.range._scratchspace &&
     (mso.u.range._scratchspace ...
      mso.u.range._scratchspace ...

~Andrew
Tamas Lengyel July 19, 2016, 4:54 p.m. UTC | #6
On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 19/07/16 17:27, Tamas K Lengyel wrote:
>>
>>>> +{
>>>> +    int rc = 0;
>>>> +    shr_handle_t sh, ch;
>>>> +    unsigned long start =
>>>> +        range->_scratchspace ? range->_scratchspace : range->start;
>>> This can be shortened to "unsigned long start = range->_scratchspace ?:
>>> range->start;" and fit on a single line.
>> I'm not that familiar with this style of the syntax, does that have
>> the effect of setting start = _scratchspace when _scratchspace is not
>> 0?
>
> It is a GCC extension
> https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which
> allows you to omit the middle parameter if it is identical to the first.

Are we OK with using syntax that is based on a compiler extension? I
recall some cases where that was frowned upon (like using the 0b
prefix).

Tamas
Andrew Cooper July 19, 2016, 4:55 p.m. UTC | #7
On 19/07/16 17:54, Tamas K Lengyel wrote:
> On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper
> <andrew.cooper3@citrix.com> wrote:
>> On 19/07/16 17:27, Tamas K Lengyel wrote:
>>>>> +{
>>>>> +    int rc = 0;
>>>>> +    shr_handle_t sh, ch;
>>>>> +    unsigned long start =
>>>>> +        range->_scratchspace ? range->_scratchspace : range->start;
>>>> This can be shortened to "unsigned long start = range->_scratchspace ?:
>>>> range->start;" and fit on a single line.
>>> I'm not that familiar with this style of the syntax, does that have
>>> the effect of setting start = _scratchspace when _scratchspace is not
>>> 0?
>> It is a GCC extension
>> https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which
>> allows you to omit the middle parameter if it is identical to the first.
> Are we OK with using syntax that is based on a compiler extension? I
> recall some cases where that was frowned upon (like using the 0b
> prefix).

We already use these all over the place.

The problem with 0b is that it isn't supported in all versions of GCC we
support.

~Andrew
Tamas Lengyel July 19, 2016, 4:58 p.m. UTC | #8
On Tue, Jul 19, 2016 at 10:55 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 19/07/16 17:54, Tamas K Lengyel wrote:
>> On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper
>> <andrew.cooper3@citrix.com> wrote:
>>> On 19/07/16 17:27, Tamas K Lengyel wrote:
>>>>>> +{
>>>>>> +    int rc = 0;
>>>>>> +    shr_handle_t sh, ch;
>>>>>> +    unsigned long start =
>>>>>> +        range->_scratchspace ? range->_scratchspace : range->start;
>>>>> This can be shortened to "unsigned long start = range->_scratchspace ?:
>>>>> range->start;" and fit on a single line.
>>>> I'm not that familiar with this style of the syntax, does that have
>>>> the effect of setting start = _scratchspace when _scratchspace is not
>>>> 0?
>>> It is a GCC extension
>>> https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which
>>> allows you to omit the middle parameter if it is identical to the first.
>> Are we OK with using syntax that is based on a compiler extension? I
>> recall some cases where that was frowned upon (like using the 0b
>> prefix).
>
> We already use these all over the place.
>
> The problem with 0b is that it isn't supported in all versions of GCC we
> support.
>

Alright, sound good!

Thanks,
Tamas
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index e904bd5..0ca94cd 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2334,6 +2334,21 @@  int xc_memshr_add_to_physmap(xc_interface *xch,
                     domid_t client_domain,
                     unsigned long client_gfn);
 
+/* Allows to deduplicate a range of memory of a client domain. Using
+ * this function is equivalent of calling xc_memshr_nominate_gfn for each gfn
+ * in the two domains followed by xc_memshr_share_gfns.
+ *
+ * May fail with -EINVAL if the source and client domain have different
+ * memory size or if memory sharing is not enabled on either of the domains.
+ * May also fail with -ENOMEM if there isn't enough memory available to store
+ * the sharing metadata before deduplication can happen.
+ */
+int xc_memshr_range_share(xc_interface *xch,
+                          domid_t source_domain,
+                          domid_t client_domain,
+                          unsigned long start,
+                          unsigned long end);
+
 /* Debug calls: return the number of pages referencing the shared frame backing
  * the input argument. Should be one or greater. 
  *
diff --git a/tools/libxc/xc_memshr.c b/tools/libxc/xc_memshr.c
index deb0aa4..9eb08b7 100644
--- a/tools/libxc/xc_memshr.c
+++ b/tools/libxc/xc_memshr.c
@@ -181,6 +181,25 @@  int xc_memshr_add_to_physmap(xc_interface *xch,
     return xc_memshr_memop(xch, source_domain, &mso);
 }
 
+int xc_memshr_range_share(xc_interface *xch,
+                          domid_t source_domain,
+                          domid_t client_domain,
+                          unsigned long start,
+                          unsigned long end)
+{
+    xen_mem_sharing_op_t mso;
+
+    memset(&mso, 0, sizeof(mso));
+
+    mso.op = XENMEM_sharing_op_range_share;
+
+    mso.u.range.client_domain = client_domain;
+    mso.u.range.start = start;
+    mso.u.range.end = end;
+
+    return xc_memshr_memop(xch, source_domain, &mso);
+}
+
 int xc_memshr_domain_resume(xc_interface *xch,
                             domid_t domid)
 {
diff --git a/tools/tests/mem-sharing/memshrtool.c b/tools/tests/mem-sharing/memshrtool.c
index 437c7c9..ae1cff3 100644
--- a/tools/tests/mem-sharing/memshrtool.c
+++ b/tools/tests/mem-sharing/memshrtool.c
@@ -24,6 +24,8 @@  static int usage(const char* prog)
     printf("  nominate <domid> <gfn>  - Nominate a page for sharing.\n");
     printf("  share <domid> <gfn> <handle> <source> <source-gfn> <source-handle>\n");
     printf("                          - Share two pages.\n");
+    printf("  range <source-domid> <destination-domid> <start-gfn> <end-gfn>\n");
+    printf("                          - Share pages between domains in range.\n");
     printf("  unshare <domid> <gfn>   - Unshare a page by grabbing a writable map.\n");
     printf("  add-to-physmap <domid> <gfn> <source> <source-gfn> <source-handle>\n");
     printf("                          - Populate a page in a domain with a shared page.\n");
@@ -180,6 +182,26 @@  int main(int argc, const char** argv)
         }
         printf("Audit returned %d errors.\n", rc);
     }
+    else if( !strcasecmp(cmd, "range") )
+    {
+        domid_t sdomid, cdomid;
+        int rc;
+        unsigned long start, end;
+
+        if ( argc != 6 )
+            return usage(argv[0]);
 
+        sdomid = strtol(argv[2], NULL, 0);
+        cdomid = strtol(argv[3], NULL, 0);
+        start = strtoul(argv[4], NULL, 0);
+        end = strtoul(argv[5], NULL, 0);
+
+        rc = xc_memshr_range_share(xch, sdomid, cdomid, start, end);
+        if ( rc < 0 )
+        {
+            printf("error executing xc_memshr_range_share: %s\n", strerror(errno));
+            return rc;
+        }
+    }
     return 0;
 }
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index a522423..6d00228 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1294,6 +1294,58 @@  int relinquish_shared_pages(struct domain *d)
     return rc;
 }
 
+static int range_share(struct domain *d, struct domain *cd,
+                      struct mem_sharing_op_range *range)
+{
+    int rc = 0;
+    shr_handle_t sh, ch;
+    unsigned long start =
+        range->_scratchspace ? range->_scratchspace : range->start;
+
+    while( range->end >= start )
+    {
+        /*
+         * We only break out if we run out of memory as individual pages may
+         * legitimately be unsharable and we just want to skip over those.
+         */
+        rc = mem_sharing_nominate_page(d, start, 0, &sh);
+        if ( rc == -ENOMEM )
+            break;
+        if ( !rc )
+        {
+            rc = mem_sharing_nominate_page(cd, start, 0, &ch);
+            if ( rc == -ENOMEM )
+                break;
+            if ( !rc )
+            {
+                /* If we get here this should be guaranteed to succeed. */
+                rc = mem_sharing_share_pages(d, start, sh,
+                                             cd, start, ch);
+                ASSERT(!rc);
+            }
+        }
+
+        /* Check for continuation if it's not the last iteration. */
+        if ( range->end >= ++start && hypercall_preempt_check() )
+        {
+            rc = 1;
+            break;
+        }
+    }
+
+    range->_scratchspace = start;
+
+    /*
+     * We only propagate -ENOMEM as individual pages may fail with -EINVAL,
+     * and for range sharing we only care if -ENOMEM was encountered so we reset
+     * rc here.
+     */
+    if ( rc < 0 && rc != -ENOMEM )
+        rc = 0;
+
+    return rc;
+}
+
 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 {
     int rc;
@@ -1468,6 +1520,94 @@  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
         }
         break;
 
+        case XENMEM_sharing_op_range_share:
+        {
+            unsigned long max_sgfn, max_cgfn;
+            struct domain *cd;
+
+            rc = -EINVAL;
+            if( mso.u.range._pad[0] || mso.u.range._pad[1] ||
+                mso.u.range._pad[2] )
+                goto out;
+
+            /*
+             * We use _scratchscape for the hypercall continuation value.
+             * Ideally the user sets this to 0 in the beginning but
+             * there is no good way of enforcing that here, so we just check
+             * that it's at least in range.
+             */
+            if ( mso.u.range._scratchspace &&
+                (mso.u.range._scratchspace < mso.u.range.start ||
+                 mso.u.range._scratchspace > mso.u.range.end) )
+                goto out;
+
+            if ( !mem_sharing_enabled(d) )
+                goto out;
+
+            rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain,
+                                                   &cd);
+            if ( rc )
+                goto out;
+
+            /*
+             * We reuse XENMEM_sharing_op_share XSM check here as this is
+             * essentially the same concept repeated over multiple pages.
+             */
+            rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd,
+                                    XENMEM_sharing_op_share);
+            if ( rc )
+            {
+                rcu_unlock_domain(cd);
+                goto out;
+            }
+
+            if ( !mem_sharing_enabled(cd) )
+            {
+                rcu_unlock_domain(cd);
+                rc = -EINVAL;
+                goto out;
+            }
+
+            /*
+             * Sanity check only, the client should keep the domains paused for
+             * the duration of this op.
+             */
+            if ( !atomic_read(&d->pause_count) ||
+                 !atomic_read(&cd->pause_count) )
+            {
+                rcu_unlock_domain(cd);
+                rc = -EINVAL;
+                goto out;
+            }
+
+            max_sgfn = domain_get_maximum_gpfn(d);
+            max_cgfn = domain_get_maximum_gpfn(cd);
+
+            if ( max_sgfn < mso.u.range.start || max_sgfn < mso.u.range.end ||
+                 max_cgfn < mso.u.range.start || max_cgfn < mso.u.range.end )
+            {
+                rcu_unlock_domain(cd);
+                rc = -EINVAL;
+                goto out;
+            }
+
+            rc = range_share(d, cd, &mso.u.range);
+            rcu_unlock_domain(cd);
+
+            if ( rc > 0 )
+            {
+                if ( __copy_to_guest(arg, &mso, 1) )
+                    rc = -EFAULT;
+                else
+                    rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
+                                                       "lh", XENMEM_sharing_op,
+                                                       arg);
+            }
+            else
+                mso.u.range._scratchspace = 0;
+        }
+        break;
+
         case XENMEM_sharing_op_debug_gfn:
         {
             unsigned long gfn = mso.u.debug.u.gfn;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 29ec571..e0bc018 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -465,6 +465,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
 #define XENMEM_sharing_op_debug_gref        5
 #define XENMEM_sharing_op_add_physmap       6
 #define XENMEM_sharing_op_audit             7
+#define XENMEM_sharing_op_range_share       8
 
 #define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
 #define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
@@ -500,7 +501,14 @@  struct xen_mem_sharing_op {
             uint64_aligned_t client_gfn;    /* IN: the client gfn */
             uint64_aligned_t client_handle; /* IN: handle to the client page */
             domid_t  client_domain; /* IN: the client domain id */
-        } share; 
+        } share;
+        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */
+            uint64_aligned_t start;          /* IN: start gfn. */
+            uint64_aligned_t end;            /* IN: end gfn (inclusive) */
+            uint64_aligned_t _scratchspace;  /* Must be set to 0 */
+            domid_t client_domain;           /* IN: the client domain id */
+            uint16_t _pad[3];                /* Must be set to 0 */
+        } range;
         struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
             union {
                 uint64_aligned_t gfn;      /* IN: gfn to debug          */