Message ID | 1468876480-27373-1-git-send-email-tamas.lengyel@zentific.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 18/07/2016 22:14, Tamas K Lengyel wrote: > Currently mem-sharing can be performed on a page-by-page basis from the control > domain. However, this process is quite wasteful when a range of pages have to > be deduplicated. > > This patch introduces a new mem_sharing memop for range sharing where > the user doesn't have to separately nominate each page in both the source and > destination domain, and the looping over all pages happen in the hypervisor. > This significantly reduces the overhead of sharing a range of memory. > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com> > Acked-by: Wei Liu <wei.liu2@citrix.com> Some style nits, and one functional suggestion. If you are happy with the suggestion, then Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > index a522423..6d00228 100644 > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -1294,6 +1294,58 @@ int relinquish_shared_pages(struct domain *d) > return rc; > } > > +static int range_share(struct domain *d, struct domain *cd, > + struct mem_sharing_op_range *range) Alignment. > +{ > + int rc = 0; > + shr_handle_t sh, ch; > + unsigned long start = > + range->_scratchspace ? range->_scratchspace : range->start; This can be shortened to "unsigned long start = range->_scratchspace ?: range->start;" and fit on a single line. > + > + while( range->end >= start ) > + { > + /* > + * We only break out if we run out of memory as individual pages may > + * legitimately be unsharable and we just want to skip over those. > + */ > + rc = mem_sharing_nominate_page(d, start, 0, &sh); > + if ( rc == -ENOMEM ) > + break; Newline here please > + if ( !rc ) > + { > + rc = mem_sharing_nominate_page(cd, start, 0, &ch); > + if ( rc == -ENOMEM ) > + break; And here. > + if ( !rc ) > + { > + /* If we get here this should be guaranteed to succeed. */ > + rc = mem_sharing_share_pages(d, start, sh, > + cd, start, ch); > + ASSERT(!rc); > + } > + } > + > + /* Check for continuation if it's not the last iteration. */ > + if ( range->end >= ++start && hypercall_preempt_check() ) > + { > + rc = 1; > + break; > + } > + } > + > + range->_scratchspace = start; > + > + /* > + * We only propagate -ENOMEM as individual pages may fail with -EINVAL, > + * and for range sharing we only care if -ENOMEM was encountered so we reset > + * rc here. > + */ > + if ( rc < 0 && rc != -ENOMEM ) Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe that to be an ok case to ignore? In the future if more errors get raised, we don't want to silently lose a more serious error which should be propagated up. > + rc = 0; > + > + return rc; > +} > + > int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) > { > int rc; > @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) > } > break; > > + case XENMEM_sharing_op_range_share: > + { > + unsigned long max_sgfn, max_cgfn; > + struct domain *cd; > + > + rc = -EINVAL; > + if( mso.u.range._pad[0] || mso.u.range._pad[1] || > + mso.u.range._pad[2] ) > + goto out; > + > + /* > + * We use _scratchscape for the hypercall continuation value. > + * Ideally the user sets this to 0 in the beginning but > + * there is no good way of enforcing that here, so we just check > + * that it's at least in range. > + */ > + if ( mso.u.range._scratchspace && > + (mso.u.range._scratchspace < mso.u.range.start || > + mso.u.range._scratchspace > mso.u.range.end) ) Alignment (extra space) for these two lines. > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 29ec571..e0bc018 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -500,7 +501,14 @@ struct xen_mem_sharing_op { > uint64_aligned_t client_gfn; /* IN: the client gfn */ > uint64_aligned_t client_handle; /* IN: handle to the client page */ > domid_t client_domain; /* IN: the client domain id */ > - } share; > + } share; > + struct mem_sharing_op_range { /* OP_RANGE_SHARE */ Alignment of the comment. ~Andrew > + uint64_aligned_t start; /* IN: start gfn. */ > + uint64_aligned_t end; /* IN: end gfn (inclusive) */ > + uint64_aligned_t _scratchspace; /* Must be set to 0 */ > + domid_t client_domain; /* IN: the client domain id */ > + uint16_t _pad[3]; /* Must be set to 0 */ > + } range; > struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ > union { > uint64_aligned_t gfn; /* IN: gfn to debug */
Hello Tamas, On 18/07/2016 22:14, Tamas K Lengyel wrote: > diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h > index e904bd5..0ca94cd 100644 > --- a/tools/libxc/include/xenctrl.h > +++ b/tools/libxc/include/xenctrl.h > @@ -2334,6 +2334,21 @@ int xc_memshr_add_to_physmap(xc_interface *xch, > domid_t client_domain, > unsigned long client_gfn); > > +/* Allows to deduplicate a range of memory of a client domain. Using > + * this function is equivalent of calling xc_memshr_nominate_gfn for each gfn > + * in the two domains followed by xc_memshr_share_gfns. > + * > + * May fail with -EINVAL if the source and client domain have different > + * memory size or if memory sharing is not enabled on either of the domains. > + * May also fail with -ENOMEM if there isn't enough memory available to store > + * the sharing metadata before deduplication can happen. > + */ > +int xc_memshr_range_share(xc_interface *xch, > + domid_t source_domain, > + domid_t client_domain, > + unsigned long start, > + unsigned long end); I know the rest of memshr interface in libxc is using "unsigned long". However, this should really be "uint64_t" to match the interface and avoid issue with 32-bit toolstack on 64-bit hypervisor. > + > /* Debug calls: return the number of pages referencing the shared frame backing > * the input argument. Should be one or greater. > * [...] > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > index a522423..6d00228 100644 > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c [...] > @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) > } > break; > > + case XENMEM_sharing_op_range_share: > + { > + unsigned long max_sgfn, max_cgfn; > + struct domain *cd; > + > + rc = -EINVAL; > + if( mso.u.range._pad[0] || mso.u.range._pad[1] || NIT: missing space after the "if". > + mso.u.range._pad[2] ) > + goto out; > + Regards,
On Mon, Jul 18, 2016 at 3:47 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 18/07/2016 22:14, Tamas K Lengyel wrote: >> Currently mem-sharing can be performed on a page-by-page basis from the control >> domain. However, this process is quite wasteful when a range of pages have to >> be deduplicated. >> >> This patch introduces a new mem_sharing memop for range sharing where >> the user doesn't have to separately nominate each page in both the source and >> destination domain, and the looping over all pages happen in the hypervisor. >> This significantly reduces the overhead of sharing a range of memory. >> >> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com> >> Acked-by: Wei Liu <wei.liu2@citrix.com> > > Some style nits, and one functional suggestion. > > If you are happy with the suggestion, then Reviewed-by: Andrew Cooper > <andrew.cooper3@citrix.com> Thanks! > >> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c >> index a522423..6d00228 100644 >> --- a/xen/arch/x86/mm/mem_sharing.c >> +++ b/xen/arch/x86/mm/mem_sharing.c >> @@ -1294,6 +1294,58 @@ int relinquish_shared_pages(struct domain *d) >> return rc; >> } >> >> +static int range_share(struct domain *d, struct domain *cd, >> + struct mem_sharing_op_range *range) > > Alignment. > >> +{ >> + int rc = 0; >> + shr_handle_t sh, ch; >> + unsigned long start = >> + range->_scratchspace ? range->_scratchspace : range->start; > > This can be shortened to "unsigned long start = range->_scratchspace ?: > range->start;" and fit on a single line. I'm not that familiar with this style of the syntax, does that have the effect of setting start = _scratchspace when _scratchspace is not 0? > >> + >> + while( range->end >= start ) >> + { >> + /* >> + * We only break out if we run out of memory as individual pages may >> + * legitimately be unsharable and we just want to skip over those. >> + */ >> + rc = mem_sharing_nominate_page(d, start, 0, &sh); >> + if ( rc == -ENOMEM ) >> + break; > > Newline here please > >> + if ( !rc ) >> + { >> + rc = mem_sharing_nominate_page(cd, start, 0, &ch); >> + if ( rc == -ENOMEM ) >> + break; > > And here. > >> + if ( !rc ) >> + { >> + /* If we get here this should be guaranteed to succeed. */ >> + rc = mem_sharing_share_pages(d, start, sh, >> + cd, start, ch); >> + ASSERT(!rc); >> + } >> + } >> + >> + /* Check for continuation if it's not the last iteration. */ >> + if ( range->end >= ++start && hypercall_preempt_check() ) >> + { >> + rc = 1; >> + break; >> + } >> + } >> + >> + range->_scratchspace = start; >> + >> + /* >> + * We only propagate -ENOMEM as individual pages may fail with -EINVAL, >> + * and for range sharing we only care if -ENOMEM was encountered so we reset >> + * rc here. >> + */ >> + if ( rc < 0 && rc != -ENOMEM ) > > Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe > that to be an ok case to ignore? In the future if more errors get > raised, we don't want to silently lose a more serious error which should > be propagated up. Well, in that case I can just change the if statement to rc == -EINVAL. > >> + rc = 0; >> + >> + return rc; >> +} >> + >> int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) >> { >> int rc; >> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) >> } >> break; >> >> + case XENMEM_sharing_op_range_share: >> + { >> + unsigned long max_sgfn, max_cgfn; >> + struct domain *cd; >> + >> + rc = -EINVAL; >> + if( mso.u.range._pad[0] || mso.u.range._pad[1] || >> + mso.u.range._pad[2] ) >> + goto out; >> + >> + /* >> + * We use _scratchscape for the hypercall continuation value. >> + * Ideally the user sets this to 0 in the beginning but >> + * there is no good way of enforcing that here, so we just check >> + * that it's at least in range. >> + */ >> + if ( mso.u.range._scratchspace && >> + (mso.u.range._scratchspace < mso.u.range.start || >> + mso.u.range._scratchspace > mso.u.range.end) ) > > Alignment (extra space) for these two lines. You mean add an extra space or that there is an extra space? > >> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h >> index 29ec571..e0bc018 100644 >> --- a/xen/include/public/memory.h >> +++ b/xen/include/public/memory.h >> @@ -500,7 +501,14 @@ struct xen_mem_sharing_op { >> uint64_aligned_t client_gfn; /* IN: the client gfn */ >> uint64_aligned_t client_handle; /* IN: handle to the client page */ >> domid_t client_domain; /* IN: the client domain id */ >> - } share; >> + } share; >> + struct mem_sharing_op_range { /* OP_RANGE_SHARE */ > > Alignment of the comment. > > ~Andrew > >> + uint64_aligned_t start; /* IN: start gfn. */ >> + uint64_aligned_t end; /* IN: end gfn (inclusive) */ >> + uint64_aligned_t _scratchspace; /* Must be set to 0 */ >> + domid_t client_domain; /* IN: the client domain id */ >> + uint16_t _pad[3]; /* Must be set to 0 */ >> + } range; >> struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ >> union { >> uint64_aligned_t gfn; /* IN: gfn to debug */ >
On Tue, Jul 19, 2016 at 1:54 AM, Julien Grall <julien.grall@arm.com> wrote: > Hello Tamas, > > On 18/07/2016 22:14, Tamas K Lengyel wrote: >> >> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h >> index e904bd5..0ca94cd 100644 >> --- a/tools/libxc/include/xenctrl.h >> +++ b/tools/libxc/include/xenctrl.h >> @@ -2334,6 +2334,21 @@ int xc_memshr_add_to_physmap(xc_interface *xch, >> domid_t client_domain, >> unsigned long client_gfn); >> >> +/* Allows to deduplicate a range of memory of a client domain. Using >> + * this function is equivalent of calling xc_memshr_nominate_gfn for each >> gfn >> + * in the two domains followed by xc_memshr_share_gfns. >> + * >> + * May fail with -EINVAL if the source and client domain have different >> + * memory size or if memory sharing is not enabled on either of the >> domains. >> + * May also fail with -ENOMEM if there isn't enough memory available to >> store >> + * the sharing metadata before deduplication can happen. >> + */ >> +int xc_memshr_range_share(xc_interface *xch, >> + domid_t source_domain, >> + domid_t client_domain, >> + unsigned long start, >> + unsigned long end); > > > I know the rest of memshr interface in libxc is using "unsigned long". > However, this should really be "uint64_t" to match the interface and avoid > issue with 32-bit toolstack on 64-bit hypervisor. Sounds good to me. > >> + >> /* Debug calls: return the number of pages referencing the shared frame >> backing >> * the input argument. Should be one or greater. >> * > > > [...] > >> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c >> index a522423..6d00228 100644 >> --- a/xen/arch/x86/mm/mem_sharing.c >> +++ b/xen/arch/x86/mm/mem_sharing.c > > > [...] > >> @@ -1468,6 +1520,94 @@ int >> mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) >> } >> break; >> >> + case XENMEM_sharing_op_range_share: >> + { >> + unsigned long max_sgfn, max_cgfn; >> + struct domain *cd; >> + >> + rc = -EINVAL; >> + if( mso.u.range._pad[0] || mso.u.range._pad[1] || > > > NIT: missing space after the "if". > >> + mso.u.range._pad[2] ) >> + goto out; >> + > > > Regards, > > -- > Julien Grall Thanks! Tamas
On 19/07/16 17:27, Tamas K Lengyel wrote: > >>> +{ >>> + int rc = 0; >>> + shr_handle_t sh, ch; >>> + unsigned long start = >>> + range->_scratchspace ? range->_scratchspace : range->start; >> This can be shortened to "unsigned long start = range->_scratchspace ?: >> range->start;" and fit on a single line. > I'm not that familiar with this style of the syntax, does that have > the effect of setting start = _scratchspace when _scratchspace is not > 0? It is a GCC extension https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which allows you to omit the middle parameter if it is identical to the first. It is very useful for chaining together a load of items where you want to stop at the first non-zero one. x = a ?: b ?: c ?: d; but can also be used with functions calls which 0 success, nonzero error semantics: rc = a() ?: b() ?: c() ?: d(); If you don't need to do any special cleanup in-between them. >>> + /* >>> + * We only propagate -ENOMEM as individual pages may fail with -EINVAL, >>> + * and for range sharing we only care if -ENOMEM was encountered so we reset >>> + * rc here. >>> + */ >>> + if ( rc < 0 && rc != -ENOMEM ) >> Would you mind putting in an ASSERT(rc == -EINVAL) here, if we believe >> that to be an ok case to ignore? In the future if more errors get >> raised, we don't want to silently lose a more serious error which should >> be propagated up. > Well, in that case I can just change the if statement to rc == -EINVAL. That is a much better suggestion. >>> @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) >>> } >>> break; >>> >>> + case XENMEM_sharing_op_range_share: >>> + { >>> + unsigned long max_sgfn, max_cgfn; >>> + struct domain *cd; >>> + >>> + rc = -EINVAL; >>> + if( mso.u.range._pad[0] || mso.u.range._pad[1] || >>> + mso.u.range._pad[2] ) >>> + goto out; >>> + >>> + /* >>> + * We use _scratchscape for the hypercall continuation value. >>> + * Ideally the user sets this to 0 in the beginning but >>> + * there is no good way of enforcing that here, so we just check >>> + * that it's at least in range. >>> + */ >>> + if ( mso.u.range._scratchspace && >>> + (mso.u.range._scratchspace < mso.u.range.start || >>> + mso.u.range._scratchspace > mso.u.range.end) ) >> Alignment (extra space) for these two lines. > You mean add an extra space or that there is an extra space? Please add an extra space in. It should look like: if ( mso.u.range._scratchspace && (mso.u.range._scratchspace ... mso.u.range._scratchspace ... ~Andrew
On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 19/07/16 17:27, Tamas K Lengyel wrote: >> >>>> +{ >>>> + int rc = 0; >>>> + shr_handle_t sh, ch; >>>> + unsigned long start = >>>> + range->_scratchspace ? range->_scratchspace : range->start; >>> This can be shortened to "unsigned long start = range->_scratchspace ?: >>> range->start;" and fit on a single line. >> I'm not that familiar with this style of the syntax, does that have >> the effect of setting start = _scratchspace when _scratchspace is not >> 0? > > It is a GCC extension > https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which > allows you to omit the middle parameter if it is identical to the first. Are we OK with using syntax that is based on a compiler extension? I recall some cases where that was frowned upon (like using the 0b prefix). Tamas
On 19/07/16 17:54, Tamas K Lengyel wrote: > On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper > <andrew.cooper3@citrix.com> wrote: >> On 19/07/16 17:27, Tamas K Lengyel wrote: >>>>> +{ >>>>> + int rc = 0; >>>>> + shr_handle_t sh, ch; >>>>> + unsigned long start = >>>>> + range->_scratchspace ? range->_scratchspace : range->start; >>>> This can be shortened to "unsigned long start = range->_scratchspace ?: >>>> range->start;" and fit on a single line. >>> I'm not that familiar with this style of the syntax, does that have >>> the effect of setting start = _scratchspace when _scratchspace is not >>> 0? >> It is a GCC extension >> https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which >> allows you to omit the middle parameter if it is identical to the first. > Are we OK with using syntax that is based on a compiler extension? I > recall some cases where that was frowned upon (like using the 0b > prefix). We already use these all over the place. The problem with 0b is that it isn't supported in all versions of GCC we support. ~Andrew
On Tue, Jul 19, 2016 at 10:55 AM, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 19/07/16 17:54, Tamas K Lengyel wrote: >> On Tue, Jul 19, 2016 at 10:49 AM, Andrew Cooper >> <andrew.cooper3@citrix.com> wrote: >>> On 19/07/16 17:27, Tamas K Lengyel wrote: >>>>>> +{ >>>>>> + int rc = 0; >>>>>> + shr_handle_t sh, ch; >>>>>> + unsigned long start = >>>>>> + range->_scratchspace ? range->_scratchspace : range->start; >>>>> This can be shortened to "unsigned long start = range->_scratchspace ?: >>>>> range->start;" and fit on a single line. >>>> I'm not that familiar with this style of the syntax, does that have >>>> the effect of setting start = _scratchspace when _scratchspace is not >>>> 0? >>> It is a GCC extension >>> https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/Conditionals.html which >>> allows you to omit the middle parameter if it is identical to the first. >> Are we OK with using syntax that is based on a compiler extension? I >> recall some cases where that was frowned upon (like using the 0b >> prefix). > > We already use these all over the place. > > The problem with 0b is that it isn't supported in all versions of GCC we > support. > Alright, sound good! Thanks, Tamas
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index e904bd5..0ca94cd 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -2334,6 +2334,21 @@ int xc_memshr_add_to_physmap(xc_interface *xch, domid_t client_domain, unsigned long client_gfn); +/* Allows to deduplicate a range of memory of a client domain. Using + * this function is equivalent of calling xc_memshr_nominate_gfn for each gfn + * in the two domains followed by xc_memshr_share_gfns. + * + * May fail with -EINVAL if the source and client domain have different + * memory size or if memory sharing is not enabled on either of the domains. + * May also fail with -ENOMEM if there isn't enough memory available to store + * the sharing metadata before deduplication can happen. + */ +int xc_memshr_range_share(xc_interface *xch, + domid_t source_domain, + domid_t client_domain, + unsigned long start, + unsigned long end); + /* Debug calls: return the number of pages referencing the shared frame backing * the input argument. Should be one or greater. * diff --git a/tools/libxc/xc_memshr.c b/tools/libxc/xc_memshr.c index deb0aa4..9eb08b7 100644 --- a/tools/libxc/xc_memshr.c +++ b/tools/libxc/xc_memshr.c @@ -181,6 +181,25 @@ int xc_memshr_add_to_physmap(xc_interface *xch, return xc_memshr_memop(xch, source_domain, &mso); } +int xc_memshr_range_share(xc_interface *xch, + domid_t source_domain, + domid_t client_domain, + unsigned long start, + unsigned long end) +{ + xen_mem_sharing_op_t mso; + + memset(&mso, 0, sizeof(mso)); + + mso.op = XENMEM_sharing_op_range_share; + + mso.u.range.client_domain = client_domain; + mso.u.range.start = start; + mso.u.range.end = end; + + return xc_memshr_memop(xch, source_domain, &mso); +} + int xc_memshr_domain_resume(xc_interface *xch, domid_t domid) { diff --git a/tools/tests/mem-sharing/memshrtool.c b/tools/tests/mem-sharing/memshrtool.c index 437c7c9..ae1cff3 100644 --- a/tools/tests/mem-sharing/memshrtool.c +++ b/tools/tests/mem-sharing/memshrtool.c @@ -24,6 +24,8 @@ static int usage(const char* prog) printf(" nominate <domid> <gfn> - Nominate a page for sharing.\n"); printf(" share <domid> <gfn> <handle> <source> <source-gfn> <source-handle>\n"); printf(" - Share two pages.\n"); + printf(" range <source-domid> <destination-domid> <start-gfn> <end-gfn>\n"); + printf(" - Share pages between domains in range.\n"); printf(" unshare <domid> <gfn> - Unshare a page by grabbing a writable map.\n"); printf(" add-to-physmap <domid> <gfn> <source> <source-gfn> <source-handle>\n"); printf(" - Populate a page in a domain with a shared page.\n"); @@ -180,6 +182,26 @@ int main(int argc, const char** argv) } printf("Audit returned %d errors.\n", rc); } + else if( !strcasecmp(cmd, "range") ) + { + domid_t sdomid, cdomid; + int rc; + unsigned long start, end; + + if ( argc != 6 ) + return usage(argv[0]); + sdomid = strtol(argv[2], NULL, 0); + cdomid = strtol(argv[3], NULL, 0); + start = strtoul(argv[4], NULL, 0); + end = strtoul(argv[5], NULL, 0); + + rc = xc_memshr_range_share(xch, sdomid, cdomid, start, end); + if ( rc < 0 ) + { + printf("error executing xc_memshr_range_share: %s\n", strerror(errno)); + return rc; + } + } return 0; } diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index a522423..6d00228 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -1294,6 +1294,58 @@ int relinquish_shared_pages(struct domain *d) return rc; } +static int range_share(struct domain *d, struct domain *cd, + struct mem_sharing_op_range *range) +{ + int rc = 0; + shr_handle_t sh, ch; + unsigned long start = + range->_scratchspace ? range->_scratchspace : range->start; + + while( range->end >= start ) + { + /* + * We only break out if we run out of memory as individual pages may + * legitimately be unsharable and we just want to skip over those. + */ + rc = mem_sharing_nominate_page(d, start, 0, &sh); + if ( rc == -ENOMEM ) + break; + if ( !rc ) + { + rc = mem_sharing_nominate_page(cd, start, 0, &ch); + if ( rc == -ENOMEM ) + break; + if ( !rc ) + { + /* If we get here this should be guaranteed to succeed. */ + rc = mem_sharing_share_pages(d, start, sh, + cd, start, ch); + ASSERT(!rc); + } + } + + /* Check for continuation if it's not the last iteration. */ + if ( range->end >= ++start && hypercall_preempt_check() ) + { + rc = 1; + break; + } + } + + range->_scratchspace = start; + + /* + * We only propagate -ENOMEM as individual pages may fail with -EINVAL, + * and for range sharing we only care if -ENOMEM was encountered so we reset + * rc here. + */ + if ( rc < 0 && rc != -ENOMEM ) + rc = 0; + + return rc; +} + int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) { int rc; @@ -1468,6 +1520,94 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) } break; + case XENMEM_sharing_op_range_share: + { + unsigned long max_sgfn, max_cgfn; + struct domain *cd; + + rc = -EINVAL; + if( mso.u.range._pad[0] || mso.u.range._pad[1] || + mso.u.range._pad[2] ) + goto out; + + /* + * We use _scratchscape for the hypercall continuation value. + * Ideally the user sets this to 0 in the beginning but + * there is no good way of enforcing that here, so we just check + * that it's at least in range. + */ + if ( mso.u.range._scratchspace && + (mso.u.range._scratchspace < mso.u.range.start || + mso.u.range._scratchspace > mso.u.range.end) ) + goto out; + + if ( !mem_sharing_enabled(d) ) + goto out; + + rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain, + &cd); + if ( rc ) + goto out; + + /* + * We reuse XENMEM_sharing_op_share XSM check here as this is + * essentially the same concept repeated over multiple pages. + */ + rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, + XENMEM_sharing_op_share); + if ( rc ) + { + rcu_unlock_domain(cd); + goto out; + } + + if ( !mem_sharing_enabled(cd) ) + { + rcu_unlock_domain(cd); + rc = -EINVAL; + goto out; + } + + /* + * Sanity check only, the client should keep the domains paused for + * the duration of this op. + */ + if ( !atomic_read(&d->pause_count) || + !atomic_read(&cd->pause_count) ) + { + rcu_unlock_domain(cd); + rc = -EINVAL; + goto out; + } + + max_sgfn = domain_get_maximum_gpfn(d); + max_cgfn = domain_get_maximum_gpfn(cd); + + if ( max_sgfn < mso.u.range.start || max_sgfn < mso.u.range.end || + max_cgfn < mso.u.range.start || max_cgfn < mso.u.range.end ) + { + rcu_unlock_domain(cd); + rc = -EINVAL; + goto out; + } + + rc = range_share(d, cd, &mso.u.range); + rcu_unlock_domain(cd); + + if ( rc > 0 ) + { + if ( __copy_to_guest(arg, &mso, 1) ) + rc = -EFAULT; + else + rc = hypercall_create_continuation(__HYPERVISOR_memory_op, + "lh", XENMEM_sharing_op, + arg); + } + else + mso.u.range._scratchspace = 0; + } + break; + case XENMEM_sharing_op_debug_gfn: { unsigned long gfn = mso.u.debug.u.gfn; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 29ec571..e0bc018 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -465,6 +465,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); #define XENMEM_sharing_op_debug_gref 5 #define XENMEM_sharing_op_add_physmap 6 #define XENMEM_sharing_op_audit 7 +#define XENMEM_sharing_op_range_share 8 #define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) #define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) @@ -500,7 +501,14 @@ struct xen_mem_sharing_op { uint64_aligned_t client_gfn; /* IN: the client gfn */ uint64_aligned_t client_handle; /* IN: handle to the client page */ domid_t client_domain; /* IN: the client domain id */ - } share; + } share; + struct mem_sharing_op_range { /* OP_RANGE_SHARE */ + uint64_aligned_t start; /* IN: start gfn. */ + uint64_aligned_t end; /* IN: end gfn (inclusive) */ + uint64_aligned_t _scratchspace; /* Must be set to 0 */ + domid_t client_domain; /* IN: the client domain id */ + uint16_t _pad[3]; /* Must be set to 0 */ + } range; struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ union { uint64_aligned_t gfn; /* IN: gfn to debug */