Message ID | 20170804022025.25293-5-blackskygg@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
I skim through this patch and have some questions. On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: > + > +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, > + libxl_static_shm *sshm) > +{ > + int rc, aborting; > + char *sshm_path, *dom_path, *dom_role_path; > + char *ents[11]; > + struct xs_permissions noperm; > + xs_transaction_t xt = XBT_NULL; > + > + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); > + dom_path = libxl__xs_get_dompath(gc, domid); > + /* the domain should be in xenstore by now */ > + assert(dom_path); > + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); > + > + > + retry_transaction: > + /* Within the transaction, goto out by default means aborting */ > + aborting = 1; > + rc = libxl__xs_transaction_start(gc, &xt); > + if (rc) { goto out; } if (rc) goto out; > + > + if (NULL == libxl__xs_read(gc, xt, sshm_path)) { !libxl__xs_read We don't use "Yoda conditions". > + rc = libxl__xs_write_checked(gc, xt, dom_role_path, "master"); > + if (rc) { goto out; }; > + > + ents[0] = "master"; > + ents[1] = GCSPRINTF("%"PRIu32, domid); > + ents[2] = "begin"; > + ents[3] = GCSPRINTF("0x%"PRIx64, sshm->begin); > + ents[4] = "end"; > + ents[5] = GCSPRINTF("0x%"PRIx64, sshm->end); > + ents[6] = "prot"; > + ents[7] = libxl__strdup(gc, libxl_sshm_prot_to_string(sshm->prot)); > + ents[8] = "cache_policy"; > + ents[9] = libxl__strdup(gc, > + libxl_sshm_cachepolicy_to_string(sshm->cache_policy)); > + ents[10] = NULL; > + These aren't going to change from iteration to iteration, so you can prepare them before entering the loop. BTW it would be cleaner to use a for (;;) {} or while (1) {} loop to implement this instead of using goto label. You can then eliminate the TRY_TRANSACTION_OR_FAIL macro. > + /* could only be accessed by Dom0 */ > + noperm.id = 0; > + noperm.perms = XS_PERM_NONE; > + libxl__xs_mknod(gc, xt, sshm_path, &noperm, 1); > + libxl__xs_writev(gc, xt, sshm_path, ents); > + } else { > + SSHM_ERROR(domid, sshm->id, "can only have one master."); > + rc = ERROR_FAIL; > + goto out; > + } > + > + aborting = rc = 0; > + > + out: > + TRY_TRANSACTION_OR_FAIL(aborting); > + return rc; > +} > + [...] > +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, > + uint32_t domid, const char *id, bool master) > +{ > + char *sshm_path, *slaves_path; > + > + sshm_path = libxl__xs_get_sshmpath(gc, id); > + slaves_path = GCSPRINTF("%s/slaves", sshm_path); > + > + if (master) { > + /* we know that domid can't be both a master and a slave for one id, Is this enforced in code? > + * so the number of slaves won't change during iteration. Simply check > + * sshm_path/slavea to tell if there are still living slaves. */ > + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { > + SSHM_ERROR(domid, id, > + "can't remove master when there are living slaves."); > + return ERROR_FAIL; Isn't this going to leave a half-destructed domain in userspace components? Maybe we should proceed anyway? > + } > + libxl__xs_path_cleanup(gc, xt, sshm_path); > + } else { > + libxl__xs_path_cleanup(gc, xt, > + GCSPRINTF("%s/%"PRIu32, slaves_path, domid)); Is this really enough? What will happen to the mapping? You merely delete the xenstore node for it but the mapping is still there. I suppose you're relying on the hypervisor to remove the mapping from p2m? > + } > + > + return 0; > +} > + [...] > diff --git a/tools/libxl/libxl_xshelp.c b/tools/libxl/libxl_xshelp.c > index c4a18df353..d91fbf5fda 100644 > --- a/tools/libxl/libxl_xshelp.c > +++ b/tools/libxl/libxl_xshelp.c > @@ -193,6 +193,14 @@ char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid) > return s; > } > > +char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id) > +{ > + char *s = GCSPRINTF("/local/static_shm/%s", id); > + if (!s) > + LOGE(ERROR, "cannot allocate static shm path"); GCSPRINTF can't fail. You can eliminate the check. > + return s; > +} > + > int libxl__xs_read_mandatory(libxl__gc *gc, xs_transaction_t t, > const char *path, const char **result_out) > { > -- > 2.13.3 >
Hi Wei, Thank you for reviewing my patch. 2017-08-04 23:20 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: > I skim through this patch and have some questions. > > On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: >> + >> +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, >> + libxl_static_shm *sshm) >> +{ >> + int rc, aborting; >> + char *sshm_path, *dom_path, *dom_role_path; >> + char *ents[11]; >> + struct xs_permissions noperm; >> + xs_transaction_t xt = XBT_NULL; >> + >> + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); >> + dom_path = libxl__xs_get_dompath(gc, domid); >> + /* the domain should be in xenstore by now */ >> + assert(dom_path); >> + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); >> + >> + >> + retry_transaction: >> + /* Within the transaction, goto out by default means aborting */ >> + aborting = 1; >> + rc = libxl__xs_transaction_start(gc, &xt); >> + if (rc) { goto out; } > > if (rc) goto out; OK. Will remove all the {}. BTW, do I have to place "goto out;" in a newline? > >> + >> + if (NULL == libxl__xs_read(gc, xt, sshm_path)) { > > !libxl__xs_read > > We don't use "Yoda conditions". I'll turn all the Yoda conditions into "natural" ones. > >> + rc = libxl__xs_write_checked(gc, xt, dom_role_path, "master"); >> + if (rc) { goto out; }; >> + >> + ents[0] = "master"; >> + ents[1] = GCSPRINTF("%"PRIu32, domid); >> + ents[2] = "begin"; >> + ents[3] = GCSPRINTF("0x%"PRIx64, sshm->begin); >> + ents[4] = "end"; >> + ents[5] = GCSPRINTF("0x%"PRIx64, sshm->end); >> + ents[6] = "prot"; >> + ents[7] = libxl__strdup(gc, libxl_sshm_prot_to_string(sshm->prot)); >> + ents[8] = "cache_policy"; >> + ents[9] = libxl__strdup(gc, >> + libxl_sshm_cachepolicy_to_string(sshm->cache_policy)); >> + ents[10] = NULL; >> + > > These aren't going to change from iteration to iteration, so you can > prepare them before entering the loop. > > BTW it would be cleaner to use a for (;;) {} or while (1) {} loop to > implement this instead of using goto label. You can then eliminate the > TRY_TRANSACTION_OR_FAIL macro. OK. I'll move the entries out of the iteration. And yes, using an infinite loop and break on appropriate conditions will make it more concise. > >> + /* could only be accessed by Dom0 */ >> + noperm.id = 0; >> + noperm.perms = XS_PERM_NONE; >> + libxl__xs_mknod(gc, xt, sshm_path, &noperm, 1); >> + libxl__xs_writev(gc, xt, sshm_path, ents); >> + } else { >> + SSHM_ERROR(domid, sshm->id, "can only have one master."); >> + rc = ERROR_FAIL; >> + goto out; >> + } >> + >> + aborting = rc = 0; >> + >> + out: >> + TRY_TRANSACTION_OR_FAIL(aborting); >> + return rc; >> +} >> + > [...] >> +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, >> + uint32_t domid, const char *id, bool master) >> +{ >> + char *sshm_path, *slaves_path; >> + >> + sshm_path = libxl__xs_get_sshmpath(gc, id); >> + slaves_path = GCSPRINTF("%s/slaves", sshm_path); >> + >> + if (master) { >> + /* we know that domid can't be both a master and a slave for one id, > > Is this enforced in code? Yes...and...no. I've done this in libxl__sshm_add_slave() by doing: + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { + SSHM_ERROR(domid, sshm->id, + "domain tried to share the same region twice."); + rc = ERROR_FAIL; + goto out; + } Maybe the comment is a little bit confusing. What I was planning to do is to place such a check in both *_add_slave() and *_add_master(), so that one ID can't appear twice within one domain, so that one domain will not be able to be both a master and a slave. > >> + * so the number of slaves won't change during iteration. Simply check >> + * sshm_path/slavea to tell if there are still living slaves. */ >> + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { >> + SSHM_ERROR(domid, id, >> + "can't remove master when there are living slaves."); >> + return ERROR_FAIL; > > Isn't this going to leave a half-destructed domain in userspace > components? Maybe we should proceed anyway? This is also among the points that I'm not very sure. What is the best way to handle this type of error during domain destruction? > >> + } >> + libxl__xs_path_cleanup(gc, xt, sshm_path); >> + } else { >> + libxl__xs_path_cleanup(gc, xt, >> + GCSPRINTF("%s/%"PRIu32, slaves_path, domid)); > > Is this really enough? What will happen to the mapping? You merely > delete the xenstore node for it but the mapping is still there. > > I suppose you're relying on the hypervisor to remove the mapping from > p2m? Yes, the "sharing" here simply means adding one physical page to the slave domains stage-2 mapping. The only thing that's changed is the refcount, and I rely one the teardown code to decrease the refcount, which is implemented on ARM but unimplemented on x86. > >> + } >> + >> + return 0; >> +} >> + > [...] >> diff --git a/tools/libxl/libxl_xshelp.c b/tools/libxl/libxl_xshelp.c >> index c4a18df353..d91fbf5fda 100644 >> --- a/tools/libxl/libxl_xshelp.c >> +++ b/tools/libxl/libxl_xshelp.c >> @@ -193,6 +193,14 @@ char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid) >> return s; >> } >> >> +char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id) >> +{ >> + char *s = GCSPRINTF("/local/static_shm/%s", id); >> + if (!s) >> + LOGE(ERROR, "cannot allocate static shm path"); > > GCSPRINTF can't fail. You can eliminate the check. I was also confused about the other similar checks around the file since GCSPRINTF will die on failure. Em...It seems that I've copied the previous errors. Then I'll remove this function and replace it with a macro in libxl_sshm.c instead. > >> + return s; >> +} >> + >> int libxl__xs_read_mandatory(libxl__gc *gc, xs_transaction_t t, >> const char *path, const char **result_out) >> { >> -- >> 2.13.3 >> Cheers, Zhongze Liu.
On Sat, Aug 05, 2017 at 01:26:37AM +0800, Zhongze Liu wrote: > Hi Wei, > > Thank you for reviewing my patch. > > 2017-08-04 23:20 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: > > I skim through this patch and have some questions. > > > > On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: > >> + > >> +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, > >> + libxl_static_shm *sshm) > >> +{ > >> + int rc, aborting; > >> + char *sshm_path, *dom_path, *dom_role_path; > >> + char *ents[11]; > >> + struct xs_permissions noperm; > >> + xs_transaction_t xt = XBT_NULL; > >> + > >> + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); > >> + dom_path = libxl__xs_get_dompath(gc, domid); > >> + /* the domain should be in xenstore by now */ > >> + assert(dom_path); > >> + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); > >> + > >> + > >> + retry_transaction: > >> + /* Within the transaction, goto out by default means aborting */ > >> + aborting = 1; > >> + rc = libxl__xs_transaction_start(gc, &xt); > >> + if (rc) { goto out; } > > > > if (rc) goto out; > > OK. Will remove all the {}. BTW, do I have to place "goto out;" in a newline? > Youc can look for examples in existing code and follow those. [...] > >> +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, > >> + uint32_t domid, const char *id, bool master) > >> +{ > >> + char *sshm_path, *slaves_path; > >> + > >> + sshm_path = libxl__xs_get_sshmpath(gc, id); > >> + slaves_path = GCSPRINTF("%s/slaves", sshm_path); > >> + > >> + if (master) { > >> + /* we know that domid can't be both a master and a slave for one id, > > > > Is this enforced in code? > > Yes...and...no. I've done this in libxl__sshm_add_slave() by doing: > > + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { > + SSHM_ERROR(domid, sshm->id, > + "domain tried to share the same region twice."); > + rc = ERROR_FAIL; > + goto out; > + } > > Maybe the comment is a little bit confusing. What I was planning to do is to > place such a check in both *_add_slave() and *_add_master(), so that one > ID can't appear twice within one domain, so that one domain will not be able > to be both a master and a slave. > OK this sounds plausible. > > > >> + * so the number of slaves won't change during iteration. Simply check > >> + * sshm_path/slavea to tell if there are still living slaves. */ > >> + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { > >> + SSHM_ERROR(domid, id, > >> + "can't remove master when there are living slaves."); > >> + return ERROR_FAIL; > > > > Isn't this going to leave a half-destructed domain in userspace > > components? Maybe we should proceed anyway? > > This is also among the points that I'm not very sure. What is the best way > to handle this type of error during domain destruction? > I think we should destroy everything in relation to the guest in Dom0 (or other service domains). Some pages for the master guests might be referenced by slaves, but they will eventually be freed (hence the domain struct will be freed) within Xen. Do experiment with this to see if my prediction is right. It also occurs to me you need to guard against circular references. That is, DomA and DomB have a mutual master-slave relationship. > >> +char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id) > >> +{ > >> + char *s = GCSPRINTF("/local/static_shm/%s", id); > >> + if (!s) > >> + LOGE(ERROR, "cannot allocate static shm path"); > > > > GCSPRINTF can't fail. You can eliminate the check. > > I was also confused about the other similar checks around the file > since GCSPRINTF will die on failure. Em...It seems that I've copied > the previous errors. > > Then I'll remove this function and replace it with a macro in > libxl_sshm.c instead. > Using a static inline function is better.
On Tue, Aug 08, 2017 at 11:49:35AM +0100, Wei Liu wrote: > On Sat, Aug 05, 2017 at 01:26:37AM +0800, Zhongze Liu wrote: > > Hi Wei, > > > > Thank you for reviewing my patch. > > > > 2017-08-04 23:20 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: > > > I skim through this patch and have some questions. > > > > > > On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: > > >> + > > >> +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, > > >> + libxl_static_shm *sshm) > > >> +{ > > >> + int rc, aborting; > > >> + char *sshm_path, *dom_path, *dom_role_path; > > >> + char *ents[11]; > > >> + struct xs_permissions noperm; > > >> + xs_transaction_t xt = XBT_NULL; > > >> + > > >> + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); > > >> + dom_path = libxl__xs_get_dompath(gc, domid); > > >> + /* the domain should be in xenstore by now */ > > >> + assert(dom_path); > > >> + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); > > >> + > > >> + > > >> + retry_transaction: > > >> + /* Within the transaction, goto out by default means aborting */ > > >> + aborting = 1; > > >> + rc = libxl__xs_transaction_start(gc, &xt); > > >> + if (rc) { goto out; } > > > > > > if (rc) goto out; > > > > OK. Will remove all the {}. BTW, do I have to place "goto out;" in a newline? > > > > Youc can look for examples in existing code and follow those. > > [...] > > >> +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, > > >> + uint32_t domid, const char *id, bool master) > > >> +{ > > >> + char *sshm_path, *slaves_path; > > >> + > > >> + sshm_path = libxl__xs_get_sshmpath(gc, id); > > >> + slaves_path = GCSPRINTF("%s/slaves", sshm_path); > > >> + > > >> + if (master) { > > >> + /* we know that domid can't be both a master and a slave for one id, > > > > > > Is this enforced in code? > > > > Yes...and...no. I've done this in libxl__sshm_add_slave() by doing: > > > > + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { > > + SSHM_ERROR(domid, sshm->id, > > + "domain tried to share the same region twice."); > > + rc = ERROR_FAIL; > > + goto out; > > + } > > > > Maybe the comment is a little bit confusing. What I was planning to do is to > > place such a check in both *_add_slave() and *_add_master(), so that one > > ID can't appear twice within one domain, so that one domain will not be able > > to be both a master and a slave. > > > > OK this sounds plausible. > > > > > > >> + * so the number of slaves won't change during iteration. Simply check > > >> + * sshm_path/slavea to tell if there are still living slaves. */ > > >> + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { > > >> + SSHM_ERROR(domid, id, > > >> + "can't remove master when there are living slaves."); > > >> + return ERROR_FAIL; > > > > > > Isn't this going to leave a half-destructed domain in userspace > > > components? Maybe we should proceed anyway? > > > > This is also among the points that I'm not very sure. What is the best way > > to handle this type of error during domain destruction? > > > > I think we should destroy everything in relation to the guest in Dom0 > (or other service domains). Some pages for the master guests might be > referenced by slaves, but they will eventually be freed (hence the > domain struct will be freed) within Xen. Do experiment with this to see > if my prediction is right. > > It also occurs to me you need to guard against circular references. That > is, DomA and DomB have a mutual master-slave relationship. > This probably can't happen because you can't construct such pair of guests in the first place.
2017-08-08 18:56 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: > On Tue, Aug 08, 2017 at 11:49:35AM +0100, Wei Liu wrote: >> On Sat, Aug 05, 2017 at 01:26:37AM +0800, Zhongze Liu wrote: >> > Hi Wei, >> > >> > Thank you for reviewing my patch. >> > >> > 2017-08-04 23:20 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: >> > > I skim through this patch and have some questions. >> > > >> > > On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: >> > >> + >> > >> +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, >> > >> + libxl_static_shm *sshm) >> > >> +{ >> > >> + int rc, aborting; >> > >> + char *sshm_path, *dom_path, *dom_role_path; >> > >> + char *ents[11]; >> > >> + struct xs_permissions noperm; >> > >> + xs_transaction_t xt = XBT_NULL; >> > >> + >> > >> + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); >> > >> + dom_path = libxl__xs_get_dompath(gc, domid); >> > >> + /* the domain should be in xenstore by now */ >> > >> + assert(dom_path); >> > >> + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); >> > >> + >> > >> + >> > >> + retry_transaction: >> > >> + /* Within the transaction, goto out by default means aborting */ >> > >> + aborting = 1; >> > >> + rc = libxl__xs_transaction_start(gc, &xt); >> > >> + if (rc) { goto out; } >> > > >> > > if (rc) goto out; >> > >> > OK. Will remove all the {}. BTW, do I have to place "goto out;" in a newline? >> > >> >> Youc can look for examples in existing code and follow those. >> >> [...] >> > >> +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, >> > >> + uint32_t domid, const char *id, bool master) >> > >> +{ >> > >> + char *sshm_path, *slaves_path; >> > >> + >> > >> + sshm_path = libxl__xs_get_sshmpath(gc, id); >> > >> + slaves_path = GCSPRINTF("%s/slaves", sshm_path); >> > >> + >> > >> + if (master) { >> > >> + /* we know that domid can't be both a master and a slave for one id, >> > > >> > > Is this enforced in code? >> > >> > Yes...and...no. I've done this in libxl__sshm_add_slave() by doing: >> > >> > + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { >> > + SSHM_ERROR(domid, sshm->id, >> > + "domain tried to share the same region twice."); >> > + rc = ERROR_FAIL; >> > + goto out; >> > + } >> > >> > Maybe the comment is a little bit confusing. What I was planning to do is to >> > place such a check in both *_add_slave() and *_add_master(), so that one >> > ID can't appear twice within one domain, so that one domain will not be able >> > to be both a master and a slave. >> > >> >> OK this sounds plausible. >> >> > > >> > >> + * so the number of slaves won't change during iteration. Simply check >> > >> + * sshm_path/slavea to tell if there are still living slaves. */ >> > >> + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { >> > >> + SSHM_ERROR(domid, id, >> > >> + "can't remove master when there are living slaves."); >> > >> + return ERROR_FAIL; >> > > >> > > Isn't this going to leave a half-destructed domain in userspace >> > > components? Maybe we should proceed anyway? >> > >> > This is also among the points that I'm not very sure. What is the best way >> > to handle this type of error during domain destruction? >> > >> >> I think we should destroy everything in relation to the guest in Dom0 >> (or other service domains). Some pages for the master guests might be >> referenced by slaves, but they will eventually be freed (hence the >> domain struct will be freed) within Xen. Do experiment with this to see >> if my prediction is right. >> >> It also occurs to me you need to guard against circular references. That >> is, DomA and DomB have a mutual master-slave relationship. >> > > This probably can't happen because you can't construct such pair of > guests in the first place. Yes, indeed. Because masters can only be created prior the slaves. So it must be a forest-like structure.
2017-08-08 18:49 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: > On Sat, Aug 05, 2017 at 01:26:37AM +0800, Zhongze Liu wrote: >> Hi Wei, >> >> Thank you for reviewing my patch. >> >> 2017-08-04 23:20 GMT+08:00 Wei Liu <wei.liu2@citrix.com>: >> > I skim through this patch and have some questions. >> > >> > On Fri, Aug 04, 2017 at 10:20:25AM +0800, Zhongze Liu wrote: >> >> + >> >> +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, >> >> + libxl_static_shm *sshm) >> >> +{ >> >> + int rc, aborting; >> >> + char *sshm_path, *dom_path, *dom_role_path; >> >> + char *ents[11]; >> >> + struct xs_permissions noperm; >> >> + xs_transaction_t xt = XBT_NULL; >> >> + >> >> + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); >> >> + dom_path = libxl__xs_get_dompath(gc, domid); >> >> + /* the domain should be in xenstore by now */ >> >> + assert(dom_path); >> >> + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); >> >> + >> >> + >> >> + retry_transaction: >> >> + /* Within the transaction, goto out by default means aborting */ >> >> + aborting = 1; >> >> + rc = libxl__xs_transaction_start(gc, &xt); >> >> + if (rc) { goto out; } >> > >> > if (rc) goto out; >> >> OK. Will remove all the {}. BTW, do I have to place "goto out;" in a newline? >> > > Youc can look for examples in existing code and follow those. > > [...] >> >> +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, >> >> + uint32_t domid, const char *id, bool master) >> >> +{ >> >> + char *sshm_path, *slaves_path; >> >> + >> >> + sshm_path = libxl__xs_get_sshmpath(gc, id); >> >> + slaves_path = GCSPRINTF("%s/slaves", sshm_path); >> >> + >> >> + if (master) { >> >> + /* we know that domid can't be both a master and a slave for one id, >> > >> > Is this enforced in code? >> >> Yes...and...no. I've done this in libxl__sshm_add_slave() by doing: >> >> + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { >> + SSHM_ERROR(domid, sshm->id, >> + "domain tried to share the same region twice."); >> + rc = ERROR_FAIL; >> + goto out; >> + } >> >> Maybe the comment is a little bit confusing. What I was planning to do is to >> place such a check in both *_add_slave() and *_add_master(), so that one >> ID can't appear twice within one domain, so that one domain will not be able >> to be both a master and a slave. >> > > OK this sounds plausible. > >> > >> >> + * so the number of slaves won't change during iteration. Simply check >> >> + * sshm_path/slavea to tell if there are still living slaves. */ >> >> + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { >> >> + SSHM_ERROR(domid, id, >> >> + "can't remove master when there are living slaves."); >> >> + return ERROR_FAIL; >> > >> > Isn't this going to leave a half-destructed domain in userspace >> > components? Maybe we should proceed anyway? >> >> This is also among the points that I'm not very sure. What is the best way >> to handle this type of error during domain destruction? >> > > I think we should destroy everything in relation to the guest in Dom0 > (or other service domains). Some pages for the master guests might be > referenced by slaves, but they will eventually be freed (hence the > domain struct will be freed) within Xen. Do experiment with this to see > if my prediction is right. Yes, that's right. I'll turn the erros into warnings and proceed anyway. > > It also occurs to me you need to guard against circular references. That > is, DomA and DomB have a mutual master-slave relationship. > >> >> +char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id) >> >> +{ >> >> + char *s = GCSPRINTF("/local/static_shm/%s", id); >> >> + if (!s) >> >> + LOGE(ERROR, "cannot allocate static shm path"); >> > >> > GCSPRINTF can't fail. You can eliminate the check. >> >> I was also confused about the other similar checks around the file >> since GCSPRINTF will die on failure. Em...It seems that I've copied >> the previous errors. >> >> Then I'll remove this function and replace it with a macro in >> libxl_sshm.c instead. >> > > Using a static inline function is better.
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 3b63fb2cad..fd624b28f3 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -138,7 +138,7 @@ LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o libxl_pci.o \ libxl_dom_suspend.o libxl_dom_save.o libxl_usb.o \ libxl_vtpm.o libxl_nic.o libxl_disk.o libxl_console.o \ libxl_cpupool.o libxl_mem.o libxl_sched.o libxl_tmem.o \ - libxl_9pfs.o libxl_domain.o \ + libxl_9pfs.o libxl_domain.o libxl_sshm.o \ $(LIBXL_OBJS-y) LIBXL_OBJS += libxl_genid.o LIBXL_OBJS += _libxl_types.o libxl_flask.o _libxl_types_internal.o diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 1158303e1a..06dba2178d 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -918,6 +918,13 @@ static void initiate_domain_create(libxl__egc *egc, goto error_out; } + if (d_config->c_info.type != LIBXL_DOMAIN_TYPE_HVM && + d_config->num_sshms != 0) { + LOGD(ERROR, domid, "static_shm is only applicable to HVM domains"); + ret = ERROR_INVAL; + goto error_out; + } + ret = libxl__domain_make(gc, d_config, &domid, &state->config); if (ret) { LOGD(ERROR, domid, "cannot make domain: %d", ret); diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index f54fd49a73..1958667344 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -1194,6 +1194,13 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, goto out; } + /* the p2m has been setup, we could map the static shared memory now. */ + rc = libxl__sshm_add(gc, domid, d_config->sshms, d_config->num_sshms); + if (rc != 0) { + LOG(ERROR, "failed to map static shared memory"); + goto out; + } + rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port, &state->store_mfn, state->console_port, &state->console_mfn, state->store_domid, diff --git a/tools/libxl/libxl_domain.c b/tools/libxl/libxl_domain.c index 08eccd082b..0702a575f2 100644 --- a/tools/libxl/libxl_domain.c +++ b/tools/libxl/libxl_domain.c @@ -1028,6 +1028,12 @@ void libxl__destroy_domid(libxl__egc *egc, libxl__destroy_domid_state *dis) goto out; } + rc = libxl__sshm_del(gc, domid); + if (rc) { + LOGD(ERROR, domid, "Deleting static shm failed."); + goto out; + } + if (libxl__device_pci_destroy_all(gc, domid) < 0) LOGD(ERROR, domid, "Pci shutdown failed"); rc = xc_domain_pause(ctx->xch, domid); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 724750967c..9c9f69c50f 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -721,6 +721,7 @@ _hidden char **libxl__xs_directory(libxl__gc *gc, xs_transaction_t t, const char *path, unsigned int *nb); /* On error: returns NULL, sets errno (no logging) */ _hidden char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid); +_hidden char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id); _hidden int libxl__backendpath_parse_domid(libxl__gc *gc, const char *be_path, libxl_domid *domid_out); @@ -4353,6 +4354,19 @@ static inline bool libxl__acpi_defbool_val(const libxl_domain_build_info *b_info #endif /* + * Set up static shared ram pages for HVM domains to communicate + * + * This function should only be called after the memory map is constructed + * and before any further memory access. */ +int libxl__sshm_add(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshm, int len); + +int libxl__sshm_del(libxl__gc *gc, uint32_t domid); + +int libxl__sshm_check_slaves_overlap(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshms, int len); + +/* * Local variables: * mode: C * c-basic-offset: 4 diff --git a/tools/libxl/libxl_sshm.c b/tools/libxl/libxl_sshm.c new file mode 100644 index 0000000000..c7e9b8c1ee --- /dev/null +++ b/tools/libxl/libxl_sshm.c @@ -0,0 +1,370 @@ +#include "libxl_osdeps.h" +#include "libxl_internal.h" +#include <stdio.h> + +#define TRY_TRANSACTION_OR_FAIL(aborting) do { \ + if (!xs_transaction_end(CTX->xsh, xt, aborting) && !aborting) { \ + if (EAGAIN == errno) { \ + goto retry_transaction; \ + } else { \ + rc = ERROR_FAIL; \ + } \ + } \ + }while(0); + +#define SSHM_ERROR(domid, sshmid, f, ...) \ + LOGD(ERROR, domid, "static_shm id = %s:" f, sshmid, ##__VA_ARGS__) + + +/* The caller have to guarentee that {s,m}begin < {s,m}end */ +static int libxl__sshm_do_map(libxl__gc *gc, uint32_t mid, uint32_t sid, + libxl_static_shm *sshm, + uint64_t mbegin, uint64_t mend) +{ + int rc; + int i; + unsigned int num_mpages, num_spages, offset; + int *errs; + xen_ulong_t *idxs; + xen_pfn_t *gpfns; + + num_mpages = (mend - mbegin) >> 12; + num_spages = (sshm->end - sshm->begin) >> 12; + offset = sshm->offset >> 12; + + /* Check range. Test offset < mpages first to avoid overflow */ + if ((offset >= num_mpages) || (num_mpages - offset < num_spages)) { + SSHM_ERROR(sid, sshm->id, "exceeds master's address space."); + rc = ERROR_INVAL; + goto out; + } + + /* fill out the pfn's and do the mapping */ + errs = libxl__calloc(gc, num_spages, sizeof(int)); + idxs = libxl__calloc(gc, num_spages, sizeof(xen_ulong_t)); + gpfns = libxl__calloc(gc, num_spages, sizeof(xen_pfn_t)); + for (i = 0; i < num_spages; i++) { + idxs[i] = (mbegin >> 12) + offset + i; + gpfns[i]= (sshm->begin >> 12) + i; + } + rc = xc_domain_add_to_physmap_batch(CTX->xch, + sid, mid, + XENMAPSPACE_gmfn_foreign, + num_spages, + idxs, gpfns, errs); + + for (i = 0; i< num_spages; i++) { + if (errs[i]) { + SSHM_ERROR(sid, sshm->id, + "can't map at address 0x%"PRIx64".", + sshm->begin + (offset << 12) ); + rc = ERROR_FAIL; + } + } + if (rc) { goto out; } + + rc = 0; + + out: + return rc; +} + +static int libxl__sshm_add_master(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshm) +{ + int rc, aborting; + char *sshm_path, *dom_path, *dom_role_path; + char *ents[11]; + struct xs_permissions noperm; + xs_transaction_t xt = XBT_NULL; + + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); + dom_path = libxl__xs_get_dompath(gc, domid); + /* the domain should be in xenstore by now */ + assert(dom_path); + dom_role_path = GCSPRINTF("%s/static_shm/%s/role", dom_path, sshm->id); + + + retry_transaction: + /* Within the transaction, goto out by default means aborting */ + aborting = 1; + rc = libxl__xs_transaction_start(gc, &xt); + if (rc) { goto out; } + + if (NULL == libxl__xs_read(gc, xt, sshm_path)) { + rc = libxl__xs_write_checked(gc, xt, dom_role_path, "master"); + if (rc) { goto out; }; + + ents[0] = "master"; + ents[1] = GCSPRINTF("%"PRIu32, domid); + ents[2] = "begin"; + ents[3] = GCSPRINTF("0x%"PRIx64, sshm->begin); + ents[4] = "end"; + ents[5] = GCSPRINTF("0x%"PRIx64, sshm->end); + ents[6] = "prot"; + ents[7] = libxl__strdup(gc, libxl_sshm_prot_to_string(sshm->prot)); + ents[8] = "cache_policy"; + ents[9] = libxl__strdup(gc, + libxl_sshm_cachepolicy_to_string(sshm->cache_policy)); + ents[10] = NULL; + + /* could only be accessed by Dom0 */ + noperm.id = 0; + noperm.perms = XS_PERM_NONE; + libxl__xs_mknod(gc, xt, sshm_path, &noperm, 1); + libxl__xs_writev(gc, xt, sshm_path, ents); + } else { + SSHM_ERROR(domid, sshm->id, "can only have one master."); + rc = ERROR_FAIL; + goto out; + } + + aborting = rc = 0; + + out: + TRY_TRANSACTION_OR_FAIL(aborting); + return rc; +} + +static int libxl__sshm_add_slave(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshm) +{ + int rc, aborting; + char *sshm_path, *slave_path, *dom_path, *dom_sshm_path, *dom_role_path; + char *ents[9]; + const char *xs_value; + libxl_static_shm master_sshm; + uint32_t master_domid; + xs_transaction_t xt = XBT_NULL; + + sshm_path = libxl__xs_get_sshmpath(gc, sshm->id); + slave_path = GCSPRINTF("%s/slaves/%"PRIu32, sshm_path, domid); + dom_path = libxl__xs_get_dompath(gc, domid); + /* the domain should be in xenstore by now */ + assert(dom_path); + dom_sshm_path = GCSPRINTF("%s/static_shm/%s", dom_path, sshm->id); + dom_role_path = GCSPRINTF("%s/role", dom_sshm_path); + + retry_transaction: + /* Within the transaction, goto out by default means aborting */ + aborting = 1; + rc = libxl__xs_transaction_start(gc, &xt); + if (rc) { goto out; } + + if (NULL == libxl__xs_read(gc, xt, sshm_path)) { + SSHM_ERROR(domid, sshm->id, "no master found."); + rc = ERROR_FAIL; + goto out; + } else { + /* check the master info to see if we could do the mapping */ + if (NULL != libxl__xs_read(gc, xt, dom_sshm_path)) { + SSHM_ERROR(domid, sshm->id, + "domain tried to share the same region twice."); + rc = ERROR_FAIL; + goto out; + } + + rc = libxl__xs_read_checked(gc, xt, + GCSPRINTF("%s/prot", sshm_path), + &xs_value); + if (rc) { goto out; } + libxl_sshm_prot_from_string(xs_value, &master_sshm.prot); + + rc = libxl__xs_read_checked(gc, xt, + GCSPRINTF("%s/begin", sshm_path), + &xs_value); + if (rc) { goto out; } + master_sshm.begin = strtoull(xs_value, NULL, 16); + + rc = libxl__xs_read_checked(gc, xt, + GCSPRINTF("%s/end", sshm_path), + &xs_value); + if (rc) { goto out; } + master_sshm.end = strtoull(xs_value, NULL, 16); + + rc = libxl__xs_read_checked(gc, xt, + GCSPRINTF("%s/master", sshm_path), + &xs_value); + if (rc) { goto out; } + master_domid = strtoull(xs_value, NULL, 16); + + /* check if the slave is asking too much permission */ + if (LIBXL_SSHM_PROT_UNKNOWN == sshm->prot) { + sshm->prot = master_sshm.prot; + } + if (master_sshm.prot < sshm->prot) { + SSHM_ERROR(domid, sshm->id, "slave is asking too much permission."); + rc = ERROR_INVAL; + goto out; + } + + /* all checks passed, do the job */ + rc = libxl__sshm_do_map(gc, master_domid, domid, sshm, + master_sshm.begin, master_sshm.end); + if (rc) { + rc = ERROR_INVAL; + goto out; + } + + rc = libxl__xs_write_checked(gc, xt, dom_role_path, "slave"); + if (rc) { goto out; } + + /* fill in slave info */ + ents[0] = "begin"; + ents[1] = GCSPRINTF("0x%"PRIx64, sshm->begin); + ents[2] = "end"; + ents[3] = GCSPRINTF("0x%"PRIx64, sshm->end); + ents[4] = "offset"; + ents[5] = GCSPRINTF("0x%"PRIx64, sshm->offset); + ents[6] = "prot"; + ents[7] = libxl__strdup(gc, libxl_sshm_prot_to_string(sshm->prot)); + ents[8] = NULL; + libxl__xs_writev(gc, xt, slave_path, ents); + } + + aborting = rc = 0; + + out: + TRY_TRANSACTION_OR_FAIL(aborting); + return rc; +} + +/* Compare function for sorting sshm ranges by sshm->begin */ +static int sshm_range_cmp(const void *a, const void *b) +{ + libxl_static_shm *const *sshma = a, *const *sshmb = b; + return (*sshma)->begin > (*sshmb)->begin ? 1 : -1; +} + +/* check if the sshm slave configs in @sshm overlap */ +static int libxl__sshm_check_overlap(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshms, int len) +{ + + const libxl_static_shm **slave_sshms = NULL; + int num_slaves; + int i; + + slave_sshms = libxl__calloc(gc, len, sizeof(slave_sshms[0])); + num_slaves = 0; + for (i = 0; i < len; ++i) { + if (LIBXL_SSHM_ROLE_SLAVE == sshms[i].role) + slave_sshms[num_slaves++] = sshms + i; + } + qsort(slave_sshms, num_slaves, sizeof(slave_sshms[0]), sshm_range_cmp); + + for (i = 0; i < num_slaves - 1; ++i) { + if (slave_sshms[i+1]->begin < slave_sshms[i]->end) { + SSHM_ERROR(domid, slave_sshms[i+1]->id, "slave ranges overlap."); + return ERROR_INVAL; + } + } + + return 0; +} + +static int libxl__sshm_del_single(libxl__gc *gc, xs_transaction_t xt, + uint32_t domid, const char *id, bool master) +{ + char *sshm_path, *slaves_path; + + sshm_path = libxl__xs_get_sshmpath(gc, id); + slaves_path = GCSPRINTF("%s/slaves", sshm_path); + + if (master) { + /* we know that domid can't be both a master and a slave for one id, + * so the number of slaves won't change during iteration. Simply check + * sshm_path/slavea to tell if there are still living slaves. */ + if (NULL != libxl__xs_read(gc, xt, slaves_path)) { + SSHM_ERROR(domid, id, + "can't remove master when there are living slaves."); + return ERROR_FAIL; + } + libxl__xs_path_cleanup(gc, xt, sshm_path); + } else { + libxl__xs_path_cleanup(gc, xt, + GCSPRINTF("%s/%"PRIu32, slaves_path, domid)); + } + + return 0; +} + +/* Delete an static_shm entry in the xensotre. Will also return success if + * the path doesn't exist. */ +int libxl__sshm_del(libxl__gc *gc, uint32_t domid) +{ + int rc, aborting; + xs_transaction_t xt = XBT_NULL; + char *dom_path, *dom_sshm_path; + const char *role; + char **sshm_ents; + unsigned int sshm_num; + int i; + + if (LIBXL_DOMAIN_TYPE_HVM != libxl__domain_type(gc, domid)) + return 0; + + dom_path = libxl__xs_get_dompath(gc, domid); + dom_sshm_path = GCSPRINTF("%s/static_shm", dom_path); + + retry_transaction: + /* Within the transaction, goto out by default means aborting */ + aborting = 1; + rc = libxl__xs_transaction_start(gc, &xt); + if (rc) { goto out; } + + if (NULL == libxl__xs_read(gc, xt, dom_sshm_path)) { + /* no sshms, just do nothing */ + rc = aborting = 0; + goto out; + } + + sshm_ents = libxl__xs_directory(gc, xt, dom_sshm_path, &sshm_num); + + for (i = 0; i < sshm_num; ++i) { + rc = libxl__xs_read_checked(gc, xt, + GCSPRINTF("%s/%s/role", dom_sshm_path, sshm_ents[i]), &role); + if (rc) { goto out; } + + rc = libxl__sshm_del_single(gc, xt, domid, + sshm_ents[i], role[0] == 'm' ? 1 : 0); + if (rc) { goto out; } + } + + libxl__xs_path_cleanup(gc, xt, dom_sshm_path); + + aborting = rc = 0; + + out: + TRY_TRANSACTION_OR_FAIL(aborting); + return rc; +} + +int libxl__sshm_add(libxl__gc *gc, uint32_t domid, + libxl_static_shm *sshms, int len) +{ + int rc, i; + + if (LIBXL_DOMAIN_TYPE_HVM != libxl__domain_type(gc, domid)) + return 0; + rc = libxl__sshm_check_overlap(gc, domid, sshms, len); + if (rc) { return rc; }; + + for (i = 0; i < len; ++i) { + if (LIBXL_SSHM_ROLE_SLAVE == sshms[i].role) { + rc = libxl__sshm_add_slave(gc, domid, sshms+i); + } else { + rc = libxl__sshm_add_master(gc, domid, sshms+i); + } + if (rc) { return rc; }; + } + + return 0; +} +/* + * Local variables: + * mode: C + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxl/libxl_xshelp.c b/tools/libxl/libxl_xshelp.c index c4a18df353..d91fbf5fda 100644 --- a/tools/libxl/libxl_xshelp.c +++ b/tools/libxl/libxl_xshelp.c @@ -193,6 +193,14 @@ char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid) return s; } +char *libxl__xs_get_sshmpath(libxl__gc *gc, const char *id) +{ + char *s = GCSPRINTF("/local/static_shm/%s", id); + if (!s) + LOGE(ERROR, "cannot allocate static shm path"); + return s; +} + int libxl__xs_read_mandatory(libxl__gc *gc, xs_transaction_t t, const char *path, const char **result_out) {
creation: * Check for further errors in the static_shm configs: overlapping areas, invalid ranges, duplicated master domain, no master domain etc. * Add code for writing infomations of static shared memory areas into the appropriate xenstore paths. * use xc_domain_add_to_physmap_batch to do the page sharing. destruction: * Check for errors that try to remove master while there'are still living slaves. * Cleanup related xenstore paths. This is for the proposal "Allow setting up shared memory areas between VMs from xl config file" (see [1]). [1] https://lists.xenproject.org/archives/html/xen-devel/2017-07/msg03047.html Signed-off-by: Zhongze Liu <blackskygg@gmail.com> --- Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: George Dunlap <George.Dunlap@eu.citrix.com> Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Tim Deegan <tim@xen.org> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Julien Grall <julien.grall@arm.com> Cc: xen-devel@lists.xen.org --- tools/libxl/Makefile | 2 +- tools/libxl/libxl_create.c | 7 + tools/libxl/libxl_dom.c | 7 + tools/libxl/libxl_domain.c | 6 + tools/libxl/libxl_internal.h | 14 ++ tools/libxl/libxl_sshm.c | 370 +++++++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_xshelp.c | 8 + 7 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 tools/libxl/libxl_sshm.c