Message ID | 50fc6fbbfd80c25d2ad1752fb945cdfc7d847f20.1566503584.git.qemu_oss@crudebyte.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | 9p: Fix file ID collisions | expand |
On Thu, 22 Aug 2019 21:33:37 +0200 Christian Schoenebeck <qemu_oss@crudebyte.com> wrote: > 'warn' (default): Only log an error message (once) on host if more than one > device is shared by same export, except of that just ignore this config > error though. This is the default behaviour for not breaking existing > installations implying that they really know what they are doing. > > 'forbid': Like 'warn', but except of just logging an error this > also denies access of guest to additional devices. > > 'remap': Allows to share more than one device per export by remapping > inodes from host to guest appropriately. To support multiple devices on the > 9p share, and avoid qid path collisions we take the device id as input to > generate a unique QID path. The lowest 48 bits of the path will be set > equal to the file inode, and the top bits will be uniquely assigned based > on the top 16 bits of the inode and the device id. > > Signed-off-by: Antonios Motakis <antonios.motakis@huawei.com> > [CS: - Rebased to https://github.com/gkurz/qemu/commits/9p-next > (SHA1 177fd3b6a8). > - Updated hash calls to new xxhash API. > - Added virtfs option 'multidevs', original patch simply did the inode > remapping without being asked. > - Updated docs for new option 'multidevs'. > - Capture root_ino in v9fs_device_realize_common() as well, not just > the device id. > - Fixed v9fs_do_readdir() not having remapped inodes. > - Log error message when running out of prefixes in > qid_path_prefixmap(). > - Fixed definition of QPATH_INO_MASK. > - Dropped unnecessary parantheses in qpp_lookup_func(). > - Dropped unnecessary g_malloc0() result checks. ] > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com> > --- Looks good but needs some more polishing. See below. > fsdev/file-op-9p.h | 5 ++ > fsdev/qemu-fsdev-opts.c | 7 +- > fsdev/qemu-fsdev.c | 11 +++ > hw/9pfs/9p.c | 182 ++++++++++++++++++++++++++++++++++++++++++------ > hw/9pfs/9p.h | 13 ++++ > qemu-options.hx | 33 +++++++-- > vl.c | 6 +- > 7 files changed, 229 insertions(+), 28 deletions(-) > > diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h > index c757c8099f..f2f7772c86 100644 > --- a/fsdev/file-op-9p.h > +++ b/fsdev/file-op-9p.h > @@ -59,6 +59,11 @@ typedef struct ExtendedOps { > #define V9FS_RDONLY 0x00000040 > #define V9FS_PROXY_SOCK_FD 0x00000080 > #define V9FS_PROXY_SOCK_NAME 0x00000100 > +/* > + * multidevs option (either one of the two applies exclusively) > + */ > +#define V9FS_REMAP_INODES 0x00000200 > +#define V9FS_FORBID_MULTIDEVS 0x00000400 > > #define V9FS_SEC_MASK 0x0000003C > > diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c > index 7c31ffffaf..07a18c6e48 100644 > --- a/fsdev/qemu-fsdev-opts.c > +++ b/fsdev/qemu-fsdev-opts.c > @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { > }, { > .name = "readonly", > .type = QEMU_OPT_BOOL, > - > + }, { > + .name = "multidevs", > + .type = QEMU_OPT_STRING, > }, { > .name = "socket", > .type = QEMU_OPT_STRING, > @@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = { > .name = "readonly", > .type = QEMU_OPT_BOOL, > }, { > + .name = "multidevs", > + .type = QEMU_OPT_STRING, > + }, { > .name = "socket", > .type = QEMU_OPT_STRING, > }, { > diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c > index 077a8c4e2b..ed03d559a9 100644 > --- a/fsdev/qemu-fsdev.c > +++ b/fsdev/qemu-fsdev.c > @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { > "writeout", > "fmode", > "dmode", > + "multidevs", So we only allow this for the "local" backend. Any reason not to add this to "proxy" as well ? I didn't do it for the "throttling" options because it is a feature I didn't care to support much, but "multidevs" is more a fix than a fancy feature. > "throttling.bps-total", > "throttling.bps-read", > "throttling.bps-write", > @@ -121,6 +122,7 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) > const char *fsdev_id = qemu_opts_id(opts); > const char *fsdriver = qemu_opt_get(opts, "fsdriver"); > const char *writeout = qemu_opt_get(opts, "writeout"); > + const char *multidevs = qemu_opt_get(opts, "multidevs"); > bool ro = qemu_opt_get_bool(opts, "readonly", 0); > > if (!fsdev_id) { > @@ -161,6 +163,15 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) > } else { > fsle->fse.export_flags &= ~V9FS_RDONLY; > } > + if (multidevs) { > + if (!strcmp(multidevs, "remap")) { > + fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > + fsle->fse.export_flags |= V9FS_REMAP_INODES; > + } else if (!strcmp(multidevs, "forbid")) { > + fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > + fsle->fse.export_flags |= V9FS_FORBID_MULTIDEVS; > + } And... } else if (!strcmp(multidevs, "warn")) { fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; fsle->fse.export_flags &= ~V9FS_REMAP_INODES; } else { error_setg(errp, "invalid multidevs property '%s'", multidevs); return -1; } ... because we're a bit pedantic for command line options :) > + } > > if (fsle->fse.ops->parse_opts) { > if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) { > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > index 8cc65c2c67..c96ea51116 100644 > --- a/hw/9pfs/9p.c > +++ b/hw/9pfs/9p.c > @@ -25,6 +25,7 @@ > #include "trace.h" > #include "migration/blocker.h" > #include "sysemu/qtest.h" > +#include "qemu/xxhash.h" > > int open_fd_hw; > int total_open_fd; > @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) > P9_STAT_MODE_NAMED_PIPE | \ > P9_STAT_MODE_SOCKET) > > -/* This is the algorithm from ufs in spfs */ > + > +/* creative abuse of tb_hash_func7, which is based on xxhash */ > +static uint32_t qpp_hash(QppEntry e) > +{ > + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); > +} > + > +static bool qpp_lookup_func(const void *obj, const void *userp) > +{ > + const QppEntry *e1 = obj, *e2 = userp; > + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; > +} > + > +static void qpp_table_remove(void *p, uint32_t h, void *up) > +{ > + g_free(p); > +} > + > +static void qpp_table_destroy(struct qht *ht) > +{ > + qht_iter(ht, qpp_table_remove, NULL); > + qht_destroy(ht); > +} Ok to have a function for this instead of open-coding but I'd like to see qpp_table_init() for consistency. > + > +/* stat_to_qid needs to map inode number (64 bits) and device id (32 bits) > + * to a unique QID path (64 bits). To avoid having to map and keep track > + * of up to 2^64 objects, we map only the 16 highest bits of the inode plus > + * the device id to the 16 highest bits of the QID path. The 48 lowest bits > + * of the QID path equal to the lowest bits of the inode number. > + * > + * This takes advantage of the fact that inode number are usually not > + * random but allocated sequentially, so we have fewer items to keep > + * track of. > + */ > +static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf, > + uint64_t *path) > +{ > + QppEntry lookup = { > + .dev = stbuf->st_dev, > + .ino_prefix = (uint16_t) (stbuf->st_ino >> 48) > + }, *val; > + uint32_t hash = qpp_hash(lookup); > + > + val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); > + > + if (!val) { > + if (pdu->s->qp_prefix_next == 0) { > + /* we ran out of prefixes */ > + error_report_once( > + "9p: No more prefixes available for remapping inodes from " > + "host to guest." > + ); > + return -ENFILE; > + } > + > + val = g_malloc0(sizeof(QppEntry)); > + *val = lookup; > + > + /* new unique inode prefix and device combo */ > + val->qp_prefix = pdu->s->qp_prefix_next++; > + qht_insert(&pdu->s->qpp_table, val, hash, NULL); > + } > + > + *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK); > + return 0; > +} > + > static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) > { > + int err; > size_t size; > > - if (pdu->s->dev_id != stbuf->st_dev) { > - error_report_once( > - "9p: Multiple devices detected in same VirtFS export. " > - "You must use a separate export for each device." > - ); > - return -ENODEV; > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > + /* map inode+device to qid path (fast path) */ > + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); > + if (err) { > + return err; > + } > + } else { > + if (pdu->s->dev_id != stbuf->st_dev) { > + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { > + error_report_once( > + "9p: Multiple devices detected in same VirtFS export. " > + "Access of guest to additional devices is (partly) " > + "denied due to virtfs option 'multidevs=forbid' being " > + "effective." > + ); > + return -ENODEV; > + } else { > + error_report_once( Please use warn_report_once(). > + "9p: Multiple devices detected in same VirtFS export, " > + "which might lead to file ID collisions and severe " > + "misbehaviours on guest! You should either use a " > + "separate export for each device shared from host or " > + "use virtfs option 'multidevs=remap'!" > + ); > + } > + } > + memset(&qidp->path, 0, sizeof(qidp->path)); > + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > + memcpy(&qidp->path, &stbuf->st_ino, size); > } > > - memset(&qidp->path, 0, sizeof(qidp->path)); > - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > - memcpy(&qidp->path, &stbuf->st_ino, size); > qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); > qidp->type = 0; > if (S_ISDIR(stbuf->st_mode)) { > @@ -616,6 +704,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > return 0; > } > > +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > + struct dirent *dent, V9fsQID *qidp) > +{ > + struct stat stbuf; > + V9fsPath path; > + int err; > + > + v9fs_path_init(&path); > + > + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); > + if (err < 0) { > + goto out; > + } > + err = v9fs_co_lstat(pdu, &path, &stbuf); > + if (err < 0) { > + goto out; > + } > + err = stat_to_qid(pdu, &stbuf, qidp); > + > +out: > + v9fs_path_free(&path); > + return err; > +} > + > V9fsPDU *pdu_alloc(V9fsState *s) > { > V9fsPDU *pdu = NULL; > @@ -1964,16 +2076,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, > v9fs_string_free(&name); > return count; > } > - /* > - * Fill up just the path field of qid because the client uses > - * only that. To fill the entire qid structure we will have > - * to stat each dirent found, which is expensive > - */ > - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > - memcpy(&qid.path, &dent->d_ino, size); > - /* Fill the other fields with dummy values */ > - qid.type = 0; > - qid.version = 0; > + > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > + /* > + * dirent_to_qid() implies expensive stat call for each entry, > + * we must do that here though since inode remapping requires > + * the device id, which in turn might be different for > + * different entries; we cannot make any assumption to avoid > + * that here. > + */ > + err = dirent_to_qid(pdu, fidp, dent, &qid); > + if (err < 0) { > + v9fs_readdir_unlock(&fidp->fs.dir); > + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); > + v9fs_string_free(&name); > + return err; > + } > + } else { > + /* > + * Fill up just the path field of qid because the client uses > + * only that. To fill the entire qid structure we will have > + * to stat each dirent found, which is expensive. For the > + * latter reason we don't call dirent_to_qid() here. Only drawback > + * is that no multi-device export detection of stat_to_qid() > + * would be done and provided as error to the user here. But > + * user would get that error anyway when accessing those > + * files/dirs through other ways. > + */ > + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > + memcpy(&qid.path, &dent->d_ino, size); > + /* Fill the other fields with dummy values */ > + qid.type = 0; > + qid.version = 0; > + } > > /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ > len = pdu_marshal(pdu, 11 + count, "Qqbs", > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, > goto out; > } > > + s->root_ino = stat.st_ino; > s->dev_id = stat.st_dev; > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > + > s->ctx.fst = &fse->fst; > fsdev_throttle_init(s->ctx.fst); > > @@ -3687,6 +3827,7 @@ out: > } > g_free(s->tag); > g_free(s->ctx.fs_root); > + qpp_table_destroy(&s->qpp_table); This causes QEMU to crash if we get there before qht_init() was called. This should be guarded by a s->qpp_table.map != NULL check. I've just posted a patch that simplifies error handling in this function, you in Cc. The patch is also in 9p-next. Please rebase on top of it. > v9fs_path_free(&path); > } > return rc; > @@ -3699,6 +3840,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) > } > fsdev_throttle_cleanup(s->ctx.fst); > g_free(s->tag); > + qpp_table_destroy(&s->qpp_table); > g_free(s->ctx.fs_root); > } > > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h > index 5e316178d5..a283b0193e 100644 > --- a/hw/9pfs/9p.h > +++ b/hw/9pfs/9p.h > @@ -8,6 +8,7 @@ > #include "fsdev/9p-iov-marshal.h" > #include "qemu/thread.h" > #include "qemu/coroutine.h" > +#include "qemu/qht.h" > > enum { > P9_TLERROR = 6, > @@ -235,6 +236,15 @@ struct V9fsFidState > V9fsFidState *rclm_lst; > }; > > +#define QPATH_INO_MASK ((1ULL << 48) - 1) > + > +/* QID path prefix entry, see stat_to_qid */ > +typedef struct { > + dev_t dev; > + uint16_t ino_prefix; > + uint16_t qp_prefix; > +} QppEntry; > + > struct V9fsState > { > QLIST_HEAD(, V9fsPDU) free_list; > @@ -256,7 +266,10 @@ struct V9fsState > Error *migration_blocker; > V9fsConf fsconf; > V9fsQID root_qid; > + ino_t root_ino; > dev_t dev_id; > + struct qht qpp_table; > + uint16_t qp_prefix_next; > }; > > /* 9p2000.L open flags */ > diff --git a/qemu-options.hx b/qemu-options.hx > index 9621e934c0..603e5e8e15 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -1335,17 +1335,17 @@ ETEXI > > DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, > "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n" > - " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" > - "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n" > - "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n" > + " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n" > + "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" > + "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" Either enable support in "proxy" or don't update the "proxy" documentation :) > "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", > QEMU_ARCH_ALL) > > STEXI > > -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}] > -@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] > -@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] > +@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] > +@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] > +@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] Ditto. > @itemx -virtfs synth,mount_tag=@var{mount_tag} > @findex -virtfs > > @@ -1399,6 +1399,27 @@ Specifies the default mode for newly created directories on the host. Works > only with security models "mapped-xattr" and "mapped-file". > @item mount_tag=@var{mount_tag} > Specifies the tag name to be used by the guest to mount this export point. > +@item multidevs=@var{multidevs} > +Specifies how to deal with multiple devices being shared with a 9p export. > +Supported behaviours are either "remap", "forbid" or "warn". The latter is > +the default behaviour on which virtfs 9p expects only one device to be > +shared with the same export, and if more than one device is shared and > +accessed via the same 9p export then only a warning message is logged > +(once) by qemu on host side. In order to avoid file ID collisions on guest > +you should either create a separate virtfs export for each device to be > +shared with guests (recommended way) or you might use "remap" instead which > +allows you to share multiple devices with only one export instead, which is > +achieved by remapping the original inode numbers from host to guest in a > +way that would prevent such collisions. Remapping inodes in such use cases > +is required because the original device IDs from host are never passed and > +exposed on guest. Instead all files of an export shared with virtfs always > +share the same device id on guest. So two files with identical inode > +numbers but from actually different devices on host would otherwise cause a > +file ID collision and hence potential misbehaviours on guest. "forbid" on > +the other hand assumes like "warn" that only one device is shared by the > +same export, however it will not only log a warning message but also > +deny access to additional devices on guest. Note though that "forbid" does > +currently not block all possible file access operations. Maybe provide a list of such operations that won't be blocked ? > @end table > ETEXI > > diff --git a/vl.c b/vl.c > index b426b32134..9cb29b483d 100644 > --- a/vl.c > +++ b/vl.c > @@ -3320,7 +3320,7 @@ int main(int argc, char **argv, char **envp) > case QEMU_OPTION_virtfs: { > QemuOpts *fsdev; > QemuOpts *device; > - const char *writeout, *sock_fd, *socket, *path, *security_model; > + const char *writeout, *sock_fd, *socket, *path, *security_model, *multidevs; > > olist = qemu_find_opts("virtfs"); > if (!olist) { > @@ -3380,6 +3380,10 @@ int main(int argc, char **argv, char **envp) > qemu_opt_set_bool(fsdev, "readonly", > qemu_opt_get_bool(opts, "readonly", 0), > &error_abort); > + multidevs = qemu_opt_get(opts, "multidevs"); > + if (multidevs) { > + qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); > + } > device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, > &error_abort); > qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);
On Thu, 22 Aug 2019 21:33:37 +0200 Christian Schoenebeck <qemu_oss@crudebyte.com> wrote: > 'warn' (default): Only log an error message (once) on host if more than one > device is shared by same export, except of that just ignore this config > error though. This is the default behaviour for not breaking existing > installations implying that they really know what they are doing. > > 'forbid': Like 'warn', but except of just logging an error this > also denies access of guest to additional devices. > > 'remap': Allows to share more than one device per export by remapping > inodes from host to guest appropriately. To support multiple devices on the > 9p share, and avoid qid path collisions we take the device id as input to > generate a unique QID path. The lowest 48 bits of the path will be set > equal to the file inode, and the top bits will be uniquely assigned based > on the top 16 bits of the inode and the device id. > > Signed-off-by: Antonios Motakis <antonios.motakis@huawei.com> > [CS: - Rebased to https://github.com/gkurz/qemu/commits/9p-next > (SHA1 177fd3b6a8). > - Updated hash calls to new xxhash API. > - Added virtfs option 'multidevs', original patch simply did the inode > remapping without being asked. > - Updated docs for new option 'multidevs'. > - Capture root_ino in v9fs_device_realize_common() as well, not just > the device id. > - Fixed v9fs_do_readdir() not having remapped inodes. > - Log error message when running out of prefixes in > qid_path_prefixmap(). > - Fixed definition of QPATH_INO_MASK. > - Dropped unnecessary parantheses in qpp_lookup_func(). > - Dropped unnecessary g_malloc0() result checks. ] > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com> > --- Some more comments below. > fsdev/file-op-9p.h | 5 ++ > fsdev/qemu-fsdev-opts.c | 7 +- > fsdev/qemu-fsdev.c | 11 +++ > hw/9pfs/9p.c | 182 ++++++++++++++++++++++++++++++++++++++++++------ > hw/9pfs/9p.h | 13 ++++ > qemu-options.hx | 33 +++++++-- > vl.c | 6 +- > 7 files changed, 229 insertions(+), 28 deletions(-) > > diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h > index c757c8099f..f2f7772c86 100644 > --- a/fsdev/file-op-9p.h > +++ b/fsdev/file-op-9p.h > @@ -59,6 +59,11 @@ typedef struct ExtendedOps { > #define V9FS_RDONLY 0x00000040 > #define V9FS_PROXY_SOCK_FD 0x00000080 > #define V9FS_PROXY_SOCK_NAME 0x00000100 > +/* > + * multidevs option (either one of the two applies exclusively) > + */ > +#define V9FS_REMAP_INODES 0x00000200 > +#define V9FS_FORBID_MULTIDEVS 0x00000400 > > #define V9FS_SEC_MASK 0x0000003C > > diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c > index 7c31ffffaf..07a18c6e48 100644 > --- a/fsdev/qemu-fsdev-opts.c > +++ b/fsdev/qemu-fsdev-opts.c > @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { > }, { > .name = "readonly", > .type = QEMU_OPT_BOOL, > - > + }, { > + .name = "multidevs", > + .type = QEMU_OPT_STRING, > }, { > .name = "socket", > .type = QEMU_OPT_STRING, > @@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = { > .name = "readonly", > .type = QEMU_OPT_BOOL, > }, { > + .name = "multidevs", > + .type = QEMU_OPT_STRING, > + }, { > .name = "socket", > .type = QEMU_OPT_STRING, > }, { > diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c > index 077a8c4e2b..ed03d559a9 100644 > --- a/fsdev/qemu-fsdev.c > +++ b/fsdev/qemu-fsdev.c > @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { > "writeout", > "fmode", > "dmode", > + "multidevs", > "throttling.bps-total", > "throttling.bps-read", > "throttling.bps-write", > @@ -121,6 +122,7 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) > const char *fsdev_id = qemu_opts_id(opts); > const char *fsdriver = qemu_opt_get(opts, "fsdriver"); > const char *writeout = qemu_opt_get(opts, "writeout"); > + const char *multidevs = qemu_opt_get(opts, "multidevs"); > bool ro = qemu_opt_get_bool(opts, "readonly", 0); > > if (!fsdev_id) { > @@ -161,6 +163,15 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) > } else { > fsle->fse.export_flags &= ~V9FS_RDONLY; > } > + if (multidevs) { > + if (!strcmp(multidevs, "remap")) { > + fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > + fsle->fse.export_flags |= V9FS_REMAP_INODES; > + } else if (!strcmp(multidevs, "forbid")) { > + fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > + fsle->fse.export_flags |= V9FS_FORBID_MULTIDEVS; > + } > + } > > if (fsle->fse.ops->parse_opts) { > if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) { > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > index 8cc65c2c67..c96ea51116 100644 > --- a/hw/9pfs/9p.c > +++ b/hw/9pfs/9p.c > @@ -25,6 +25,7 @@ > #include "trace.h" > #include "migration/blocker.h" > #include "sysemu/qtest.h" > +#include "qemu/xxhash.h" > > int open_fd_hw; > int total_open_fd; > @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) > P9_STAT_MODE_NAMED_PIPE | \ > P9_STAT_MODE_SOCKET) > > -/* This is the algorithm from ufs in spfs */ > + > +/* creative abuse of tb_hash_func7, which is based on xxhash */ > +static uint32_t qpp_hash(QppEntry e) > +{ > + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); > +} > + > +static bool qpp_lookup_func(const void *obj, const void *userp) > +{ > + const QppEntry *e1 = obj, *e2 = userp; > + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; > +} > + > +static void qpp_table_remove(void *p, uint32_t h, void *up) > +{ > + g_free(p); > +} > + > +static void qpp_table_destroy(struct qht *ht) > +{ > + qht_iter(ht, qpp_table_remove, NULL); > + qht_destroy(ht); > +} > + > +/* stat_to_qid needs to map inode number (64 bits) and device id (32 bits) > + * to a unique QID path (64 bits). To avoid having to map and keep track > + * of up to 2^64 objects, we map only the 16 highest bits of the inode plus > + * the device id to the 16 highest bits of the QID path. The 48 lowest bits > + * of the QID path equal to the lowest bits of the inode number. > + * > + * This takes advantage of the fact that inode number are usually not > + * random but allocated sequentially, so we have fewer items to keep > + * track of. > + */ > +static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf, > + uint64_t *path) > +{ > + QppEntry lookup = { > + .dev = stbuf->st_dev, > + .ino_prefix = (uint16_t) (stbuf->st_ino >> 48) > + }, *val; > + uint32_t hash = qpp_hash(lookup); > + > + val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); > + > + if (!val) { > + if (pdu->s->qp_prefix_next == 0) { > + /* we ran out of prefixes */ > + error_report_once( > + "9p: No more prefixes available for remapping inodes from " > + "host to guest." > + ); > + return -ENFILE; > + } > + > + val = g_malloc0(sizeof(QppEntry)); > + *val = lookup; > + > + /* new unique inode prefix and device combo */ > + val->qp_prefix = pdu->s->qp_prefix_next++; > + qht_insert(&pdu->s->qpp_table, val, hash, NULL); > + } > + > + *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK); > + return 0; > +} > + > static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) > { > + int err; > size_t size; > > - if (pdu->s->dev_id != stbuf->st_dev) { > - error_report_once( > - "9p: Multiple devices detected in same VirtFS export. " > - "You must use a separate export for each device." > - ); > - return -ENODEV; > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > + /* map inode+device to qid path (fast path) */ > + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); > + if (err) { > + return err; > + } > + } else { > + if (pdu->s->dev_id != stbuf->st_dev) { > + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { > + error_report_once( > + "9p: Multiple devices detected in same VirtFS export. " > + "Access of guest to additional devices is (partly) " > + "denied due to virtfs option 'multidevs=forbid' being " > + "effective." > + ); > + return -ENODEV; > + } else { > + error_report_once( > + "9p: Multiple devices detected in same VirtFS export, " > + "which might lead to file ID collisions and severe " > + "misbehaviours on guest! You should either use a " > + "separate export for each device shared from host or " > + "use virtfs option 'multidevs=remap'!" > + ); > + } > + } > + memset(&qidp->path, 0, sizeof(qidp->path)); > + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > + memcpy(&qidp->path, &stbuf->st_ino, size); > } > > - memset(&qidp->path, 0, sizeof(qidp->path)); > - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > - memcpy(&qidp->path, &stbuf->st_ino, size); > qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); > qidp->type = 0; > if (S_ISDIR(stbuf->st_mode)) { > @@ -616,6 +704,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > return 0; > } > > +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > + struct dirent *dent, V9fsQID *qidp) > +{ > + struct stat stbuf; > + V9fsPath path; > + int err; > + > + v9fs_path_init(&path); > + > + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); > + if (err < 0) { > + goto out; > + } > + err = v9fs_co_lstat(pdu, &path, &stbuf); > + if (err < 0) { > + goto out; > + } > + err = stat_to_qid(pdu, &stbuf, qidp); > + > +out: > + v9fs_path_free(&path); > + return err; > +} > + > V9fsPDU *pdu_alloc(V9fsState *s) > { > V9fsPDU *pdu = NULL; > @@ -1964,16 +2076,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, > v9fs_string_free(&name); > return count; > } > - /* > - * Fill up just the path field of qid because the client uses > - * only that. To fill the entire qid structure we will have > - * to stat each dirent found, which is expensive > - */ > - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > - memcpy(&qid.path, &dent->d_ino, size); > - /* Fill the other fields with dummy values */ > - qid.type = 0; > - qid.version = 0; > + > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > + /* > + * dirent_to_qid() implies expensive stat call for each entry, > + * we must do that here though since inode remapping requires > + * the device id, which in turn might be different for > + * different entries; we cannot make any assumption to avoid > + * that here. > + */ > + err = dirent_to_qid(pdu, fidp, dent, &qid); > + if (err < 0) { > + v9fs_readdir_unlock(&fidp->fs.dir); > + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); > + v9fs_string_free(&name); > + return err; > + } > + } else { > + /* > + * Fill up just the path field of qid because the client uses > + * only that. To fill the entire qid structure we will have > + * to stat each dirent found, which is expensive. For the > + * latter reason we don't call dirent_to_qid() here. Only drawback > + * is that no multi-device export detection of stat_to_qid() > + * would be done and provided as error to the user here. But > + * user would get that error anyway when accessing those > + * files/dirs through other ways. > + */ > + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > + memcpy(&qid.path, &dent->d_ino, size); > + /* Fill the other fields with dummy values */ > + qid.type = 0; > + qid.version = 0; > + } > > /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ > len = pdu_marshal(pdu, 11 + count, "Qqbs", > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, > goto out; > } > > + s->root_ino = stat.st_ino; This isn't used anywhere. It looks like a leftover of the readdir fix in v5. > s->dev_id = stat.st_dev; > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > + > s->ctx.fst = &fse->fst; > fsdev_throttle_init(s->ctx.fst); > > @@ -3687,6 +3827,7 @@ out: > } > g_free(s->tag); > g_free(s->ctx.fs_root); > + qpp_table_destroy(&s->qpp_table); > v9fs_path_free(&path); > } > return rc; > @@ -3699,6 +3840,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) > } > fsdev_throttle_cleanup(s->ctx.fst); > g_free(s->tag); > + qpp_table_destroy(&s->qpp_table); > g_free(s->ctx.fs_root); > } > > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h > index 5e316178d5..a283b0193e 100644 > --- a/hw/9pfs/9p.h > +++ b/hw/9pfs/9p.h > @@ -8,6 +8,7 @@ > #include "fsdev/9p-iov-marshal.h" > #include "qemu/thread.h" > #include "qemu/coroutine.h" > +#include "qemu/qht.h" > > enum { > P9_TLERROR = 6, > @@ -235,6 +236,15 @@ struct V9fsFidState > V9fsFidState *rclm_lst; > }; > > +#define QPATH_INO_MASK ((1ULL << 48) - 1) > + > +/* QID path prefix entry, see stat_to_qid */ > +typedef struct { > + dev_t dev; > + uint16_t ino_prefix; > + uint16_t qp_prefix; > +} QppEntry; > + > struct V9fsState > { > QLIST_HEAD(, V9fsPDU) free_list; > @@ -256,7 +266,10 @@ struct V9fsState > Error *migration_blocker; > V9fsConf fsconf; > V9fsQID root_qid; > + ino_t root_ino; Thinking again, I'm wondering if root_ino and dev_id could be used instead of root_qid in v9fs_walk()... Would you have a look at that if you decide to fix the readdir issue ? > dev_t dev_id; > + struct qht qpp_table; > + uint16_t qp_prefix_next; > }; > > /* 9p2000.L open flags */ > diff --git a/qemu-options.hx b/qemu-options.hx > index 9621e934c0..603e5e8e15 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -1335,17 +1335,17 @@ ETEXI > > DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, > "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n" > - " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" > - "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n" > - "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n" > + " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n" > + "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" > + "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" > "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", > QEMU_ARCH_ALL) > > STEXI > > -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}] > -@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] > -@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] > +@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] > +@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] > +@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] > @itemx -virtfs synth,mount_tag=@var{mount_tag} > @findex -virtfs > > @@ -1399,6 +1399,27 @@ Specifies the default mode for newly created directories on the host. Works > only with security models "mapped-xattr" and "mapped-file". > @item mount_tag=@var{mount_tag} > Specifies the tag name to be used by the guest to mount this export point. > +@item multidevs=@var{multidevs} > +Specifies how to deal with multiple devices being shared with a 9p export. > +Supported behaviours are either "remap", "forbid" or "warn". The latter is > +the default behaviour on which virtfs 9p expects only one device to be > +shared with the same export, and if more than one device is shared and > +accessed via the same 9p export then only a warning message is logged > +(once) by qemu on host side. In order to avoid file ID collisions on guest > +you should either create a separate virtfs export for each device to be > +shared with guests (recommended way) or you might use "remap" instead which > +allows you to share multiple devices with only one export instead, which is > +achieved by remapping the original inode numbers from host to guest in a > +way that would prevent such collisions. Remapping inodes in such use cases > +is required because the original device IDs from host are never passed and > +exposed on guest. Instead all files of an export shared with virtfs always > +share the same device id on guest. So two files with identical inode > +numbers but from actually different devices on host would otherwise cause a > +file ID collision and hence potential misbehaviours on guest. "forbid" on > +the other hand assumes like "warn" that only one device is shared by the > +same export, however it will not only log a warning message but also > +deny access to additional devices on guest. Note though that "forbid" does > +currently not block all possible file access operations. > @end table > ETEXI > > diff --git a/vl.c b/vl.c > index b426b32134..9cb29b483d 100644 > --- a/vl.c > +++ b/vl.c > @@ -3320,7 +3320,7 @@ int main(int argc, char **argv, char **envp) > case QEMU_OPTION_virtfs: { > QemuOpts *fsdev; > QemuOpts *device; > - const char *writeout, *sock_fd, *socket, *path, *security_model; > + const char *writeout, *sock_fd, *socket, *path, *security_model, *multidevs; > > olist = qemu_find_opts("virtfs"); > if (!olist) { > @@ -3380,6 +3380,10 @@ int main(int argc, char **argv, char **envp) > qemu_opt_set_bool(fsdev, "readonly", > qemu_opt_get_bool(opts, "readonly", 0), > &error_abort); > + multidevs = qemu_opt_get(opts, "multidevs"); > + if (multidevs) { > + qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); > + } > device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, > &error_abort); > qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);
On Donnerstag, 29. August 2019 18:55:28 CEST Greg Kurz wrote: > > diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c > > index 7c31ffffaf..07a18c6e48 100644 > > --- a/fsdev/qemu-fsdev-opts.c > > +++ b/fsdev/qemu-fsdev-opts.c > > @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { > > > > }, { > > > > .name = "readonly", > > .type = QEMU_OPT_BOOL, > > > > - > > + }, { > > + .name = "multidevs", > > + .type = QEMU_OPT_STRING, > > > > }, { > > > > .name = "socket", > > .type = QEMU_OPT_STRING, > > > > @@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = { > > > > .name = "readonly", > > .type = QEMU_OPT_BOOL, > > > > }, { > > > > + .name = "multidevs", > > + .type = QEMU_OPT_STRING, > > + }, { > > > > .name = "socket", > > .type = QEMU_OPT_STRING, > > > > }, { > > > > diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c > > index 077a8c4e2b..ed03d559a9 100644 > > --- a/fsdev/qemu-fsdev.c > > +++ b/fsdev/qemu-fsdev.c > > @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { > > > > "writeout", > > "fmode", > > "dmode", > > > > + "multidevs", > > So we only allow this for the "local" backend. Any reason not to > add this to "proxy" as well ? > > I didn't do it for the "throttling" options because it is a > feature I didn't care to support much, but "multidevs" is more > a fix than a fancy feature. Well, to be honest I haven't cared about the proxy backend at all. Like I mentioned before, I am a bit sceptical that the proxy feature is actually used by people in real life at all (at least in its current implementation). So personally I don't see much sense in investing time for adding & testing new things on this backend ATM. > > + if (multidevs) { > > + if (!strcmp(multidevs, "remap")) { > > + fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > > + fsle->fse.export_flags |= V9FS_REMAP_INODES; > > + } else if (!strcmp(multidevs, "forbid")) { > > + fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > > + fsle->fse.export_flags |= V9FS_FORBID_MULTIDEVS; > > + } > > And... > > } else if (!strcmp(multidevs, "warn")) { > fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > } else { > error_setg(errp, "invalid multidevs property '%s'", multidevs); > return -1; > } > > ... because we're a bit pedantic for command line options :) And I thought I adapted to relaxed handling of CLI arguments. See existing option "writeout". :) > > > + } > > > > if (fsle->fse.ops->parse_opts) { > > > > if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) { > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > index 8cc65c2c67..c96ea51116 100644 > > --- a/hw/9pfs/9p.c > > +++ b/hw/9pfs/9p.c > > @@ -25,6 +25,7 @@ > > > > #include "trace.h" > > #include "migration/blocker.h" > > #include "sysemu/qtest.h" > > > > +#include "qemu/xxhash.h" > > > > int open_fd_hw; > > int total_open_fd; > > > > @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) > > > > P9_STAT_MODE_NAMED_PIPE | \ > > P9_STAT_MODE_SOCKET) > > > > -/* This is the algorithm from ufs in spfs */ > > + > > +/* creative abuse of tb_hash_func7, which is based on xxhash */ > > +static uint32_t qpp_hash(QppEntry e) > > +{ > > + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); > > +} > > + > > +static bool qpp_lookup_func(const void *obj, const void *userp) > > +{ > > + const QppEntry *e1 = obj, *e2 = userp; > > + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; > > +} > > + > > +static void qpp_table_remove(void *p, uint32_t h, void *up) > > +{ > > + g_free(p); > > +} > > + > > +static void qpp_table_destroy(struct qht *ht) > > +{ > > + qht_iter(ht, qpp_table_remove, NULL); > > + qht_destroy(ht); > > +} > > Ok to have a function for this instead of open-coding but I'd > like to see qpp_table_init() for consistency. Well, these are just qht_init() one-liners, but if you really want to have dedicated, local init functions for them, okay. > > static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID > > *qidp) { > > > > + int err; > > > > size_t size; > > > > - if (pdu->s->dev_id != stbuf->st_dev) { > > - error_report_once( > > - "9p: Multiple devices detected in same VirtFS export. " > > - "You must use a separate export for each device." > > - ); > > - return -ENODEV; > > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > > + /* map inode+device to qid path (fast path) */ > > + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); > > + if (err) { > > + return err; > > + } > > + } else { > > + if (pdu->s->dev_id != stbuf->st_dev) { > > + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { > > + error_report_once( > > + "9p: Multiple devices detected in same VirtFS export. > > " + "Access of guest to additional devices is (partly) > > " + "denied due to virtfs option 'multidevs=forbid' > > being " + "effective." > > + ); > > + return -ENODEV; > > + } else { > > + error_report_once( > > Please use warn_report_once(). Sure! > > + "9p: Multiple devices detected in same VirtFS export, > > " + "which might lead to file ID collisions and severe > > " + "misbehaviours on guest! You should either use a " > > + "separate export for each device shared from host or > > " + "use virtfs option 'multidevs=remap'!" > > + ); > > + } > > + } > > + memset(&qidp->path, 0, sizeof(qidp->path)); > > + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > > + memcpy(&qidp->path, &stbuf->st_ino, size); > > > > } > > > > - memset(&qidp->path, 0, sizeof(qidp->path)); > > - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > > - memcpy(&qidp->path, &stbuf->st_ino, size); > > > > qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); > > qidp->type = 0; > > if (S_ISDIR(stbuf->st_mode)) { > > > > @@ -616,6 +704,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, > > V9fsFidState *fidp,> > > return 0; > > > > } > > > > +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > > + struct dirent *dent, V9fsQID *qidp) > > +{ > > + struct stat stbuf; > > + V9fsPath path; > > + int err; > > + > > + v9fs_path_init(&path); > > + > > + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); > > + if (err < 0) { > > + goto out; > > + } > > + err = v9fs_co_lstat(pdu, &path, &stbuf); > > + if (err < 0) { > > + goto out; > > + } > > + err = stat_to_qid(pdu, &stbuf, qidp); > > + > > +out: > > + v9fs_path_free(&path); > > + return err; > > +} > > + > > > > V9fsPDU *pdu_alloc(V9fsState *s) > > { > > > > V9fsPDU *pdu = NULL; > > > > @@ -1964,16 +2076,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU > > *pdu, V9fsFidState *fidp,> > > v9fs_string_free(&name); > > return count; > > > > } > > > > - /* > > - * Fill up just the path field of qid because the client uses > > - * only that. To fill the entire qid structure we will have > > - * to stat each dirent found, which is expensive > > - */ > > - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > > - memcpy(&qid.path, &dent->d_ino, size); > > - /* Fill the other fields with dummy values */ > > - qid.type = 0; > > - qid.version = 0; > > + > > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > > + /* > > + * dirent_to_qid() implies expensive stat call for each > > entry, > > + * we must do that here though since inode remapping requires > > + * the device id, which in turn might be different for > > + * different entries; we cannot make any assumption to avoid > > + * that here. > > + */ > > + err = dirent_to_qid(pdu, fidp, dent, &qid); > > + if (err < 0) { > > + v9fs_readdir_unlock(&fidp->fs.dir); > > + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); > > + v9fs_string_free(&name); > > + return err; > > + } > > + } else { > > + /* > > + * Fill up just the path field of qid because the client uses > > + * only that. To fill the entire qid structure we will have > > + * to stat each dirent found, which is expensive. For the > > + * latter reason we don't call dirent_to_qid() here. Only > > drawback + * is that no multi-device export detection of > > stat_to_qid() + * would be done and provided as error to the > > user here. But + * user would get that error anyway when > > accessing those + * files/dirs through other ways. > > + */ > > + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > > + memcpy(&qid.path, &dent->d_ino, size); > > + /* Fill the other fields with dummy values */ > > + qid.type = 0; > > + qid.version = 0; > > + } > > > > /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ > > len = pdu_marshal(pdu, 11 + count, "Qqbs", > > > > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const > > V9fsTransport *t,> > > goto out; > > > > } > > > > + s->root_ino = stat.st_ino; > > > > s->dev_id = stat.st_dev; > > > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > > + > > > > s->ctx.fst = &fse->fst; > > fsdev_throttle_init(s->ctx.fst); > > > > @@ -3687,6 +3827,7 @@ out: > > } > > g_free(s->tag); > > g_free(s->ctx.fs_root); > > > > + qpp_table_destroy(&s->qpp_table); > > This causes QEMU to crash if we get there before qht_init() was called. > This should be guarded by a s->qpp_table.map != NULL check. Touché. I'll add that to qp_table_destroy() for simplicity. > I've just posted a patch that simplifies error handling in this > function, you in Cc. The patch is also in 9p-next. Please rebase > on top of it. Ok. > > diff --git a/qemu-options.hx b/qemu-options.hx > > index 9621e934c0..603e5e8e15 100644 > > --- a/qemu-options.hx > > +++ b/qemu-options.hx > > @@ -1335,17 +1335,17 @@ ETEXI > > > > DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, > > > > "-virtfs > > local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-fil > > e|passthrough|none\n"> > > - " > > [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" - > > "-virtfs > > proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly] > > \n" - "-virtfs > > proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonl > > y]\n" + " > > [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,mul > > tidevs=remap|forbid|warn]\n" + "-virtfs > > proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly] > > [,multidevs=remap|forbid|warn]\n" + "-virtfs > > proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonl > > y][,multidevs=remap|forbid|warn]\n" > Either enable support in "proxy" or don't update the "proxy" documentation > :) Then, if you don't mind, I'll just drop it from the proxy documentation, for the reasons I described above. > > "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", > > QEMU_ARCH_ALL) > > > > STEXI > > > > -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} > > ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] > > [,fmode=@var{fmode}][,dmode=@var{dmode}] -@itemx -virtfs > > proxy,socket=@var{socket},mount_tag=@var{mount_tag} > > [,writeout=@var{writeout}][,readonly] -@itemx -virtfs > > proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} > > [,writeout=@var{writeout}][,readonly] +@item -virtfs > > local,path=@var{path},mount_tag=@var{mount_tag} > > ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] > > [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] > > +@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} > > [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] +@itemx > > -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} > > [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] > Ditto. Ack. > > @itemx -virtfs synth,mount_tag=@var{mount_tag} > > @findex -virtfs > > > > @@ -1399,6 +1399,27 @@ Specifies the default mode for newly created > > directories on the host. Works> > > only with security models "mapped-xattr" and "mapped-file". > > @item mount_tag=@var{mount_tag} > > Specifies the tag name to be used by the guest to mount this export > > point. > > > > +@item multidevs=@var{multidevs} > > +Specifies how to deal with multiple devices being shared with a 9p > > export. > > +Supported behaviours are either "remap", "forbid" or "warn". The latter > > is > > +the default behaviour on which virtfs 9p expects only one device to be > > +shared with the same export, and if more than one device is shared and > > +accessed via the same 9p export then only a warning message is logged > > +(once) by qemu on host side. In order to avoid file ID collisions on > > guest > > +you should either create a separate virtfs export for each device to be > > +shared with guests (recommended way) or you might use "remap" instead > > which +allows you to share multiple devices with only one export instead, > > which is +achieved by remapping the original inode numbers from host to > > guest in a +way that would prevent such collisions. Remapping inodes in > > such use cases +is required because the original device IDs from host are > > never passed and +exposed on guest. Instead all files of an export shared > > with virtfs always +share the same device id on guest. So two files with > > identical inode +numbers but from actually different devices on host > > would otherwise cause a +file ID collision and hence potential > > misbehaviours on guest. "forbid" on +the other hand assumes like "warn" > > that only one device is shared by the +same export, however it will not > > only log a warning message but also +deny access to additional devices on > > guest. Note though that "forbid" does +currently not block all possible > > file access operations. > > Maybe provide a list of such operations that won't be blocked ? I feel punished for being so verbose. Okaaay. :) > > > @end table > > ETEXI > > > > diff --git a/vl.c b/vl.c > > index b426b32134..9cb29b483d 100644 > > --- a/vl.c > > +++ b/vl.c > > @@ -3320,7 +3320,7 @@ int main(int argc, char **argv, char **envp) > > > > case QEMU_OPTION_virtfs: { > > > > QemuOpts *fsdev; > > QemuOpts *device; > > > > - const char *writeout, *sock_fd, *socket, *path, > > *security_model; + const char *writeout, *sock_fd, > > *socket, *path, *security_model, *multidevs;> > > olist = qemu_find_opts("virtfs"); > > if (!olist) { > > > > @@ -3380,6 +3380,10 @@ int main(int argc, char **argv, char **envp) > > > > qemu_opt_set_bool(fsdev, "readonly", > > > > qemu_opt_get_bool(opts, "readonly", 0), > > &error_abort); > > > > + multidevs = qemu_opt_get(opts, "multidevs"); > > + if (multidevs) { > > + qemu_opt_set(fsdev, "multidevs", multidevs, > > &error_abort); + } > > > > device = qemu_opts_create(qemu_find_opts("device"), NULL, > > 0, > > > > &error_abort); > > > > qemu_opt_set(device, "driver", "virtio-9p-pci", > > &error_abort);
On Freitag, 30. August 2019 14:22:38 CEST Greg Kurz wrote: > Some more comments below. [snip] > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > index 8cc65c2c67..c96ea51116 100644 > > --- a/hw/9pfs/9p.c > > +++ b/hw/9pfs/9p.c > > @@ -25,6 +25,7 @@ > > > > #include "trace.h" > > #include "migration/blocker.h" > > #include "sysemu/qtest.h" [snip] > > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const > > V9fsTransport *t,> > > goto out; > > > > } > > > > + s->root_ino = stat.st_ino; > > This isn't used anywhere. It looks like a leftover of the readdir fix > in v5. Yes, both correct. I intentionally left it though, since I found it a natural complement always capturing the root inode along to the root device. > > s->dev_id = stat.st_dev; > > > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > > + > > > > s->ctx.fst = &fse->fst; > > fsdev_throttle_init(s->ctx.fst); > > > > @@ -3687,6 +3827,7 @@ out: > > } > > g_free(s->tag); > > g_free(s->ctx.fs_root); > > > > + qpp_table_destroy(&s->qpp_table); > > > > v9fs_path_free(&path); > > > > } > > return rc; > > > > @@ -3699,6 +3840,7 @@ void v9fs_device_unrealize_common(V9fsState *s, > > Error **errp)> > > } > > fsdev_throttle_cleanup(s->ctx.fst); > > g_free(s->tag); > > > > + qpp_table_destroy(&s->qpp_table); > > > > g_free(s->ctx.fs_root); > > > > } > > > > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h > > index 5e316178d5..a283b0193e 100644 > > --- a/hw/9pfs/9p.h > > +++ b/hw/9pfs/9p.h > > @@ -8,6 +8,7 @@ > > > > #include "fsdev/9p-iov-marshal.h" > > #include "qemu/thread.h" > > #include "qemu/coroutine.h" > > > > +#include "qemu/qht.h" > > > > enum { > > > > P9_TLERROR = 6, > > > > @@ -235,6 +236,15 @@ struct V9fsFidState > > > > V9fsFidState *rclm_lst; > > > > }; > > > > +#define QPATH_INO_MASK ((1ULL << 48) - 1) > > + > > +/* QID path prefix entry, see stat_to_qid */ > > +typedef struct { > > + dev_t dev; > > + uint16_t ino_prefix; > > + uint16_t qp_prefix; > > +} QppEntry; > > + > > > > struct V9fsState > > { > > > > QLIST_HEAD(, V9fsPDU) free_list; > > > > @@ -256,7 +266,10 @@ struct V9fsState > > > > Error *migration_blocker; > > V9fsConf fsconf; > > V9fsQID root_qid; > > > > + ino_t root_ino; > > Thinking again, I'm wondering if root_ino and dev_id could be used > instead of root_qid in v9fs_walk()... Would you have a look at that > if you decide to fix the readdir issue ? I keep it in mind when looking at the postponed readdir() issue again. > > dev_t dev_id; > > > > + struct qht qpp_table; > > + uint16_t qp_prefix_next; > > > > }; > > > > /* 9p2000.L open flags */ > >
On Sun, 01 Sep 2019 20:40:34 +0200 Christian Schoenebeck <qemu_oss@crudebyte.com> wrote: > On Donnerstag, 29. August 2019 18:55:28 CEST Greg Kurz wrote: > > > diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c > > > index 7c31ffffaf..07a18c6e48 100644 > > > --- a/fsdev/qemu-fsdev-opts.c > > > +++ b/fsdev/qemu-fsdev-opts.c > > > @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { > > > > > > }, { > > > > > > .name = "readonly", > > > .type = QEMU_OPT_BOOL, > > > > > > - > > > + }, { > > > + .name = "multidevs", > > > + .type = QEMU_OPT_STRING, > > > > > > }, { > > > > > > .name = "socket", > > > .type = QEMU_OPT_STRING, > > > > > > @@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = { > > > > > > .name = "readonly", > > > .type = QEMU_OPT_BOOL, > > > > > > }, { > > > > > > + .name = "multidevs", > > > + .type = QEMU_OPT_STRING, > > > + }, { > > > > > > .name = "socket", > > > .type = QEMU_OPT_STRING, > > > > > > }, { > > > > > > diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c > > > index 077a8c4e2b..ed03d559a9 100644 > > > --- a/fsdev/qemu-fsdev.c > > > +++ b/fsdev/qemu-fsdev.c > > > @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { > > > > > > "writeout", > > > "fmode", > > > "dmode", > > > > > > + "multidevs", > > > > So we only allow this for the "local" backend. Any reason not to > > add this to "proxy" as well ? > > > > I didn't do it for the "throttling" options because it is a > > feature I didn't care to support much, but "multidevs" is more > > a fix than a fancy feature. > > Well, to be honest I haven't cared about the proxy backend at all. Like I > mentioned before, I am a bit sceptical that the proxy feature is actually used > by people in real life at all (at least in its current implementation). So > personally I don't see much sense in investing time for adding & testing new > things on this backend ATM. > It's fine by me, as my ultimate plan is to deprecate proxy at some point after virtio-fs gets merged :) So in this case, you just need to not document the option as being available for proxy. > > > + if (multidevs) { > > > + if (!strcmp(multidevs, "remap")) { > > > + fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > > > + fsle->fse.export_flags |= V9FS_REMAP_INODES; > > > + } else if (!strcmp(multidevs, "forbid")) { > > > + fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > > > + fsle->fse.export_flags |= V9FS_FORBID_MULTIDEVS; > > > + } > > > > And... > > > > } else if (!strcmp(multidevs, "warn")) { > > fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; > > fsle->fse.export_flags &= ~V9FS_REMAP_INODES; > > } else { > > error_setg(errp, "invalid multidevs property '%s'", multidevs); > > return -1; > > } > > > > ... because we're a bit pedantic for command line options :) > > And I thought I adapted to relaxed handling of CLI arguments. See existing > option "writeout". :) > Yeah... it's a mess. I don't think people really cared at the time, but I personally prefer to have stricter checking. Feel free to send fixes :) > > > > > + } > > > > > > if (fsle->fse.ops->parse_opts) { > > > > > > if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) { > > > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > > index 8cc65c2c67..c96ea51116 100644 > > > --- a/hw/9pfs/9p.c > > > +++ b/hw/9pfs/9p.c > > > @@ -25,6 +25,7 @@ > > > > > > #include "trace.h" > > > #include "migration/blocker.h" > > > #include "sysemu/qtest.h" > > > > > > +#include "qemu/xxhash.h" > > > > > > int open_fd_hw; > > > int total_open_fd; > > > > > > @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) > > > > > > P9_STAT_MODE_NAMED_PIPE | \ > > > P9_STAT_MODE_SOCKET) > > > > > > -/* This is the algorithm from ufs in spfs */ > > > + > > > +/* creative abuse of tb_hash_func7, which is based on xxhash */ > > > +static uint32_t qpp_hash(QppEntry e) > > > +{ > > > + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); > > > +} > > > + > > > +static bool qpp_lookup_func(const void *obj, const void *userp) > > > +{ > > > + const QppEntry *e1 = obj, *e2 = userp; > > > + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; > > > +} > > > + > > > +static void qpp_table_remove(void *p, uint32_t h, void *up) > > > +{ > > > + g_free(p); > > > +} > > > + > > > +static void qpp_table_destroy(struct qht *ht) > > > +{ > > > + qht_iter(ht, qpp_table_remove, NULL); > > > + qht_destroy(ht); > > > +} > > > > Ok to have a function for this instead of open-coding but I'd > > like to see qpp_table_init() for consistency. > > Well, these are just qht_init() one-liners, but if you really want to have > dedicated, local init functions for them, okay. > Yeah, even if it's a one-liner, I prefer consistency. Alternatively, with an idempotent v9fs_device_unrealize_common() like in [1], you'd have only one user for qpp_table_destroy() and you can open-code it. This would address my consistency concern even better :) [1] https://github.com/gkurz/qemu/commit/7fc4c49e910df2e155b36bf0a05de9209bd92da9 > > > static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID > > > *qidp) { > > > > > > + int err; > > > > > > size_t size; > > > > > > - if (pdu->s->dev_id != stbuf->st_dev) { > > > - error_report_once( > > > - "9p: Multiple devices detected in same VirtFS export. " > > > - "You must use a separate export for each device." > > > - ); > > > - return -ENODEV; > > > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > > > + /* map inode+device to qid path (fast path) */ > > > + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); > > > + if (err) { > > > + return err; > > > + } > > > + } else { > > > + if (pdu->s->dev_id != stbuf->st_dev) { > > > + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { > > > + error_report_once( > > > + "9p: Multiple devices detected in same VirtFS export. > > > " + "Access of guest to additional devices is (partly) > > > " + "denied due to virtfs option 'multidevs=forbid' > > > being " + "effective." > > > + ); > > > + return -ENODEV; > > > + } else { > > > + error_report_once( > > > > Please use warn_report_once(). > > Sure! > > > > + "9p: Multiple devices detected in same VirtFS export, > > > " + "which might lead to file ID collisions and severe > > > " + "misbehaviours on guest! You should either use a " > > > + "separate export for each device shared from host or > > > " + "use virtfs option 'multidevs=remap'!" > > > + ); > > > + } > > > + } > > > + memset(&qidp->path, 0, sizeof(qidp->path)); > > > + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > > > + memcpy(&qidp->path, &stbuf->st_ino, size); > > > > > > } > > > > > > - memset(&qidp->path, 0, sizeof(qidp->path)); > > > - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); > > > - memcpy(&qidp->path, &stbuf->st_ino, size); > > > > > > qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); > > > qidp->type = 0; > > > if (S_ISDIR(stbuf->st_mode)) { > > > > > > @@ -616,6 +704,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, > > > V9fsFidState *fidp,> > > > return 0; > > > > > > } > > > > > > +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, > > > + struct dirent *dent, V9fsQID *qidp) > > > +{ > > > + struct stat stbuf; > > > + V9fsPath path; > > > + int err; > > > + > > > + v9fs_path_init(&path); > > > + > > > + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); > > > + if (err < 0) { > > > + goto out; > > > + } > > > + err = v9fs_co_lstat(pdu, &path, &stbuf); > > > + if (err < 0) { > > > + goto out; > > > + } > > > + err = stat_to_qid(pdu, &stbuf, qidp); > > > + > > > +out: > > > + v9fs_path_free(&path); > > > + return err; > > > +} > > > + > > > > > > V9fsPDU *pdu_alloc(V9fsState *s) > > > { > > > > > > V9fsPDU *pdu = NULL; > > > > > > @@ -1964,16 +2076,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU > > > *pdu, V9fsFidState *fidp,> > > > v9fs_string_free(&name); > > > return count; > > > > > > } > > > > > > - /* > > > - * Fill up just the path field of qid because the client uses > > > - * only that. To fill the entire qid structure we will have > > > - * to stat each dirent found, which is expensive > > > - */ > > > - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > > > - memcpy(&qid.path, &dent->d_ino, size); > > > - /* Fill the other fields with dummy values */ > > > - qid.type = 0; > > > - qid.version = 0; > > > + > > > + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { > > > + /* > > > + * dirent_to_qid() implies expensive stat call for each > > > entry, > > > + * we must do that here though since inode remapping requires > > > + * the device id, which in turn might be different for > > > + * different entries; we cannot make any assumption to avoid > > > + * that here. > > > + */ > > > + err = dirent_to_qid(pdu, fidp, dent, &qid); > > > + if (err < 0) { > > > + v9fs_readdir_unlock(&fidp->fs.dir); > > > + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); > > > + v9fs_string_free(&name); > > > + return err; > > > + } > > > + } else { > > > + /* > > > + * Fill up just the path field of qid because the client uses > > > + * only that. To fill the entire qid structure we will have > > > + * to stat each dirent found, which is expensive. For the > > > + * latter reason we don't call dirent_to_qid() here. Only > > > drawback + * is that no multi-device export detection of > > > stat_to_qid() + * would be done and provided as error to the > > > user here. But + * user would get that error anyway when > > > accessing those + * files/dirs through other ways. > > > + */ > > > + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); > > > + memcpy(&qid.path, &dent->d_ino, size); > > > + /* Fill the other fields with dummy values */ > > > + qid.type = 0; > > > + qid.version = 0; > > > + } > > > > > > /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ > > > len = pdu_marshal(pdu, 11 + count, "Qqbs", > > > > > > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const > > > V9fsTransport *t,> > > > goto out; > > > > > > } > > > > > > + s->root_ino = stat.st_ino; > > > > > > s->dev_id = stat.st_dev; > > > > > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > > > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > > > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > > > + > > > > > > s->ctx.fst = &fse->fst; > > > fsdev_throttle_init(s->ctx.fst); > > > > > > @@ -3687,6 +3827,7 @@ out: > > > } > > > g_free(s->tag); > > > g_free(s->ctx.fs_root); > > > > > > + qpp_table_destroy(&s->qpp_table); > > > > This causes QEMU to crash if we get there before qht_init() was called. > > This should be guarded by a s->qpp_table.map != NULL check. > > Touché. > > I'll add that to qp_table_destroy() for simplicity. > > > I've just posted a patch that simplifies error handling in this > > function, you in Cc. The patch is also in 9p-next. Please rebase > > on top of it. > > Ok. > > > > diff --git a/qemu-options.hx b/qemu-options.hx > > > index 9621e934c0..603e5e8e15 100644 > > > --- a/qemu-options.hx > > > +++ b/qemu-options.hx > > > @@ -1335,17 +1335,17 @@ ETEXI > > > > > > DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, > > > > > > "-virtfs > > > local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-fil > > > e|passthrough|none\n"> > > > - " > > > [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" - > > > "-virtfs > > > proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly] > > > \n" - "-virtfs > > > proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonl > > > y]\n" + " > > > [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,mul > > > tidevs=remap|forbid|warn]\n" + "-virtfs > > > proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly] > > > [,multidevs=remap|forbid|warn]\n" + "-virtfs > > > proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonl > > > y][,multidevs=remap|forbid|warn]\n" > > Either enable support in "proxy" or don't update the "proxy" documentation > > :) > > Then, if you don't mind, I'll just drop it from the proxy documentation, for > the reasons I described above. > Agreed. > > > "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", > > > QEMU_ARCH_ALL) > > > > > > STEXI > > > > > > -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} > > > ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] > > > [,fmode=@var{fmode}][,dmode=@var{dmode}] -@itemx -virtfs > > > proxy,socket=@var{socket},mount_tag=@var{mount_tag} > > > [,writeout=@var{writeout}][,readonly] -@itemx -virtfs > > > proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} > > > [,writeout=@var{writeout}][,readonly] +@item -virtfs > > > local,path=@var{path},mount_tag=@var{mount_tag} > > > ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] > > > [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] > > > +@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} > > > [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] +@itemx > > > -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} > > > [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] > > Ditto. > > Ack. > > > > @itemx -virtfs synth,mount_tag=@var{mount_tag} > > > @findex -virtfs > > > > > > @@ -1399,6 +1399,27 @@ Specifies the default mode for newly created > > > directories on the host. Works> > > > only with security models "mapped-xattr" and "mapped-file". > > > @item mount_tag=@var{mount_tag} > > > Specifies the tag name to be used by the guest to mount this export > > > point. > > > > > > +@item multidevs=@var{multidevs} > > > +Specifies how to deal with multiple devices being shared with a 9p > > > export. > > > +Supported behaviours are either "remap", "forbid" or "warn". The latter > > > is > > > +the default behaviour on which virtfs 9p expects only one device to be > > > +shared with the same export, and if more than one device is shared and > > > +accessed via the same 9p export then only a warning message is logged > > > +(once) by qemu on host side. In order to avoid file ID collisions on > > > guest > > > +you should either create a separate virtfs export for each device to be > > > +shared with guests (recommended way) or you might use "remap" instead > > > which +allows you to share multiple devices with only one export instead, > > > which is +achieved by remapping the original inode numbers from host to > > > guest in a +way that would prevent such collisions. Remapping inodes in > > > such use cases +is required because the original device IDs from host are > > > never passed and +exposed on guest. Instead all files of an export shared > > > with virtfs always +share the same device id on guest. So two files with > > > identical inode +numbers but from actually different devices on host > > > would otherwise cause a +file ID collision and hence potential > > > misbehaviours on guest. "forbid" on +the other hand assumes like "warn" > > > that only one device is shared by the +same export, however it will not > > > only log a warning message but also +deny access to additional devices on > > > guest. Note though that "forbid" does +currently not block all possible > > > file access operations. > > > > Maybe provide a list of such operations that won't be blocked ? > > I feel punished for being so verbose. Okaaay. :) > :) > > > > > @end table > > > ETEXI > > > > > > diff --git a/vl.c b/vl.c > > > index b426b32134..9cb29b483d 100644 > > > --- a/vl.c > > > +++ b/vl.c > > > @@ -3320,7 +3320,7 @@ int main(int argc, char **argv, char **envp) > > > > > > case QEMU_OPTION_virtfs: { > > > > > > QemuOpts *fsdev; > > > QemuOpts *device; > > > > > > - const char *writeout, *sock_fd, *socket, *path, > > > *security_model; + const char *writeout, *sock_fd, > > > *socket, *path, *security_model, *multidevs;> > > > olist = qemu_find_opts("virtfs"); > > > if (!olist) { > > > > > > @@ -3380,6 +3380,10 @@ int main(int argc, char **argv, char **envp) > > > > > > qemu_opt_set_bool(fsdev, "readonly", > > > > > > qemu_opt_get_bool(opts, "readonly", 0), > > > &error_abort); > > > > > > + multidevs = qemu_opt_get(opts, "multidevs"); > > > + if (multidevs) { > > > + qemu_opt_set(fsdev, "multidevs", multidevs, > > > &error_abort); + } > > > > > > device = qemu_opts_create(qemu_find_opts("device"), NULL, > > > 0, > > > > > > &error_abort); > > > > > > qemu_opt_set(device, "driver", "virtio-9p-pci", > > > &error_abort); >
On Sun, 01 Sep 2019 20:56:16 +0200 Christian Schoenebeck <qemu_oss@crudebyte.com> wrote: > On Freitag, 30. August 2019 14:22:38 CEST Greg Kurz wrote: > > Some more comments below. > [snip] > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > > index 8cc65c2c67..c96ea51116 100644 > > > --- a/hw/9pfs/9p.c > > > +++ b/hw/9pfs/9p.c > > > @@ -25,6 +25,7 @@ > > > > > > #include "trace.h" > > > #include "migration/blocker.h" > > > #include "sysemu/qtest.h" > [snip] > > > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const > > > V9fsTransport *t,> > > > goto out; > > > > > > } > > > > > > + s->root_ino = stat.st_ino; > > > > This isn't used anywhere. It looks like a leftover of the readdir fix > > in v5. > > Yes, both correct. I intentionally left it though, since I found it a natural > complement always capturing the root inode along to the root device. > Fair enough. The local backend opens an fd to the root directory, to be used by any access to the 9p share. I think root_dev/root_ino should be obtained with fstat() on this fd, to be sure they are consistent. Maybe add an extra struct stat * argument to the init function ? I'd rather see this done as a preparatory "backend to cache 9p root device/inode during init" patch. > > > s->dev_id = stat.st_dev; > > > > > > + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ > > > + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); > > > + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ > > > + > > > > > > s->ctx.fst = &fse->fst; > > > fsdev_throttle_init(s->ctx.fst); > > > > > > @@ -3687,6 +3827,7 @@ out: > > > } > > > g_free(s->tag); > > > g_free(s->ctx.fs_root); > > > > > > + qpp_table_destroy(&s->qpp_table); > > > > > > v9fs_path_free(&path); > > > > > > } > > > return rc; > > > > > > @@ -3699,6 +3840,7 @@ void v9fs_device_unrealize_common(V9fsState *s, > > > Error **errp)> > > > } > > > fsdev_throttle_cleanup(s->ctx.fst); > > > g_free(s->tag); > > > > > > + qpp_table_destroy(&s->qpp_table); > > > > > > g_free(s->ctx.fs_root); > > > > > > } > > > > > > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h > > > index 5e316178d5..a283b0193e 100644 > > > --- a/hw/9pfs/9p.h > > > +++ b/hw/9pfs/9p.h > > > @@ -8,6 +8,7 @@ > > > > > > #include "fsdev/9p-iov-marshal.h" > > > #include "qemu/thread.h" > > > #include "qemu/coroutine.h" > > > > > > +#include "qemu/qht.h" > > > > > > enum { > > > > > > P9_TLERROR = 6, > > > > > > @@ -235,6 +236,15 @@ struct V9fsFidState > > > > > > V9fsFidState *rclm_lst; > > > > > > }; > > > > > > +#define QPATH_INO_MASK ((1ULL << 48) - 1) > > > + > > > +/* QID path prefix entry, see stat_to_qid */ > > > +typedef struct { > > > + dev_t dev; > > > + uint16_t ino_prefix; > > > + uint16_t qp_prefix; > > > +} QppEntry; > > > + > > > > > > struct V9fsState > > > { > > > > > > QLIST_HEAD(, V9fsPDU) free_list; > > > > > > @@ -256,7 +266,10 @@ struct V9fsState > > > > > > Error *migration_blocker; > > > V9fsConf fsconf; > > > V9fsQID root_qid; > > > > > > + ino_t root_ino; > > > > Thinking again, I'm wondering if root_ino and dev_id could be used > > instead of root_qid in v9fs_walk()... Would you have a look at that > > if you decide to fix the readdir issue ? > > I keep it in mind when looking at the postponed readdir() issue again. > > > > dev_t dev_id; > > > > > > + struct qht qpp_table; > > > + uint16_t qp_prefix_next; > > > > > > }; > > > > > > /* 9p2000.L open flags */ > > > >
On Montag, 2. September 2019 12:16:26 CEST Greg Kurz wrote: > > > > @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU > > > > *pdu) > > > > > > > > P9_STAT_MODE_NAMED_PIPE | \ > > > > P9_STAT_MODE_SOCKET) > > > > > > > > -/* This is the algorithm from ufs in spfs */ > > > > + > > > > +/* creative abuse of tb_hash_func7, which is based on xxhash */ > > > > +static uint32_t qpp_hash(QppEntry e) > > > > +{ > > > > + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); > > > > +} > > > > + > > > > +static bool qpp_lookup_func(const void *obj, const void *userp) > > > > +{ > > > > + const QppEntry *e1 = obj, *e2 = userp; > > > > + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; > > > > +} > > > > + > > > > +static void qpp_table_remove(void *p, uint32_t h, void *up) > > > > +{ > > > > + g_free(p); > > > > +} > > > > + > > > > +static void qpp_table_destroy(struct qht *ht) > > > > +{ > > > > + qht_iter(ht, qpp_table_remove, NULL); > > > > + qht_destroy(ht); > > > > +} > > > > > > Ok to have a function for this instead of open-coding but I'd > > > like to see qpp_table_init() for consistency. > > > > Well, these are just qht_init() one-liners, but if you really want to have > > dedicated, local init functions for them, okay. > > Yeah, even if it's a one-liner, I prefer consistency. Alternatively, with > an idempotent v9fs_device_unrealize_common() like in [1], you'd have > only one user for qpp_table_destroy() and you can open-code it. This > would address my consistency concern even better :) > > [1] > https://github.com/gkurz/qemu/commit/7fc4c49e910df2e155b36bf0a05de9209bd92d I'll rather add qpp_table_init() then, because grouping the two calls qht_iter() and qht_destroy() together to a dedicated function qpp_table_destroy() still makes sense semantically IMO.
On Montag, 2. September 2019 13:49:34 CEST Greg Kurz wrote: > On Sun, 01 Sep 2019 20:56:16 +0200 > > Christian Schoenebeck <qemu_oss@crudebyte.com> wrote: > > On Freitag, 30. August 2019 14:22:38 CEST Greg Kurz wrote: > > > Some more comments below. > > > > [snip] > > > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > > > index 8cc65c2c67..c96ea51116 100644 > > > > --- a/hw/9pfs/9p.c > > > > +++ b/hw/9pfs/9p.c > > > > @@ -25,6 +25,7 @@ > > > > > > > > #include "trace.h" > > > > #include "migration/blocker.h" > > > > #include "sysemu/qtest.h" > > > > [snip] > > > > > > @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, > > > > const > > > > V9fsTransport *t,> > > > > > > > > goto out; > > > > > > > > } > > > > > > > > + s->root_ino = stat.st_ino; > > > > > > This isn't used anywhere. It looks like a leftover of the readdir fix > > > in v5. > > > > Yes, both correct. I intentionally left it though, since I found it a > > natural complement always capturing the root inode along to the root > > device. > Fair enough. The local backend opens an fd to the root directory, to be used > by any access to the 9p share. I think root_dev/root_ino should be obtained > with fstat() on this fd, to be sure they are consistent. Maybe add an extra > struct stat * argument to the init function ? I'd rather see this done as a > preparatory "backend to cache 9p root device/inode during init" patch. Convinced. I'll drop root_ino from this patch set for now.
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index c757c8099f..f2f7772c86 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -59,6 +59,11 @@ typedef struct ExtendedOps { #define V9FS_RDONLY 0x00000040 #define V9FS_PROXY_SOCK_FD 0x00000080 #define V9FS_PROXY_SOCK_NAME 0x00000100 +/* + * multidevs option (either one of the two applies exclusively) + */ +#define V9FS_REMAP_INODES 0x00000200 +#define V9FS_FORBID_MULTIDEVS 0x00000400 #define V9FS_SEC_MASK 0x0000003C diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c index 7c31ffffaf..07a18c6e48 100644 --- a/fsdev/qemu-fsdev-opts.c +++ b/fsdev/qemu-fsdev-opts.c @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { }, { .name = "readonly", .type = QEMU_OPT_BOOL, - + }, { + .name = "multidevs", + .type = QEMU_OPT_STRING, }, { .name = "socket", .type = QEMU_OPT_STRING, @@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = { .name = "readonly", .type = QEMU_OPT_BOOL, }, { + .name = "multidevs", + .type = QEMU_OPT_STRING, + }, { .name = "socket", .type = QEMU_OPT_STRING, }, { diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c index 077a8c4e2b..ed03d559a9 100644 --- a/fsdev/qemu-fsdev.c +++ b/fsdev/qemu-fsdev.c @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { "writeout", "fmode", "dmode", + "multidevs", "throttling.bps-total", "throttling.bps-read", "throttling.bps-write", @@ -121,6 +122,7 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) const char *fsdev_id = qemu_opts_id(opts); const char *fsdriver = qemu_opt_get(opts, "fsdriver"); const char *writeout = qemu_opt_get(opts, "writeout"); + const char *multidevs = qemu_opt_get(opts, "multidevs"); bool ro = qemu_opt_get_bool(opts, "readonly", 0); if (!fsdev_id) { @@ -161,6 +163,15 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp) } else { fsle->fse.export_flags &= ~V9FS_RDONLY; } + if (multidevs) { + if (!strcmp(multidevs, "remap")) { + fsle->fse.export_flags &= ~V9FS_FORBID_MULTIDEVS; + fsle->fse.export_flags |= V9FS_REMAP_INODES; + } else if (!strcmp(multidevs, "forbid")) { + fsle->fse.export_flags &= ~V9FS_REMAP_INODES; + fsle->fse.export_flags |= V9FS_FORBID_MULTIDEVS; + } + } if (fsle->fse.ops->parse_opts) { if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) { diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 8cc65c2c67..c96ea51116 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -25,6 +25,7 @@ #include "trace.h" #include "migration/blocker.h" #include "sysemu/qtest.h" +#include "qemu/xxhash.h" int open_fd_hw; int total_open_fd; @@ -571,22 +572,109 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) P9_STAT_MODE_NAMED_PIPE | \ P9_STAT_MODE_SOCKET) -/* This is the algorithm from ufs in spfs */ + +/* creative abuse of tb_hash_func7, which is based on xxhash */ +static uint32_t qpp_hash(QppEntry e) +{ + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); +} + +static bool qpp_lookup_func(const void *obj, const void *userp) +{ + const QppEntry *e1 = obj, *e2 = userp; + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; +} + +static void qpp_table_remove(void *p, uint32_t h, void *up) +{ + g_free(p); +} + +static void qpp_table_destroy(struct qht *ht) +{ + qht_iter(ht, qpp_table_remove, NULL); + qht_destroy(ht); +} + +/* stat_to_qid needs to map inode number (64 bits) and device id (32 bits) + * to a unique QID path (64 bits). To avoid having to map and keep track + * of up to 2^64 objects, we map only the 16 highest bits of the inode plus + * the device id to the 16 highest bits of the QID path. The 48 lowest bits + * of the QID path equal to the lowest bits of the inode number. + * + * This takes advantage of the fact that inode number are usually not + * random but allocated sequentially, so we have fewer items to keep + * track of. + */ +static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf, + uint64_t *path) +{ + QppEntry lookup = { + .dev = stbuf->st_dev, + .ino_prefix = (uint16_t) (stbuf->st_ino >> 48) + }, *val; + uint32_t hash = qpp_hash(lookup); + + val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); + + if (!val) { + if (pdu->s->qp_prefix_next == 0) { + /* we ran out of prefixes */ + error_report_once( + "9p: No more prefixes available for remapping inodes from " + "host to guest." + ); + return -ENFILE; + } + + val = g_malloc0(sizeof(QppEntry)); + *val = lookup; + + /* new unique inode prefix and device combo */ + val->qp_prefix = pdu->s->qp_prefix_next++; + qht_insert(&pdu->s->qpp_table, val, hash, NULL); + } + + *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK); + return 0; +} + static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) { + int err; size_t size; - if (pdu->s->dev_id != stbuf->st_dev) { - error_report_once( - "9p: Multiple devices detected in same VirtFS export. " - "You must use a separate export for each device." - ); - return -ENODEV; + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { + /* map inode+device to qid path (fast path) */ + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); + if (err) { + return err; + } + } else { + if (pdu->s->dev_id != stbuf->st_dev) { + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { + error_report_once( + "9p: Multiple devices detected in same VirtFS export. " + "Access of guest to additional devices is (partly) " + "denied due to virtfs option 'multidevs=forbid' being " + "effective." + ); + return -ENODEV; + } else { + error_report_once( + "9p: Multiple devices detected in same VirtFS export, " + "which might lead to file ID collisions and severe " + "misbehaviours on guest! You should either use a " + "separate export for each device shared from host or " + "use virtfs option 'multidevs=remap'!" + ); + } + } + memset(&qidp->path, 0, sizeof(qidp->path)); + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); + memcpy(&qidp->path, &stbuf->st_ino, size); } - memset(&qidp->path, 0, sizeof(qidp->path)); - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); - memcpy(&qidp->path, &stbuf->st_ino, size); qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); qidp->type = 0; if (S_ISDIR(stbuf->st_mode)) { @@ -616,6 +704,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, return 0; } +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, + struct dirent *dent, V9fsQID *qidp) +{ + struct stat stbuf; + V9fsPath path; + int err; + + v9fs_path_init(&path); + + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); + if (err < 0) { + goto out; + } + err = v9fs_co_lstat(pdu, &path, &stbuf); + if (err < 0) { + goto out; + } + err = stat_to_qid(pdu, &stbuf, qidp); + +out: + v9fs_path_free(&path); + return err; +} + V9fsPDU *pdu_alloc(V9fsState *s) { V9fsPDU *pdu = NULL; @@ -1964,16 +2076,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, v9fs_string_free(&name); return count; } - /* - * Fill up just the path field of qid because the client uses - * only that. To fill the entire qid structure we will have - * to stat each dirent found, which is expensive - */ - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); - memcpy(&qid.path, &dent->d_ino, size); - /* Fill the other fields with dummy values */ - qid.type = 0; - qid.version = 0; + + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { + /* + * dirent_to_qid() implies expensive stat call for each entry, + * we must do that here though since inode remapping requires + * the device id, which in turn might be different for + * different entries; we cannot make any assumption to avoid + * that here. + */ + err = dirent_to_qid(pdu, fidp, dent, &qid); + if (err < 0) { + v9fs_readdir_unlock(&fidp->fs.dir); + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); + v9fs_string_free(&name); + return err; + } + } else { + /* + * Fill up just the path field of qid because the client uses + * only that. To fill the entire qid structure we will have + * to stat each dirent found, which is expensive. For the + * latter reason we don't call dirent_to_qid() here. Only drawback + * is that no multi-device export detection of stat_to_qid() + * would be done and provided as error to the user here. But + * user would get that error anyway when accessing those + * files/dirs through other ways. + */ + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); + memcpy(&qid.path, &dent->d_ino, size); + /* Fill the other fields with dummy values */ + qid.type = 0; + qid.version = 0; + } /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ len = pdu_marshal(pdu, 11 + count, "Qqbs", @@ -3672,8 +3807,13 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, goto out; } + s->root_ino = stat.st_ino; s->dev_id = stat.st_dev; + /* QID path hash table. 1 entry ought to be enough for anybody ;) */ + qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ + s->ctx.fst = &fse->fst; fsdev_throttle_init(s->ctx.fst); @@ -3687,6 +3827,7 @@ out: } g_free(s->tag); g_free(s->ctx.fs_root); + qpp_table_destroy(&s->qpp_table); v9fs_path_free(&path); } return rc; @@ -3699,6 +3840,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) } fsdev_throttle_cleanup(s->ctx.fst); g_free(s->tag); + qpp_table_destroy(&s->qpp_table); g_free(s->ctx.fs_root); } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 5e316178d5..a283b0193e 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -8,6 +8,7 @@ #include "fsdev/9p-iov-marshal.h" #include "qemu/thread.h" #include "qemu/coroutine.h" +#include "qemu/qht.h" enum { P9_TLERROR = 6, @@ -235,6 +236,15 @@ struct V9fsFidState V9fsFidState *rclm_lst; }; +#define QPATH_INO_MASK ((1ULL << 48) - 1) + +/* QID path prefix entry, see stat_to_qid */ +typedef struct { + dev_t dev; + uint16_t ino_prefix; + uint16_t qp_prefix; +} QppEntry; + struct V9fsState { QLIST_HEAD(, V9fsPDU) free_list; @@ -256,7 +266,10 @@ struct V9fsState Error *migration_blocker; V9fsConf fsconf; V9fsQID root_qid; + ino_t root_ino; dev_t dev_id; + struct qht qpp_table; + uint16_t qp_prefix_next; }; /* 9p2000.L open flags */ diff --git a/qemu-options.hx b/qemu-options.hx index 9621e934c0..603e5e8e15 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1335,17 +1335,17 @@ ETEXI DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n" - " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" - "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n" - "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n" + " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n" + "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" + "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly][,multidevs=remap|forbid|warn]\n" "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", QEMU_ARCH_ALL) STEXI -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}] -@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] -@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] +@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] +@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] +@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,multidevs=@var{multidevs}] @itemx -virtfs synth,mount_tag=@var{mount_tag} @findex -virtfs @@ -1399,6 +1399,27 @@ Specifies the default mode for newly created directories on the host. Works only with security models "mapped-xattr" and "mapped-file". @item mount_tag=@var{mount_tag} Specifies the tag name to be used by the guest to mount this export point. +@item multidevs=@var{multidevs} +Specifies how to deal with multiple devices being shared with a 9p export. +Supported behaviours are either "remap", "forbid" or "warn". The latter is +the default behaviour on which virtfs 9p expects only one device to be +shared with the same export, and if more than one device is shared and +accessed via the same 9p export then only a warning message is logged +(once) by qemu on host side. In order to avoid file ID collisions on guest +you should either create a separate virtfs export for each device to be +shared with guests (recommended way) or you might use "remap" instead which +allows you to share multiple devices with only one export instead, which is +achieved by remapping the original inode numbers from host to guest in a +way that would prevent such collisions. Remapping inodes in such use cases +is required because the original device IDs from host are never passed and +exposed on guest. Instead all files of an export shared with virtfs always +share the same device id on guest. So two files with identical inode +numbers but from actually different devices on host would otherwise cause a +file ID collision and hence potential misbehaviours on guest. "forbid" on +the other hand assumes like "warn" that only one device is shared by the +same export, however it will not only log a warning message but also +deny access to additional devices on guest. Note though that "forbid" does +currently not block all possible file access operations. @end table ETEXI diff --git a/vl.c b/vl.c index b426b32134..9cb29b483d 100644 --- a/vl.c +++ b/vl.c @@ -3320,7 +3320,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_virtfs: { QemuOpts *fsdev; QemuOpts *device; - const char *writeout, *sock_fd, *socket, *path, *security_model; + const char *writeout, *sock_fd, *socket, *path, *security_model, *multidevs; olist = qemu_find_opts("virtfs"); if (!olist) { @@ -3380,6 +3380,10 @@ int main(int argc, char **argv, char **envp) qemu_opt_set_bool(fsdev, "readonly", qemu_opt_get_bool(opts, "readonly", 0), &error_abort); + multidevs = qemu_opt_get(opts, "multidevs"); + if (multidevs) { + qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); + } device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, &error_abort); qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);