diff mbox series

[v7,3/6] 9pfs: add new function v9fs_co_readdir_many()

Message ID 4035f3a28461ba21ab5e24c843556b0ec06246ab.1595166227.git.qemu_oss@crudebyte.com (mailing list archive)
State New, archived
Headers show
Series 9pfs: readdir optimization | expand

Commit Message

Christian Schoenebeck July 19, 2020, 12:29 p.m. UTC
The newly added function v9fs_co_readdir_many() retrieves multiple
directory entries with a single fs driver request. It is intended to
replace uses of v9fs_co_readdir(), the latter only retrives a single
directory entry per fs driver request instead.

The reason for this planned replacement is that for every fs driver
request the coroutine is dispatched from main I/O thread to a
background I/O thread and eventually dispatched back to main I/O
thread. Hopping between threads adds latency. So if a 9pfs Treaddir
request reads a large amount of directory entries, this currently
sums up to huge latencies of several hundred ms or even more. So
using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
provide significant performance improvements.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.h    |  22 ++++++
 hw/9pfs/codir.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---
 hw/9pfs/coth.h  |   3 +
 3 files changed, 210 insertions(+), 11 deletions(-)

Comments

Christian Schoenebeck July 28, 2020, 8:33 a.m. UTC | #1
On Sonntag, 19. Juli 2020 14:29:13 CEST Christian Schoenebeck wrote:
> The newly added function v9fs_co_readdir_many() retrieves multiple
> directory entries with a single fs driver request. It is intended to
> replace uses of v9fs_co_readdir(), the latter only retrives a single
> directory entry per fs driver request instead.
> 
> The reason for this planned replacement is that for every fs driver
> request the coroutine is dispatched from main I/O thread to a
> background I/O thread and eventually dispatched back to main I/O
> thread. Hopping between threads adds latency. So if a 9pfs Treaddir
> request reads a large amount of directory entries, this currently
> sums up to huge latencies of several hundred ms or even more. So
> using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
> provide significant performance improvements.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
>  hw/9pfs/9p.h    |  22 ++++++
>  hw/9pfs/codir.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---
>  hw/9pfs/coth.h  |   3 +
>  3 files changed, 210 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index 561774e843..93b7030edf 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -215,6 +215,28 @@ static inline void v9fs_readdir_init(V9fsDir *dir)
>      qemu_co_mutex_init(&dir->readdir_mutex);
>  }
> 
> +/**
> + * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
> + * which is a chained list of directory entries.
> + */
> +typedef struct V9fsDirEnt {
> +    /* mandatory (must not be NULL) information for all readdir requests */
> +    struct dirent *dent;
> +    /*
> +     * optional (may be NULL): A full stat of each directory entry is just
> +     * done if explicitly told to fs driver.
> +     */
> +    struct stat *st;
> +    /*
> +     * instead of an array, directory entries are always returned as
> +     * chained list, that's because the amount of entries retrieved by fs
> +     * drivers is dependent on the individual entries' name (since response
> +     * messages are size limited), so the final amount cannot be estimated
> +     * before hand
> +     */
> +    struct V9fsDirEnt *next;
> +} V9fsDirEnt;
> +
>  /*
>   * Filled by fs driver on open and other
>   * calls.
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 73f9a751e1..07da5d8d70 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -18,28 +18,202 @@
>  #include "qemu/main-loop.h"
>  #include "coth.h"
> 
> +/*
> + * This is solely executed on a background IO thread.
> + */
> +static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent
> **dent) +{
> +    int err = 0;
> +    V9fsState *s = pdu->s;
> +    struct dirent *entry;
> +
> +    errno = 0;
> +    entry = s->ops->readdir(&s->ctx, &fidp->fs);
> +    if (!entry && errno) {
> +        *dent = NULL;
> +        err = -errno;
> +    } else {
> +        *dent = entry;
> +    }
> +    return err;
> +}
> +
> +/*
> + * TODO: This will be removed for performance reasons.
> + * Use v9fs_co_readdir_many() instead.
> + */
>  int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
>                                   struct dirent **dent)
>  {
>      int err;
> -    V9fsState *s = pdu->s;
> 
>      if (v9fs_request_cancelled(pdu)) {
>          return -EINTR;
>      }
> -    v9fs_co_run_in_worker(
> -        {
> -            struct dirent *entry;
> +    v9fs_co_run_in_worker({
> +        err = do_readdir(pdu, fidp, dent);
> +    });
> +    return err;
> +}

Ok, this ^ part (precisely: do_readdir() and v9fs_co_readdir()) can still be 
sliced out into a separate patch, and apparently makes sense, as it would 
avoid cluttering this patch like ...

> +
> +/*
> + * This is solely executed on a background IO thread.
> + *
> + * See v9fs_co_readdir_many() (as its only user) below for details.
> + */
> +static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> +                           struct V9fsDirEnt **entries, off_t offset,
> +                           int32_t maxsize, bool dostat)
> +{
> +    V9fsState *s = pdu->s;
> +    V9fsString name;
> +    int len, err = 0;
> +    int32_t size = 0;
> +    off_t saved_dir_pos;
> +    struct dirent *dent;
> +    struct V9fsDirEnt *e = NULL;
> +    V9fsPath path;
> +    struct stat stbuf;
> +
> +    *entries = NULL;
> +    v9fs_path_init(&path);
> +
> +    /*
> +     * TODO: Here should be a warn_report_once() if lock failed.
> +     *
> +     * With a good 9p client we should not get into concurrency here,
> +     * because a good client would not use the same fid for concurrent
> +     * requests. We do the lock here for safety reasons though. However
> +     * the client would then suffer performance issues, so better log that
> +     * issue here.
> +     */
> +    v9fs_readdir_lock(&fidp->fs.dir);
> +
> +    /* seek directory to requested initial position */
> +    if (offset == 0) {
> +        s->ops->rewinddir(&s->ctx, &fidp->fs);
> +    } else {
> +        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
> +    }
> +
> +    /* save the directory position */
> +    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
> +    if (saved_dir_pos < 0) {
> +        err = saved_dir_pos;
> +        goto out;
> +    }
> +
> +    while (true) {
> +        /* get directory entry from fs driver */
> +        err = do_readdir(pdu, fidp, &dent);
> +        if (err || !dent) {
> +            break;
> +        }
> 
> -            errno = 0;
> -            entry = s->ops->readdir(&s->ctx, &fidp->fs);
> -            if (!entry && errno) {

... here ...

> +        /*
> +         * stop this loop as soon as it would exceed the allowed maximum
> +         * response message size for the directory entries collected so
> far, +         * because anything beyond that size would need to be
> discarded by +         * 9p controller (main thread / top half) anyway
> +         */
> +        v9fs_string_init(&name);
> +        v9fs_string_sprintf(&name, "%s", dent->d_name);
> +        len = v9fs_readdir_response_size(&name);
> +        v9fs_string_free(&name);
> +        if (size + len > maxsize) {
> +            /* this is not an error case actually */
> +            break;
> +        }
> +
> +        /* append next node to result chain */
> +        if (!e) {
> +            *entries = e = g_malloc0(sizeof(V9fsDirEnt));
> +        } else {
> +            e = e->next = g_malloc0(sizeof(V9fsDirEnt));
> +        }
> +        e->dent = g_malloc0(sizeof(struct dirent));
> +        memcpy(e->dent, dent, sizeof(struct dirent));
> +
> +        /* perform a full stat() for directory entry if requested by caller
> */ +        if (dostat) {
> +            err = s->ops->name_to_path(
> +                &s->ctx, &fidp->path, dent->d_name, &path
> +            );
> +            if (err < 0) {
>                  err = -errno;
> -            } else {
> -                *dent = entry;
> -                err = 0;

... and here.

> +                break;
>              }
> -        });
> +
> +            err = s->ops->lstat(&s->ctx, &path, &stbuf);
> +            if (err < 0) {
> +                err = -errno;
> +                break;
> +            }
> +
> +            e->st = g_malloc0(sizeof(struct stat));
> +            memcpy(e->st, &stbuf, sizeof(struct stat));
> +        }
> +
> +        size += len;
> +        saved_dir_pos = dent->d_off;
> +    }
> +
> +    /* restore (last) saved position */
> +    s->ops->seekdir(&s->ctx, &fidp->fs, saved_dir_pos);
> +
> +out:
> +    v9fs_readdir_unlock(&fidp->fs.dir);
> +    v9fs_path_free(&path);
> +    if (err < 0) {
> +        return err;
> +    }
> +    return size;
> +}
> +
> +/**
> + * @brief Reads multiple directory entries in one rush.
> + *
> + * Retrieves the requested (max. amount of) directory entries from the fs
> + * driver. This function must only be called by the main IO thread (top
> half). + * Internally this function call will be dispatched to a background
> IO thread + * (bottom half) where it is eventually executed by the fs
> driver. + *
> + * @discussion Acquiring multiple directory entries in one rush from the fs
> + * driver, instead of retrieving each directory entry individually, is
> very + * beneficial from performance point of view. Because for every fs
> driver + * request latency is added, which in practice could lead to
> overall + * latencies of several hundred ms for reading all entries (of
> just a single + * directory) if every directory entry was individually
> requested from fs + * driver.
> + *
> + * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after
> calling + * v9fs_co_readdir_many(), both on success and on error cases of
> this + * function, to avoid memory leaks once @p entries are no longer
> needed. + *
> + * @param pdu - the causing 9p (T_readdir) client request
> + * @param fidp - already opened directory where readdir shall be performed
> on + * @param entries - output for directory entries (must not be NULL) + *
> @param offset - initial position inside the directory the function shall +
> *                 seek to before retrieving the directory entries + *
> @param maxsize - maximum result message body size (in bytes)
> + * @param dostat - whether a stat() should be performed and returned for
> + *                 each directory entry
> + * @returns resulting response message body size (in bytes) on success,
> + *          negative error code otherwise
> + */
> +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> +                                      struct V9fsDirEnt **entries,
> +                                      off_t offset, int32_t maxsize,
> +                                      bool dostat)
> +{
> +    int err = 0;
> +
> +    if (v9fs_request_cancelled(pdu)) {
> +        return -EINTR;
> +    }
> +    v9fs_co_run_in_worker({
> +        err = do_readdir_many(pdu, fidp, entries, offset, maxsize, dostat);
> +    });
>      return err;
>  }
> 
> diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
> index c2cdc7a9ea..fd4a45bc7c 100644
> --- a/hw/9pfs/coth.h
> +++ b/hw/9pfs/coth.h
> @@ -49,6 +49,9 @@
>  void co_run_in_worker_bh(void *);
>  int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
>  int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent
> **); +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *, V9fsFidState *,
> +                                      struct V9fsDirEnt **, off_t, int32_t,
> +                                      bool);
>  off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
>  void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
>  void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);

So I'll prepare a v8 with this patch here split into two.

But this is it. I don't see another chunk in this patch set that could be 
split further down in an useful way.

Best regards,
Christian Schoenebeck
Greg Kurz July 28, 2020, 8:46 a.m. UTC | #2
On Tue, 28 Jul 2020 10:33:42 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> On Sonntag, 19. Juli 2020 14:29:13 CEST Christian Schoenebeck wrote:
> > The newly added function v9fs_co_readdir_many() retrieves multiple
> > directory entries with a single fs driver request. It is intended to
> > replace uses of v9fs_co_readdir(), the latter only retrives a single
> > directory entry per fs driver request instead.
> > 
> > The reason for this planned replacement is that for every fs driver
> > request the coroutine is dispatched from main I/O thread to a
> > background I/O thread and eventually dispatched back to main I/O
> > thread. Hopping between threads adds latency. So if a 9pfs Treaddir
> > request reads a large amount of directory entries, this currently
> > sums up to huge latencies of several hundred ms or even more. So
> > using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
> > provide significant performance improvements.
> > 
> > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > ---
> >  hw/9pfs/9p.h    |  22 ++++++
> >  hw/9pfs/codir.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---
> >  hw/9pfs/coth.h  |   3 +
> >  3 files changed, 210 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> > index 561774e843..93b7030edf 100644
> > --- a/hw/9pfs/9p.h
> > +++ b/hw/9pfs/9p.h
> > @@ -215,6 +215,28 @@ static inline void v9fs_readdir_init(V9fsDir *dir)
> >      qemu_co_mutex_init(&dir->readdir_mutex);
> >  }
> > 
> > +/**
> > + * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
> > + * which is a chained list of directory entries.
> > + */
> > +typedef struct V9fsDirEnt {
> > +    /* mandatory (must not be NULL) information for all readdir requests */
> > +    struct dirent *dent;
> > +    /*
> > +     * optional (may be NULL): A full stat of each directory entry is just
> > +     * done if explicitly told to fs driver.
> > +     */
> > +    struct stat *st;
> > +    /*
> > +     * instead of an array, directory entries are always returned as
> > +     * chained list, that's because the amount of entries retrieved by fs
> > +     * drivers is dependent on the individual entries' name (since response
> > +     * messages are size limited), so the final amount cannot be estimated
> > +     * before hand
> > +     */
> > +    struct V9fsDirEnt *next;
> > +} V9fsDirEnt;
> > +
> >  /*
> >   * Filled by fs driver on open and other
> >   * calls.
> > diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> > index 73f9a751e1..07da5d8d70 100644
> > --- a/hw/9pfs/codir.c
> > +++ b/hw/9pfs/codir.c
> > @@ -18,28 +18,202 @@
> >  #include "qemu/main-loop.h"
> >  #include "coth.h"
> > 
> > +/*
> > + * This is solely executed on a background IO thread.
> > + */
> > +static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent
> > **dent) +{
> > +    int err = 0;
> > +    V9fsState *s = pdu->s;
> > +    struct dirent *entry;
> > +
> > +    errno = 0;
> > +    entry = s->ops->readdir(&s->ctx, &fidp->fs);
> > +    if (!entry && errno) {
> > +        *dent = NULL;
> > +        err = -errno;
> > +    } else {
> > +        *dent = entry;
> > +    }
> > +    return err;
> > +}
> > +
> > +/*
> > + * TODO: This will be removed for performance reasons.
> > + * Use v9fs_co_readdir_many() instead.
> > + */
> >  int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
> >                                   struct dirent **dent)
> >  {
> >      int err;
> > -    V9fsState *s = pdu->s;
> > 
> >      if (v9fs_request_cancelled(pdu)) {
> >          return -EINTR;
> >      }
> > -    v9fs_co_run_in_worker(
> > -        {
> > -            struct dirent *entry;
> > +    v9fs_co_run_in_worker({
> > +        err = do_readdir(pdu, fidp, dent);
> > +    });
> > +    return err;
> > +}
> 
> Ok, this ^ part (precisely: do_readdir() and v9fs_co_readdir()) can still be 
> sliced out into a separate patch, and apparently makes sense, as it would 
> avoid cluttering this patch like ...
> 
> > +
> > +/*
> > + * This is solely executed on a background IO thread.
> > + *
> > + * See v9fs_co_readdir_many() (as its only user) below for details.
> > + */
> > +static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> > +                           struct V9fsDirEnt **entries, off_t offset,
> > +                           int32_t maxsize, bool dostat)
> > +{
> > +    V9fsState *s = pdu->s;
> > +    V9fsString name;
> > +    int len, err = 0;
> > +    int32_t size = 0;
> > +    off_t saved_dir_pos;
> > +    struct dirent *dent;
> > +    struct V9fsDirEnt *e = NULL;
> > +    V9fsPath path;
> > +    struct stat stbuf;
> > +
> > +    *entries = NULL;
> > +    v9fs_path_init(&path);
> > +
> > +    /*
> > +     * TODO: Here should be a warn_report_once() if lock failed.
> > +     *
> > +     * With a good 9p client we should not get into concurrency here,
> > +     * because a good client would not use the same fid for concurrent
> > +     * requests. We do the lock here for safety reasons though. However
> > +     * the client would then suffer performance issues, so better log that
> > +     * issue here.
> > +     */
> > +    v9fs_readdir_lock(&fidp->fs.dir);
> > +
> > +    /* seek directory to requested initial position */
> > +    if (offset == 0) {
> > +        s->ops->rewinddir(&s->ctx, &fidp->fs);
> > +    } else {
> > +        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
> > +    }
> > +
> > +    /* save the directory position */
> > +    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
> > +    if (saved_dir_pos < 0) {
> > +        err = saved_dir_pos;
> > +        goto out;
> > +    }
> > +
> > +    while (true) {
> > +        /* get directory entry from fs driver */
> > +        err = do_readdir(pdu, fidp, &dent);
> > +        if (err || !dent) {
> > +            break;
> > +        }
> > 
> > -            errno = 0;
> > -            entry = s->ops->readdir(&s->ctx, &fidp->fs);
> > -            if (!entry && errno) {
> 
> ... here ...
> 
> > +        /*
> > +         * stop this loop as soon as it would exceed the allowed maximum
> > +         * response message size for the directory entries collected so
> > far, +         * because anything beyond that size would need to be
> > discarded by +         * 9p controller (main thread / top half) anyway
> > +         */
> > +        v9fs_string_init(&name);
> > +        v9fs_string_sprintf(&name, "%s", dent->d_name);
> > +        len = v9fs_readdir_response_size(&name);
> > +        v9fs_string_free(&name);
> > +        if (size + len > maxsize) {
> > +            /* this is not an error case actually */
> > +            break;
> > +        }
> > +
> > +        /* append next node to result chain */
> > +        if (!e) {
> > +            *entries = e = g_malloc0(sizeof(V9fsDirEnt));
> > +        } else {
> > +            e = e->next = g_malloc0(sizeof(V9fsDirEnt));
> > +        }
> > +        e->dent = g_malloc0(sizeof(struct dirent));
> > +        memcpy(e->dent, dent, sizeof(struct dirent));
> > +
> > +        /* perform a full stat() for directory entry if requested by caller
> > */ +        if (dostat) {
> > +            err = s->ops->name_to_path(
> > +                &s->ctx, &fidp->path, dent->d_name, &path
> > +            );
> > +            if (err < 0) {
> >                  err = -errno;
> > -            } else {
> > -                *dent = entry;
> > -                err = 0;
> 
> ... and here.
> 
> > +                break;
> >              }
> > -        });
> > +
> > +            err = s->ops->lstat(&s->ctx, &path, &stbuf);
> > +            if (err < 0) {
> > +                err = -errno;
> > +                break;
> > +            }
> > +
> > +            e->st = g_malloc0(sizeof(struct stat));
> > +            memcpy(e->st, &stbuf, sizeof(struct stat));
> > +        }
> > +
> > +        size += len;
> > +        saved_dir_pos = dent->d_off;
> > +    }
> > +
> > +    /* restore (last) saved position */
> > +    s->ops->seekdir(&s->ctx, &fidp->fs, saved_dir_pos);
> > +
> > +out:
> > +    v9fs_readdir_unlock(&fidp->fs.dir);
> > +    v9fs_path_free(&path);
> > +    if (err < 0) {
> > +        return err;
> > +    }
> > +    return size;
> > +}
> > +
> > +/**
> > + * @brief Reads multiple directory entries in one rush.
> > + *
> > + * Retrieves the requested (max. amount of) directory entries from the fs
> > + * driver. This function must only be called by the main IO thread (top
> > half). + * Internally this function call will be dispatched to a background
> > IO thread + * (bottom half) where it is eventually executed by the fs
> > driver. + *
> > + * @discussion Acquiring multiple directory entries in one rush from the fs
> > + * driver, instead of retrieving each directory entry individually, is
> > very + * beneficial from performance point of view. Because for every fs
> > driver + * request latency is added, which in practice could lead to
> > overall + * latencies of several hundred ms for reading all entries (of
> > just a single + * directory) if every directory entry was individually
> > requested from fs + * driver.
> > + *
> > + * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after
> > calling + * v9fs_co_readdir_many(), both on success and on error cases of
> > this + * function, to avoid memory leaks once @p entries are no longer
> > needed. + *
> > + * @param pdu - the causing 9p (T_readdir) client request
> > + * @param fidp - already opened directory where readdir shall be performed
> > on + * @param entries - output for directory entries (must not be NULL) + *
> > @param offset - initial position inside the directory the function shall +
> > *                 seek to before retrieving the directory entries + *
> > @param maxsize - maximum result message body size (in bytes)
> > + * @param dostat - whether a stat() should be performed and returned for
> > + *                 each directory entry
> > + * @returns resulting response message body size (in bytes) on success,
> > + *          negative error code otherwise
> > + */
> > +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> > +                                      struct V9fsDirEnt **entries,
> > +                                      off_t offset, int32_t maxsize,
> > +                                      bool dostat)
> > +{
> > +    int err = 0;
> > +
> > +    if (v9fs_request_cancelled(pdu)) {
> > +        return -EINTR;
> > +    }
> > +    v9fs_co_run_in_worker({
> > +        err = do_readdir_many(pdu, fidp, entries, offset, maxsize, dostat);
> > +    });
> >      return err;
> >  }
> > 
> > diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
> > index c2cdc7a9ea..fd4a45bc7c 100644
> > --- a/hw/9pfs/coth.h
> > +++ b/hw/9pfs/coth.h
> > @@ -49,6 +49,9 @@
> >  void co_run_in_worker_bh(void *);
> >  int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
> >  int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent
> > **); +int coroutine_fn v9fs_co_readdir_many(V9fsPDU *, V9fsFidState *,
> > +                                      struct V9fsDirEnt **, off_t, int32_t,
> > +                                      bool);
> >  off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
> >  void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
> >  void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);
> 
> So I'll prepare a v8 with this patch here split into two.
> 
> But this is it. I don't see another chunk in this patch set that could be 
> split further down in an useful way.
> 
> Best regards,
> Christian Schoenebeck
> 
> 

You're in charge now so feel free to do that if the development+testing
cost is acceptable to you. You already know my take on having smaller
patches :)

Cheers,

--
Greg
Christian Schoenebeck July 28, 2020, 9:34 a.m. UTC | #3
On Dienstag, 28. Juli 2020 10:46:00 CEST Greg Kurz wrote:
> > So I'll prepare a v8 with this patch here split into two.
> > 
> > But this is it. I don't see another chunk in this patch set that could be
> > split further down in an useful way.
> > 
> > Best regards,
> > Christian Schoenebeck
> 
> You're in charge now so feel free to do that if the development+testing
> cost is acceptable to you. 

Yep, that's the plan. This patch set is already thoroughly tested by me, so I 
would like to avoid major changes for this series now that would require me to 
restart major testing cycles.

In this particular case, this patch-split ends up in 100% identical code. So 
it is really just git history tweaking after all.

> You already know my take on having smaller
> patches :)

A pure desire for something is one thing, the actually available real-life 
options are another thing. If you see more options than I can identify, you 
are always invited to make your call.

Plus this patch series already contain trivial patches for a long time (e.g. 
patch 1, 2, 6), and yet I haven't seen any ack from your side for any of them.
Be invited for that as well.

Best regards,
Christian Schoenebeck
Greg Kurz July 28, 2020, 9:46 a.m. UTC | #4
On Tue, 28 Jul 2020 11:34:12 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> On Dienstag, 28. Juli 2020 10:46:00 CEST Greg Kurz wrote:
> > > So I'll prepare a v8 with this patch here split into two.
> > > 
> > > But this is it. I don't see another chunk in this patch set that could be
> > > split further down in an useful way.
> > > 
> > > Best regards,
> > > Christian Schoenebeck
> > 
> > You're in charge now so feel free to do that if the development+testing
> > cost is acceptable to you. 
> 
> Yep, that's the plan. This patch set is already thoroughly tested by me, so I 
> would like to avoid major changes for this series now that would require me to 
> restart major testing cycles.
> 
> In this particular case, this patch-split ends up in 100% identical code. So 
> it is really just git history tweaking after all.
> 
> > You already know my take on having smaller
> > patches :)
> 
> A pure desire for something is one thing, the actually available real-life 
> options are another thing. If you see more options than I can identify, you 
> are always invited to make your call.
> 
> Plus this patch series already contain trivial patches for a long time (e.g. 
> patch 1, 2, 6), and yet I haven't seen any ack from your side for any of them.
> Be invited for that as well.
> 

I'm now starting to reach a steady-state in my new job. I should hopefully
be able to find some cycles for the trivial patches at least.

Thanks for your patience.

> Best regards,
> Christian Schoenebeck
> 
>
diff mbox series

Patch

diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 561774e843..93b7030edf 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -215,6 +215,28 @@  static inline void v9fs_readdir_init(V9fsDir *dir)
     qemu_co_mutex_init(&dir->readdir_mutex);
 }
 
+/**
+ * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
+ * which is a chained list of directory entries.
+ */
+typedef struct V9fsDirEnt {
+    /* mandatory (must not be NULL) information for all readdir requests */
+    struct dirent *dent;
+    /*
+     * optional (may be NULL): A full stat of each directory entry is just
+     * done if explicitly told to fs driver.
+     */
+    struct stat *st;
+    /*
+     * instead of an array, directory entries are always returned as
+     * chained list, that's because the amount of entries retrieved by fs
+     * drivers is dependent on the individual entries' name (since response
+     * messages are size limited), so the final amount cannot be estimated
+     * before hand
+     */
+    struct V9fsDirEnt *next;
+} V9fsDirEnt;
+
 /*
  * Filled by fs driver on open and other
  * calls.
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 73f9a751e1..07da5d8d70 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -18,28 +18,202 @@ 
 #include "qemu/main-loop.h"
 #include "coth.h"
 
+/*
+ * This is solely executed on a background IO thread.
+ */
+static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
+{
+    int err = 0;
+    V9fsState *s = pdu->s;
+    struct dirent *entry;
+
+    errno = 0;
+    entry = s->ops->readdir(&s->ctx, &fidp->fs);
+    if (!entry && errno) {
+        *dent = NULL;
+        err = -errno;
+    } else {
+        *dent = entry;
+    }
+    return err;
+}
+
+/*
+ * TODO: This will be removed for performance reasons.
+ * Use v9fs_co_readdir_many() instead.
+ */
 int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
                                  struct dirent **dent)
 {
     int err;
-    V9fsState *s = pdu->s;
 
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
-    v9fs_co_run_in_worker(
-        {
-            struct dirent *entry;
+    v9fs_co_run_in_worker({
+        err = do_readdir(pdu, fidp, dent);
+    });
+    return err;
+}
+
+/*
+ * This is solely executed on a background IO thread.
+ *
+ * See v9fs_co_readdir_many() (as its only user) below for details.
+ */
+static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+                           struct V9fsDirEnt **entries, off_t offset,
+                           int32_t maxsize, bool dostat)
+{
+    V9fsState *s = pdu->s;
+    V9fsString name;
+    int len, err = 0;
+    int32_t size = 0;
+    off_t saved_dir_pos;
+    struct dirent *dent;
+    struct V9fsDirEnt *e = NULL;
+    V9fsPath path;
+    struct stat stbuf;
+
+    *entries = NULL;
+    v9fs_path_init(&path);
+
+    /*
+     * TODO: Here should be a warn_report_once() if lock failed.
+     *
+     * With a good 9p client we should not get into concurrency here,
+     * because a good client would not use the same fid for concurrent
+     * requests. We do the lock here for safety reasons though. However
+     * the client would then suffer performance issues, so better log that
+     * issue here.
+     */
+    v9fs_readdir_lock(&fidp->fs.dir);
+
+    /* seek directory to requested initial position */
+    if (offset == 0) {
+        s->ops->rewinddir(&s->ctx, &fidp->fs);
+    } else {
+        s->ops->seekdir(&s->ctx, &fidp->fs, offset);
+    }
+
+    /* save the directory position */
+    saved_dir_pos = s->ops->telldir(&s->ctx, &fidp->fs);
+    if (saved_dir_pos < 0) {
+        err = saved_dir_pos;
+        goto out;
+    }
+
+    while (true) {
+        /* get directory entry from fs driver */
+        err = do_readdir(pdu, fidp, &dent);
+        if (err || !dent) {
+            break;
+        }
 
-            errno = 0;
-            entry = s->ops->readdir(&s->ctx, &fidp->fs);
-            if (!entry && errno) {
+        /*
+         * stop this loop as soon as it would exceed the allowed maximum
+         * response message size for the directory entries collected so far,
+         * because anything beyond that size would need to be discarded by
+         * 9p controller (main thread / top half) anyway
+         */
+        v9fs_string_init(&name);
+        v9fs_string_sprintf(&name, "%s", dent->d_name);
+        len = v9fs_readdir_response_size(&name);
+        v9fs_string_free(&name);
+        if (size + len > maxsize) {
+            /* this is not an error case actually */
+            break;
+        }
+
+        /* append next node to result chain */
+        if (!e) {
+            *entries = e = g_malloc0(sizeof(V9fsDirEnt));
+        } else {
+            e = e->next = g_malloc0(sizeof(V9fsDirEnt));
+        }
+        e->dent = g_malloc0(sizeof(struct dirent));
+        memcpy(e->dent, dent, sizeof(struct dirent));
+
+        /* perform a full stat() for directory entry if requested by caller */
+        if (dostat) {
+            err = s->ops->name_to_path(
+                &s->ctx, &fidp->path, dent->d_name, &path
+            );
+            if (err < 0) {
                 err = -errno;
-            } else {
-                *dent = entry;
-                err = 0;
+                break;
             }
-        });
+
+            err = s->ops->lstat(&s->ctx, &path, &stbuf);
+            if (err < 0) {
+                err = -errno;
+                break;
+            }
+
+            e->st = g_malloc0(sizeof(struct stat));
+            memcpy(e->st, &stbuf, sizeof(struct stat));
+        }
+
+        size += len;
+        saved_dir_pos = dent->d_off;
+    }
+
+    /* restore (last) saved position */
+    s->ops->seekdir(&s->ctx, &fidp->fs, saved_dir_pos);
+
+out:
+    v9fs_readdir_unlock(&fidp->fs.dir);
+    v9fs_path_free(&path);
+    if (err < 0) {
+        return err;
+    }
+    return size;
+}
+
+/**
+ * @brief Reads multiple directory entries in one rush.
+ *
+ * Retrieves the requested (max. amount of) directory entries from the fs
+ * driver. This function must only be called by the main IO thread (top half).
+ * Internally this function call will be dispatched to a background IO thread
+ * (bottom half) where it is eventually executed by the fs driver.
+ *
+ * @discussion Acquiring multiple directory entries in one rush from the fs
+ * driver, instead of retrieving each directory entry individually, is very
+ * beneficial from performance point of view. Because for every fs driver
+ * request latency is added, which in practice could lead to overall
+ * latencies of several hundred ms for reading all entries (of just a single
+ * directory) if every directory entry was individually requested from fs
+ * driver.
+ *
+ * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling
+ * v9fs_co_readdir_many(), both on success and on error cases of this
+ * function, to avoid memory leaks once @p entries are no longer needed.
+ *
+ * @param pdu - the causing 9p (T_readdir) client request
+ * @param fidp - already opened directory where readdir shall be performed on
+ * @param entries - output for directory entries (must not be NULL)
+ * @param offset - initial position inside the directory the function shall
+ *                 seek to before retrieving the directory entries
+ * @param maxsize - maximum result message body size (in bytes)
+ * @param dostat - whether a stat() should be performed and returned for
+ *                 each directory entry
+ * @returns resulting response message body size (in bytes) on success,
+ *          negative error code otherwise
+ */
+int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+                                      struct V9fsDirEnt **entries,
+                                      off_t offset, int32_t maxsize,
+                                      bool dostat)
+{
+    int err = 0;
+
+    if (v9fs_request_cancelled(pdu)) {
+        return -EINTR;
+    }
+    v9fs_co_run_in_worker({
+        err = do_readdir_many(pdu, fidp, entries, offset, maxsize, dostat);
+    });
     return err;
 }
 
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index c2cdc7a9ea..fd4a45bc7c 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -49,6 +49,9 @@ 
 void co_run_in_worker_bh(void *);
 int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
 int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
+int coroutine_fn v9fs_co_readdir_many(V9fsPDU *, V9fsFidState *,
+                                      struct V9fsDirEnt **, off_t, int32_t,
+                                      bool);
 off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
 void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
 void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);