diff mbox series

[v2,04/11] 9p: darwin: Handle struct dirent differences

Message ID 20211122004913.20052-5-wwcohen@gmail.com (mailing list archive)
State New, archived
Headers show
Series 9p: Add support for darwin | expand

Commit Message

Will Cohen Nov. 22, 2021, 12:49 a.m. UTC
From: Keno Fischer <keno@juliacomputing.com>

On darwin d_seekoff exists, but is optional and does not seem to
be commonly used by file systems. Use `telldir` instead to obtain
the seek offset.

Signed-off-by: Keno Fischer <keno@juliacomputing.com>
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch <reactorcontrol@icloud.com>
Signed-off-by: Will Cohen <wwcohen@gmail.com>
---
 hw/9pfs/9p-synth.c |  2 ++
 hw/9pfs/9p.c       | 33 +++++++++++++++++++++++++++++++--
 hw/9pfs/codir.c    |  4 ++++
 3 files changed, 37 insertions(+), 2 deletions(-)

Comments

Christian Schoenebeck Nov. 24, 2021, 2:58 p.m. UTC | #1
On Montag, 22. November 2021 01:49:06 CET Will Cohen wrote:
> From: Keno Fischer <keno@juliacomputing.com>
> 
> On darwin d_seekoff exists, but is optional and does not seem to
> be commonly used by file systems. Use `telldir` instead to obtain
> the seek offset.

Are you sure d_seekoff doesn't work on macOS? Because using telldir() instead
is not the same thing. Accessing d_*off is just POD access, whereas telldir()
is a syscall. What you are trying in this patch with telldir() easily gets
hairy.

AFAIK there was d_off in previous versions of macOS, which was then replaced
by d_seekof in macOS 11.1, no?

> Signed-off-by: Keno Fischer <keno@juliacomputing.com>
> [Michael Roitzsch: - Rebase for NixOS]
> Signed-off-by: Michael Roitzsch <reactorcontrol@icloud.com>
> Signed-off-by: Will Cohen <wwcohen@gmail.com>
> ---
>  hw/9pfs/9p-synth.c |  2 ++
>  hw/9pfs/9p.c       | 33 +++++++++++++++++++++++++++++++--
>  hw/9pfs/codir.c    |  4 ++++
>  3 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
> index 4a4a776d06..09b9c25288 100644
> --- a/hw/9pfs/9p-synth.c
> +++ b/hw/9pfs/9p-synth.c
> @@ -222,7 +222,9 @@ static void synth_direntry(V9fsSynthNode *node,
>  {
>      strcpy(entry->d_name, node->name);
>      entry->d_ino = node->attr->inode;
> +#ifndef CONFIG_DARWIN
>      entry->d_off = off + 1;
> +#endif
>  }

^ That doesn't look like it would work. Compiling sure.

Have you tried running the test cases?
https://wiki.qemu.org/Documentation/9p#Test_Cases

>  static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index f4f0c200c7..c06e8a85a0 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -2218,6 +2218,25 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU
> *pdu, V9fsFidState *fidp, return offset;
>  }
> 
> +/**
> + * Get the seek offset of a dirent. If not available from the structure
> itself, + * obtain it by calling telldir.
> + */
> +static int v9fs_dent_telldir(V9fsPDU *pdu, V9fsFidState *fidp,
> +                             struct dirent *dent)
> +{
> +#ifdef CONFIG_DARWIN
> +    /*
> +     * Darwin has d_seekoff, which appears to function similarly to d_off.
> +     * However, it does not appear to be supported on all file systems,
> +     * so use telldir for correctness.
> +     */
> +    return telldir(fidp->fs.dir.stream);
> +#else
> +    return dent->d_off;
> +#endif

The thing here is, we usually run fs syscalls as coroutines on a worker thread
as they might block for a long time, and in the meantime 9p server's main
thread could handle other tasks. Plus if a fs syscall gets stuck, we can abort
the request, which is not possible if its called directly from main thread.

https://wiki.qemu.org/Documentation/9p#Threads_and_Coroutines

dent->d_off is just POD access, so it is instantanious. But that does not mean
you should wrap that telldir() call now to be a background task, because that
will add other implications. I would rather prefer to clarify first whether
d_*off is really not working on macOS to avoid all the foreseeable trouble.

> +}
> +
>  static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
>                                                    V9fsFidState *fidp,
>                                                    uint32_t max_count)
> @@ -2281,7 +2300,11 @@ static int coroutine_fn
> v9fs_do_readdir_with_stat(V9fsPDU *pdu, count += len;
>          v9fs_stat_free(&v9stat);
>          v9fs_path_free(&path);
> -        saved_dir_pos = dent->d_off;
> +        saved_dir_pos = v9fs_dent_telldir(pdu, fidp, dent);
> +        if (saved_dir_pos < 0) {
> +            err = saved_dir_pos;
> +            break;
> +        }
>      }
> 
>      v9fs_readdir_unlock(&fidp->fs.dir);
> @@ -2420,6 +2443,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu,
> V9fsFidState *fidp, V9fsString name;
>      int len, err = 0;
>      int32_t count = 0;
> +    off_t off;
>      struct dirent *dent;
>      struct stat *st;
>      struct V9fsDirEnt *entries = NULL;
> @@ -2480,12 +2504,17 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU
> *pdu, V9fsFidState *fidp, qid.version = 0;
>          }
> 
> +        off = v9fs_dent_telldir(pdu, fidp, dent);
> +        if (off < 0) {
> +            err = off;
> +            break;
> +        }
>          v9fs_string_init(&name);
>          v9fs_string_sprintf(&name, "%s", dent->d_name);
> 
>          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
>          len = pdu_marshal(pdu, 11 + count, "Qqbs",
> -                          &qid, dent->d_off,
> +                          &qid, off,
>                            dent->d_type, &name);
> 
>          v9fs_string_free(&name);
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 032cce04c4..78aca1d98b 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -167,7 +167,11 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState
> *fidp, }
> 
>          size += len;
> +#ifdef CONFIG_DARWIN
> +        saved_dir_pos = telldir(fidp->fs.dir.stream);
> +#else
>          saved_dir_pos = dent->d_off;
> +#endif
>      }
> 
>      /* restore (last) saved position */
Michael Roitzsch Nov. 24, 2021, 3:45 p.m. UTC | #2
Hi,

> Are you sure d_seekoff doesn't work on macOS?

I just tried on an APFS volume on macOS Monterey and d_seekoff is always 0, while telldir() outputs useful values.

> Because using telldir() instead
> is not the same thing. Accessing d_*off is just POD access, whereas telldir()
> is a syscall.

I am not sure this is the case. I have tried a quick test program:

#include <dirent.h>

int main(void)
{
	int result = 0;
	DIR *dir = opendir(".");
	while (readdir(dir)) {
		result += telldir(dir);
	}
	closedir(dir);
	return result;
}

I ran it with 'sudo dtruss ./test', which should give me a trace of the system calls. The relevant portion is:

open_nocancel(".\0", 0x1100004, 0x0)		 = 3 0
sysctlbyname(kern.secure_kernel, 0x12, 0x7FF7BE49810C, 0x7FF7BE498110, 0x0)		 = 0 0
fstatfs64(0x3, 0x7FF7BE498110, 0x0)		 = 0 0
getdirentries64(0x3, 0x7FF4A5808A00, 0x2000)		 = 1472 0
close_nocancel(0x3)		 = 0 0

The directory has more than 30 entries, but the loop does not appear to cause individual system calls. Instead, readdir() and telldir() appear to be library functions powered by this getdirentries64 syscall.

This is on Monterey. Unfortunately I cannot test if older versions of macOS are different.

Michael
Christian Schoenebeck Nov. 24, 2021, 7:09 p.m. UTC | #3
On Mittwoch, 24. November 2021 16:45:30 CET Michael Roitzsch wrote:
> Hi,
> 
> > Are you sure d_seekoff doesn't work on macOS?
> 
> I just tried on an APFS volume on macOS Monterey and d_seekoff is always 0,
> while telldir() outputs useful values.
> > Because using telldir() instead
> > is not the same thing. Accessing d_*off is just POD access, whereas
> > telldir() is a syscall.
> 
> I am not sure this is the case. I have tried a quick test program:
> 
> #include <dirent.h>
> 
> int main(void)
> {
> 	int result = 0;
> 	DIR *dir = opendir(".");
> 	while (readdir(dir)) {
> 		result += telldir(dir);
> 	}
> 	closedir(dir);
> 	return result;
> }
> 
> I ran it with 'sudo dtruss ./test', which should give me a trace of the
> system calls. The relevant portion is:
> 
> open_nocancel(".\0", 0x1100004, 0x0)		 = 3 0
> sysctlbyname(kern.secure_kernel, 0x12, 0x7FF7BE49810C, 0x7FF7BE498110,
> 0x0)		 = 0 0 fstatfs64(0x3, 0x7FF7BE498110, 0x0)		 = 0 0
> getdirentries64(0x3, 0x7FF4A5808A00, 0x2000)		 = 1472 0
> close_nocancel(0x3)		 = 0 0
> 
> The directory has more than 30 entries, but the loop does not appear to
> cause individual system calls. Instead, readdir() and telldir() appear to
> be library functions powered by this getdirentries64 syscall.

Right, telldir() is part of Apple's libc, no (fs) syscall involved:
https://opensource.apple.com/source/Libc/Libc-167/gen.subproj/telldir.c.auto.html

However instead of changing the (fs driver independent) 9p server [9p.c] code
and using fidp there, I would probably rather address this issue for macOS in
the individual fs drivers [9p-local.c, 9p-synth.c, 9p-proxy.c] directly on
dirent instead, and not rely on fidp not having mutated on server.

And please make sure the 9p test cases pass.

Best regards,
Christian Schoenebeck
Will Cohen Jan. 27, 2022, 9:48 p.m. UTC | #4
If acceptable, we'd still propose leaving these changes as is for
expediency's sake, rather than using our dirent and then translating it all
to save one read from the FS layer.

For v3, the synth tests will pass. We did have to modify the local
fs_unlinkat_dir test to correct AT_REMOVEDIR to P9_DOTL_AT_REMOVEDIR to get
that test to pass, but those now pass as well.


On Wed, Nov 24, 2021 at 2:09 PM Christian Schoenebeck <
qemu_oss@crudebyte.com> wrote:

> On Mittwoch, 24. November 2021 16:45:30 CET Michael Roitzsch wrote:
> > Hi,
> >
> > > Are you sure d_seekoff doesn't work on macOS?
> >
> > I just tried on an APFS volume on macOS Monterey and d_seekoff is always
> 0,
> > while telldir() outputs useful values.
> > > Because using telldir() instead
> > > is not the same thing. Accessing d_*off is just POD access, whereas
> > > telldir() is a syscall.
> >
> > I am not sure this is the case. I have tried a quick test program:
> >
> > #include <dirent.h>
> >
> > int main(void)
> > {
> >       int result = 0;
> >       DIR *dir = opendir(".");
> >       while (readdir(dir)) {
> >               result += telldir(dir);
> >       }
> >       closedir(dir);
> >       return result;
> > }
> >
> > I ran it with 'sudo dtruss ./test', which should give me a trace of the
> > system calls. The relevant portion is:
> >
> > open_nocancel(".\0", 0x1100004, 0x0)           = 3 0
> > sysctlbyname(kern.secure_kernel, 0x12, 0x7FF7BE49810C, 0x7FF7BE498110,
> > 0x0)           = 0 0 fstatfs64(0x3, 0x7FF7BE498110, 0x0)               =
> 0 0
> > getdirentries64(0x3, 0x7FF4A5808A00, 0x2000)           = 1472 0
> > close_nocancel(0x3)            = 0 0
> >
> > The directory has more than 30 entries, but the loop does not appear to
> > cause individual system calls. Instead, readdir() and telldir() appear to
> > be library functions powered by this getdirentries64 syscall.
>
> Right, telldir() is part of Apple's libc, no (fs) syscall involved:
>
> https://opensource.apple.com/source/Libc/Libc-167/gen.subproj/telldir.c.auto.html
>
> However instead of changing the (fs driver independent) 9p server [9p.c]
> code
> and using fidp there, I would probably rather address this issue for macOS
> in
> the individual fs drivers [9p-local.c, 9p-synth.c, 9p-proxy.c] directly on
> dirent instead, and not rely on fidp not having mutated on server.
>
> And please make sure the 9p test cases pass.
>
> Best regards,
> Christian Schoenebeck
>
>
>
Christian Schoenebeck Jan. 28, 2022, 3:48 p.m. UTC | #5
On Donnerstag, 27. Januar 2022 22:48:02 CET Will Cohen wrote:
> If acceptable, we'd still propose leaving these changes as is for
> expediency's sake, rather than using our dirent and then translating it all
> to save one read from the FS layer.

The problem is V9fsFidState *fidp is a shared resource that might become 
mutated by other threads in between and could therefore lead to concurrency 
issues and undefined behaviour, whereas struct dirent is a local resource not 
being shared.

See also BTW:
https://lore.kernel.org/qemu-devel/20220127212734.218900-1-vt@altlinux.org/

Best regards,
Christian Schoenebeck
diff mbox series

Patch

diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 4a4a776d06..09b9c25288 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -222,7 +222,9 @@  static void synth_direntry(V9fsSynthNode *node,
 {
     strcpy(entry->d_name, node->name);
     entry->d_ino = node->attr->inode;
+#ifndef CONFIG_DARWIN
     entry->d_off = off + 1;
+#endif
 }
 
 static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index f4f0c200c7..c06e8a85a0 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -2218,6 +2218,25 @@  static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
     return offset;
 }
 
+/**
+ * Get the seek offset of a dirent. If not available from the structure itself,
+ * obtain it by calling telldir.
+ */
+static int v9fs_dent_telldir(V9fsPDU *pdu, V9fsFidState *fidp,
+                             struct dirent *dent)
+{
+#ifdef CONFIG_DARWIN
+    /*
+     * Darwin has d_seekoff, which appears to function similarly to d_off.
+     * However, it does not appear to be supported on all file systems,
+     * so use telldir for correctness.
+     */
+    return telldir(fidp->fs.dir.stream);
+#else
+    return dent->d_off;
+#endif
+}
+
 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
                                                   V9fsFidState *fidp,
                                                   uint32_t max_count)
@@ -2281,7 +2300,11 @@  static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
         count += len;
         v9fs_stat_free(&v9stat);
         v9fs_path_free(&path);
-        saved_dir_pos = dent->d_off;
+        saved_dir_pos = v9fs_dent_telldir(pdu, fidp, dent);
+        if (saved_dir_pos < 0) {
+            err = saved_dir_pos;
+            break;
+        }
     }
 
     v9fs_readdir_unlock(&fidp->fs.dir);
@@ -2420,6 +2443,7 @@  static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
     V9fsString name;
     int len, err = 0;
     int32_t count = 0;
+    off_t off;
     struct dirent *dent;
     struct stat *st;
     struct V9fsDirEnt *entries = NULL;
@@ -2480,12 +2504,17 @@  static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
             qid.version = 0;
         }
 
+        off = v9fs_dent_telldir(pdu, fidp, dent);
+        if (off < 0) {
+            err = off;
+            break;
+        }
         v9fs_string_init(&name);
         v9fs_string_sprintf(&name, "%s", dent->d_name);
 
         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
         len = pdu_marshal(pdu, 11 + count, "Qqbs",
-                          &qid, dent->d_off,
+                          &qid, off,
                           dent->d_type, &name);
 
         v9fs_string_free(&name);
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 032cce04c4..78aca1d98b 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -167,7 +167,11 @@  static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
         }
 
         size += len;
+#ifdef CONFIG_DARWIN
+        saved_dir_pos = telldir(fidp->fs.dir.stream);
+#else
         saved_dir_pos = dent->d_off;
+#endif
     }
 
     /* restore (last) saved position */