diff mbox series

[v3,07/10] virtiofsd: Add lo_inode.fhandle

Message ID 20210730150134.216126-8-mreitz@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofsd: Allow using file handles instead of O_PATH FDs | expand

Commit Message

Max Reitz July 30, 2021, 3:01 p.m. UTC
This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c      | 116 ++++++++++++++++++++++----
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 2 files changed, 102 insertions(+), 15 deletions(-)

Comments

Vivek Goyal Aug. 9, 2021, 3:21 p.m. UTC | #1
On Fri, Jul 30, 2021 at 05:01:31PM +0200, Max Reitz wrote:
> This new field is an alternative to lo_inode.fd: Either of the two must
> be set.  In case an O_PATH FD is needed for some lo_inode, it is either
> taken from lo_inode.fd, if valid, or a temporary FD is opened with
> open_by_handle_at().
> 
> Using a file handle instead of an FD has the advantage of keeping the
> number of open file descriptors low.
> 
> Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
> opened on the filesystem to which the file handle refers), but every
> lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
> keep a hash map of such FDs in mount_fds (mapping ID to FD).
> get_file_handle(), which is added by a later patch, will ensure that
> every mount ID for which we have generated a handle has a corresponding
> entry in mount_fds.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c      | 116 ++++++++++++++++++++++----
>  tools/virtiofsd/passthrough_seccomp.c |   1 +
>  2 files changed, 102 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 292b7f7e27..487448d666 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -88,8 +88,25 @@ struct lo_key {
>      uint64_t mnt_id;
>  };
>  
> +struct lo_fhandle {
> +    union {
> +        struct file_handle handle;
> +        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
> +    };
> +    int mount_id;
> +};
> +
> +/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
> +static GHashTable *mount_fds;
> +pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
> +

How about if we move this hash table inside "struct lo_data". That seems to
be one global data structure keeping all the info. Also it can be
cleaned up during lo_destroy().

Thanks
Vivek
Hanna Reitz Aug. 9, 2021, 4:41 p.m. UTC | #2
On 09.08.21 17:21, Vivek Goyal wrote:
> On Fri, Jul 30, 2021 at 05:01:31PM +0200, Max Reitz wrote:
>> This new field is an alternative to lo_inode.fd: Either of the two must
>> be set.  In case an O_PATH FD is needed for some lo_inode, it is either
>> taken from lo_inode.fd, if valid, or a temporary FD is opened with
>> open_by_handle_at().
>>
>> Using a file handle instead of an FD has the advantage of keeping the
>> number of open file descriptors low.
>>
>> Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
>> opened on the filesystem to which the file handle refers), but every
>> lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
>> keep a hash map of such FDs in mount_fds (mapping ID to FD).
>> get_file_handle(), which is added by a later patch, will ensure that
>> every mount ID for which we have generated a handle has a corresponding
>> entry in mount_fds.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
>> ---
>>   tools/virtiofsd/passthrough_ll.c      | 116 ++++++++++++++++++++++----
>>   tools/virtiofsd/passthrough_seccomp.c |   1 +
>>   2 files changed, 102 insertions(+), 15 deletions(-)
>>
>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>> index 292b7f7e27..487448d666 100644
>> --- a/tools/virtiofsd/passthrough_ll.c
>> +++ b/tools/virtiofsd/passthrough_ll.c
>> @@ -88,8 +88,25 @@ struct lo_key {
>>       uint64_t mnt_id;
>>   };
>>   
>> +struct lo_fhandle {
>> +    union {
>> +        struct file_handle handle;
>> +        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
>> +    };
>> +    int mount_id;
>> +};
>> +
>> +/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
>> +static GHashTable *mount_fds;
>> +pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
>> +
> How about if we move this hash table inside "struct lo_data". That seems to
> be one global data structure keeping all the info. Also it can be
> cleaned up during lo_destroy().

Yes, sounds good and right, will do.

Hanna
diff mbox series

Patch

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 292b7f7e27..487448d666 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@  struct lo_key {
     uint64_t mnt_id;
 };
 
+struct lo_fhandle {
+    union {
+        struct file_handle handle;
+        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+    };
+    int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 struct lo_inode {
+    /*
+     * Either of fd or fhandle must be set (i.e. >= 0 or non-NULL,
+     * respectively).
+     */
     int fd;
+    struct lo_fhandle *fhandle;
 
     /*
      * Atomic reference count for this object.  The nlookup field holds a
@@ -302,6 +319,44 @@  static int temp_fd_steal(TempFd *temp_fd)
     }
 }
 
+/**
+ * Open the given file handle with the given flags.
+ *
+ * The mount FD to pass to open_by_handle_at() is taken from the
+ * mount_fds hash map.
+ *
+ * On error, return -errno.
+ */
+static int open_file_handle(const struct lo_fhandle *fh, int flags)
+{
+    gpointer mount_fd_ptr;
+    int mount_fd;
+    bool found;
+    int ret;
+
+    ret = pthread_rwlock_rdlock(&mount_fds_lock);
+    if (ret) {
+        return -ret;
+    }
+
+    /* mount_fd == 0 is valid, so we need lookup_extended */
+    found = g_hash_table_lookup_extended(mount_fds,
+                                         GINT_TO_POINTER(fh->mount_id),
+                                         NULL, &mount_fd_ptr);
+    pthread_rwlock_unlock(&mount_fds_lock);
+    if (!found) {
+        return -EINVAL;
+    }
+    mount_fd = GPOINTER_TO_INT(mount_fd_ptr);
+
+    ret = open_by_handle_at(mount_fd, (struct file_handle *)&fh->handle, flags);
+    if (ret < 0) {
+        return -errno;
+    }
+
+    return ret;
+}
+
 /*
  * Load capng's state from our saved state if the current thread
  * hadn't previously been loaded.
@@ -608,7 +663,11 @@  static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
     *inodep = NULL;
 
     if (g_atomic_int_dec_and_test(&inode->refcount)) {
-        close(inode->fd);
+        if (inode->fd >= 0) {
+            close(inode->fd);
+        } else {
+            g_free(inode->fhandle);
+        }
         free(inode);
     }
 }
@@ -635,10 +694,25 @@  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 
 static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
 {
-    *tfd = (TempFd) {
-        .fd = inode->fd,
-        .owned = false,
-    };
+    if (inode->fd >= 0) {
+        *tfd = (TempFd) {
+            .fd = inode->fd,
+            .owned = false,
+        };
+    } else {
+        int fd;
+
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, O_PATH);
+        if (fd < 0) {
+            return -errno;
+        }
+
+        *tfd = (TempFd) {
+            .fd = fd,
+            .owned = true,
+        };
+    }
 
     return 0;
 }
@@ -678,22 +752,32 @@  static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd *tfd)
 static int lo_inode_open(const struct lo_data *lo, const struct lo_inode *inode,
                          int open_flags, TempFd *tfd)
 {
-    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+    g_autofree char *fd_str = NULL;
     int fd;
 
     if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
         return -EBADF;
     }
 
-    /*
-     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
-     * that the inode is not a special file but if an external process races
-     * with us then symlinks are traversed here. It is not possible to escape
-     * the shared directory since it is mounted as "/" though.
-     */
-    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-    if (fd < 0) {
-        return -errno;
+    if (inode->fd >= 0) {
+        /*
+         * The file is a symlink so O_NOFOLLOW must be ignored. We checked
+         * earlier that the inode is not a special file but if an external
+         * process races with us then symlinks are traversed here. It is not
+         * possible to escape the shared directory since it is mounted as "/"
+         * though.
+         */
+        fd_str = g_strdup_printf("%d", inode->fd);
+        fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
+        if (fd < 0) {
+            return -errno;
+        }
+    } else {
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, open_flags);
+        if (fd < 0) {
+            return fd;
+        }
     }
 
     *tfd = (TempFd) {
@@ -4110,6 +4194,8 @@  int main(int argc, char *argv[])
     lo.root.fuse_ino = FUSE_ROOT_ID;
     lo.cache = CACHE_AUTO;
 
+    mount_fds = g_hash_table_new(NULL, NULL);
+
     /*
      * Set up the ino map like this:
      * [0] Reserved (will not be used)
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index f49ed94b5e..af04c638cb 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -77,6 +77,7 @@  static const int syscall_allowlist[] = {
     SCMP_SYS(statx),
     SCMP_SYS(open),
     SCMP_SYS(openat),
+    SCMP_SYS(open_by_handle_at),
     SCMP_SYS(ppoll),
     SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
     SCMP_SYS(preadv),