diff mbox

[v3,08/27] osdep: Add qemu_lock_fd and qemu_unlock_fd

Message ID 1461848266-32119-9-git-send-email-famz@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Fam Zheng April 28, 2016, 12:57 p.m. UTC
They are wrappers of POSIX fcntl file locking, with the additional
interception of open/close (through qemu_open and qemu_close) to offer a
better semantics that preserves the locks across multiple life cycles of
different fds on the same file.  The reason to make this semantics
change over the fcntl locks is to make the API cleaner for QEMU
subsystems.

More precisely, we remove this "feature" of fcntl lock:

    If a process closes any file descriptor referring to a file, then
    all of the process's locks on that file are released, regardless of
    the file descriptor(s) on which the locks were obtained.

as long as the fd is always open/closed through qemu_open and
qemu_close.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 include/qemu/osdep.h |   2 +
 util/osdep.c         | 200 +++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 188 insertions(+), 14 deletions(-)

Comments

Richard W.M. Jones May 4, 2016, 1:46 p.m. UTC | #1
On Thu, Apr 28, 2016 at 08:57:27PM +0800, Fam Zheng wrote:
> They are wrappers of POSIX fcntl file locking, with the additional
> interception of open/close (through qemu_open and qemu_close) to offer a
> better semantics that preserves the locks across multiple life cycles of
> different fds on the same file.  The reason to make this semantics
> change over the fcntl locks is to make the API cleaner for QEMU
> subsystems.
> 
> More precisely, we remove this "feature" of fcntl lock:
> 
>     If a process closes any file descriptor referring to a file, then
>     all of the process's locks on that file are released, regardless of
>     the file descriptor(s) on which the locks were obtained.
> 
> as long as the fd is always open/closed through qemu_open and
> qemu_close.

> +    ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK);
> +    if (ret == -1) {
> +        return -errno;
> +    }

It still appears this patch would break libguestfs's valid use case.

How about addressing what Dan wrote here:

  https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg02927.html

?

Rich.
Fam Zheng May 10, 2016, 2:43 a.m. UTC | #2
On Wed, 05/04 14:46, Richard W.M. Jones wrote:
> On Thu, Apr 28, 2016 at 08:57:27PM +0800, Fam Zheng wrote:
> > They are wrappers of POSIX fcntl file locking, with the additional
> > interception of open/close (through qemu_open and qemu_close) to offer a
> > better semantics that preserves the locks across multiple life cycles of
> > different fds on the same file.  The reason to make this semantics
> > change over the fcntl locks is to make the API cleaner for QEMU
> > subsystems.
> > 
> > More precisely, we remove this "feature" of fcntl lock:
> > 
> >     If a process closes any file descriptor referring to a file, then
> >     all of the process's locks on that file are released, regardless of
> >     the file descriptor(s) on which the locks were obtained.
> > 
> > as long as the fd is always open/closed through qemu_open and
> > qemu_close.
> 
> > +    ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK);
> > +    if (ret == -1) {
> > +        return -errno;
> > +    }
> 
> It still appears this patch would break libguestfs's valid use case.
> 
> How about addressing what Dan wrote here:
> 
>   https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg02927.html

OK. It means we then don't lock ro images at all. Fair enough and I will
respin.

Fam
diff mbox

Patch

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 408783f..089c13f 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -251,6 +251,8 @@  int qemu_madvise(void *addr, size_t len, int advice);
 
 int qemu_open(const char *name, int flags, ...);
 int qemu_close(int fd);
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool readonly);
+int qemu_unlock_fd(int fd, int64_t start, int64_t len);
 
 #if defined(__HAIKU__) && defined(__i386__)
 #define FMT_pid "%ld"
diff --git a/util/osdep.c b/util/osdep.c
index d56d071..1510cbf 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -41,6 +41,7 @@  extern int madvise(caddr_t, size_t, int);
 #include "qemu/sockets.h"
 #include "qemu/error-report.h"
 #include "monitor/monitor.h"
+#include <glib.h>
 
 static bool fips_enabled = false;
 
@@ -146,13 +147,9 @@  static int qemu_parse_fdset(const char *param)
 }
 #endif
 
-/*
- * Opens a file with FD_CLOEXEC set
- */
-int qemu_open(const char *name, int flags, ...)
+static int qemu_fd_open(const char *name, int flags, int mode)
 {
     int ret;
-    int mode = 0;
 
 #ifndef _WIN32
     const char *fdset_id_str;
@@ -189,13 +186,6 @@  int qemu_open(const char *name, int flags, ...)
     }
 #endif
 
-    if (flags & O_CREAT) {
-        va_list ap;
-
-        va_start(ap, flags);
-        mode = va_arg(ap, int);
-        va_end(ap);
-    }
 
 #ifdef O_CLOEXEC
     ret = open(name, flags | O_CLOEXEC, mode);
@@ -216,6 +206,188 @@  int qemu_open(const char *name, int flags, ...)
     return ret;
 }
 
+typedef struct LockEntry LockEntry;
+struct LockEntry {
+    int fd;
+    bool readonly;
+    int64_t start;
+    int64_t len;
+    QLIST_ENTRY(LockEntry) next;
+};
+
+typedef struct {
+    QLIST_HEAD(, LockEntry) records;
+} LockRecord;
+
+static GHashTable *fd_to_path;
+static GHashTable *lock_map;
+
+static void fd_map_init(void)
+{
+    if (!fd_to_path) {
+        fd_to_path = g_hash_table_new(g_direct_hash, g_direct_equal);
+        lock_map = g_hash_table_new(g_str_hash, g_str_equal);
+    }
+}
+
+static int qemu_lock_do(int fd, int64_t start, int64_t len, int fl_type)
+{
+    struct flock fl = (struct flock) {
+        .l_whence  = SEEK_SET,
+        /* Locking byte 1 avoids interfereing with virtlockd. */
+        .l_start = start,
+        .l_len = len,
+        .l_type = fl_type,
+    };
+
+    return fcntl(fd, F_SETLK, &fl);
+}
+
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool readonly)
+{
+    LockEntry *ent;
+    LockRecord *rec;
+    char *path;
+    int ret;
+
+    ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK);
+    if (ret == -1) {
+        return -errno;
+    }
+    path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+    assert(path);
+    rec = g_hash_table_lookup(lock_map, path);
+    if (!rec) {
+        gboolean inserted;
+        rec = g_new(LockRecord, 1);
+        QLIST_INIT(&rec->records);
+        inserted = g_hash_table_insert(lock_map, path, rec);
+        assert(inserted);
+    } else {
+        /* Check the range is not locked by any fd in the record yet. */
+        int64_t end = start + len;
+        QLIST_FOREACH(ent, &rec->records, next) {
+            int64_t ent_end = ent->start + ent->len;
+            if ((ent->start >= start && ent_end <= end) ||
+                (ent_end >= start && ent_end <= end)) {
+                /* There is an overlap between ent and us, only shared locking
+                 * is permitted. */
+                if (!(ent->readonly && readonly)) {
+                    return -EAGAIN;
+                }
+            }
+        }
+    }
+    ent = g_new(LockEntry, 1);
+    *ent = (LockEntry) {
+        .fd = fd,
+        .start = start,
+        .len = len,
+        .readonly = readonly,
+    };
+    QLIST_INSERT_HEAD(&rec->records, ent, next);
+    return 0;
+}
+
+int qemu_unlock_fd(int fd, int64_t start, int64_t len)
+{
+    LockEntry *ent;
+    LockRecord *rec;
+    int ret;
+    char *path;
+
+    ret = qemu_lock_do(fd, start, len, F_UNLCK);
+    if (ret == -1) {
+        return ret;
+    }
+    path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+    assert(path);
+    rec = g_hash_table_lookup(lock_map, path);
+    assert(ret);
+    QLIST_FOREACH(ent, &rec->records, next) {
+        if (ent->fd == fd && ent->start == start && ent->len == len) {
+            QLIST_REMOVE(ent, next);
+            break;
+        }
+    }
+    assert(ent);
+    return ret;
+}
+
+static void qemu_fd_add_record(int fd, const char *path)
+{
+    gboolean ret;
+    char *rpath;
+
+    fd_map_init();
+    rpath = realpath(path, NULL);
+    if (!rpath) {
+        fprintf(stderr, "Failed to get real path for file %s: %s\n", path,
+                strerror(errno));
+        rpath = strdup(path);
+    }
+    ret = g_hash_table_insert(fd_to_path, GINT_TO_POINTER(fd), g_strdup(rpath));
+    /* It's a caller bug if the record already exists */
+    assert(ret);
+    free(rpath);
+}
+
+static int qemu_fd_close(int fd)
+{
+    LockEntry *ent, *tmp;
+    LockRecord *rec;
+    char *path;
+    int ret;
+
+    assert(fd_to_path);
+    path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+    assert(path);
+    g_hash_table_remove(fd_to_path, GINT_TO_POINTER(fd));
+    rec = g_hash_table_lookup(lock_map, path);
+    ret = close(fd);
+
+    if (rec) {
+        QLIST_FOREACH_SAFE(ent, &rec->records, next, tmp) {
+            int r;
+            if (ent->fd == fd) {
+                QLIST_REMOVE(ent, next);
+                g_free(ent);
+                continue;
+            }
+            r = qemu_lock_do(ent->fd, ent->start, ent->len,
+                             ent->readonly ? F_RDLCK : F_WRLCK);
+            if (r == -1) {
+                fprintf(stderr, "Failed to acquire lock on fd %d: %s\n",
+                        ent->fd, strerror(errno));
+            }
+        }
+    }
+
+    return ret;
+}
+
+/*
+ * Opens a file with FD_CLOEXEC set
+ */
+int qemu_open(const char *name, int flags, ...)
+{
+    int mode = 0;
+    int ret;
+
+    if (flags & O_CREAT) {
+        va_list ap;
+
+        va_start(ap, flags);
+        mode = va_arg(ap, int);
+        va_end(ap);
+    }
+    ret = qemu_fd_open(name, flags, mode);
+    if (ret >= 0) {
+        qemu_fd_add_record(ret, name);
+    }
+    return ret;
+}
+
 int qemu_close(int fd)
 {
     int64_t fdset_id;
@@ -225,7 +397,7 @@  int qemu_close(int fd)
     if (fdset_id != -1) {
         int ret;
 
-        ret = close(fd);
+        ret = qemu_fd_close(fd);
         if (ret == 0) {
             monitor_fdset_dup_fd_remove(fd);
         }
@@ -233,7 +405,7 @@  int qemu_close(int fd)
         return ret;
     }
 
-    return close(fd);
+    return qemu_fd_close(fd);
 }
 
 /*