@@ -251,6 +251,8 @@ int qemu_madvise(void *addr, size_t len, int advice);
int qemu_open(const char *name, int flags, ...);
int qemu_close(int fd);
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool readonly);
+int qemu_unlock_fd(int fd, int64_t start, int64_t len);
#if defined(__HAIKU__) && defined(__i386__)
#define FMT_pid "%ld"
@@ -41,6 +41,7 @@ extern int madvise(caddr_t, size_t, int);
#include "qemu/sockets.h"
#include "qemu/error-report.h"
#include "monitor/monitor.h"
+#include <glib.h>
static bool fips_enabled = false;
@@ -146,13 +147,9 @@ static int qemu_parse_fdset(const char *param)
}
#endif
-/*
- * Opens a file with FD_CLOEXEC set
- */
-int qemu_open(const char *name, int flags, ...)
+static int qemu_fd_open(const char *name, int flags, int mode)
{
int ret;
- int mode = 0;
#ifndef _WIN32
const char *fdset_id_str;
@@ -189,13 +186,6 @@ int qemu_open(const char *name, int flags, ...)
}
#endif
- if (flags & O_CREAT) {
- va_list ap;
-
- va_start(ap, flags);
- mode = va_arg(ap, int);
- va_end(ap);
- }
#ifdef O_CLOEXEC
ret = open(name, flags | O_CLOEXEC, mode);
@@ -216,6 +206,188 @@ int qemu_open(const char *name, int flags, ...)
return ret;
}
+typedef struct LockEntry LockEntry;
+struct LockEntry {
+ int fd;
+ bool readonly;
+ int64_t start;
+ int64_t len;
+ QLIST_ENTRY(LockEntry) next;
+};
+
+typedef struct {
+ QLIST_HEAD(, LockEntry) records;
+} LockRecord;
+
+static GHashTable *fd_to_path;
+static GHashTable *lock_map;
+
+static void fd_map_init(void)
+{
+ if (!fd_to_path) {
+ fd_to_path = g_hash_table_new(g_direct_hash, g_direct_equal);
+ lock_map = g_hash_table_new(g_str_hash, g_str_equal);
+ }
+}
+
+static int qemu_lock_do(int fd, int64_t start, int64_t len, int fl_type)
+{
+ struct flock fl = (struct flock) {
+ .l_whence = SEEK_SET,
+ /* Locking byte 1 avoids interfereing with virtlockd. */
+ .l_start = start,
+ .l_len = len,
+ .l_type = fl_type,
+ };
+
+ return fcntl(fd, F_SETLK, &fl);
+}
+
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool readonly)
+{
+ LockEntry *ent;
+ LockRecord *rec;
+ char *path;
+ int ret;
+
+ ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK);
+ if (ret == -1) {
+ return -errno;
+ }
+ path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+ assert(path);
+ rec = g_hash_table_lookup(lock_map, path);
+ if (!rec) {
+ gboolean inserted;
+ rec = g_new(LockRecord, 1);
+ QLIST_INIT(&rec->records);
+ inserted = g_hash_table_insert(lock_map, path, rec);
+ assert(inserted);
+ } else {
+ /* Check the range is not locked by any fd in the record yet. */
+ int64_t end = start + len;
+ QLIST_FOREACH(ent, &rec->records, next) {
+ int64_t ent_end = ent->start + ent->len;
+ if ((ent->start >= start && ent_end <= end) ||
+ (ent_end >= start && ent_end <= end)) {
+ /* There is an overlap between ent and us, only shared locking
+ * is permitted. */
+ if (!(ent->readonly && readonly)) {
+ return -EAGAIN;
+ }
+ }
+ }
+ }
+ ent = g_new(LockEntry, 1);
+ *ent = (LockEntry) {
+ .fd = fd,
+ .start = start,
+ .len = len,
+ .readonly = readonly,
+ };
+ QLIST_INSERT_HEAD(&rec->records, ent, next);
+ return 0;
+}
+
+int qemu_unlock_fd(int fd, int64_t start, int64_t len)
+{
+ LockEntry *ent;
+ LockRecord *rec;
+ int ret;
+ char *path;
+
+ ret = qemu_lock_do(fd, start, len, F_UNLCK);
+ if (ret == -1) {
+ return ret;
+ }
+ path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+ assert(path);
+ rec = g_hash_table_lookup(lock_map, path);
+ assert(ret);
+ QLIST_FOREACH(ent, &rec->records, next) {
+ if (ent->fd == fd && ent->start == start && ent->len == len) {
+ QLIST_REMOVE(ent, next);
+ break;
+ }
+ }
+ assert(ent);
+ return ret;
+}
+
+static void qemu_fd_add_record(int fd, const char *path)
+{
+ gboolean ret;
+ char *rpath;
+
+ fd_map_init();
+ rpath = realpath(path, NULL);
+ if (!rpath) {
+ fprintf(stderr, "Failed to get real path for file %s: %s\n", path,
+ strerror(errno));
+ rpath = strdup(path);
+ }
+ ret = g_hash_table_insert(fd_to_path, GINT_TO_POINTER(fd), g_strdup(rpath));
+ /* It's a caller bug if the record already exists */
+ assert(ret);
+ free(rpath);
+}
+
+static int qemu_fd_close(int fd)
+{
+ LockEntry *ent, *tmp;
+ LockRecord *rec;
+ char *path;
+ int ret;
+
+ assert(fd_to_path);
+ path = g_hash_table_lookup(fd_to_path, GINT_TO_POINTER(fd));
+ assert(path);
+ g_hash_table_remove(fd_to_path, GINT_TO_POINTER(fd));
+ rec = g_hash_table_lookup(lock_map, path);
+ ret = close(fd);
+
+ if (rec) {
+ QLIST_FOREACH_SAFE(ent, &rec->records, next, tmp) {
+ int r;
+ if (ent->fd == fd) {
+ QLIST_REMOVE(ent, next);
+ g_free(ent);
+ continue;
+ }
+ r = qemu_lock_do(ent->fd, ent->start, ent->len,
+ ent->readonly ? F_RDLCK : F_WRLCK);
+ if (r == -1) {
+ fprintf(stderr, "Failed to acquire lock on fd %d: %s\n",
+ ent->fd, strerror(errno));
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Opens a file with FD_CLOEXEC set
+ */
+int qemu_open(const char *name, int flags, ...)
+{
+ int mode = 0;
+ int ret;
+
+ if (flags & O_CREAT) {
+ va_list ap;
+
+ va_start(ap, flags);
+ mode = va_arg(ap, int);
+ va_end(ap);
+ }
+ ret = qemu_fd_open(name, flags, mode);
+ if (ret >= 0) {
+ qemu_fd_add_record(ret, name);
+ }
+ return ret;
+}
+
int qemu_close(int fd)
{
int64_t fdset_id;
@@ -225,7 +397,7 @@ int qemu_close(int fd)
if (fdset_id != -1) {
int ret;
- ret = close(fd);
+ ret = qemu_fd_close(fd);
if (ret == 0) {
monitor_fdset_dup_fd_remove(fd);
}
@@ -233,7 +405,7 @@ int qemu_close(int fd)
return ret;
}
- return close(fd);
+ return qemu_fd_close(fd);
}
/*
They are wrappers of POSIX fcntl file locking, with the additional interception of open/close (through qemu_open and qemu_close) to offer a better semantics that preserves the locks across multiple life cycles of different fds on the same file. The reason to make this semantics change over the fcntl locks is to make the API cleaner for QEMU subsystems. More precisely, we remove this "feature" of fcntl lock: If a process closes any file descriptor referring to a file, then all of the process's locks on that file are released, regardless of the file descriptor(s) on which the locks were obtained. as long as the fd is always open/closed through qemu_open and qemu_close. Signed-off-by: Fam Zheng <famz@redhat.com> --- include/qemu/osdep.h | 2 + util/osdep.c | 200 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 188 insertions(+), 14 deletions(-)