@@ -555,10 +555,9 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
}
}
-struct xs_tdb_record_hdr *db_fetch(const char *db_name, size_t *size)
+const struct xs_tdb_record_hdr *db_fetch(const char *db_name, size_t *size)
{
const struct xs_tdb_record_hdr *hdr;
- struct xs_tdb_record_hdr *p;
hdr = hashtable_search(nodes, db_name);
if (!hdr) {
@@ -569,22 +568,15 @@ struct xs_tdb_record_hdr *db_fetch(const char *db_name, size_t *size)
*size = sizeof(*hdr) + hdr->num_perms * sizeof(hdr->perms[0]) +
hdr->datalen + hdr->childlen;
- /* Return a copy, avoiding a potential modification in the DB. */
- p = talloc_memdup(NULL, hdr, *size);
- if (!p) {
- errno = ENOMEM;
- return NULL;
- }
-
trace_tdb("read %s size %zu\n", db_name, *size + strlen(db_name));
- return p;
+ return hdr;
}
static void get_acc_data(const char *name, struct node_account_data *acc)
{
size_t size;
- struct xs_tdb_record_hdr *hdr;
+ const struct xs_tdb_record_hdr *hdr;
if (acc->memory < 0) {
hdr = db_fetch(name, &size);
@@ -595,7 +587,6 @@ static void get_acc_data(const char *name, struct node_account_data *acc)
acc->memory = size;
acc->domid = hdr->perms[0].id;
}
- talloc_free(hdr);
}
}
@@ -708,7 +699,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
const char *name)
{
size_t size;
- struct xs_tdb_record_hdr *hdr;
+ const struct xs_tdb_record_hdr *hdr;
struct node *node;
const char *db_name;
int err;
@@ -729,30 +720,30 @@ struct node *read_node(struct connection *conn, const void *ctx,
hdr = db_fetch(db_name, &size);
if (hdr == NULL) {
- if (errno == ENOENT) {
- node->generation = NO_GENERATION;
- err = access_node(conn, node, NODE_ACCESS_READ, NULL);
- errno = err ? : ENOENT;
- } else {
- log("DB error on read: %s", strerror(errno));
- errno = EIO;
- }
+ node->generation = NO_GENERATION;
+ err = access_node(conn, node, NODE_ACCESS_READ, NULL);
+ errno = err ? : ENOENT;
goto error;
}
node->parent = NULL;
- talloc_steal(node, hdr);
/* Datalen, childlen, number of permissions */
node->generation = hdr->generation;
node->perms.num = hdr->num_perms;
node->datalen = hdr->datalen;
node->childlen = hdr->childlen;
-
- /* Permissions are struct xs_permissions. */
- node->perms.p = hdr->perms;
- node->acc.domid = get_node_owner(node);
+ node->acc.domid = hdr->perms[0].id;
node->acc.memory = size;
+
+ /* Copy node data to new memory area, starting with permissions. */
+ size -= sizeof(*hdr);
+ node->perms.p = talloc_memdup(node, hdr->perms, size);
+ if (node->perms.p == NULL) {
+ errno = ENOMEM;
+ goto error;
+ }
+
if (domain_adjust_node_perms(node))
goto error;
@@ -362,7 +362,7 @@ extern xengnttab_handle **xgt_handle;
int remember_string(struct hashtable *hash, const char *str);
/* Data base access functions. */
-struct xs_tdb_record_hdr *db_fetch(const char *db_name, size_t *size);
+const struct xs_tdb_record_hdr *db_fetch(const char *db_name, size_t *size);
int db_write(struct connection *conn, const char *db_name, void *data,
size_t size, struct node_account_data *acc,
enum write_node_mode mode, bool no_quota_check);
@@ -357,20 +357,17 @@ static int finalize_transaction(struct connection *conn,
{
struct accessed_node *i, *n;
size_t size;
- struct xs_tdb_record_hdr *hdr;
+ const struct xs_tdb_record_hdr *hdr;
uint64_t gen;
list_for_each_entry_safe(i, n, &trans->accessed, list) {
if (i->check_gen) {
hdr = db_fetch(i->node, &size);
if (!hdr) {
- if (errno != ENOENT)
- return errno;
gen = NO_GENERATION;
} else {
gen = hdr->generation;
}
- talloc_free(hdr);
if (i->generation != gen)
return EAGAIN;
}
@@ -388,14 +385,26 @@ static int finalize_transaction(struct connection *conn,
if (i->ta_node) {
hdr = db_fetch(i->trans_name, &size);
if (hdr) {
+ /*
+ * Delete transaction entry and write it as
+ * no-TA entry. As we only hold a reference
+ * to the data, increment its ref count, then
+ * delete it from the DB. Now we own it and can
+ * drop the const attribute for changing the
+ * generation count.
+ */
enum write_node_mode mode;
+ struct xs_tdb_record_hdr *own;
- hdr->generation = ++generation;
+ talloc_increase_ref_count(hdr);
+ db_delete(conn, i->trans_name, NULL);
+
+ own = (struct xs_tdb_record_hdr *)hdr;
+ own->generation = ++generation;
mode = (i->generation == NO_GENERATION)
? NODE_CREATE : NODE_MODIFY;
- *is_corrupt |= db_write(conn, i->node, hdr,
+ *is_corrupt |= db_write(conn, i->node, own,
size, NULL, mode, true);
- db_delete(conn, i->trans_name, NULL);
} else {
*is_corrupt = true;
}
Today the node data is copied in db_fetch() on each data base read in order to avoid accidental data base modifications when working on a node. read_node() is the only caller of db_fetch() which isn't freeing the returned data area immediately after using it. The other callers don't modify the returned data, so they don't need the data to be copied. Move copying of the data into read_node(), resulting in a speedup of the other callers due to no memory allocation and no copying being needed anymore. This allows to let db_fetch() return a pointer to const data. As db_fetch() can't return any error other than ENOENT now, error handling for the callers can be simplified. Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: - new patch V3: - modify return type of db_fetch() to return a pointer to const (Julien Grall) - drop stale comment (Julien Grall) - fix transaction handling V4: - don't drop const attribute for hdr (Julien Grall) --- tools/xenstore/xenstored_core.c | 43 ++++++++++---------------- tools/xenstore/xenstored_core.h | 2 +- tools/xenstore/xenstored_transaction.c | 23 +++++++++----- 3 files changed, 34 insertions(+), 34 deletions(-)