diff mbox series

[nfs-utils,2/2] sqlite.c: Add a simple database health check

Message ID 20190626190432.16257-2-smayhew@redhat.com (mailing list archive)
State New, archived
Headers show
Series [nfs-utils,1/2] sqlite.c: restore zero-padding to the recovery table names | expand

Commit Message

Scott Mayhew June 26, 2019, 7:04 p.m. UTC
Commit a8133e1fd1742 removed the zero-padding from the table names and
broke grace period handling.  Add a simple health check for the database
to try to detect and fix these.  Other checks could be added in the
future if the database schema changes.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
---
 utils/nfsdcld/sqlite.c | 131 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
diff mbox series

Patch

diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
index 5d78d24..fa81df8 100644
--- a/utils/nfsdcld/sqlite.c
+++ b/utils/nfsdcld/sqlite.c
@@ -67,6 +67,7 @@ 
 #include "cld-internal.h"
 #include "conffile.h"
 #include "legacy.h"
+#include "nfslib.h"
 
 #define CLD_SQLITE_LATEST_SCHEMA_VERSION 3
 #define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
@@ -448,6 +449,129 @@  out:
 	return ret;
 }
 
+/*
+ * Helper for renaming a recovery table to fix the padding.
+ */
+static int
+sqlite_fix_table_name(const char *name)
+{
+	int ret;
+	uint64_t val;
+	char *err;
+
+	if (sscanf(name, "rec-%" PRIx64, &val) != 1)
+		return -EINVAL;
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+			"RENAME TO \"rec-%016" PRIx64 "\";",
+			name, val);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return -EINVAL;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to fix table for epoch %d: %s",
+		     val, err);
+		goto out;
+	}
+	xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
+out:
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
+ * If the epoch encoded in the table name matches either the current
+ * epoch or the recovery epoch, then try to fix the padding.  Otherwise,
+ * we bail.
+ */
+static int
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
+			    char **UNUSED(colnames))
+{
+	int ret = SQLITE_OK;
+	uint64_t val;
+
+	if (ncols > 1)
+		return -EINVAL;
+	if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
+		return -EINVAL;
+	if (val == current_epoch || val == recovery_epoch) {
+		xlog(D_GENERAL, "found invalid table name %s for %s epoch",
+		     cols[0], val == current_epoch ? "current" : "recovery");
+		ret = sqlite_fix_table_name(cols[0]);
+	} else {
+		xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
+		     PRId64, cols[0], val);
+		return -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Look for recovery table names where the epoch isn't zero-padded
+ */
+static int
+sqlite_check_table_names(void)
+{
+	int ret;
+	char *err;
+
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
+			   "AND length(name) < 20;",
+			   sqlite_check_table_names_cb, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Table names check failed: %s", err);
+	}
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Simple db health check.  For now we're just making sure that the recovery
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
+ * representation of the epoch value) and that epoch value matches either
+ * the current epoch or the recovery epoch.
+ */
+static int
+sqlite_check_db_health(void)
+{
+	int ret, ret2;
+	char *err;
+
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+
+	ret = sqlite_check_table_names();
+	if (ret != SQLITE_OK)
+		goto rollback;
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+
+cleanup:
+	sqlite3_free(err);
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	return ret;
+rollback:
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto cleanup;
+}
+
 static int
 sqlite_attach_db(const char *path)
 {
@@ -673,6 +797,13 @@  sqlite_prepare_dbh(const char *topdir)
 	if (ret)
 		goto out_close;
 
+	ret = sqlite_check_db_health();
+	if (ret) {
+		xlog(L_ERROR, "Database health check failed! "
+			"Database must be fixed manually.");
+		goto out_close;
+	}
+
 	/* one-time "upgrade" from older client tracking methods */
 	if (first_time) {
 		sqlite_copy_cltrack_records(&num_cltrack_records);