@@ -50,7 +50,7 @@
*
* becomes this backwards information:
*
- * (child_agino*, dir_ino*, dir_gen, name*)
+ * (child_agino*, dir_ino*, dir_gen, name_cookie*)
*
* Key fields are starred.
*
@@ -58,6 +58,10 @@
* that names are stored separately in an xfblob data structure so that the
* rest of the information can be sorted and processed as fixed-size records;
* the incore parent pointer record contains a pointer to the strblob data.
+ * Because string blobs are deduplicated, there's a 1:1 mapping of name cookies
+ * to strings, which means that we can use the name cookie as a comparison key
+ * instead of loading the full dentry name every time we want to perform a
+ * comparison.
*/
struct ag_pptr {
@@ -121,15 +125,18 @@ parent_ptr_init(
struct xfs_mount *mp)
{
char *descr;
+ uint64_t iused;
xfs_agnumber_t agno;
int error;
if (!xfs_has_parent(mp))
return;
+ /* One hash bucket per inode, up to about 8M of memory on 64-bit. */
+ iused = min(mp->m_sb.sb_icount - mp->m_sb.sb_ifree, 1048573);
descr = kasprintf(GFP_KERNEL, "xfs_repair (%s): parent pointer names",
mp->m_fsname);
- error = strblobs_init(descr, &nameblobs);
+ error = strblobs_init(descr, iused, &nameblobs);
kfree(descr);
if (error)
do_error(_("init parent pointer names failed: %s\n"),
@@ -190,7 +197,7 @@ add_parent_ptr(
pthread_mutex_lock(&names_mutex);
error = strblobs_store(nameblobs, &ag_pptr.name_cookie, fname,
- ag_pptr.namelen);
+ ag_pptr.namelen, ag_pptr.namehash);
pthread_mutex_unlock(&names_mutex);
if (error)
do_error(_("storing name '%s' failed: %s\n"),
@@ -13,22 +13,42 @@
* =====================
*
* This data structure wraps the storage of strings with explicit length in an
- * xfblob structure.
+ * xfblob structure. It stores a hashtable of string checksums to provide
+ * fast(ish) lookups of existing strings to enable deduplication of the strings
+ * contained within.
*/
+struct strblob_hashent {
+ struct strblob_hashent *next;
+
+ xfblob_cookie str_cookie;
+ unsigned int str_len;
+ xfs_dahash_t str_hash;
+};
+
struct strblobs {
struct xfblob *strings;
+ unsigned int nr_buckets;
+
+ struct strblob_hashent *buckets[];
};
+static inline size_t strblobs_sizeof(unsigned int nr_buckets)
+{
+ return sizeof(struct strblobs) +
+ (nr_buckets * sizeof(struct strblobs_hashent *));
+}
+
/* Initialize a string blob structure. */
int
strblobs_init(
const char *descr,
+ unsigned int hash_buckets,
struct strblobs **sblobs)
{
struct strblobs *sb;
int error;
- sb = malloc(sizeof(struct strblobs));
+ sb = calloc(strblobs_sizeof(hash_buckets), 1);
if (!sb)
return ENOMEM;
@@ -36,6 +56,7 @@ strblobs_init(
if (error)
goto out_free;
+ sb->nr_buckets = hash_buckets;
*sblobs = sb;
return 0;
@@ -50,21 +71,132 @@ strblobs_destroy(
struct strblobs **sblobs)
{
struct strblobs *sb = *sblobs;
+ struct strblob_hashent *ent, *ent_next;
+ unsigned int bucket;
+
+ for (bucket = 0; bucket < sb->nr_buckets; bucket++) {
+ ent = sb->buckets[bucket];
+ while (ent != NULL) {
+ ent_next = ent->next;
+ free(ent);
+ ent = ent_next;
+ }
+ }
xfblob_destroy(sb->strings);
free(sb);
*sblobs = NULL;
}
+/*
+ * Search the string hashtable for a matching entry. Sets sets the cookie and
+ * returns 0 if one is found; ENOENT if there is no match; or a positive errno.
+ */
+static int
+__strblobs_lookup(
+ struct strblobs *sblobs,
+ xfblob_cookie *str_cookie,
+ const unsigned char *str,
+ unsigned int str_len,
+ xfs_dahash_t str_hash)
+{
+ struct strblob_hashent *ent;
+ unsigned char *buf = NULL;
+ unsigned int bucket;
+ int error;
+
+ bucket = str_hash % sblobs->nr_buckets;
+ ent = sblobs->buckets[bucket];
+
+ for (ent = sblobs->buckets[bucket]; ent != NULL; ent = ent->next) {
+ if (ent->str_len != str_len || ent->str_hash != str_hash)
+ continue;
+
+ if (!buf) {
+ buf = malloc(str_len);
+ if (!buf)
+ return ENOMEM;
+ }
+
+ error = strblobs_load(sblobs, ent->str_cookie, buf, str_len);
+ if (error)
+ goto out;
+
+ if (memcmp(str, buf, str_len))
+ continue;
+
+ *str_cookie = ent->str_cookie;
+ goto out;
+ }
+ error = ENOENT;
+
+out:
+ free(buf);
+ return error;
+}
+
+/*
+ * Search the string hashtable for a matching entry. Sets sets the cookie and
+ * returns 0 if one is found; ENOENT if there is no match; or a positive errno.
+ */
+int
+strblobs_lookup(
+ struct strblobs *sblobs,
+ xfblob_cookie *str_cookie,
+ const unsigned char *str,
+ unsigned int str_len,
+ xfs_dahash_t str_hash)
+{
+ return __strblobs_lookup(sblobs, str_cookie, str, str_len, str_hash);
+}
+
+/* Remember a string in the hashtable. */
+static int
+strblobs_hash(
+ struct strblobs *sblobs,
+ xfblob_cookie str_cookie,
+ const unsigned char *str,
+ unsigned int str_len,
+ xfs_dahash_t str_hash)
+{
+ struct strblob_hashent *ent;
+ unsigned int bucket;
+
+ bucket = str_hash % sblobs->nr_buckets;
+
+ ent = malloc(sizeof(struct strblob_hashent));
+ if (!ent)
+ return ENOMEM;
+
+ ent->str_cookie = str_cookie;
+ ent->str_len = str_len;
+ ent->str_hash = str_hash;
+ ent->next = sblobs->buckets[bucket];
+
+ sblobs->buckets[bucket] = ent;
+ return 0;
+}
+
/* Store a string and return a cookie for its retrieval. */
int
strblobs_store(
struct strblobs *sblobs,
xfblob_cookie *str_cookie,
const unsigned char *str,
- unsigned int str_len)
+ unsigned int str_len,
+ xfs_dahash_t str_hash)
{
- return -xfblob_store(sblobs->strings, str_cookie, str, str_len);
+ int error;
+
+ error = __strblobs_lookup(sblobs, str_cookie, str, str_len, str_hash);
+ if (error != ENOENT)
+ return error;
+
+ error = -xfblob_store(sblobs->strings, str_cookie, str, str_len);
+ if (error)
+ return error;
+
+ return strblobs_hash(sblobs, *str_cookie, str, str_len, str_hash);
}
/* Retrieve a previously stored string. */
@@ -8,12 +8,17 @@
struct strblobs;
-int strblobs_init(const char *descr, struct strblobs **sblobs);
+int strblobs_init(const char *descr, unsigned int hash_buckets,
+ struct strblobs **sblobs);
void strblobs_destroy(struct strblobs **sblobs);
int strblobs_store(struct strblobs *sblobs, xfblob_cookie *str_cookie,
- const unsigned char *str, unsigned int str_len);
+ const unsigned char *str, unsigned int str_len,
+ xfs_dahash_t hash);
int strblobs_load(struct strblobs *sblobs, xfblob_cookie str_cookie,
unsigned char *str, unsigned int str_len);
+int strblobs_lookup(struct strblobs *sblobs, xfblob_cookie *str_cookie,
+ const unsigned char *str, unsigned int str_len,
+ xfs_dahash_t hash);
#endif /* __REPAIR_STRBLOBS_H__ */