diff mbox series

fstests: Adds non-recursive and single file mode to fssum

Message ID 20200430035543.7178-1-raghavan.arvind@gmail.com (mailing list archive)
State New, archived
Headers show
Series fstests: Adds non-recursive and single file mode to fssum | expand

Commit Message

Arvind Raghavan April 30, 2020, 3:55 a.m. UTC
Requires patch: Fix duplicate CLI arguments in fssum

Adds flag '-R' for non-recursive mode. Useful for checking a directory
after an fsync, as POSIX standards only guarantee that the immediate
files in the directory are synced.

Added support for files as input, instead of just directories. fssum
computes the checksum of the single file.

Added a flag for including file size in metadata. Previously it was
always included. It is useful to ignore file sizes when testing an
fsync of a directory, as the data of its children is not guaranteed to
be persisted.

Signed-off-by: Arvind Raghavan <raghavan.arvind@gmail.com>
Signed-off-by: Jayashree Mohan <jaya@cs.utexas.edu>
Signed-off-by: Vijay Chidambaram <vijay@cs.utexas.edu>
---
 src/fssum.c | 162 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 122 insertions(+), 40 deletions(-)

Comments

Amir Goldstein May 1, 2020, 4:30 a.m. UTC | #1
Hi Arvind,

This looks great, but I have some suggestion to improve.
One thing is that should be a patch series with a cover letter
and not a single patch, because this way it is much harder to review
and bisect problems later on.

On Thu, Apr 30, 2020 at 8:34 PM Arvind Raghavan
<raghavan.arvind@gmail.com> wrote:
>
> Requires patch: Fix duplicate CLI arguments in fssum

This lines is irrelevant in git history so should be in notes after --- line

>
> Adds flag '-R' for non-recursive mode. Useful for checking a directory
> after an fsync, as POSIX standards only guarantee that the immediate
> files in the directory are synced.
>

Full stop. that should be a patch on its own.

> Added support for files as input, instead of just directories. fssum
> computes the checksum of the single file.
>

That one as well.

> Added a flag for including file size in metadata. Previously it was
> always included. It is useful to ignore file sizes when testing an
> fsync of a directory, as the data of its children is not guaranteed to
> be persisted.

And that one.

>
> Signed-off-by: Arvind Raghavan <raghavan.arvind@gmail.com>
> Signed-off-by: Jayashree Mohan <jaya@cs.utexas.edu>
> Signed-off-by: Vijay Chidambaram <vijay@cs.utexas.edu>
> ---
>  src/fssum.c | 162 +++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 122 insertions(+), 40 deletions(-)
>
> diff --git a/src/fssum.c b/src/fssum.c
> index 3d97a70b..9b32e8cb 100644
> --- a/src/fssum.c
> +++ b/src/fssum.c
> @@ -29,6 +29,7 @@
>  #include <inttypes.h>
>  #include <assert.h>
>  #include <endian.h>
> +#include <libgen.h>
>
>  #define CS_SIZE 16
>  #define CHUNKS 128
> @@ -56,6 +57,7 @@ typedef int (*sum_file_data_t)(int fd, sum_t *dst);
>
>  int gen_manifest = 0;
>  int in_manifest = 0;
> +int recursive = 1;
>  char *checksum = NULL;
>  struct excludes *excludes;
>  int n_excludes = 0;
> @@ -74,13 +76,14 @@ enum _flags {
>         FLAG_XATTRS,
>         FLAG_OPEN_ERROR,
>         FLAG_STRUCTURE,
> +       FLAG_SIZE,
>         NUM_FLAGS
>  };
>
> -const char flchar[] = "ugoamcdtes";
> +const char flchar[] = "ugoamcdtesz";
>  char line[65536];
>
> -int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0};
> +int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1};
>
>  char *
>  getln(char *buf, int size, FILE *fp)
> @@ -146,12 +149,14 @@ usage(void)
>         fprintf(stderr, "         t       : include xattrs\n");
>         fprintf(stderr, "         e       : include open errors (aborts otherwise)\n");
>         fprintf(stderr, "         s       : include block structure (holes)\n");
> +       fprintf(stderr, "         z       : include file size\n");
>         fprintf(stderr, "    -[UGOAMCDTES]: exclude respective field from calculation\n");

missing Z

>         fprintf(stderr, "    -n           : reset all flags\n");
>         fprintf(stderr, "    -N           : set all flags\n");
>         fprintf(stderr, "    -x path      : exclude path when building checksum (multiple ok)\n");
> +       fprintf(stderr, "    -R           : traverse dirs non-recursively (recursive is default)\n");
>         fprintf(stderr, "    -h           : this help\n\n");
> -       fprintf(stderr, "The default field mask is ugoamCdtES. If the checksum/manifest is read from a\n");
> +       fprintf(stderr, "The default field mask is ugoamCdtESz. If the checksum/manifest is read from a\n");
>         fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
>         fprintf(stderr, "are ignored.\n");
>         exit(-1);
> @@ -502,6 +507,92 @@ malformed:
>                 excess_file(fn);
>  }
>
> +void
> +sum_meta(sum_t *meta, struct stat64 *st) {
> +       if (!S_ISDIR(st->st_mode))
> +               sum_add_u64(meta, st->st_nlink);
> +       if (flags[FLAG_UID])
> +               sum_add_u64(meta, st->st_uid);
> +       if (flags[FLAG_GID])
> +               sum_add_u64(meta, st->st_gid);
> +       if (flags[FLAG_MODE])
> +               sum_add_u64(meta, st->st_mode);
> +       if (flags[FLAG_ATIME])
> +               sum_add_time(meta, st->st_atime);
> +       if (flags[FLAG_MTIME])
> +               sum_add_time(meta, st->st_mtime);
> +       if (flags[FLAG_CTIME])
> +               sum_add_time(meta, st->st_ctime);
> +}
> +void
> +print_manifest(char* path, sum_t *meta, sum_t *cs, struct stat64 *st) {
> +       if (gen_manifest || in_manifest) {
> +               char *fn;
> +               char *m;
> +               char *c;
> +
> +               if (S_ISDIR(st->st_mode))
> +                       strcat(path, "/");
> +               fn = escape(path);
> +               m = sum_to_string(meta);
> +               c = sum_to_string(cs);
> +
> +               if (gen_manifest)
> +                       fprintf(out_fp, "%s %s %s\n", fn, m, c);
> +               if (in_manifest)
> +                       check_manifest(fn, m, c, 0);
> +               free(c);
> +               free(m);
> +               free(fn);
> +       }
> +}
> +

Those helpers are great, but re-factoring should be a separate prep
patch. It is easy to review non-logic-changing-re-factoring and it is hard
to review logic changing and re-factoring together - to human ;-).

> +void
> +sum_file(int fd, sum_t *finalcs, struct stat64 *st, char *path) {
> +       int ret;
> +       sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
> +                       sum_file_data_strict : sum_file_data_permissive;
> +
> +       sum_t cs;
> +       sum_t meta;
> +
> +       sum_init(&cs);
> +       sum_init(&meta);
> +
> +       char* name = basename(path);
> +       sum_add(&meta, name, strlen(name));
> +       sum_meta(&meta, st);
> +
> +       if (flags[FLAG_XATTRS]) {
> +               ret = sum_xattrs(fd, &meta);
> +               if (ret < 0) {
> +                       fprintf(stderr, "failed to read xattrs from "
> +                                       "%s: %s\n", path, strerror(-ret));
> +                       exit(-1);
> +               }
> +       }
> +
> +       if (flags[FLAG_SIZE])
> +               sum_add_u64(&meta, st->st_size);
> +
> +       if (flags[FLAG_DATA]) {
> +               ret = sum_file_data(fd, &cs);
> +               if (ret < 0) {
> +                       fprintf(stderr, "read failed for %s: %s\n", path,
> +                                       strerror(errno));
> +                       exit(-1);
> +               }
> +       }
> +
> +       sum_fini(&cs);
> +       sum_fini(&meta);
> +
> +       print_manifest(path, &meta, &cs, st);
> +
> +       sum_add_sum(finalcs, &cs);
> +       sum_add_sum(finalcs, &meta);
> +}
> +
>  void
>  sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
>  {
> @@ -582,20 +673,7 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
>
>                 sum_add_u64(&meta, level);
>                 sum_add(&meta, namelist[i], strlen(namelist[i]));
> -               if (!S_ISDIR(st.st_mode))
> -                       sum_add_u64(&meta, st.st_nlink);
> -               if (flags[FLAG_UID])
> -                       sum_add_u64(&meta, st.st_uid);
> -               if (flags[FLAG_GID])
> -                       sum_add_u64(&meta, st.st_gid);
> -               if (flags[FLAG_MODE])
> -                       sum_add_u64(&meta, st.st_mode);
> -               if (flags[FLAG_ATIME])
> -                       sum_add_time(&meta, st.st_atime);
> -               if (flags[FLAG_MTIME])
> -                       sum_add_time(&meta, st.st_mtime);
> -               if (flags[FLAG_CTIME])
> -                       sum_add_time(&meta, st.st_ctime);
> +               sum_meta(&meta, &st);
>                 if (flags[FLAG_XATTRS] &&
>                     (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))) {
>                         fd = openat(dirfd, namelist[i], 0);
> @@ -618,7 +696,7 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
>                                 }
>                         }
>                 }
> -               if (S_ISDIR(st.st_mode)) {
> +               if (S_ISDIR(st.st_mode) && recursive) {
>                         fd = openat(dirfd, namelist[i], 0);
>                         if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
>                                 sum_add_u64(&meta, errno);
> @@ -631,7 +709,8 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
>                                 close(fd);
>                         }
>                 } else if (S_ISREG(st.st_mode)) {
> -                       sum_add_u64(&meta, st.st_size);
> +                       if (flags[FLAG_SIZE])
> +                               sum_add_u64(&meta, st.st_size);
>                         if (flags[FLAG_DATA]) {
>                                 if (verbose)
>                                         fprintf(stderr, "file %s\n",
> @@ -672,25 +751,9 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
>                 }
>                 sum_fini(&cs);
>                 sum_fini(&meta);
> -               if (gen_manifest || in_manifest) {
> -                       char *fn;
> -                       char *m;
> -                       char *c;
> -
> -                       if (S_ISDIR(st.st_mode))
> -                               strcat(path, "/");
> -                       fn = escape(path);
> -                       m = sum_to_string(&meta);
> -                       c = sum_to_string(&cs);
> -
> -                       if (gen_manifest)
> -                               fprintf(out_fp, "%s %s %s\n", fn, m, c);
> -                       if (in_manifest)
> -                               check_manifest(fn, m, c, 0);
> -                       free(c);
> -                       free(m);
> -                       free(fn);
> -               }
> +
> +               print_manifest(path, &meta, &cs, &st);
> +
>                 sum_add_sum(dircs, &cs);
>                 sum_add_sum(dircs, &meta);
>  next:
> @@ -712,7 +775,7 @@ main(int argc, char *argv[])
>         int plen;
>         int elen;
>         int n_flags = 0;
> -       const char *allopts = "heEfuUgGoOaAmMcCdDtTsSnNw:r:vx:";
> +       const char *allopts = "heEfuUgGoOaAmMcCdDtTsSzZnNRw:r:vx:";
>
>         out_fp = stdout;
>         while ((c = getopt(argc, argv, allopts)) != EOF) {
> @@ -720,6 +783,9 @@ main(int argc, char *argv[])
>                 case 'f':
>                         gen_manifest = 1;
>                         break;
> +               case 'R':
> +                       recursive = 0;
> +                       break;
>                 case 'u':
>                 case 'U':
>                 case 'g':
> @@ -740,6 +806,8 @@ main(int argc, char *argv[])
>                 case 'E':
>                 case 's':
>                 case 'S':
> +               case 'z':
> +               case 'Z':
>                         ++n_flags;
>                         parse_flag(c);
>                         break;
> @@ -855,8 +923,22 @@ main(int argc, char *argv[])
>         if (gen_manifest)
>                 fprintf(out_fp, "Flags: %s\n", flagstring);
>
> +       struct stat64 path_st;
> +       if (fstat64(fd, &path_st)) {
> +               perror("fstat");
> +               exit(-1);
> +       }
> +
>         sum_init(&cs);
> -       sum(fd, 1, &cs, path, "");
> +       if (S_ISDIR(path_st.st_mode)) {
> +               sum(fd, 1, &cs, path, "");
> +       } else if (S_ISREG(path_st.st_mode)) {
> +               sum_file(fd, &cs, &path_st, path);
> +       } else {
> +               fprintf(stderr, "path must be file or dir: %s", path);
> +               exit(-1);
> +       }
> +

I would try something different - only after you try to write it we will
know if i was a good idea or not...
factor out sum_one() from the dir entries iteration in the re-factoring patch
then call sum_one() from main() for non-dir in the patch to support single
file sum. And there is not really a need to limit single file sum to
regular file.

It is tempting to just call sum_one() to let it handle file or dir for
either root or node dir, but that change may affect all existing test
and will be harder to verify. So I would rather that sum_one()
will be a stupified factor out of some code chunk to a helper.

Hope this is clear enough - if not, let me know.
Thanks,
Amir.
diff mbox series

Patch

diff --git a/src/fssum.c b/src/fssum.c
index 3d97a70b..9b32e8cb 100644
--- a/src/fssum.c
+++ b/src/fssum.c
@@ -29,6 +29,7 @@ 
 #include <inttypes.h>
 #include <assert.h>
 #include <endian.h>
+#include <libgen.h>
 
 #define CS_SIZE 16
 #define CHUNKS	128
@@ -56,6 +57,7 @@  typedef int (*sum_file_data_t)(int fd, sum_t *dst);
 
 int gen_manifest = 0;
 int in_manifest = 0;
+int recursive = 1;
 char *checksum = NULL;
 struct excludes *excludes;
 int n_excludes = 0;
@@ -74,13 +76,14 @@  enum _flags {
 	FLAG_XATTRS,
 	FLAG_OPEN_ERROR,
 	FLAG_STRUCTURE,
+	FLAG_SIZE,
 	NUM_FLAGS
 };
 
-const char flchar[] = "ugoamcdtes";
+const char flchar[] = "ugoamcdtesz";
 char line[65536];
 
-int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0};
+int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1};
 
 char *
 getln(char *buf, int size, FILE *fp)
@@ -146,12 +149,14 @@  usage(void)
 	fprintf(stderr, "         t       : include xattrs\n");
 	fprintf(stderr, "         e       : include open errors (aborts otherwise)\n");
 	fprintf(stderr, "         s       : include block structure (holes)\n");
+	fprintf(stderr, "         z       : include file size\n");
 	fprintf(stderr, "    -[UGOAMCDTES]: exclude respective field from calculation\n");
 	fprintf(stderr, "    -n           : reset all flags\n");
 	fprintf(stderr, "    -N           : set all flags\n");
 	fprintf(stderr, "    -x path      : exclude path when building checksum (multiple ok)\n");
+	fprintf(stderr, "    -R           : traverse dirs non-recursively (recursive is default)\n");
 	fprintf(stderr, "    -h           : this help\n\n");
-	fprintf(stderr, "The default field mask is ugoamCdtES. If the checksum/manifest is read from a\n");
+	fprintf(stderr, "The default field mask is ugoamCdtESz. If the checksum/manifest is read from a\n");
 	fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
 	fprintf(stderr, "are ignored.\n");
 	exit(-1);
@@ -502,6 +507,92 @@  malformed:
 		excess_file(fn);
 }
 
+void
+sum_meta(sum_t *meta, struct stat64 *st) {
+	if (!S_ISDIR(st->st_mode))
+		sum_add_u64(meta, st->st_nlink);
+	if (flags[FLAG_UID])
+		sum_add_u64(meta, st->st_uid);
+	if (flags[FLAG_GID])
+		sum_add_u64(meta, st->st_gid);
+	if (flags[FLAG_MODE])
+		sum_add_u64(meta, st->st_mode);
+	if (flags[FLAG_ATIME])
+		sum_add_time(meta, st->st_atime);
+	if (flags[FLAG_MTIME])
+		sum_add_time(meta, st->st_mtime);
+	if (flags[FLAG_CTIME])
+		sum_add_time(meta, st->st_ctime);
+}
+void
+print_manifest(char* path, sum_t *meta, sum_t *cs, struct stat64 *st) {
+	if (gen_manifest || in_manifest) {
+		char *fn;
+		char *m;
+		char *c;
+
+		if (S_ISDIR(st->st_mode))
+			strcat(path, "/");
+		fn = escape(path);
+		m = sum_to_string(meta);
+		c = sum_to_string(cs);
+
+		if (gen_manifest)
+			fprintf(out_fp, "%s %s %s\n", fn, m, c);
+		if (in_manifest)
+			check_manifest(fn, m, c, 0);
+		free(c);
+		free(m);
+		free(fn);
+	}
+}
+
+void
+sum_file(int fd, sum_t *finalcs, struct stat64 *st, char *path) {
+	int ret;
+	sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
+			sum_file_data_strict : sum_file_data_permissive;
+
+	sum_t cs;
+	sum_t meta;
+
+	sum_init(&cs);
+	sum_init(&meta);
+
+	char* name = basename(path);
+	sum_add(&meta, name, strlen(name));
+	sum_meta(&meta, st);
+
+	if (flags[FLAG_XATTRS]) {
+		ret = sum_xattrs(fd, &meta);
+		if (ret < 0) {
+			fprintf(stderr, "failed to read xattrs from "
+					"%s: %s\n", path, strerror(-ret));
+			exit(-1);
+		}
+	}
+
+	if (flags[FLAG_SIZE])
+		sum_add_u64(&meta, st->st_size);
+
+	if (flags[FLAG_DATA]) {
+		ret = sum_file_data(fd, &cs);
+		if (ret < 0) {
+			fprintf(stderr, "read failed for %s: %s\n", path,
+					strerror(errno));
+			exit(-1);
+		}
+	}
+
+	sum_fini(&cs);
+	sum_fini(&meta);
+
+	print_manifest(path, &meta, &cs, st);
+
+	sum_add_sum(finalcs, &cs);
+	sum_add_sum(finalcs, &meta);
+}
+
 void
 sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
 {
@@ -582,20 +673,7 @@  sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
 
 		sum_add_u64(&meta, level);
 		sum_add(&meta, namelist[i], strlen(namelist[i]));
-		if (!S_ISDIR(st.st_mode))
-			sum_add_u64(&meta, st.st_nlink);
-		if (flags[FLAG_UID])
-			sum_add_u64(&meta, st.st_uid);
-		if (flags[FLAG_GID])
-			sum_add_u64(&meta, st.st_gid);
-		if (flags[FLAG_MODE])
-			sum_add_u64(&meta, st.st_mode);
-		if (flags[FLAG_ATIME])
-			sum_add_time(&meta, st.st_atime);
-		if (flags[FLAG_MTIME])
-			sum_add_time(&meta, st.st_mtime);
-		if (flags[FLAG_CTIME])
-			sum_add_time(&meta, st.st_ctime);
+		sum_meta(&meta, &st);
 		if (flags[FLAG_XATTRS] &&
 		    (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))) {
 			fd = openat(dirfd, namelist[i], 0);
@@ -618,7 +696,7 @@  sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
 				}
 			}
 		}
-		if (S_ISDIR(st.st_mode)) {
+		if (S_ISDIR(st.st_mode) && recursive) {
 			fd = openat(dirfd, namelist[i], 0);
 			if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
 				sum_add_u64(&meta, errno);
@@ -631,7 +709,8 @@  sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
 				close(fd);
 			}
 		} else if (S_ISREG(st.st_mode)) {
-			sum_add_u64(&meta, st.st_size);
+			if (flags[FLAG_SIZE])
+				sum_add_u64(&meta, st.st_size);
 			if (flags[FLAG_DATA]) {
 				if (verbose)
 					fprintf(stderr, "file %s\n",
@@ -672,25 +751,9 @@  sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
 		}
 		sum_fini(&cs);
 		sum_fini(&meta);
-		if (gen_manifest || in_manifest) {
-			char *fn;
-			char *m;
-			char *c;
-
-			if (S_ISDIR(st.st_mode))
-				strcat(path, "/");
-			fn = escape(path);
-			m = sum_to_string(&meta);
-			c = sum_to_string(&cs);
-
-			if (gen_manifest)
-				fprintf(out_fp, "%s %s %s\n", fn, m, c);
-			if (in_manifest)
-				check_manifest(fn, m, c, 0);
-			free(c);
-			free(m);
-			free(fn);
-		}
+
+		print_manifest(path, &meta, &cs, &st);
+
 		sum_add_sum(dircs, &cs);
 		sum_add_sum(dircs, &meta);
 next:
@@ -712,7 +775,7 @@  main(int argc, char *argv[])
 	int plen;
 	int elen;
 	int n_flags = 0;
-	const char *allopts = "heEfuUgGoOaAmMcCdDtTsSnNw:r:vx:";
+	const char *allopts = "heEfuUgGoOaAmMcCdDtTsSzZnNRw:r:vx:";
 
 	out_fp = stdout;
 	while ((c = getopt(argc, argv, allopts)) != EOF) {
@@ -720,6 +783,9 @@  main(int argc, char *argv[])
 		case 'f':
 			gen_manifest = 1;
 			break;
+		case 'R':
+			recursive = 0;
+			break;
 		case 'u':
 		case 'U':
 		case 'g':
@@ -740,6 +806,8 @@  main(int argc, char *argv[])
 		case 'E':
 		case 's':
 		case 'S':
+		case 'z':
+		case 'Z':
 			++n_flags;
 			parse_flag(c);
 			break;
@@ -855,8 +923,22 @@  main(int argc, char *argv[])
 	if (gen_manifest)
 		fprintf(out_fp, "Flags: %s\n", flagstring);
 
+	struct stat64 path_st;
+	if (fstat64(fd, &path_st)) {
+		perror("fstat");
+		exit(-1);
+	}
+
 	sum_init(&cs);
-	sum(fd, 1, &cs, path, "");
+	if (S_ISDIR(path_st.st_mode)) {
+		sum(fd, 1, &cs, path, "");
+	} else if (S_ISREG(path_st.st_mode)) {
+		sum_file(fd, &cs, &path_st, path);
+	} else {
+		fprintf(stderr, "path must be file or dir: %s", path);
+		exit(-1);
+	}
+
 	sum_fini(&cs);
 
 	close(fd);