Message ID | 6321f041bfb9f439198cc03d2f7cb8ce8c5db867.1301503683.git.list.btrfs@jan-o-sch.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Yes, this is over three months after the initial posting, but since nobody else has looked at it yet, and the patch is in my integration stack... I've not reviewed the whole thing -- just the "scrub start" code so far. I've removed the bits I've not checked from the file below. On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: No commit message at all? > Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> > --- > scrub.c | 1568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 1568 insertions(+), 0 deletions(-) This is quite big to review in one lump... Is it possible to split the patch into functional sections? (Add shared infrastructure, then each of the four functions separately, maybe?) > diff --git a/scrub.c b/scrub.c > new file mode 100644 > index 0000000..22052ed > --- /dev/null > +++ b/scrub.c > @@ -0,0 +1,1568 @@ It seems to be conventional to put a GPL notice at the top of source files... :) > + > +#include <sys/ioctl.h> > +#include <sys/wait.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <sys/socket.h> > +#include <sys/un.h> > +#include <poll.h> > +#include <sys/file.h> > +#include <uuid/uuid.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <pthread.h> > +#include <ctype.h> > +#include <signal.h> > +#include <stdarg.h> > + > +#include "ctree.h" > +#include "ioctl.h" > +#include "btrfs_cmds.h" > +#include "utils.h" > +#include "volumes.h" > +#include "disk-io.h" > + > +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" > +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress" I'd suggest /var/lib/btrfs/[...] instead. Putting it in the top level of /var seems a bit presumptuous (and contravenes the FHS). > +#define SCRUB_FILE_VERSION_PREFIX "scrub status:" I'd drop the : from this, since there's no colons used in other key names elsewhere (in scrub_read_file() and _scrub_kvread()), and the correction for the colon at [1] is confusing. > +#define SCRUB_FILE_VERSION "1" > + > +struct scrub_stats { > + time_t t_start; > + time_t t_resumed; > + u64 duration; > + u64 finished; > + u64 canceled; > +}; > + > +struct scrub_progress { > + struct btrfs_ioctl_scrub_args scrub_args; > + int fd; > + int ret; > + int skip; > + struct scrub_stats stats; > + struct scrub_file_record *resumed; > + int ioctl_errno; > + pthread_mutex_t progress_mutex; > +}; > + > +struct scrub_file_record { > + u8 fsid[BTRFS_FSID_SIZE]; > + u64 devid; > + struct scrub_stats stats; > + struct btrfs_scrub_progress p; > +}; > + > +struct scrub_progress_cycle { > + int fdmnt; > + int prg_fd; > + int do_record; > + struct btrfs_ioctl_fs_info_args *fi; > + struct scrub_progress *progress; > + struct scrub_progress *shared_progress; > + pthread_mutex_t *write_mutex; > +}; > + > +struct scrub_fs_stat { > + struct btrfs_scrub_progress p; > + struct scrub_stats s; > + int i; > +}; > + > +static void print_scrub_full(struct btrfs_scrub_progress *sp) > +{ > + printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed); > + printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed); > + printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed); > + printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed); > + printf("\tread_errors: %lld\n", sp->read_errors); > + printf("\tcsum_errors: %lld\n", sp->csum_errors); > + printf("\tverify_errors: %lld\n", sp->verify_errors); > + printf("\tno_csum: %lld\n", sp->no_csum); > + printf("\tcsum_discards: %lld\n", sp->csum_discards); > + printf("\tsuper_errors: %lld\n", sp->super_errors); > + printf("\tmalloc_errors: %lld\n", sp->malloc_errors); > + printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors); > + printf("\tcorrected_errors: %lld\n", sp->corrected_errors); > + printf("\tlast_physical: %lld\n", sp->last_physical); > +} > + > +#define err(test, ...) do { \ > + if (test) \ > + fprintf(stderr, __VA_ARGS__); \ > +} while (0) > + > +#define PRINT_SCRUB_ERROR(test, desc) do { \ > + if (test) \ > + printf(" %s=%llu", desc, test); \ > +} while (0) Extra line of space here, otherwise it looks like the function is part of the macro. (And in a number of other places throughout the file, too) > +static void print_scrub_summary(struct btrfs_scrub_progress *p) > +{ > + u64 err_cnt; > + u64 err_cnt2; > + > + err_cnt = p->read_errors + > + p->csum_errors + > + p->verify_errors + > + p->csum_discards + > + p->super_errors + > + p->malloc_errors; > + > + err_cnt2 = p->corrected_errors + p->uncorrectable_errors; > + > + printf("\ttotal bytes scrubbed: %s with %llu errors\n", > + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), > + max(err_cnt, err_cnt2)); Memory leak: pretty_sizes() mallocs space for its result. > + if (err_cnt || err_cnt2) { > + printf("\terror details:"); > + PRINT_SCRUB_ERROR(p->read_errors, "read"); > + PRINT_SCRUB_ERROR(p->super_errors, "super"); > + PRINT_SCRUB_ERROR(p->malloc_errors, "malloc"); > + PRINT_SCRUB_ERROR(p->verify_errors, "verify"); > + PRINT_SCRUB_ERROR(p->csum_errors, "csum"); > + PRINT_SCRUB_ERROR(p->csum_discards, "csum-discards"); > + printf("\n"); > + printf("\tcorrected errors: %llu, uncorrectable errors: %llu\n", > + p->corrected_errors, p->uncorrectable_errors); > + } > +} > + > +#define _SCRUB_FS_STAT(p, name, fs_stat) fs_stat->p.name += p->name checkpatch.pl says: scrub.c:130: ERROR: Macros with complex values should be enclosed in parenthesis (and in general, checkpatch.pl is whinging about lots of whitespace/ indentation issues with this file) > +#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \ > +do { \ > + if (fs_stat->s.name > ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0) > +#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat) \ > +do { \ > + if (!fs_stat->s.name || fs_stat->s.name > ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0) > +#define _SCRUB_FS_STAT_MAX(ss, name, fs_stat) \ Maybe use _SCRUB_FS_STAT_ZMAX to match the ZMIN usage above? > +do { \ > + if (!fs_stat->s.name || fs_stat->s.name < ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0) Line of space needed here. > +static void add_to_fs_stat(struct btrfs_scrub_progress *p, > + struct scrub_stats *ss, > + struct scrub_fs_stat *fs_stat) > +{ > + _SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, read_errors, fs_stat); > + _SCRUB_FS_STAT(p, csum_errors, fs_stat); > + _SCRUB_FS_STAT(p, verify_errors, fs_stat); > + _SCRUB_FS_STAT(p, no_csum, fs_stat); > + _SCRUB_FS_STAT(p, csum_discards, fs_stat); > + _SCRUB_FS_STAT(p, super_errors, fs_stat); > + _SCRUB_FS_STAT(p, malloc_errors, fs_stat); > + _SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat); > + _SCRUB_FS_STAT(p, corrected_errors, fs_stat); > + _SCRUB_FS_STAT(p, last_physical, fs_stat); > + _SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat); > + _SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat); > + _SCRUB_FS_STAT_MAX(ss, duration, fs_stat); > + _SCRUB_FS_STAT_MAX(ss, canceled, fs_stat); > + _SCRUB_FS_STAT_MIN(ss, finished, fs_stat); > +} > + > +static void init_fs_stat(struct scrub_fs_stat *fs_stat) > +{ > + memset(fs_stat, 0, sizeof(*fs_stat)); > + fs_stat->s.finished = 2; What does 2 mean? ->s.finished seems to be a boolean everywhere except here. Can you turn this value into a more descriptive #define? Or just use 1? > +} > + > +static void _print_scrub_ss(struct scrub_stats *ss) > +{ > + char t[BTRFS_PATH_NAME_MAX+1]; Since this string is used for storing formatted dates, there's a little cognitive dissonance over it being long enough to store a btrfs path name... (Yeah, OK, it's a convenient large value, but still a tad confusing to use it here) > + struct tm tm; > + > + if (!ss || !ss->t_start) { > + printf("\tno stats available\n"); > + return; > + } > + if (ss->t_resumed) { > + localtime_r(&ss->t_resumed, &tm); > + strftime(t, sizeof(t), "%c", &tm); strftime doesn't append a terminating zero if the string is longer than the buffer it's filling. > + printf("\tscrub resumed at %s", t); > + } else { > + localtime_r(&ss->t_start, &tm); > + strftime(t, sizeof(t), "%c", &tm); > + printf("\tscrub started at %s", t); > + } > + if (ss->finished && !ss->canceled) { > + printf(" and finished after %llu seconds\n", > + ss->duration); > + } else if (ss->canceled) { > + printf(" and was aborted after %llu seconds\n", > + ss->duration); > + } else { > + printf(", running for %llu seconds\n", ss->duration); > + } > +} > + > +static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di, > + struct btrfs_scrub_progress *p, int raw, > + const char *append, struct scrub_stats *ss) > +{ > + printf("scrub device %s (id %llu) %s\n", di->path, di->devid, > + append ? append : ""); > + > + _print_scrub_ss(ss); > + > + if (p) { > + if (raw) > + print_scrub_full(p); > + else > + print_scrub_summary(p); > + } > +} > + > +static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw) > +{ > + _print_scrub_ss(&fs_stat->s); > + > + if (raw) > + print_scrub_full(&fs_stat->p); > + else > + print_scrub_summary(&fs_stat->p); > +} > + > +static void free_history(struct scrub_file_record **last_scrubs) > +{ > + struct scrub_file_record **l = last_scrubs; > + if (!l) > + return; > + while (*l) > + free(*l++); > + free(last_scrubs); > +} > + > +static int cancel_fd = -1; > +static void scrub_sigint_record_progress(int signal) What does this function have to do with recording progress? Seems a bit of a misnomer to me. (Call it scrub_sigint_cancel_scrub, maybe?) > +{ > + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); > +} > + > +static int scrub_handle_sigint_parent(void) > +{ > + struct sigaction sa = { > + .sa_handler = SIG_IGN, > + .sa_flags = SA_RESTART, > + }; > + > + return sigaction(SIGINT, &sa, NULL); > +} > + > +static int scrub_handle_sigint_child(int fd) > +{ > + struct sigaction sa = { > + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, > + }; > + > + cancel_fd = fd; > + return sigaction(SIGINT, &sa, NULL); > +} > + > +static int _scrub_datafile(const char *fn_base, const char *fn_local, > + const char *fn_tmp, char *datafile, int max) > +{ > + int ret; > + > + strncpy(datafile, fn_base, max); You need to put a zero byte at datafile[max], otherwise it could be unterminated (if max <= strlen(fn_base)), and the strlen will then run off the end of the string. > + ret = strlen(datafile); > + > + if (ret + 1 >= max) > + return -EOVERFLOW; This will never happen (if you put the zero terminator in) > + datafile[ret] = '.'; > + strncpy(datafile+ret+1, fn_local, max-ret-1); ... and add a zero byte here, too (or use strncat) > + ret = strlen(datafile); > + > + if (ret + 1 >= max) > + return -EOVERFLOW; as above: won't happen > + if (fn_tmp) { > + datafile[ret] = '_'; > + strncpy(datafile+ret+1, fn_tmp, max-ret-1); ... and add a zero byte here (or use strncat) > + ret = strlen(datafile); > + > + if (ret >= max) > + return -EOVERFLOW; > + } > + > + return 0; > +} > + > +static int _scrub_open_file(const char *datafile, int m) Just a niggle: Why the leading _ when other scrub-specific functions don't have it? (There's about a dozen other such symbols -- was there a criterion you used to decide which ones have _ and which don't?) > +{ > + int fd; > + int ret; > + > + fd = open(datafile, m, 0600); > + if (fd < 0) > + return -errno; > + > + ret = flock(fd, LOCK_EX|LOCK_NB); > + if (ret) { > + ret = errno; > + close(fd); > + return -ret; > + } > + > + return fd; > +} > + > +static int scrub_open_file_r(const char *fn_base, const char *fn_local) > +{ > + int ret; > + char datafile[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, NULL, > + datafile, sizeof(datafile)); > + if (ret < 0) > + return ret; > + return _scrub_open_file(datafile, O_RDONLY); > +} > + > +static int scrub_open_file_w(const char *fn_base, const char *fn_local, > + const char *tmp) > +{ > + int ret; > + char datafile[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, tmp, > + datafile, sizeof(datafile)); > + if (ret < 0) > + return ret; > + return _scrub_open_file(datafile, O_WRONLY|O_CREAT); > +} > + > +static int scrub_rename_file(const char *fn_base, const char *fn_local, > + const char *tmp) > +{ > + int ret; > + char datafile_old[BTRFS_PATH_NAME_MAX+1]; > + char datafile_new[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, tmp, > + datafile_old, sizeof(datafile_old)); > + if (ret < 0) > + return ret; > + ret = _scrub_datafile(fn_base, fn_local, NULL, > + datafile_new, sizeof(datafile_new)); > + if (ret < 0) > + return ret; > + ret = rename(datafile_old, datafile_new); > + return ret ? -errno : 0; > +} > + > +#define _SCRUB_KVREAD(i, name, avail, l, dest) \ > + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name) > +#define _SCRUB_KVREAD_STATS(i, name, avail, l, dest) \ > + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest->stats.name) > +/* > + * returns 0 if the key did not match (nothing was read) > + * 1 if the key did match (success) > + * -1 if the key did match and an error occured > + */ > +static int _scrub_kvread(int *i, int len, int avail, const char *buf, > + const char *key, u64 *dest) > +{ > + int j; > + > + if (*i+len+1 < avail && strncmp(&buf[*i], key, len-1) == 0) { > + *i += len-1; > + if (buf[*i] != ':') { > + return -1; > + } > + *i += 1; > + for (j=0; isdigit(buf[*i+j]) && *i+j < avail; ++j) > + ; > + if (*i+j >= avail) > + return -1; > + *dest = atoll(&buf[*i]); > + *i += j; > + return 1; > + } > + > + return 0; > +} > + > +#define _SCRUB_ILLEGAL do { \ I'd better call the police, then... :) I think you mean invalid (or unexpected, or unparsable), not illegal. (and likewise in the message, below, of course). > + if (report_errors) { \ > + fprintf(stderr, "WARNING: illegal data in line %d pos " \ > + "%d state %d (near \"%.*s\") at %s:%d\n", \ > + lineno, i, state, 20 > avail ? avail : 20, l+i, \ > + __FILE__, __LINE__); \ > + } \ > + goto skip; \ > +} while (0) Extra line of space here > +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) > +{ > + int avail = 0; > + int old_avail = 0; > + char l[512]; > + int state = 0; > + int curr = -1; > + int i = 0; > + int j; > + int ret; > + int eof = 0; > + int lineno = 0; > + u64 version; > + char empty_uuid[BTRFS_FSID_SIZE] = {0}; > + struct scrub_file_record **p = NULL; > + > + if (fd < 0) > + return ERR_PTR(-EINVAL); > + > +again: > + old_avail = avail-i; > + BUG_ON(old_avail < 0); > + if (old_avail) > + memmove(l, l+i, old_avail); > + avail = read(fd, l+old_avail, sizeof(l)-old_avail); > + if (avail == 0) { > + eof = 1; > + } > + if (avail + old_avail == 0) { > + if (curr >= 0 && > + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { > + p[curr] = NULL; > + } else if (curr == -1) { > + p = ERR_PTR(-ENODATA); > + } > + return p; > + } > + if (avail == -1) > + return ERR_PTR(-errno); If avail == -1 (i.e. the read failed) and old_avail == 1 (i.e. there was only one character left in the buffer), then that triggers the code in the previous if statement (avail+old_avail == 0) as well. > + avail += old_avail; > + > + i = 0; > + while (i < avail) { > + switch (state) { > + case 0: /* start if file */ of > + ret = _scrub_kvread(&i, > + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l, [1] Drop the colon from SCRUB_FILE_VERSION_PREFIX and leave out the -1 here. > + SCRUB_FILE_VERSION_PREFIX, &version); > + if (ret != 1) > + _SCRUB_ILLEGAL; > + if (version != atoll(SCRUB_FILE_VERSION)) > + return ERR_PTR(-ENOTSUP); > + state = 6; > + continue; > + case 1: /* start of line, alloc */ > + if (!eof && !memchr(l+i, '\n', avail-i)) > + goto again; This will cause problems (an infinite loop), I think, if there's an input line greater than 512 bytes in length -- you can't find a line ending, so you go back to "again", read in zero bytes because you've not consumed anything in your buffer, and come back here to discover that there's no line ending... > + ++lineno; > + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, > + BTRFS_FSID_SIZE) == 0) { > + state = 2; > + continue; > + } > + ++curr; > + p = realloc(p, (curr+2)*sizeof(*p)); > + if (p) > + p[curr] = malloc(sizeof(**p)); > + if (!p || !p[curr]) > + return ERR_PTR(-errno); > + memset(p[curr], 0, sizeof(**p)); > + p[curr+1] = NULL; > + ++state; You probably need a comment here (and below, as appropriate) to indicate that the lack of a continue or break is intended, and not a bug. > + case 2: /* start of line, skip space */ > + while (isspace(l[i]) && i<avail) { > + if (l[i] == '\n') > + ++lineno; > + ++i; > + } > + if (i >= avail || (!eof && !memchr(l+i, '\n', avail-i))) > + goto again; > + ++state; > + case 3: /* read fsid */ > + if (i == avail) > + continue; > + for (j=0; l[i+j] != ':' && i+j < avail; ++j) > + ; > + if (i+j+1 >= avail) > + _SCRUB_ILLEGAL; Possibly a comment needed here to indicate that state 1 guarantees a full line of text in the buffer, so hitting the end of the buffer here is a fatal error. (It took me a while to work out why this case was always an error). > + if (j != 36) > + _SCRUB_ILLEGAL; > + l[i+j] = '\0'; > + ret = uuid_parse(l+i, p[curr]->fsid); > + if (ret) > + _SCRUB_ILLEGAL; > + i += j + 1; > + ++state; > + case 4: /* read dev id */ > + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > + ; > + if (!j || i+j+1 >= avail) j == 0 is clearer than !j here, IMO > + _SCRUB_ILLEGAL; > + p[curr]->devid = atoll(&l[i]); > + i += j + 1; Is there any reason that you couldn't just use strtoull here? We know that the string is terminated with a \n (by the guarantee of state 1), so strtoull will always finish within the buffer. > + ++state; > + case 5: /* read key/value pair */ > + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, read_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, csum_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, verify_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, no_csum, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, csum_discards, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, super_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, malloc_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, corrected_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, last_physical, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, finished, avail, > + l, &p[curr]->stats) || > + _SCRUB_KVREAD(&i, t_start, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, t_resumed, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, duration, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, canceled, avail, > + l, &p[curr]->stats); > + if (ret != 1) > + _SCRUB_ILLEGAL; If there's a syntax error in the parser (i.e. the matched key is not followed by a colon, or we run out of data), then _SCRUB_KVREAD returns -1, which is converted to 1 by the ||, and the error is dropped silently. > + ++state; > + case 6: /* after number */ > + if (l[i] == '|') { > + state = 5; > + } else if (l[i] == '\n') { > + state = 1; > + } else { > + _SCRUB_ILLEGAL; > + } > + ++i; > + continue; > + case 99: /* skip rest of line */ > +skip: > + state = 99; > + do { > + ++i; > + if (l[i-1] == '\n') { > + state = 1; > + break; > + } > + } while (i < avail); > + continue; > + } > + BUG(); > + } > + goto again; > +} > +#undef _SCRUB_ILLEGAL [...] > +static int scrub_write_file(int fd, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret = 0; > + int i; > + char buf[1024]; > + struct scrub_progress local; > + struct scrub_progress *use; > + > + if (n < 1) { > + return -EINVAL; > + } > + > + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION > + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1 If you leave out the colon from SCRUB_FILE_VERSION_PREFIX, then you don't need the -1 here. You can add a literal colon in the middle of the string concatenation between the prefix and the version number. > + + sizeof(SCRUB_FILE_VERSION)-1 + 1); > + if (ret) > + return -EOVERFLOW; > + [...] > +static struct scrub_file_record *last_dev_scrub( > + struct scrub_file_record *const *const past_scrubs, u64 devid) > +{ > + int i; > + > + if (!past_scrubs || IS_ERR(past_scrubs)) > + return NULL; > + > + for (i=0; past_scrubs[i]; ++i) > + if (past_scrubs[i]->devid == devid) > + return past_scrubs[i]; > + > + return NULL; > +} > + > +static int scrub_device_info(int fd, u64 devid, > + struct btrfs_ioctl_dev_info_args *di_args) > +{ > + int ret; > + > + di_args->devid = devid; > + memset(&di_args->uuid, '\0', sizeof(di_args->uuid)); > + > + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); > + return ret ? -errno : 0; > +} > + > +static int scrub_fs_info(int fd, char *path, > + struct btrfs_ioctl_fs_info_args *fi_args, > + struct btrfs_ioctl_dev_info_args **di_ret) > +{ > + int ret = 0; > + int ndevs = 0; > + int i = 1; > + struct btrfs_fs_devices* fs_devices_mnt = NULL; > + struct btrfs_ioctl_dev_info_args *di_args; > + char mp[BTRFS_PATH_NAME_MAX+1]; > + > + memset(fi_args, 0, sizeof(*fi_args)); > + > + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); > + if (ret && errno == EINVAL) { > + /* path is no mounted btrfs. try if it's a device */ > + ret = check_mounted_where(fd, path, mp, sizeof(mp), > + &fs_devices_mnt); > + if (!ret) > + return -EINVAL; > + fi_args->num_devices = 1; Is this a valid assumption? What happens if I pass just one device of a multi-device FS to "btrfs scrub start"? > + fi_args->max_id = fs_devices_mnt->latest_devid; > + i = fs_devices_mnt->latest_devid; > + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); > + close(fd); > + fd = open_file_or_dir(mp); > + if (fd < 0) > + return -errno; > + } else if (ret) { > + return -errno; > + } > + > + if (!fi_args->num_devices) > + return 0; > + > + di_args = *di_ret = malloc(fi_args->num_devices*sizeof(*di_args)); > + if (!di_args) > + return -errno; > + > + for (; i<=fi_args->max_id; ++i) { > + BUG_ON(ndevs >= fi_args->num_devices); > + ret = scrub_device_info(fd, i, &di_args[ndevs]); > + if (ret == -ENODEV) > + continue; > + if (ret) > + return ret; > + ++ndevs; > + } > + > + BUG_ON(ndevs == 0); > + > + return 0; > +} > + > +int mkdir_p(char *path) > +{ > + int i; > + int ret; > + > + for (i=1; i<strlen(path); ++i) { > + if (path[i] != '/') > + continue; > + path[i] = '\0'; > + ret = mkdir(path, 0777); > + if (ret && errno != EEXIST) > + return 1; > + path[i] = '/'; > + } > + > + return 0; > +} > + > +static int scrub_start(int argc, char **argv, int resume) > +{ > + int fdmnt; > + int prg_fd = -1; > + int fdres = -1; > + int ret; > + pid_t pid; > + int c; > + int i; > + int err = 0; This clashes with the macro err(). OK, I know the compiler's clever enough to disambiguate, but it leads to nastiness like [2], below. > + int print_raw = 0; > + char *path; > + int do_background = 1; > + int do_wait = 0; > + int do_print = 0; > + int do_quiet = 0; > + int do_record = 1; > + int readonly = 0; > + int do_stats_per_dev = 0; > + int n_start = 0; > + int n_skip = 0; > + int n_resume = 0; > + struct btrfs_ioctl_fs_info_args fi_args; > + struct btrfs_ioctl_dev_info_args *di_args = NULL; > + struct scrub_progress *sp = NULL; > + struct scrub_fs_stat fs_stat; > + struct timeval tv; > + struct sockaddr_un addr = { > + .sun_family = AF_UNIX, > + }; > + pthread_t *t_devs = NULL; > + pthread_t t_prog; > + pthread_attr_t t_attr; > + struct scrub_file_record **past_scrubs = NULL; > + struct scrub_file_record *last_scrub = NULL; > + char *datafile = strdup(SCRUB_DATA_FILE); This is never freed. > + char fsid[37]; Magic number. is there a #define in libuuid for this value? > + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; > + struct scrub_progress_cycle spc; > + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; > + void *terr; > + u64 devid; > + > + optind = 1; > + while ((c = getopt(argc, argv, "BdqrR")) != -1) { > + switch(c) { > + case 'B': > + do_background = 0; > + do_wait = 1; > + do_print = 1; > + break; > + case 'd': > + do_stats_per_dev = 1; > + break; > + case 'q': > + do_quiet = 1; > + break; > + case 'r': > + readonly = 1; > + break; > + case 'R': > + print_raw = 1; > + break; > + case '?': > + default: > + fprintf(stderr, "ERROR: scrub args invalid.\n" > + " -B do not background (implies -W)\n" What's -W? > + " -d stats per device (-B only)\n" > + " -q quiet\n" > + " -r read only mode\n"); > + return 1; > + } > + } > + > + /* try to catch most error cases before forking */ > + > + spc.progress = NULL; > + if (do_quiet && do_print) > + do_print = 0; > + > + if (mkdir_p(datafile)) { > + err(!do_quiet, "WARNING: cannot create scrub data " > + "file, mkdir %s failed: %s. Status recording " > + "disabled\n", datafile, strerror(errno)); > + do_record = 0; > + } > + > + path = argv[optind]; No bounds check: hrm@ruthven:btrfs-progs-unstable $ ./btrfs scrub start -B ERROR: can't access '(null)' > + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + err(!do_quiet, "ERROR: can't access '%s'\n", path); > + return 12; > + } > + > + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); > + if (ret) { > + err(!do_quiet, "ERROR: getting dev info for scrub failed: " > + "%s\n", strerror(-ret)); > + err = 1; > + goto out; > + } > + if (!fi_args.num_devices) { > + err(!do_quiet, "ERROR: no devices found\n"); > + err = 1; > + goto out; > + } > + > + uuid_unparse(fi_args.fsid, fsid); > + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); > + if (fdres < 0 && fdres != -ENOENT) { > + err(!do_quiet, "WARNING: failed to open status file: " > + "%s\n", strerror(-fdres)); > + } else if (fdres >= 0) { > + past_scrubs = scrub_read_file(fdres, !do_quiet); > + if (IS_ERR(past_scrubs)) > + err(!do_quiet, "WARNING: failed to read status file: " > + "%s\n", strerror(-PTR_ERR(past_scrubs))); > + close(fdres); > + } > + > + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); > + sp = calloc(1, fi_args.num_devices*sizeof(*sp)); Shouldn't that be calloc(fi_args.num_devices, sizeof(*sp)) ? (OK, it doesn't make any particular difference, but it just seems odd to keep a dog and bark yourself). > + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress)); Woof! (And why do we need twice as many progress markers as devices?) > + if (!t_devs || !sp || !spc.progress) { > + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); > + err = 1; [2] Eugh. Calling what looks like a function, then assigning a value to it. Can you call the variable something else? (Or make the macro a more obvious macro: ERR() say?) > + goto out; > + } > + > + ret = pthread_attr_init(&t_attr); > + if (ret) { > + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", > + strerror(ret)); > + err = 1; > + goto out; > + } > + > + for (i = 0; i < fi_args.num_devices; ++i) { > + devid = di_args[i].devid; > + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); > + if (ret) { > + err(!do_quiet, "ERROR: pthread_mutex_init failed: " > + "%s\n", strerror(ret)); > + err = 1; > + goto out; > + } > + last_scrub = last_dev_scrub(past_scrubs, devid); > + sp[i].scrub_args.devid = devid; > + sp[i].fd = fdmnt; > + if (resume && last_scrub && (last_scrub->stats.canceled || > + !last_scrub->stats.finished)) { > + ++n_resume; > + sp[i].scrub_args.start = last_scrub->p.last_physical; > + sp[i].resumed = last_scrub; > + } else if (resume) { > + ++n_skip; > + sp[i].skip = 1; > + sp[i].resumed = last_scrub; > + continue; > + } else { > + ++n_start; > + sp[i].scrub_args.start = 0ll; > + sp[i].resumed = NULL; > + } > + sp[i].skip = 0; > + sp[i].scrub_args.end = (u64)-1ll; > + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; > + } > + > + if (!n_start && !n_resume) { > + if (!do_quiet) > + printf("scrub: nothing to resume for %s, fsid %s\n", > + path, fsid); > + err = 0; > + goto out; > + } > + > + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); > + while (ret != -1) { > + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, > + NULL, sock_path, sizeof(sock_path)); > + /* ignore EOVERFLOW, as strncpy follows anyway */ The name in sock_path could still be truncated on -EOVERFLOW, though. Is that always safe? > + strncpy(addr.sun_path, sock_path, > + sizeof(addr.sun_path)-1); > + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); > + if (ret != -1 || errno != EADDRINUSE) > + break; If we failed to bind because the address was in use, is there much point in trying to connect to the socket here? > + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); > + if (!ret || errno != ECONNREFUSED) { > + fprintf(stderr, "ERROR: scrub already running\n"); > + close(prg_fd); > + goto out; > + } > + ret = unlink(sock_path); Under the right (wrong) set of circumstances, isn't this loop going to busy-wait? > + } > + if (ret != -1) { > + ret = listen(prg_fd, 100); > + } > + if (ret == -1) { > + err(!do_quiet, "WARNING: failed to open the progress status " > + "socket at %s: %s. Progress cannot be queried\n", > + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, > + strerror(errno)); > + if (prg_fd != -1) { > + close(prg_fd); > + prg_fd = -1; > + if (sock_path[0]) > + unlink(sock_path); > + } > + } > + > + if (do_record) { > + /* write all-zero progress file for a start */ > + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, > + fi_args.num_devices); -HRM: Unchecked scrub_write_progress > + if (ret) { > + err(!do_quiet, "WARNING: failed to write the progress " > + "status file: %s. Status recording disabled\n", > + strerror(-ret)); > + do_record = 0; > + } > + } > + > + if (do_background) { > + pid = fork(); > + if (pid == -1) { > + err(!do_quiet, "ERROR: cannot scrub, fork failed: " > + "%s\n", strerror(errno)); > + err = 1; > + goto out; > + } > + > + if (pid) { > + int stat; > + scrub_handle_sigint_parent(); > + if (!do_quiet) > + printf("scrub %s on %s, fsid %s (pid=%d)\n", > + n_start ? "started" : "resumed", > + path, fsid, pid); > + if (!do_wait) { > + err = 0; > + goto out; > + } > + ret = wait(&stat); > + if (ret != pid) { > + err(!do_quiet, "ERROR: wait failed: (ret=%d) " > + "%s\n", ret, strerror(errno)); > + err = 1; > + goto out; > + } > + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { > + err(!do_quiet, "ERROR: scrub process failed\n"); > + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; > + goto out; > + } > + err = 0; > + goto out; > + } > + } > + > + scrub_handle_sigint_child(fdmnt); > + > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (sp[i].skip) { > + sp[i].scrub_args.progress = sp[i].resumed->p; > + sp[i].stats = sp[i].resumed->stats; > + sp[i].ret = 0; > + sp[i].stats.finished = 1; > + continue; > + } > + devid = di_args[i].devid; > + gettimeofday(&tv, NULL); > + sp[i].stats.t_start = tv.tv_sec; > + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]); -HRM not checked scrub_one_dev() > + if (ret) { > + if (do_print) > + fprintf(stderr, "ERROR: creating " > + "scrub_one_dev[%llu] thread failed: " > + "%s\n", devid, strerror(ret)); > + err = 1; > + goto out; > + } > + } > + > + spc.fdmnt = fdmnt; > + spc.prg_fd = prg_fd; > + spc.do_record = do_record; > + spc.write_mutex = &spc_write_mutex; > + spc.shared_progress = sp; > + spc.fi = &fi_args; > + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); > + -HRM: Not checked: scrub_progress_cycle() > + err = 0; > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (sp[i].skip) > + continue; > + devid = di_args[i].devid; > + ret = pthread_join(t_devs[i], NULL); > + if (ret) { > + if (do_print) > + fprintf(stderr, "ERROR: pthread_join failed " > + "for scrub_one_dev[%llu]: %s\n", devid, > + strerror(ret)); > + err++; > + continue; > + } > + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { > + if (do_print) > + fprintf(stderr, "WARNING: device %lld not " > + "present\n", devid); > + continue; > + } > + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { > + err++; > + } else if (sp[i].ret) { > + if (do_print) > + fprintf(stderr, "ERROR: scrubbing %s failed " > + "for device id %lld (%s)\n", path, > + devid, strerror(sp[i].ioctl_errno)); > + err++; > + continue; > + } > + } > + > + if (do_print) { > + const char *append = "done"; > + if (!do_stats_per_dev) > + init_fs_stat(&fs_stat); > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (do_stats_per_dev) { > + print_scrub_dev(&di_args[i], > + &sp[i].scrub_args.progress, > + print_raw, > + sp[i].ret ? "canceled" : "done", > + &sp[i].stats); > + } else { > + if (sp[i].ret) > + append = "canceled"; > + add_to_fs_stat(&sp[i].scrub_args.progress, > + &sp[i].stats, &fs_stat); > + } > + } > + if (!do_stats_per_dev) { > + printf("scrub %s for %s\n", append, fsid); > + print_fs_stat(&fs_stat, print_raw); > + } > + } > + > + pthread_cancel(t_prog); > + ret = pthread_join(t_prog, &terr); Does this need to happen before the output above? Is there a possible race between scrub_progress_cycle() and the stats gathering code here? (I've not looked at scrub_progress_cycle() in detail yet, so I don't know). > + if (do_print && terr && terr != PTHREAD_CANCELED) { > + fprintf(stderr, "ERROR: recording progress " > + "failed: %s\n", strerror(-PTR_ERR(terr))); > + } > + > + if (do_record) { > + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, > + fi_args.num_devices); -HRM Not checked scrub_write_progress() > + if (ret && do_print) { > + fprintf(stderr, "ERROR: failed to record the result: " > + "%s\n", strerror(-ret)); > + } > + } > + > + scrub_handle_sigint_child(-1); > + > +out: > + free_history(past_scrubs); > + free(di_args); > + free(t_devs); > + free(sp); > + free(spc.progress); > + if (prg_fd > -1) { > + close(prg_fd); > + if (sock_path[0]) > + unlink(sock_path); > + } > + close(fdmnt); > + > + return !!err; > +} > + > +int do_scrub_start(int argc, char **argv) > +{ > + return scrub_start(argc, argv, 0); > +} > + > +int do_scrub_resume(int argc, char **argv) > +{ > + return scrub_start(argc, argv, 1); > +} [...]
On 10.07.2011 20:23, Hugo Mills wrote: > Yes, this is over three months after the initial posting, but since > nobody else has looked at it yet, and the patch is in my integration > stack... ... thanks! > I've not reviewed the whole thing -- just the "scrub start" code so > far. I've removed the bits I've not checked from the file below. I rebased the old branch I found to your current integration branch and fixed up a most of what you mentioned. I'll not send a new version out until after your complete review (or your statement that this is it or your statement that you would rather going on reviewing the revised version). Things I ripped out are accepted and corrected without resistance. Comments follow. > On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > > No commit message at all? Didn't know what to put there. Cover letter says it all. And as mentioned, this is the initial implementation. >> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> >> --- >> scrub.c | 1568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 files changed, 1568 insertions(+), 0 deletions(-) > > This is quite big to review in one lump... Is it possible to split > the patch into functional sections? (Add shared infrastructure, then > each of the four functions separately, maybe?) Thought about that, but it doesn't make sense to me. It is the initial implementation. A lot of the code is shared, thus adding one lump and patching the patch with four small additional commits wouldn't help much. >> diff --git a/scrub.c b/scrub.c >> new file mode 100644 >> index 0000000..22052ed >> --- /dev/null >> +++ b/scrub.c >> +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" >> +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress" > > I'd suggest /var/lib/btrfs/[...] instead. Putting it in the top > level of /var seems a bit presumptuous (and contravenes the FHS). I wasn't sure if I can expect /var/lib to be present anywhere btrfs could run. But I changed it to what you suggested. >> + printf("\ttotal bytes scrubbed: %s with %llu errors\n", >> + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), >> + max(err_cnt, err_cnt2)); > > Memory leak: pretty_sizes() mallocs space for its result. Pah... In a user space function of a run-once utility right before it exits. But I fixed that one, just to please you :-) >> +static void init_fs_stat(struct scrub_fs_stat *fs_stat) >> +{ >> + memset(fs_stat, 0, sizeof(*fs_stat)); >> + fs_stat->s.finished = 2; > > What does 2 mean? ->s.finished seems to be a boolean everywhere > except here. Can you turn this value into a more descriptive #define? > Or just use 1? Good question. I guess I once wanted to distinguish really finished scrub runs from not-even-started ones. I changed it to 1 (which makes it much more likely we'll need that distinction quite soon). >> +static int cancel_fd = -1; >> +static void scrub_sigint_record_progress(int signal) > > What does this function have to do with recording progress? Seems a > bit of a misnomer to me. (Call it scrub_sigint_cancel_scrub, maybe?) I added a comment and left the name unchanged. >> +{ >> + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); >> +} >> + >> +static int scrub_handle_sigint_parent(void) >> +{ >> + struct sigaction sa = { >> + .sa_handler = SIG_IGN, >> + .sa_flags = SA_RESTART, >> + }; >> + >> + return sigaction(SIGINT, &sa, NULL); >> +} >> + >> +static int scrub_handle_sigint_child(int fd) >> +{ >> + struct sigaction sa = { >> + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, >> + }; >> + >> + cancel_fd = fd; >> + return sigaction(SIGINT, &sa, NULL); >> +} >> + >> +static int _scrub_datafile(const char *fn_base, const char *fn_local, >> + const char *fn_tmp, char *datafile, int max) >> +{ >> + int ret; >> + >> + strncpy(datafile, fn_base, max); > > You need to put a zero byte at datafile[max], otherwise it could be > unterminated (if max <= strlen(fn_base)), and the strlen will then run > off the end of the string. Damn. strncpy is a mess. I want strlcpy. I Modified the code another way. I rather return an error than throwing away bytes and continue happily. strncpy third arg always - 1, thus we always have a 0 byte at the end of the buffer. I then compare strlen to the buffer size. >> + ret = strlen(datafile); >> + >> + if (ret + 1 >= max) >> + return -EOVERFLOW; > > This will never happen (if you put the zero terminator in) > >> + datafile[ret] = '.'; >> + strncpy(datafile+ret+1, fn_local, max-ret-1); > > ... and add a zero byte here, too (or use strncat) > >> + ret = strlen(datafile); >> + >> + if (ret + 1 >= max) >> + return -EOVERFLOW; > > as above: won't happen > >> + if (fn_tmp) { >> + datafile[ret] = '_'; >> + strncpy(datafile+ret+1, fn_tmp, max-ret-1); > > ... and add a zero byte here (or use strncat) > >> + ret = strlen(datafile); >> + >> + if (ret >= max) >> + return -EOVERFLOW; >> + } >> + >> + return 0; >> +} >> + >> +static int _scrub_open_file(const char *datafile, int m) > > Just a niggle: Why the leading _ when other scrub-specific > functions don't have it? (There's about a dozen other such symbols -- > was there a criterion you used to decide which ones have _ and which > don't?) Looking at the function names today, I think the criterion was "everything i/o based get's a _", which is rubbish. I dropped all the '_'s. >> +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) >> +{ >> + int avail = 0; >> + int old_avail = 0; >> + char l[512]; >> + int state = 0; >> + int curr = -1; >> + int i = 0; >> + int j; >> + int ret; >> + int eof = 0; >> + int lineno = 0; >> + u64 version; >> + char empty_uuid[BTRFS_FSID_SIZE] = {0}; >> + struct scrub_file_record **p = NULL; >> + >> + if (fd < 0) >> + return ERR_PTR(-EINVAL); >> + >> +again: >> + old_avail = avail-i; >> + BUG_ON(old_avail < 0); >> + if (old_avail) >> + memmove(l, l+i, old_avail); >> + avail = read(fd, l+old_avail, sizeof(l)-old_avail); >> + if (avail == 0) { >> + eof = 1; >> + } >> + if (avail + old_avail == 0) { >> + if (curr >= 0 && >> + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { >> + p[curr] = NULL; >> + } else if (curr == -1) { >> + p = ERR_PTR(-ENODATA); >> + } >> + return p; >> + } >> + if (avail == -1) >> + return ERR_PTR(-errno); > > If avail == -1 (i.e. the read failed) and old_avail == 1 > (i.e. there was only one character left in the buffer), then that > triggers the code in the previous if statement (avail+old_avail == 0) > as well. > >> + avail += old_avail; >> + >> + i = 0; >> + while (i < avail) { >> + switch (state) { >> + case 0: /* start if file */ > of > >> + ret = _scrub_kvread(&i, >> + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l, > > [1] Drop the colon from SCRUB_FILE_VERSION_PREFIX and leave out the > -1 here. > >> + SCRUB_FILE_VERSION_PREFIX, &version); >> + if (ret != 1) >> + _SCRUB_ILLEGAL; >> + if (version != atoll(SCRUB_FILE_VERSION)) >> + return ERR_PTR(-ENOTSUP); >> + state = 6; >> + continue; >> + case 1: /* start of line, alloc */ >> + if (!eof && !memchr(l+i, '\n', avail-i)) >> + goto again; > > This will cause problems (an infinite loop), I think, if there's an > input line greater than 512 bytes in length -- you can't find a line > ending, so you go back to "again", read in zero bytes because you've > not consumed anything in your buffer, and come back here to discover > that there's no line ending... No, it will complain about a too long line. We go to "again", read 0 characters (which is fine, returns 0), then set the eof flag (which is kind of a bug) and stop processing. I.e., whenever we encounter a line longer than sizeof(l), we tell this fact to the user and stop processing for that file completely. I increased that buffer to 16k, which sounds big enough for quite some file format extensions. >> + ++lineno; >> + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, >> + BTRFS_FSID_SIZE) == 0) { >> + state = 2; >> + continue; >> + } >> + ++curr; >> + p = realloc(p, (curr+2)*sizeof(*p)); >> + if (p) >> + p[curr] = malloc(sizeof(**p)); >> + if (!p || !p[curr]) >> + return ERR_PTR(-errno); >> + memset(p[curr], 0, sizeof(**p)); >> + p[curr+1] = NULL; >> + ++state; > > You probably need a comment here (and below, as appropriate) to > indicate that the lack of a continue or break is intended, and not a > bug. > >> + case 2: /* start of line, skip space */ >> + while (isspace(l[i]) && i<avail) { >> + if (l[i] == '\n') >> + ++lineno; >> + ++i; >> + } >> + if (i >= avail || (!eof && !memchr(l+i, '\n', avail-i))) >> + goto again; >> + ++state; >> + case 3: /* read fsid */ >> + if (i == avail) >> + continue; >> + for (j=0; l[i+j] != ':' && i+j < avail; ++j) >> + ; >> + if (i+j+1 >= avail) >> + _SCRUB_ILLEGAL; > > Possibly a comment needed here to indicate that state 1 guarantees > a full line of text in the buffer, so hitting the end of the buffer > here is a fatal error. (It took me a while to work out why this case > was always an error). Comment added in "case 1", instead, because this holds true for each _SCRUB_INVALID (let's call it invalid) in this block. >> + if (j != 36) >> + _SCRUB_ILLEGAL; >> + l[i+j] = '\0'; >> + ret = uuid_parse(l+i, p[curr]->fsid); >> + if (ret) >> + _SCRUB_ILLEGAL; >> + i += j + 1; >> + ++state; >> + case 4: /* read dev id */ >> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) >> + ; >> + if (!j || i+j+1 >= avail) > > j == 0 is clearer than !j here, IMO > >> + _SCRUB_ILLEGAL; >> + p[curr]->devid = atoll(&l[i]); >> + i += j + 1; > > Is there any reason that you couldn't just use strtoull here? We > know that the string is terminated with a \n (by the guarantee of > state 1), so strtoull will always finish within the buffer. I just found it way easier to use atoll. We already know the first character really is a digit, so why bother with a more cumbersome function? >> + ++state; >> + case 5: /* read key/value pair */ >> + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, read_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, csum_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, verify_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, no_csum, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, csum_discards, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, super_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, malloc_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, corrected_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, last_physical, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, finished, avail, >> + l, &p[curr]->stats) || >> + _SCRUB_KVREAD(&i, t_start, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, t_resumed, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, duration, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, canceled, avail, >> + l, &p[curr]->stats); >> + if (ret != 1) >> + _SCRUB_ILLEGAL; > > If there's a syntax error in the parser (i.e. the matched key is > not followed by a colon, or we run out of data), then _SCRUB_KVREAD > returns -1, which is converted to 1 by the ||, and the error is > dropped silently. Oops. Fixed by defining 0 to be the success return value for scrub_kvread (sounds better, anyway). The check then looks for ret != 0 instead. >> +static int scrub_fs_info(int fd, char *path, >> + struct btrfs_ioctl_fs_info_args *fi_args, >> + struct btrfs_ioctl_dev_info_args **di_ret) >> +{ >> + int ret = 0; >> + int ndevs = 0; >> + int i = 1; >> + struct btrfs_fs_devices* fs_devices_mnt = NULL; >> + struct btrfs_ioctl_dev_info_args *di_args; >> + char mp[BTRFS_PATH_NAME_MAX+1]; >> + >> + memset(fi_args, 0, sizeof(*fi_args)); >> + >> + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); >> + if (ret && errno == EINVAL) { >> + /* path is no mounted btrfs. try if it's a device */ >> + ret = check_mounted_where(fd, path, mp, sizeof(mp), >> + &fs_devices_mnt); >> + if (!ret) >> + return -EINVAL; >> + fi_args->num_devices = 1; > > Is this a valid assumption? What happens if I pass just one device > of a multi-device FS to "btrfs scrub start"? You tell scrub to scrub that one device, then. At least for raid1, this option makes sense. >> +static int scrub_start(int argc, char **argv, int resume) >> +{ >> + int fdmnt; >> + int prg_fd = -1; >> + int fdres = -1; >> + int ret; >> + pid_t pid; >> + int c; >> + int i; >> + int err = 0; > > This clashes with the macro err(). OK, I know the compiler's clever > enough to disambiguate, but it leads to nastiness like [2], below. Okok. Macro is now ERR(). >> + int print_raw = 0; >> + char *path; >> + int do_background = 1; >> + int do_wait = 0; >> + int do_print = 0; >> + int do_quiet = 0; >> + int do_record = 1; >> + int readonly = 0; >> + int do_stats_per_dev = 0; >> + int n_start = 0; >> + int n_skip = 0; >> + int n_resume = 0; >> + struct btrfs_ioctl_fs_info_args fi_args; >> + struct btrfs_ioctl_dev_info_args *di_args = NULL; >> + struct scrub_progress *sp = NULL; >> + struct scrub_fs_stat fs_stat; >> + struct timeval tv; >> + struct sockaddr_un addr = { >> + .sun_family = AF_UNIX, >> + }; >> + pthread_t *t_devs = NULL; >> + pthread_t t_prog; >> + pthread_attr_t t_attr; >> + struct scrub_file_record **past_scrubs = NULL; >> + struct scrub_file_record *last_scrub = NULL; >> + char *datafile = strdup(SCRUB_DATA_FILE); > > This is never freed. > >> + char fsid[37]; > > Magic number. is there a #define in libuuid for this value? At least the man page of uuid_parse clearly states uuids have 36 bytes plus a \0 in printf format. uuid/uuid.h doesn't contain such a constant. But volumes.c, print-tree.c and others do it with 37, too. >> + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; >> + struct scrub_progress_cycle spc; >> + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; >> + void *terr; >> + u64 devid; >> + >> + optind = 1; >> + while ((c = getopt(argc, argv, "BdqrR")) != -1) { >> + switch(c) { >> + case 'B': >> + do_background = 0; >> + do_wait = 1; >> + do_print = 1; >> + break; >> + case 'd': >> + do_stats_per_dev = 1; >> + break; >> + case 'q': >> + do_quiet = 1; >> + break; >> + case 'r': >> + readonly = 1; >> + break; >> + case 'R': >> + print_raw = 1; >> + break; >> + case '?': >> + default: >> + fprintf(stderr, "ERROR: scrub args invalid.\n" >> + " -B do not background (implies -W)\n" > > What's -W? A development option that was removed before submitting v2 and is removed for v3 now from that comment above as well. >> + " -d stats per device (-B only)\n" >> + " -q quiet\n" >> + " -r read only mode\n"); >> + return 1; >> + } >> + } >> + >> + /* try to catch most error cases before forking */ >> + >> + spc.progress = NULL; >> + if (do_quiet && do_print) >> + do_print = 0; >> + >> + if (mkdir_p(datafile)) { >> + err(!do_quiet, "WARNING: cannot create scrub data " >> + "file, mkdir %s failed: %s. Status recording " >> + "disabled\n", datafile, strerror(errno)); >> + do_record = 0; >> + } >> + >> + path = argv[optind]; > > No bounds check: > > hrm@ruthven:btrfs-progs-unstable $ ./btrfs scrub start -B > ERROR: can't access '(null)' > >> + fdmnt = open_file_or_dir(path); >> + if (fdmnt < 0) { >> + err(!do_quiet, "ERROR: can't access '%s'\n", path); >> + return 12; >> + } >> + >> + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); >> + if (ret) { >> + err(!do_quiet, "ERROR: getting dev info for scrub failed: " >> + "%s\n", strerror(-ret)); >> + err = 1; >> + goto out; >> + } >> + if (!fi_args.num_devices) { >> + err(!do_quiet, "ERROR: no devices found\n"); >> + err = 1; >> + goto out; >> + } >> + >> + uuid_unparse(fi_args.fsid, fsid); >> + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); >> + if (fdres < 0 && fdres != -ENOENT) { >> + err(!do_quiet, "WARNING: failed to open status file: " >> + "%s\n", strerror(-fdres)); >> + } else if (fdres >= 0) { >> + past_scrubs = scrub_read_file(fdres, !do_quiet); >> + if (IS_ERR(past_scrubs)) >> + err(!do_quiet, "WARNING: failed to read status file: " >> + "%s\n", strerror(-PTR_ERR(past_scrubs))); >> + close(fdres); >> + } >> + >> + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); >> + sp = calloc(1, fi_args.num_devices*sizeof(*sp)); > > Shouldn't that be calloc(fi_args.num_devices, sizeof(*sp)) ? (OK, > it doesn't make any particular difference, but it just seems odd to > keep a dog and bark yourself). Sounds quite reasonable. >> + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress)); > > Woof! (And why do we need twice as many progress markers as devices?) We need them for consistency reasons. Both are used alternately when progress is recorded to make sure we never write a half baked record. >> + if (!t_devs || !sp || !spc.progress) { >> + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); >> + err = 1; > > [2] Eugh. Calling what looks like a function, then assigning a > value to it. Can you call the variable something else? (Or make the > macro a more obvious macro: ERR() say?) > >> + goto out; >> + } >> + >> + ret = pthread_attr_init(&t_attr); >> + if (ret) { >> + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", >> + strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + devid = di_args[i].devid; >> + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); >> + if (ret) { >> + err(!do_quiet, "ERROR: pthread_mutex_init failed: " >> + "%s\n", strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + last_scrub = last_dev_scrub(past_scrubs, devid); >> + sp[i].scrub_args.devid = devid; >> + sp[i].fd = fdmnt; >> + if (resume && last_scrub && (last_scrub->stats.canceled || >> + !last_scrub->stats.finished)) { >> + ++n_resume; >> + sp[i].scrub_args.start = last_scrub->p.last_physical; >> + sp[i].resumed = last_scrub; >> + } else if (resume) { >> + ++n_skip; >> + sp[i].skip = 1; >> + sp[i].resumed = last_scrub; >> + continue; >> + } else { >> + ++n_start; >> + sp[i].scrub_args.start = 0ll; >> + sp[i].resumed = NULL; >> + } >> + sp[i].skip = 0; >> + sp[i].scrub_args.end = (u64)-1ll; >> + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; >> + } >> + >> + if (!n_start && !n_resume) { >> + if (!do_quiet) >> + printf("scrub: nothing to resume for %s, fsid %s\n", >> + path, fsid); >> + err = 0; >> + goto out; >> + } >> + >> + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); >> + while (ret != -1) { >> + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, >> + NULL, sock_path, sizeof(sock_path)); >> + /* ignore EOVERFLOW, as strncpy follows anyway */ > > The name in sock_path could still be truncated on -EOVERFLOW, > though. Is that always safe? Yeah, no, that wasn't nice. It's changed now. >> + strncpy(addr.sun_path, sock_path, >> + sizeof(addr.sun_path)-1); >> + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); >> + if (ret != -1 || errno != EADDRINUSE) >> + break; > > If we failed to bind because the address was in use, is there much > point in trying to connect to the socket here? Had to think about those lines for a moment, so I'll add a comment. It's code that cares for an orphan socket file: try to connect, if that's working, scrub must be running, so error out. If it doesn't, unlink the file and loop. >> + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); >> + if (!ret || errno != ECONNREFUSED) { >> + fprintf(stderr, "ERROR: scrub already running\n"); >> + close(prg_fd); >> + goto out; >> + } >> + ret = unlink(sock_path); > > Under the right (wrong) set of circumstances, isn't this loop going > to busy-wait? In general, the loop can't execute more than twice. If you manage to recreate that socket file in exactly the same moment over and over again, that's possible, yes. But you would have to do that externally on your own, this can't happen if you only use btrfs scrub. >> + } >> + if (ret != -1) { >> + ret = listen(prg_fd, 100); >> + } >> + if (ret == -1) { >> + err(!do_quiet, "WARNING: failed to open the progress status " >> + "socket at %s: %s. Progress cannot be queried\n", >> + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, >> + strerror(errno)); >> + if (prg_fd != -1) { >> + close(prg_fd); >> + prg_fd = -1; >> + if (sock_path[0]) >> + unlink(sock_path); >> + } >> + } >> + >> + if (do_record) { >> + /* write all-zero progress file for a start */ >> + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, >> + fi_args.num_devices); > > -HRM: Unchecked scrub_write_progress > >> + if (ret) { >> + err(!do_quiet, "WARNING: failed to write the progress " >> + "status file: %s. Status recording disabled\n", >> + strerror(-ret)); >> + do_record = 0; >> + } >> + } >> + >> + if (do_background) { >> + pid = fork(); >> + if (pid == -1) { >> + err(!do_quiet, "ERROR: cannot scrub, fork failed: " >> + "%s\n", strerror(errno)); >> + err = 1; >> + goto out; >> + } >> + >> + if (pid) { >> + int stat; >> + scrub_handle_sigint_parent(); >> + if (!do_quiet) >> + printf("scrub %s on %s, fsid %s (pid=%d)\n", >> + n_start ? "started" : "resumed", >> + path, fsid, pid); >> + if (!do_wait) { >> + err = 0; >> + goto out; >> + } >> + ret = wait(&stat); >> + if (ret != pid) { >> + err(!do_quiet, "ERROR: wait failed: (ret=%d) " >> + "%s\n", ret, strerror(errno)); >> + err = 1; >> + goto out; >> + } >> + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { >> + err(!do_quiet, "ERROR: scrub process failed\n"); >> + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; >> + goto out; >> + } >> + err = 0; >> + goto out; >> + } >> + } >> + >> + scrub_handle_sigint_child(fdmnt); >> + >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (sp[i].skip) { >> + sp[i].scrub_args.progress = sp[i].resumed->p; >> + sp[i].stats = sp[i].resumed->stats; >> + sp[i].ret = 0; >> + sp[i].stats.finished = 1; >> + continue; >> + } >> + devid = di_args[i].devid; >> + gettimeofday(&tv, NULL); >> + sp[i].stats.t_start = tv.tv_sec; >> + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]); > > -HRM not checked scrub_one_dev() > >> + if (ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: creating " >> + "scrub_one_dev[%llu] thread failed: " >> + "%s\n", devid, strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + } >> + >> + spc.fdmnt = fdmnt; >> + spc.prg_fd = prg_fd; >> + spc.do_record = do_record; >> + spc.write_mutex = &spc_write_mutex; >> + spc.shared_progress = sp; >> + spc.fi = &fi_args; >> + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); >> + > > -HRM: Not checked: scrub_progress_cycle() > >> + err = 0; >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (sp[i].skip) >> + continue; >> + devid = di_args[i].devid; HRM: mark1 >> + ret = pthread_join(t_devs[i], NULL); >> + if (ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: pthread_join failed " >> + "for scrub_one_dev[%llu]: %s\n", devid, >> + strerror(ret)); >> + err++; >> + continue; >> + } >> + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { >> + if (do_print) >> + fprintf(stderr, "WARNING: device %lld not " >> + "present\n", devid); >> + continue; >> + } >> + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { >> + err++; >> + } else if (sp[i].ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: scrubbing %s failed " >> + "for device id %lld (%s)\n", path, >> + devid, strerror(sp[i].ioctl_errno)); >> + err++; >> + continue; >> + } >> + } >> + >> + if (do_print) { >> + const char *append = "done"; >> + if (!do_stats_per_dev) >> + init_fs_stat(&fs_stat); >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (do_stats_per_dev) { >> + print_scrub_dev(&di_args[i], >> + &sp[i].scrub_args.progress, >> + print_raw, >> + sp[i].ret ? "canceled" : "done", >> + &sp[i].stats); >> + } else { >> + if (sp[i].ret) >> + append = "canceled"; >> + add_to_fs_stat(&sp[i].scrub_args.progress, >> + &sp[i].stats, &fs_stat); >> + } >> + } >> + if (!do_stats_per_dev) { >> + printf("scrub %s for %s\n", append, fsid); >> + print_fs_stat(&fs_stat, print_raw); >> + } >> + } >> + >> + pthread_cancel(t_prog); >> + ret = pthread_join(t_prog, &terr); > > Does this need to happen before the output above? Is there a No, it doesn't. Look at the line below the "mark1" mark above. (What a sentence!) There we join all the worker threads and print their results. After succeeding, all workers must be done, so the progress thread can't do anything useful anymore. > possible race between scrub_progress_cycle() and the stats gathering > code here? (I've not looked at scrub_progress_cycle() in detail yet, > so I don't know). As I see it, no. In addition to the fact that there won't come any fresh progress notification at this state, we have those progress fields twice to ensure consistency together with the progress_mutex taken by scrub_progress_cycle. -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
OK, here's the remainder of my comments for this file. Not much for this bit -- just one comment about locking, a reminder, and an observation. On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: [...] > +static int _scrub_write_buf(int fd, const void *data, int len) > +{ > + int ret; > + ret = write(fd, data, len); > + return ret - len; > +} > + > +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) > + __attribute__ ((format (printf, 4, 5))); > +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) > +{ > + int ret; > + va_list args; > + > + va_start(args, fmt); > + ret = vsnprintf(buf, max, fmt, args); > + va_end(args); > + if (ret >= max) > + return ret - max; > + return _scrub_write_buf(fd, buf, ret); > +} > + > +#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \ > + data->resumed->p.name + data->scrub_args.progress.name > +static struct scrub_progress *_scrub_resumed_stats(struct scrub_progress *data, > + struct scrub_progress *dest) > +{ > + if (!data->resumed || data->skip) > + return data; > + > + _SCRUB_SUM(dest, data, data_extents_scrubbed); > + _SCRUB_SUM(dest, data, tree_extents_scrubbed); > + _SCRUB_SUM(dest, data, data_bytes_scrubbed); > + _SCRUB_SUM(dest, data, tree_bytes_scrubbed); > + _SCRUB_SUM(dest, data, read_errors); > + _SCRUB_SUM(dest, data, csum_errors); > + _SCRUB_SUM(dest, data, verify_errors); > + _SCRUB_SUM(dest, data, no_csum); > + _SCRUB_SUM(dest, data, csum_discards); > + _SCRUB_SUM(dest, data, super_errors); > + _SCRUB_SUM(dest, data, malloc_errors); > + _SCRUB_SUM(dest, data, uncorrectable_errors); > + _SCRUB_SUM(dest, data, corrected_errors); > + _SCRUB_SUM(dest, data, last_physical); > + dest->stats.canceled = data->stats.canceled; > + dest->stats.finished = data->stats.finished; > + dest->stats.t_resumed = data->stats.t_start; > + dest->stats.t_start = data->resumed->stats.t_start; > + dest->stats.duration = data->resumed->stats.duration + > + data->stats.duration; > + dest->scrub_args.devid = data->scrub_args.devid; > + return dest; > +} > + > +#define _SCRUB_KVWRITE(fd, buf, name, use) \ > + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ > + use->scrub_args.progress.name) > +#define _SCRUB_KVWRITE_STATS(fd, buf, name, use) \ > + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ > + use->stats.name) > +static int _scrub_kvwrite(int fd, char *buf, int max, > + const char *key, u64 val) > +{ > + return _scrub_writev(fd, buf, max, "|%s:%lld", key, val); > +} > + > +static int scrub_write_file(int fd, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret = 0; > + int i; > + char buf[1024]; > + struct scrub_progress local; > + struct scrub_progress *use; > + > + if (n < 1) { > + return -EINVAL; > + } > + > + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION > + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1 > + + sizeof(SCRUB_FILE_VERSION)-1 + 1); > + if (ret) > + return -EOVERFLOW; > + > + for (i=0; i<n; ++i) { > + use = _scrub_resumed_stats(&data[i], &local); > + if (_scrub_write_buf(fd, fsid, strlen(fsid)) || > + _scrub_write_buf(fd, ":", 1) || > + _scrub_writev(fd, buf, sizeof(buf), "%lld", > + use->scrub_args.devid) || > + _scrub_write_buf(fd, buf, ret) || > + _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, read_errors, use) || > + _SCRUB_KVWRITE(fd, buf, csum_errors, use) || > + _SCRUB_KVWRITE(fd, buf, verify_errors, use) || > + _SCRUB_KVWRITE(fd, buf, no_csum, use) || > + _SCRUB_KVWRITE(fd, buf, csum_discards, use) || > + _SCRUB_KVWRITE(fd, buf, super_errors, use) || > + _SCRUB_KVWRITE(fd, buf, malloc_errors, use) || > + _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) || > + _SCRUB_KVWRITE(fd, buf, corrected_errors, use) || > + _SCRUB_KVWRITE(fd, buf, last_physical, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, duration, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, finished, use) || > + _scrub_write_buf(fd, "\n", 1)) { > + return -EOVERFLOW; > + } > + } > + > + return 0; > +} > +#undef _SCRUB_KVWRITE > + > +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret; > + int fd; > + int old; > + > + ret = pthread_mutex_lock(m); > + if (ret) { > + ret = -errno; > + goto out; > + } > + > + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); Probably not massively important, but you don't check for the return value of this call or its counterpart at the end of this function. > + fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp"); > + if (fd < 0) { > + ret = fd; > + goto out; > + } > + ret = scrub_write_file(fd, fsid, data, n); > + if (ret) > + goto out; This leaks the file handle fd. > + ret = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp"); > + if (ret) > + goto out; As does this. > + ret = close(fd); > + if (ret) { > + ret = -errno; > + goto out; > + } > + > +out: > + if (ret) { > + pthread_mutex_unlock(m); > + } else { > + ret = pthread_mutex_unlock(m); > + if (ret) > + ret = -errno; > + } > + > + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old); > + > + return ret; > +} > + > +static void *scrub_one_dev(void *ctx) > +{ > + struct scrub_progress *sp = ctx; > + int ret; > + struct timeval tv; > + > + sp->stats.canceled = 0; > + sp->stats.duration = 0; > + sp->stats.finished = 0; > + > + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); > + gettimeofday(&tv, NULL); > + sp->ret = ret; > + sp->stats.duration = tv.tv_sec - sp->stats.t_start; > + sp->stats.canceled = !!ret; > + sp->ioctl_errno = errno; > + ret = pthread_mutex_lock(&sp->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + sp->stats.finished = 1; > + ret = pthread_mutex_unlock(&sp->progress_mutex); If you downgrade .finished to a plain int, rather than a u64, then is this locking actually be needed? You have one place where the lock is held to write a single value (which is atomic for an int, IIRC), and you have another place where you hold the lock to read and compare it. I don't see any problem with removing the lock for that. > + if (ret) > + return ERR_PTR(-errno); > + > + > + return NULL; > +} > + > +static void *progress_one_dev(void *ctx) > +{ > + struct scrub_progress *sp = ctx; > + > + sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args); > + sp->ioctl_errno = errno; > + > + return NULL; > +} > + > +static void *scrub_progress_cycle(void *ctx) > +{ > + int ret; > + int i; > + char fsid[37]; > + struct scrub_progress *sp; > + struct scrub_progress *sp_last; > + struct scrub_progress *sp_shared; > + struct timeval tv; > + struct scrub_progress_cycle *spc = ctx; > + int ndev = spc->fi->num_devices; > + int this = 1; > + int last = 0; > + int peer_fd = -1; > + struct pollfd accept_poll_fd = { > + .fd = spc->prg_fd, > + .events = POLLIN, > + .revents = 0, > + }; > + struct pollfd write_poll_fd = { > + .events = POLLOUT, > + .revents = 0, > + }; > + struct sockaddr_un peer; > + socklen_t peer_size = sizeof(peer); > + > + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &ret); > + uuid_unparse(spc->fi->fsid, fsid); > + > + for (i=0; i<ndev; ++i) { > + sp = &spc->progress[i]; > + sp_last = &spc->progress[i+ndev]; > + sp_shared = &spc->shared_progress[i]; > + sp->scrub_args.devid = sp_last->scrub_args.devid = > + sp_shared->scrub_args.devid; > + sp->fd = sp_last->fd = spc->fdmnt; > + sp->stats.t_start = sp_last->stats.t_start = > + sp_shared->stats.t_start; > + sp->resumed = sp_last->resumed = sp_shared->resumed; > + sp->skip = sp_last->skip = sp_shared->skip; > + sp->stats.finished = sp_last->stats.finished = > + sp_shared->stats.finished; > + } > + > + while (1) { > + ret = poll(&accept_poll_fd, 1, 5*1000); > + if (ret == -1) > + return ERR_PTR(-errno); > + if (ret) > + peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer, > + &peer_size); > + gettimeofday(&tv, NULL); > + this = (this+1)%2; > + last = (last+1)%2; > + for (i=0; i<ndev; ++i) { > + sp = &spc->progress[this*ndev+i]; > + sp_last = &spc->progress[last*ndev+i]; > + sp_shared = &spc->shared_progress[i]; > + if (sp->stats.finished) { > + continue; > + } > + progress_one_dev(sp); > + sp->stats.duration = tv.tv_sec - sp->stats.t_start; > + if (!sp->ret) > + continue; > + if (sp->ioctl_errno != ENOTCONN && > + sp->ioctl_errno != ENODEV) > + return ERR_PTR(-sp->ioctl_errno); > + /* > + * scrub finished or device removed, check the > + * finished flag. if unset, just use the last > + * result we got for the current write and go > + * on. flag should be set on next cycle, then. > + */ > + ret = pthread_mutex_lock(&sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + if (!sp_shared->stats.finished) { > + ret = pthread_mutex_unlock( > + &sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + memcpy(sp, sp_last, sizeof(*sp)); > + continue; > + } > + ret = pthread_mutex_unlock(&sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + memcpy(sp, sp_shared, sizeof(*sp)); > + memcpy(sp_last, sp_shared, sizeof(*sp)); > + } > + if (peer_fd != -1) { > + write_poll_fd.fd = peer_fd; > + ret = poll(&write_poll_fd, 1, 0); > + if (ret == -1) > + return ERR_PTR(-errno); > + if (ret) { > + ret = scrub_write_file( > + peer_fd, fsid, > + &spc->progress[this*ndev], ndev); > + if (ret) > + return ERR_PTR(ret); > + } > + close(peer_fd); > + peer_fd = -1; > + } > + if (!spc->do_record) > + continue; > + ret = scrub_write_progress(spc->write_mutex, fsid, > + &spc->progress[this*ndev], ndev); > + if (ret) > + return ERR_PTR(ret); > + } > +} [...] > +int do_scrub_cancel(int argc, char **argv) > +{ > + char *path = argv[1]; > + int ret; > + int fdmnt; > + int err; > + char mp[BTRFS_PATH_NAME_MAX+1]; > + struct btrfs_fs_devices* fs_devices_mnt = NULL; > + > + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + fprintf(stderr, "ERROR: scrub cancel failed\n"); > + return 12; > + } > + > +again: > + ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL); > + err = errno; > + close(fdmnt); > + > + if (ret && err == EINVAL) { > + /* path is no mounted btrfs. try if it's a device */ > + ret = check_mounted_where(fdmnt, path, mp, sizeof(mp), > + &fs_devices_mnt); > + close(fdmnt); > + if (ret) { > + fdmnt = open_file_or_dir(mp); > + if (fdmnt >= 0) { > + path = mp; > + goto again; > + } > + } > + } > + > + if (ret) { > + fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path, > + err == ENOTCONN ? "not running" : strerror(errno)); > + return 1; > + } > + > + printf("scrub cancelled\n"); > + > + return 0; > +} > + > +int do_scrub_status(int argc, char **argv) > +{ > + > + char *path; > + struct btrfs_ioctl_fs_info_args fi_args; > + struct btrfs_ioctl_dev_info_args *di_args = NULL; > + struct scrub_file_record **past_scrubs = NULL; > + struct scrub_file_record *last_scrub; > + struct scrub_fs_stat fs_stat; > + struct sockaddr_un addr = { > + .sun_family = AF_UNIX, > + }; > + int ret; > + int fdmnt; > + int i; > + optind = 1; > + int print_raw = 0; > + int do_stats_per_dev = 0; > + char c; > + char fsid[37]; > + int fdres = -1; > + int err = 0; > + > + while ((c = getopt(argc, argv, "dR")) != -1) { > + switch(c) { > + case 'd': > + do_stats_per_dev = 1; > + break; > + case 'R': > + print_raw = 1; > + break; > + case '?': > + default: > + fprintf(stderr, "ERROR: scrub status args invalid.\n" > + " -d stats per device\n"); > + return 1; > + } > + } > + > + path = argv[optind]; > + > + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + fprintf(stderr, "ERROR: can't access to '%s'\n", path); > + return 12; > + } > + > + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); > + if (ret) { > + fprintf(stderr, "ERROR: getting dev info for scrub failed: " > + "%s\n", strerror(-ret)); > + err = 1; > + goto out; > + } > + if (!fi_args.num_devices) { > + fprintf(stderr, "ERROR: no devices found\n"); > + err = 1; > + goto out; > + } > + > + uuid_unparse(fi_args.fsid, fsid); > + > + fdres = socket(AF_UNIX, SOCK_STREAM, 0); > + if (fdres == -1) { > + fprintf(stderr, "ERROR: failed to create socket to " > + "receive progress information: %s\n", > + strerror(errno)); > + err = 1; > + goto out; > + } > + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, > + NULL, addr.sun_path, sizeof(addr.sun_path)-1); > + /* ignore EOVERFLOW, just use shorter name and hope for the best */ Same comment as in the previous mail about ignoring EOVERFLOW in this code... > + ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr)); > + if (ret == -1) { > + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); > + if (fdres < 0 && fdres != -ENOENT) { > + fprintf(stderr, "WARNING: failed to open status file: " > + "%s\n", strerror(-fdres)); > + err = 1; > + goto out; > + } > + } > + > + if (fdres >= 0) { > + past_scrubs = scrub_read_file(fdres, 1); > + if (IS_ERR(past_scrubs)) > + fprintf(stderr, "WARNING: failed to read status: %s\n", > + strerror(-PTR_ERR(past_scrubs))); > + } > + > + printf("scrub status for %s\n", fsid); > + > + /* > + * TODO: rather communicate with scrub process instead of > + * dumping the file stats for instant results > + */ > + if (do_stats_per_dev) { > + for (i = 0; i < fi_args.num_devices; ++i) { > + last_scrub = last_dev_scrub(past_scrubs, > + di_args[i].devid); > + if (!last_scrub) { > + print_scrub_dev(&di_args[i], NULL, print_raw, > + NULL, NULL); > + continue; > + } > + print_scrub_dev(&di_args[i], &last_scrub->p, print_raw, > + last_scrub->stats.finished ? > + "history" : "status", > + &last_scrub->stats); > + } > + } else { > + init_fs_stat(&fs_stat); > + for (i = 0; i < fi_args.num_devices; ++i) { > + last_scrub = last_dev_scrub(past_scrubs, > + di_args[i].devid); > + if (!last_scrub) > + continue; > + add_to_fs_stat(&last_scrub->p, &last_scrub->stats, > + &fs_stat); > + } > + print_fs_stat(&fs_stat, print_raw); > + } > + > +out: > + free_history(past_scrubs); > + free(di_args); > + close(fdmnt); > + if (fdres > -1) > + close(fdres); > + > + return err; > +} Hugo.
On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote: > On 10.07.2011 20:23, Hugo Mills wrote: > > Yes, this is over three months after the initial posting, but since > > nobody else has looked at it yet, and the patch is in my integration > > stack... > > ... thanks! > > > I've not reviewed the whole thing -- just the "scrub start" code so > > far. I've removed the bits I've not checked from the file below. > > I rebased the old branch I found to your current integration branch and > fixed up a most of what you mentioned. I'll not send a new version out > until after your complete review (or your statement that this is it or > your statement that you would rather going on reviewing the revised > version). Thanks. The other half has just gone out (with few comments). > Things I ripped out are accepted and corrected without resistance. > Comments follow. Only a couple of rejoinders below. > > On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: [...] > >> + case 4: /* read dev id */ > >> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > >> + ; > >> + if (!j || i+j+1 >= avail) > > > > j == 0 is clearer than !j here, IMO > > > >> + _SCRUB_ILLEGAL; > >> + p[curr]->devid = atoll(&l[i]); > >> + i += j + 1; > > > > Is there any reason that you couldn't just use strtoull here? We > > know that the string is terminated with a \n (by the guarantee of > > state 1), so strtoull will always finish within the buffer. > > I just found it way easier to use atoll. We already know the first > character really is a digit, so why bother with a more cumbersome function? Ah, my mistake for not being clearer, I think: I was talking about the for loop at the head of the state 4 code as well. That only exists in order to find the end of the number read in by atoll, and strtoull would do that for you. [...] > >> + char fsid[37]; > > > > Magic number. is there a #define in libuuid for this value? > > At least the man page of uuid_parse clearly states uuids have 36 bytes > plus a \0 in printf format. uuid/uuid.h doesn't contain such a constant. > But volumes.c, print-tree.c and others do it with 37, too. OK, if that's conventional (and not defined in uuid.h), then go with the magic number. Hugo.
On 11.07.2011 22:45, Hugo Mills wrote: > OK, here's the remainder of my comments for this file. Not much for > this bit -- just one comment about locking, a reminder, and an > observation. Again, I ripped out the bits I simply corrected. My comments below. > [...] > >> +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, >> + struct scrub_progress* data, int n) >> +{ >> + int ret; >> + int fd; >> + int old; >> + >> + ret = pthread_mutex_lock(m); >> + if (ret) { >> + ret = -errno; >> + goto out; >> + } >> + >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); > > Probably not massively important, but you don't check for the > return value of this call or its counterpart at the end of this > function. pthread_* return values where wrong throughout the code. Good that you pointed at this one. It's all fixed now. > [...] > >> +static void *scrub_one_dev(void *ctx) >> +{ >> + struct scrub_progress *sp = ctx; >> + int ret; >> + struct timeval tv; >> + >> + sp->stats.canceled = 0; >> + sp->stats.duration = 0; >> + sp->stats.finished = 0; >> + >> + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); >> + gettimeofday(&tv, NULL); >> + sp->ret = ret; >> + sp->stats.duration = tv.tv_sec - sp->stats.t_start; >> + sp->stats.canceled = !!ret; >> + sp->ioctl_errno = errno; >> + ret = pthread_mutex_lock(&sp->progress_mutex); >> + if (ret) >> + return ERR_PTR(-errno); >> + sp->stats.finished = 1; >> + ret = pthread_mutex_unlock(&sp->progress_mutex); > > If you downgrade .finished to a plain int, rather than a u64, then > is this locking actually be needed? You have one place where the lock > is held to write a single value (which is atomic for an int, IIRC), > and you have another place where you hold the lock to read and compare > it. I don't see any problem with removing the lock for that. Conclusion first: I want to stick with the mutex. My reasoning: - this is by no means time critical code - the mutexes do the memory barriers required for synchronization - using int instead of u64 would complicate the kvread macros Thanks, -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11.07.2011 22:57, Hugo Mills wrote: > On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote: >> On 10.07.2011 20:23, Hugo Mills wrote: >>> Yes, this is over three months after the initial posting, but since >>> nobody else has looked at it yet, and the patch is in my integration >>> stack... >> >> ... thanks! >> >>> I've not reviewed the whole thing -- just the "scrub start" code so >>> far. I've removed the bits I've not checked from the file below. >> >> I rebased the old branch I found to your current integration branch and >> fixed up a most of what you mentioned. I'll not send a new version out >> until after your complete review (or your statement that this is it or >> your statement that you would rather going on reviewing the revised >> version). > > Thanks. The other half has just gone out (with few comments). I'm through now, but I'll wait another day for you to protest on my latest comments before sending the new version. >> Things I ripped out are accepted and corrected without resistance. >> Comments follow. > > Only a couple of rejoinders below. > >>> On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > [...] > >>>> + case 4: /* read dev id */ >>>> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) >>>> + ; >>>> + if (!j || i+j+1 >= avail) >>> >>> j == 0 is clearer than !j here, IMO >>> >>>> + _SCRUB_ILLEGAL; >>>> + p[curr]->devid = atoll(&l[i]); >>>> + i += j + 1; >>> >>> Is there any reason that you couldn't just use strtoull here? We >>> know that the string is terminated with a \n (by the guarantee of >>> state 1), so strtoull will always finish within the buffer. >> >> I just found it way easier to use atoll. We already know the first >> character really is a digit, so why bother with a more cumbersome function? > > Ah, my mistake for not being clearer, I think: I was talking about > the for loop at the head of the state 4 code as well. That only exists > in order to find the end of the number read in by atoll, and strtoull > would do that for you. Alright, now I see your point. However, with strtoull I would have to calculate with character pointers, whereas anything else uses direct character access with i and j here. And I don't need the fancy bits of strtoull, either. So I'd like to stick with atoll. > [...] > >>>> + char fsid[37]; >>> >>> Magic number. is there a #define in libuuid for this value? >> >> At least the man page of uuid_parse clearly states uuids have 36 bytes >> plus a \0 in printf format. uuid/uuid.h doesn't contain such a constant. >> But volumes.c, print-tree.c and others do it with 37, too. > > OK, if that's conventional (and not defined in uuid.h), then go > with the magic number. > > Hugo. > Thanks, -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 12, 2011 at 10:49:59AM +0200, Jan Schmidt wrote: > On 11.07.2011 22:57, Hugo Mills wrote: > > On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote: > >> On 10.07.2011 20:23, Hugo Mills wrote: > >>> Yes, this is over three months after the initial posting, but since > >>> nobody else has looked at it yet, and the patch is in my integration > >>> stack... > >> > >> ... thanks! > >> > >>> I've not reviewed the whole thing -- just the "scrub start" code so > >>> far. I've removed the bits I've not checked from the file below. > >> > >> I rebased the old branch I found to your current integration branch and > >> fixed up a most of what you mentioned. I'll not send a new version out > >> until after your complete review (or your statement that this is it or > >> your statement that you would rather going on reviewing the revised > >> version). > > > > Thanks. The other half has just gone out (with few comments). > > I'm through now, but I'll wait another day for you to protest on my > latest comments before sending the new version. > > >> Things I ripped out are accepted and corrected without resistance. > >> Comments follow. > > > > Only a couple of rejoinders below. > > > >>> On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > > [...] > > > >>>> + case 4: /* read dev id */ > >>>> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > >>>> + ; > >>>> + if (!j || i+j+1 >= avail) > >>> > >>> j == 0 is clearer than !j here, IMO > >>> > >>>> + _SCRUB_ILLEGAL; > >>>> + p[curr]->devid = atoll(&l[i]); > >>>> + i += j + 1; > >>> > >>> Is there any reason that you couldn't just use strtoull here? We > >>> know that the string is terminated with a \n (by the guarantee of > >>> state 1), so strtoull will always finish within the buffer. > >> > >> I just found it way easier to use atoll. We already know the first > >> character really is a digit, so why bother with a more cumbersome function? > > > > Ah, my mistake for not being clearer, I think: I was talking about > > the for loop at the head of the state 4 code as well. That only exists > > in order to find the end of the number read in by atoll, and strtoull > > would do that for you. > > Alright, now I see your point. However, with strtoull I would have to > calculate with character pointers, whereas anything else uses direct > character access with i and j here. And I don't need the fancy bits of > strtoull, either. So I'd like to stick with atoll. OK, it's not something I feel massively strongly about. Stick with atoll, then. Hugo.
diff --git a/scrub.c b/scrub.c new file mode 100644 index 0000000..22052ed --- /dev/null +++ b/scrub.c @@ -0,0 +1,1568 @@ + +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <poll.h> +#include <sys/file.h> +#include <uuid/uuid.h> +#include <fcntl.h> +#include <unistd.h> +#include <pthread.h> +#include <ctype.h> +#include <signal.h> +#include <stdarg.h> + +#include "ctree.h" +#include "ioctl.h" +#include "btrfs_cmds.h" +#include "utils.h" +#include "volumes.h" +#include "disk-io.h" + +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress" +#define SCRUB_FILE_VERSION_PREFIX "scrub status:" +#define SCRUB_FILE_VERSION "1" + +struct scrub_stats { + time_t t_start; + time_t t_resumed; + u64 duration; + u64 finished; + u64 canceled; +}; + +struct scrub_progress { + struct btrfs_ioctl_scrub_args scrub_args; + int fd; + int ret; + int skip; + struct scrub_stats stats; + struct scrub_file_record *resumed; + int ioctl_errno; + pthread_mutex_t progress_mutex; +}; + +struct scrub_file_record { + u8 fsid[BTRFS_FSID_SIZE]; + u64 devid; + struct scrub_stats stats; + struct btrfs_scrub_progress p; +}; + +struct scrub_progress_cycle { + int fdmnt; + int prg_fd; + int do_record; + struct btrfs_ioctl_fs_info_args *fi; + struct scrub_progress *progress; + struct scrub_progress *shared_progress; + pthread_mutex_t *write_mutex; +}; + +struct scrub_fs_stat { + struct btrfs_scrub_progress p; + struct scrub_stats s; + int i; +}; + +static void print_scrub_full(struct btrfs_scrub_progress *sp) +{ + printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed); + printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed); + printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed); + printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed); + printf("\tread_errors: %lld\n", sp->read_errors); + printf("\tcsum_errors: %lld\n", sp->csum_errors); + printf("\tverify_errors: %lld\n", sp->verify_errors); + printf("\tno_csum: %lld\n", sp->no_csum); + printf("\tcsum_discards: %lld\n", sp->csum_discards); + printf("\tsuper_errors: %lld\n", sp->super_errors); + printf("\tmalloc_errors: %lld\n", sp->malloc_errors); + printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors); + printf("\tcorrected_errors: %lld\n", sp->corrected_errors); + printf("\tlast_physical: %lld\n", sp->last_physical); +} + +#define err(test, ...) do { \ + if (test) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +#define PRINT_SCRUB_ERROR(test, desc) do { \ + if (test) \ + printf(" %s=%llu", desc, test); \ +} while (0) +static void print_scrub_summary(struct btrfs_scrub_progress *p) +{ + u64 err_cnt; + u64 err_cnt2; + + err_cnt = p->read_errors + + p->csum_errors + + p->verify_errors + + p->csum_discards + + p->super_errors + + p->malloc_errors; + + err_cnt2 = p->corrected_errors + p->uncorrectable_errors; + + printf("\ttotal bytes scrubbed: %s with %llu errors\n", + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), + max(err_cnt, err_cnt2)); + if (err_cnt || err_cnt2) { + printf("\terror details:"); + PRINT_SCRUB_ERROR(p->read_errors, "read"); + PRINT_SCRUB_ERROR(p->super_errors, "super"); + PRINT_SCRUB_ERROR(p->malloc_errors, "malloc"); + PRINT_SCRUB_ERROR(p->verify_errors, "verify"); + PRINT_SCRUB_ERROR(p->csum_errors, "csum"); + PRINT_SCRUB_ERROR(p->csum_discards, "csum-discards"); + printf("\n"); + printf("\tcorrected errors: %llu, uncorrectable errors: %llu\n", + p->corrected_errors, p->uncorrectable_errors); + } +} + +#define _SCRUB_FS_STAT(p, name, fs_stat) fs_stat->p.name += p->name +#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \ +do { \ + if (fs_stat->s.name > ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat) \ +do { \ + if (!fs_stat->s.name || fs_stat->s.name > ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +#define _SCRUB_FS_STAT_MAX(ss, name, fs_stat) \ +do { \ + if (!fs_stat->s.name || fs_stat->s.name < ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +static void add_to_fs_stat(struct btrfs_scrub_progress *p, + struct scrub_stats *ss, + struct scrub_fs_stat *fs_stat) +{ + _SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, read_errors, fs_stat); + _SCRUB_FS_STAT(p, csum_errors, fs_stat); + _SCRUB_FS_STAT(p, verify_errors, fs_stat); + _SCRUB_FS_STAT(p, no_csum, fs_stat); + _SCRUB_FS_STAT(p, csum_discards, fs_stat); + _SCRUB_FS_STAT(p, super_errors, fs_stat); + _SCRUB_FS_STAT(p, malloc_errors, fs_stat); + _SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat); + _SCRUB_FS_STAT(p, corrected_errors, fs_stat); + _SCRUB_FS_STAT(p, last_physical, fs_stat); + _SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat); + _SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat); + _SCRUB_FS_STAT_MAX(ss, duration, fs_stat); + _SCRUB_FS_STAT_MAX(ss, canceled, fs_stat); + _SCRUB_FS_STAT_MIN(ss, finished, fs_stat); +} + +static void init_fs_stat(struct scrub_fs_stat *fs_stat) +{ + memset(fs_stat, 0, sizeof(*fs_stat)); + fs_stat->s.finished = 2; +} + +static void _print_scrub_ss(struct scrub_stats *ss) +{ + char t[BTRFS_PATH_NAME_MAX+1]; + struct tm tm; + + if (!ss || !ss->t_start) { + printf("\tno stats available\n"); + return; + } + if (ss->t_resumed) { + localtime_r(&ss->t_resumed, &tm); + strftime(t, sizeof(t), "%c", &tm); + printf("\tscrub resumed at %s", t); + } else { + localtime_r(&ss->t_start, &tm); + strftime(t, sizeof(t), "%c", &tm); + printf("\tscrub started at %s", t); + } + if (ss->finished && !ss->canceled) { + printf(" and finished after %llu seconds\n", + ss->duration); + } else if (ss->canceled) { + printf(" and was aborted after %llu seconds\n", + ss->duration); + } else { + printf(", running for %llu seconds\n", ss->duration); + } +} + +static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di, + struct btrfs_scrub_progress *p, int raw, + const char *append, struct scrub_stats *ss) +{ + printf("scrub device %s (id %llu) %s\n", di->path, di->devid, + append ? append : ""); + + _print_scrub_ss(ss); + + if (p) { + if (raw) + print_scrub_full(p); + else + print_scrub_summary(p); + } +} + +static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw) +{ + _print_scrub_ss(&fs_stat->s); + + if (raw) + print_scrub_full(&fs_stat->p); + else + print_scrub_summary(&fs_stat->p); +} + +static void free_history(struct scrub_file_record **last_scrubs) +{ + struct scrub_file_record **l = last_scrubs; + if (!l) + return; + while (*l) + free(*l++); + free(last_scrubs); +} + +static int cancel_fd = -1; +static void scrub_sigint_record_progress(int signal) +{ + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); +} + +static int scrub_handle_sigint_parent(void) +{ + struct sigaction sa = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + return sigaction(SIGINT, &sa, NULL); +} + +static int scrub_handle_sigint_child(int fd) +{ + struct sigaction sa = { + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, + }; + + cancel_fd = fd; + return sigaction(SIGINT, &sa, NULL); +} + +static int _scrub_datafile(const char *fn_base, const char *fn_local, + const char *fn_tmp, char *datafile, int max) +{ + int ret; + + strncpy(datafile, fn_base, max); + ret = strlen(datafile); + + if (ret + 1 >= max) + return -EOVERFLOW; + + datafile[ret] = '.'; + strncpy(datafile+ret+1, fn_local, max-ret-1); + ret = strlen(datafile); + + if (ret + 1 >= max) + return -EOVERFLOW; + + if (fn_tmp) { + datafile[ret] = '_'; + strncpy(datafile+ret+1, fn_tmp, max-ret-1); + ret = strlen(datafile); + + if (ret >= max) + return -EOVERFLOW; + } + + return 0; +} + +static int _scrub_open_file(const char *datafile, int m) +{ + int fd; + int ret; + + fd = open(datafile, m, 0600); + if (fd < 0) + return -errno; + + ret = flock(fd, LOCK_EX|LOCK_NB); + if (ret) { + ret = errno; + close(fd); + return -ret; + } + + return fd; +} + +static int scrub_open_file_r(const char *fn_base, const char *fn_local) +{ + int ret; + char datafile[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, NULL, + datafile, sizeof(datafile)); + if (ret < 0) + return ret; + return _scrub_open_file(datafile, O_RDONLY); +} + +static int scrub_open_file_w(const char *fn_base, const char *fn_local, + const char *tmp) +{ + int ret; + char datafile[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, tmp, + datafile, sizeof(datafile)); + if (ret < 0) + return ret; + return _scrub_open_file(datafile, O_WRONLY|O_CREAT); +} + +static int scrub_rename_file(const char *fn_base, const char *fn_local, + const char *tmp) +{ + int ret; + char datafile_old[BTRFS_PATH_NAME_MAX+1]; + char datafile_new[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, tmp, + datafile_old, sizeof(datafile_old)); + if (ret < 0) + return ret; + ret = _scrub_datafile(fn_base, fn_local, NULL, + datafile_new, sizeof(datafile_new)); + if (ret < 0) + return ret; + ret = rename(datafile_old, datafile_new); + return ret ? -errno : 0; +} + +#define _SCRUB_KVREAD(i, name, avail, l, dest) \ + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name) +#define _SCRUB_KVREAD_STATS(i, name, avail, l, dest) \ + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest->stats.name) +/* + * returns 0 if the key did not match (nothing was read) + * 1 if the key did match (success) + * -1 if the key did match and an error occured + */ +static int _scrub_kvread(int *i, int len, int avail, const char *buf, + const char *key, u64 *dest) +{ + int j; + + if (*i+len+1 < avail && strncmp(&buf[*i], key, len-1) == 0) { + *i += len-1; + if (buf[*i] != ':') { + return -1; + } + *i += 1; + for (j=0; isdigit(buf[*i+j]) && *i+j < avail; ++j) + ; + if (*i+j >= avail) + return -1; + *dest = atoll(&buf[*i]); + *i += j; + return 1; + } + + return 0; +} + +#define _SCRUB_ILLEGAL do { \ + if (report_errors) { \ + fprintf(stderr, "WARNING: illegal data in line %d pos " \ + "%d state %d (near \"%.*s\") at %s:%d\n", \ + lineno, i, state, 20 > avail ? avail : 20, l+i, \ + __FILE__, __LINE__); \ + } \ + goto skip; \ +} while (0) +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) +{ + int avail = 0; + int old_avail = 0; + char l[512]; + int state = 0; + int curr = -1; + int i = 0; + int j; + int ret; + int eof = 0; + int lineno = 0; + u64 version; + char empty_uuid[BTRFS_FSID_SIZE] = {0}; + struct scrub_file_record **p = NULL; + + if (fd < 0) + return ERR_PTR(-EINVAL); + +again: + old_avail = avail-i; + BUG_ON(old_avail < 0); + if (old_avail) + memmove(l, l+i, old_avail); + avail = read(fd, l+old_avail, sizeof(l)-old_avail); + if (avail == 0) { + eof = 1; + } + if (avail + old_avail == 0) { + if (curr >= 0 && + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { + p[curr] = NULL; + } else if (curr == -1) { + p = ERR_PTR(-ENODATA); + } + return p; + } + if (avail == -1) + return ERR_PTR(-errno); + avail += old_avail; + + i = 0; + while (i < avail) { + switch (state) { + case 0: /* start if file */ + ret = _scrub_kvread(&i, + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l, + SCRUB_FILE_VERSION_PREFIX, &version); + if (ret != 1) + _SCRUB_ILLEGAL; + if (version != atoll(SCRUB_FILE_VERSION)) + return ERR_PTR(-ENOTSUP); + state = 6; + continue; + case 1: /* start of line, alloc */ + if (!eof && !memchr(l+i, '\n', avail-i)) + goto again; + ++lineno; + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, + BTRFS_FSID_SIZE) == 0) { + state = 2; + continue; + } + ++curr; + p = realloc(p, (curr+2)*sizeof(*p)); + if (p) + p[curr] = malloc(sizeof(**p)); + if (!p || !p[curr]) + return ERR_PTR(-errno); + memset(p[curr], 0, sizeof(**p)); + p[curr+1] = NULL; + ++state; + case 2: /* start of line, skip space */ + while (isspace(l[i]) && i<avail) { + if (l[i] == '\n') + ++lineno; + ++i; + } + if (i >= avail || (!eof && !memchr(l+i, '\n', avail-i))) + goto again; + ++state; + case 3: /* read fsid */ + if (i == avail) + continue; + for (j=0; l[i+j] != ':' && i+j < avail; ++j) + ; + if (i+j+1 >= avail) + _SCRUB_ILLEGAL; + if (j != 36) + _SCRUB_ILLEGAL; + l[i+j] = '\0'; + ret = uuid_parse(l+i, p[curr]->fsid); + if (ret) + _SCRUB_ILLEGAL; + i += j + 1; + ++state; + case 4: /* read dev id */ + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) + ; + if (!j || i+j+1 >= avail) + _SCRUB_ILLEGAL; + p[curr]->devid = atoll(&l[i]); + i += j + 1; + ++state; + case 5: /* read key/value pair */ + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, read_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, csum_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, verify_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, no_csum, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, csum_discards, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, super_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, malloc_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, corrected_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, last_physical, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, finished, avail, + l, &p[curr]->stats) || + _SCRUB_KVREAD(&i, t_start, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, t_resumed, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, duration, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, canceled, avail, + l, &p[curr]->stats); + if (ret != 1) + _SCRUB_ILLEGAL; + ++state; + case 6: /* after number */ + if (l[i] == '|') { + state = 5; + } else if (l[i] == '\n') { + state = 1; + } else { + _SCRUB_ILLEGAL; + } + ++i; + continue; + case 99: /* skip rest of line */ +skip: + state = 99; + do { + ++i; + if (l[i-1] == '\n') { + state = 1; + break; + } + } while (i < avail); + continue; + } + BUG(); + } + goto again; +} +#undef _SCRUB_ILLEGAL + +static int _scrub_write_buf(int fd, const void *data, int len) +{ + int ret; + ret = write(fd, data, len); + return ret - len; +} + +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) + __attribute__ ((format (printf, 4, 5))); +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) +{ + int ret; + va_list args; + + va_start(args, fmt); + ret = vsnprintf(buf, max, fmt, args); + va_end(args); + if (ret >= max) + return ret - max; + return _scrub_write_buf(fd, buf, ret); +} + +#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \ + data->resumed->p.name + data->scrub_args.progress.name +static struct scrub_progress *_scrub_resumed_stats(struct scrub_progress *data, + struct scrub_progress *dest) +{ + if (!data->resumed || data->skip) + return data; + + _SCRUB_SUM(dest, data, data_extents_scrubbed); + _SCRUB_SUM(dest, data, tree_extents_scrubbed); + _SCRUB_SUM(dest, data, data_bytes_scrubbed); + _SCRUB_SUM(dest, data, tree_bytes_scrubbed); + _SCRUB_SUM(dest, data, read_errors); + _SCRUB_SUM(dest, data, csum_errors); + _SCRUB_SUM(dest, data, verify_errors); + _SCRUB_SUM(dest, data, no_csum); + _SCRUB_SUM(dest, data, csum_discards); + _SCRUB_SUM(dest, data, super_errors); + _SCRUB_SUM(dest, data, malloc_errors); + _SCRUB_SUM(dest, data, uncorrectable_errors); + _SCRUB_SUM(dest, data, corrected_errors); + _SCRUB_SUM(dest, data, last_physical); + dest->stats.canceled = data->stats.canceled; + dest->stats.finished = data->stats.finished; + dest->stats.t_resumed = data->stats.t_start; + dest->stats.t_start = data->resumed->stats.t_start; + dest->stats.duration = data->resumed->stats.duration + + data->stats.duration; + dest->scrub_args.devid = data->scrub_args.devid; + return dest; +} + +#define _SCRUB_KVWRITE(fd, buf, name, use) \ + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ + use->scrub_args.progress.name) +#define _SCRUB_KVWRITE_STATS(fd, buf, name, use) \ + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ + use->stats.name) +static int _scrub_kvwrite(int fd, char *buf, int max, + const char *key, u64 val) +{ + return _scrub_writev(fd, buf, max, "|%s:%lld", key, val); +} + +static int scrub_write_file(int fd, const char *fsid, + struct scrub_progress* data, int n) +{ + int ret = 0; + int i; + char buf[1024]; + struct scrub_progress local; + struct scrub_progress *use; + + if (n < 1) { + return -EINVAL; + } + + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1 + + sizeof(SCRUB_FILE_VERSION)-1 + 1); + if (ret) + return -EOVERFLOW; + + for (i=0; i<n; ++i) { + use = _scrub_resumed_stats(&data[i], &local); + if (_scrub_write_buf(fd, fsid, strlen(fsid)) || + _scrub_write_buf(fd, ":", 1) || + _scrub_writev(fd, buf, sizeof(buf), "%lld", + use->scrub_args.devid) || + _scrub_write_buf(fd, buf, ret) || + _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, read_errors, use) || + _SCRUB_KVWRITE(fd, buf, csum_errors, use) || + _SCRUB_KVWRITE(fd, buf, verify_errors, use) || + _SCRUB_KVWRITE(fd, buf, no_csum, use) || + _SCRUB_KVWRITE(fd, buf, csum_discards, use) || + _SCRUB_KVWRITE(fd, buf, super_errors, use) || + _SCRUB_KVWRITE(fd, buf, malloc_errors, use) || + _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) || + _SCRUB_KVWRITE(fd, buf, corrected_errors, use) || + _SCRUB_KVWRITE(fd, buf, last_physical, use) || + _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) || + _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) || + _SCRUB_KVWRITE_STATS(fd, buf, duration, use) || + _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) || + _SCRUB_KVWRITE_STATS(fd, buf, finished, use) || + _scrub_write_buf(fd, "\n", 1)) { + return -EOVERFLOW; + } + } + + return 0; +} +#undef _SCRUB_KVWRITE + +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, + struct scrub_progress* data, int n) +{ + int ret; + int fd; + int old; + + ret = pthread_mutex_lock(m); + if (ret) { + ret = -errno; + goto out; + } + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + + fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp"); + if (fd < 0) { + ret = fd; + goto out; + } + ret = scrub_write_file(fd, fsid, data, n); + if (ret) + goto out; + ret = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp"); + if (ret) + goto out; + ret = close(fd); + if (ret) { + ret = -errno; + goto out; + } + +out: + if (ret) { + pthread_mutex_unlock(m); + } else { + ret = pthread_mutex_unlock(m); + if (ret) + ret = -errno; + } + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old); + + return ret; +} + +static void *scrub_one_dev(void *ctx) +{ + struct scrub_progress *sp = ctx; + int ret; + struct timeval tv; + + sp->stats.canceled = 0; + sp->stats.duration = 0; + sp->stats.finished = 0; + + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); + gettimeofday(&tv, NULL); + sp->ret = ret; + sp->stats.duration = tv.tv_sec - sp->stats.t_start; + sp->stats.canceled = !!ret; + sp->ioctl_errno = errno; + ret = pthread_mutex_lock(&sp->progress_mutex); + if (ret) + return ERR_PTR(-errno); + sp->stats.finished = 1; + ret = pthread_mutex_unlock(&sp->progress_mutex); + if (ret) + return ERR_PTR(-errno); + + + return NULL; +} + +static void *progress_one_dev(void *ctx) +{ + struct scrub_progress *sp = ctx; + + sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args); + sp->ioctl_errno = errno; + + return NULL; +} + +static void *scrub_progress_cycle(void *ctx) +{ + int ret; + int i; + char fsid[37]; + struct scrub_progress *sp; + struct scrub_progress *sp_last; + struct scrub_progress *sp_shared; + struct timeval tv; + struct scrub_progress_cycle *spc = ctx; + int ndev = spc->fi->num_devices; + int this = 1; + int last = 0; + int peer_fd = -1; + struct pollfd accept_poll_fd = { + .fd = spc->prg_fd, + .events = POLLIN, + .revents = 0, + }; + struct pollfd write_poll_fd = { + .events = POLLOUT, + .revents = 0, + }; + struct sockaddr_un peer; + socklen_t peer_size = sizeof(peer); + + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &ret); + uuid_unparse(spc->fi->fsid, fsid); + + for (i=0; i<ndev; ++i) { + sp = &spc->progress[i]; + sp_last = &spc->progress[i+ndev]; + sp_shared = &spc->shared_progress[i]; + sp->scrub_args.devid = sp_last->scrub_args.devid = + sp_shared->scrub_args.devid; + sp->fd = sp_last->fd = spc->fdmnt; + sp->stats.t_start = sp_last->stats.t_start = + sp_shared->stats.t_start; + sp->resumed = sp_last->resumed = sp_shared->resumed; + sp->skip = sp_last->skip = sp_shared->skip; + sp->stats.finished = sp_last->stats.finished = + sp_shared->stats.finished; + } + + while (1) { + ret = poll(&accept_poll_fd, 1, 5*1000); + if (ret == -1) + return ERR_PTR(-errno); + if (ret) + peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer, + &peer_size); + gettimeofday(&tv, NULL); + this = (this+1)%2; + last = (last+1)%2; + for (i=0; i<ndev; ++i) { + sp = &spc->progress[this*ndev+i]; + sp_last = &spc->progress[last*ndev+i]; + sp_shared = &spc->shared_progress[i]; + if (sp->stats.finished) { + continue; + } + progress_one_dev(sp); + sp->stats.duration = tv.tv_sec - sp->stats.t_start; + if (!sp->ret) + continue; + if (sp->ioctl_errno != ENOTCONN && + sp->ioctl_errno != ENODEV) + return ERR_PTR(-sp->ioctl_errno); + /* + * scrub finished or device removed, check the + * finished flag. if unset, just use the last + * result we got for the current write and go + * on. flag should be set on next cycle, then. + */ + ret = pthread_mutex_lock(&sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + if (!sp_shared->stats.finished) { + ret = pthread_mutex_unlock( + &sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + memcpy(sp, sp_last, sizeof(*sp)); + continue; + } + ret = pthread_mutex_unlock(&sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + memcpy(sp, sp_shared, sizeof(*sp)); + memcpy(sp_last, sp_shared, sizeof(*sp)); + } + if (peer_fd != -1) { + write_poll_fd.fd = peer_fd; + ret = poll(&write_poll_fd, 1, 0); + if (ret == -1) + return ERR_PTR(-errno); + if (ret) { + ret = scrub_write_file( + peer_fd, fsid, + &spc->progress[this*ndev], ndev); + if (ret) + return ERR_PTR(ret); + } + close(peer_fd); + peer_fd = -1; + } + if (!spc->do_record) + continue; + ret = scrub_write_progress(spc->write_mutex, fsid, + &spc->progress[this*ndev], ndev); + if (ret) + return ERR_PTR(ret); + } +} + +static struct scrub_file_record *last_dev_scrub( + struct scrub_file_record *const *const past_scrubs, u64 devid) +{ + int i; + + if (!past_scrubs || IS_ERR(past_scrubs)) + return NULL; + + for (i=0; past_scrubs[i]; ++i) + if (past_scrubs[i]->devid == devid) + return past_scrubs[i]; + + return NULL; +} + +static int scrub_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args) +{ + int ret; + + di_args->devid = devid; + memset(&di_args->uuid, '\0', sizeof(di_args->uuid)); + + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); + return ret ? -errno : 0; +} + +static int scrub_fs_info(int fd, char *path, + struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret) +{ + int ret = 0; + int ndevs = 0; + int i = 1; + struct btrfs_fs_devices* fs_devices_mnt = NULL; + struct btrfs_ioctl_dev_info_args *di_args; + char mp[BTRFS_PATH_NAME_MAX+1]; + + memset(fi_args, 0, sizeof(*fi_args)); + + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); + if (ret && errno == EINVAL) { + /* path is no mounted btrfs. try if it's a device */ + ret = check_mounted_where(fd, path, mp, sizeof(mp), + &fs_devices_mnt); + if (!ret) + return -EINVAL; + fi_args->num_devices = 1; + fi_args->max_id = fs_devices_mnt->latest_devid; + i = fs_devices_mnt->latest_devid; + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); + close(fd); + fd = open_file_or_dir(mp); + if (fd < 0) + return -errno; + } else if (ret) { + return -errno; + } + + if (!fi_args->num_devices) + return 0; + + di_args = *di_ret = malloc(fi_args->num_devices*sizeof(*di_args)); + if (!di_args) + return -errno; + + for (; i<=fi_args->max_id; ++i) { + BUG_ON(ndevs >= fi_args->num_devices); + ret = scrub_device_info(fd, i, &di_args[ndevs]); + if (ret == -ENODEV) + continue; + if (ret) + return ret; + ++ndevs; + } + + BUG_ON(ndevs == 0); + + return 0; +} + +int mkdir_p(char *path) +{ + int i; + int ret; + + for (i=1; i<strlen(path); ++i) { + if (path[i] != '/') + continue; + path[i] = '\0'; + ret = mkdir(path, 0777); + if (ret && errno != EEXIST) + return 1; + path[i] = '/'; + } + + return 0; +} + +static int scrub_start(int argc, char **argv, int resume) +{ + int fdmnt; + int prg_fd = -1; + int fdres = -1; + int ret; + pid_t pid; + int c; + int i; + int err = 0; + int print_raw = 0; + char *path; + int do_background = 1; + int do_wait = 0; + int do_print = 0; + int do_quiet = 0; + int do_record = 1; + int readonly = 0; + int do_stats_per_dev = 0; + int n_start = 0; + int n_skip = 0; + int n_resume = 0; + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + struct scrub_progress *sp = NULL; + struct scrub_fs_stat fs_stat; + struct timeval tv; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + pthread_t *t_devs = NULL; + pthread_t t_prog; + pthread_attr_t t_attr; + struct scrub_file_record **past_scrubs = NULL; + struct scrub_file_record *last_scrub = NULL; + char *datafile = strdup(SCRUB_DATA_FILE); + char fsid[37]; + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; + struct scrub_progress_cycle spc; + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; + void *terr; + u64 devid; + + optind = 1; + while ((c = getopt(argc, argv, "BdqrR")) != -1) { + switch(c) { + case 'B': + do_background = 0; + do_wait = 1; + do_print = 1; + break; + case 'd': + do_stats_per_dev = 1; + break; + case 'q': + do_quiet = 1; + break; + case 'r': + readonly = 1; + break; + case 'R': + print_raw = 1; + break; + case '?': + default: + fprintf(stderr, "ERROR: scrub args invalid.\n" + " -B do not background (implies -W)\n" + " -d stats per device (-B only)\n" + " -q quiet\n" + " -r read only mode\n"); + return 1; + } + } + + /* try to catch most error cases before forking */ + + spc.progress = NULL; + if (do_quiet && do_print) + do_print = 0; + + if (mkdir_p(datafile)) { + err(!do_quiet, "WARNING: cannot create scrub data " + "file, mkdir %s failed: %s. Status recording " + "disabled\n", datafile, strerror(errno)); + do_record = 0; + } + + path = argv[optind]; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + err(!do_quiet, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + if (ret) { + err(!do_quiet, "ERROR: getting dev info for scrub failed: " + "%s\n", strerror(-ret)); + err = 1; + goto out; + } + if (!fi_args.num_devices) { + err(!do_quiet, "ERROR: no devices found\n"); + err = 1; + goto out; + } + + uuid_unparse(fi_args.fsid, fsid); + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); + if (fdres < 0 && fdres != -ENOENT) { + err(!do_quiet, "WARNING: failed to open status file: " + "%s\n", strerror(-fdres)); + } else if (fdres >= 0) { + past_scrubs = scrub_read_file(fdres, !do_quiet); + if (IS_ERR(past_scrubs)) + err(!do_quiet, "WARNING: failed to read status file: " + "%s\n", strerror(-PTR_ERR(past_scrubs))); + close(fdres); + } + + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); + sp = calloc(1, fi_args.num_devices*sizeof(*sp)); + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress)); + + if (!t_devs || !sp || !spc.progress) { + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); + err = 1; + goto out; + } + + ret = pthread_attr_init(&t_attr); + if (ret) { + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", + strerror(ret)); + err = 1; + goto out; + } + + for (i = 0; i < fi_args.num_devices; ++i) { + devid = di_args[i].devid; + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); + if (ret) { + err(!do_quiet, "ERROR: pthread_mutex_init failed: " + "%s\n", strerror(ret)); + err = 1; + goto out; + } + last_scrub = last_dev_scrub(past_scrubs, devid); + sp[i].scrub_args.devid = devid; + sp[i].fd = fdmnt; + if (resume && last_scrub && (last_scrub->stats.canceled || + !last_scrub->stats.finished)) { + ++n_resume; + sp[i].scrub_args.start = last_scrub->p.last_physical; + sp[i].resumed = last_scrub; + } else if (resume) { + ++n_skip; + sp[i].skip = 1; + sp[i].resumed = last_scrub; + continue; + } else { + ++n_start; + sp[i].scrub_args.start = 0ll; + sp[i].resumed = NULL; + } + sp[i].skip = 0; + sp[i].scrub_args.end = (u64)-1ll; + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; + } + + if (!n_start && !n_resume) { + if (!do_quiet) + printf("scrub: nothing to resume for %s, fsid %s\n", + path, fsid); + err = 0; + goto out; + } + + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); + while (ret != -1) { + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, + NULL, sock_path, sizeof(sock_path)); + /* ignore EOVERFLOW, as strncpy follows anyway */ + strncpy(addr.sun_path, sock_path, + sizeof(addr.sun_path)-1); + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret != -1 || errno != EADDRINUSE) + break; + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); + if (!ret || errno != ECONNREFUSED) { + fprintf(stderr, "ERROR: scrub already running\n"); + close(prg_fd); + goto out; + } + ret = unlink(sock_path); + } + if (ret != -1) { + ret = listen(prg_fd, 100); + } + if (ret == -1) { + err(!do_quiet, "WARNING: failed to open the progress status " + "socket at %s: %s. Progress cannot be queried\n", + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, + strerror(errno)); + if (prg_fd != -1) { + close(prg_fd); + prg_fd = -1; + if (sock_path[0]) + unlink(sock_path); + } + } + + if (do_record) { + /* write all-zero progress file for a start */ + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, + fi_args.num_devices); + if (ret) { + err(!do_quiet, "WARNING: failed to write the progress " + "status file: %s. Status recording disabled\n", + strerror(-ret)); + do_record = 0; + } + } + + if (do_background) { + pid = fork(); + if (pid == -1) { + err(!do_quiet, "ERROR: cannot scrub, fork failed: " + "%s\n", strerror(errno)); + err = 1; + goto out; + } + + if (pid) { + int stat; + scrub_handle_sigint_parent(); + if (!do_quiet) + printf("scrub %s on %s, fsid %s (pid=%d)\n", + n_start ? "started" : "resumed", + path, fsid, pid); + if (!do_wait) { + err = 0; + goto out; + } + ret = wait(&stat); + if (ret != pid) { + err(!do_quiet, "ERROR: wait failed: (ret=%d) " + "%s\n", ret, strerror(errno)); + err = 1; + goto out; + } + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { + err(!do_quiet, "ERROR: scrub process failed\n"); + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; + goto out; + } + err = 0; + goto out; + } + } + + scrub_handle_sigint_child(fdmnt); + + for (i = 0; i < fi_args.num_devices; ++i) { + if (sp[i].skip) { + sp[i].scrub_args.progress = sp[i].resumed->p; + sp[i].stats = sp[i].resumed->stats; + sp[i].ret = 0; + sp[i].stats.finished = 1; + continue; + } + devid = di_args[i].devid; + gettimeofday(&tv, NULL); + sp[i].stats.t_start = tv.tv_sec; + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]); + if (ret) { + if (do_print) + fprintf(stderr, "ERROR: creating " + "scrub_one_dev[%llu] thread failed: " + "%s\n", devid, strerror(ret)); + err = 1; + goto out; + } + } + + spc.fdmnt = fdmnt; + spc.prg_fd = prg_fd; + spc.do_record = do_record; + spc.write_mutex = &spc_write_mutex; + spc.shared_progress = sp; + spc.fi = &fi_args; + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); + + err = 0; + for (i = 0; i < fi_args.num_devices; ++i) { + if (sp[i].skip) + continue; + devid = di_args[i].devid; + ret = pthread_join(t_devs[i], NULL); + if (ret) { + if (do_print) + fprintf(stderr, "ERROR: pthread_join failed " + "for scrub_one_dev[%llu]: %s\n", devid, + strerror(ret)); + err++; + continue; + } + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { + if (do_print) + fprintf(stderr, "WARNING: device %lld not " + "present\n", devid); + continue; + } + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { + err++; + } else if (sp[i].ret) { + if (do_print) + fprintf(stderr, "ERROR: scrubbing %s failed " + "for device id %lld (%s)\n", path, + devid, strerror(sp[i].ioctl_errno)); + err++; + continue; + } + } + + if (do_print) { + const char *append = "done"; + if (!do_stats_per_dev) + init_fs_stat(&fs_stat); + for (i = 0; i < fi_args.num_devices; ++i) { + if (do_stats_per_dev) { + print_scrub_dev(&di_args[i], + &sp[i].scrub_args.progress, + print_raw, + sp[i].ret ? "canceled" : "done", + &sp[i].stats); + } else { + if (sp[i].ret) + append = "canceled"; + add_to_fs_stat(&sp[i].scrub_args.progress, + &sp[i].stats, &fs_stat); + } + } + if (!do_stats_per_dev) { + printf("scrub %s for %s\n", append, fsid); + print_fs_stat(&fs_stat, print_raw); + } + } + + pthread_cancel(t_prog); + ret = pthread_join(t_prog, &terr); + if (do_print && terr && terr != PTHREAD_CANCELED) { + fprintf(stderr, "ERROR: recording progress " + "failed: %s\n", strerror(-PTR_ERR(terr))); + } + + if (do_record) { + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, + fi_args.num_devices); + if (ret && do_print) { + fprintf(stderr, "ERROR: failed to record the result: " + "%s\n", strerror(-ret)); + } + } + + scrub_handle_sigint_child(-1); + +out: + free_history(past_scrubs); + free(di_args); + free(t_devs); + free(sp); + free(spc.progress); + if (prg_fd > -1) { + close(prg_fd); + if (sock_path[0]) + unlink(sock_path); + } + close(fdmnt); + + return !!err; +} + +int do_scrub_start(int argc, char **argv) +{ + return scrub_start(argc, argv, 0); +} + +int do_scrub_resume(int argc, char **argv) +{ + return scrub_start(argc, argv, 1); +} + +int do_scrub_cancel(int argc, char **argv) +{ + char *path = argv[1]; + int ret; + int fdmnt; + int err; + char mp[BTRFS_PATH_NAME_MAX+1]; + struct btrfs_fs_devices* fs_devices_mnt = NULL; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: scrub cancel failed\n"); + return 12; + } + +again: + ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL); + err = errno; + close(fdmnt); + + if (ret && err == EINVAL) { + /* path is no mounted btrfs. try if it's a device */ + ret = check_mounted_where(fdmnt, path, mp, sizeof(mp), + &fs_devices_mnt); + close(fdmnt); + if (ret) { + fdmnt = open_file_or_dir(mp); + if (fdmnt >= 0) { + path = mp; + goto again; + } + } + } + + if (ret) { + fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path, + err == ENOTCONN ? "not running" : strerror(errno)); + return 1; + } + + printf("scrub cancelled\n"); + + return 0; +} + +int do_scrub_status(int argc, char **argv) +{ + + char *path; + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + struct scrub_file_record **past_scrubs = NULL; + struct scrub_file_record *last_scrub; + struct scrub_fs_stat fs_stat; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + int ret; + int fdmnt; + int i; + optind = 1; + int print_raw = 0; + int do_stats_per_dev = 0; + char c; + char fsid[37]; + int fdres = -1; + int err = 0; + + while ((c = getopt(argc, argv, "dR")) != -1) { + switch(c) { + case 'd': + do_stats_per_dev = 1; + break; + case 'R': + print_raw = 1; + break; + case '?': + default: + fprintf(stderr, "ERROR: scrub status args invalid.\n" + " -d stats per device\n"); + return 1; + } + } + + path = argv[optind]; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + if (ret) { + fprintf(stderr, "ERROR: getting dev info for scrub failed: " + "%s\n", strerror(-ret)); + err = 1; + goto out; + } + if (!fi_args.num_devices) { + fprintf(stderr, "ERROR: no devices found\n"); + err = 1; + goto out; + } + + uuid_unparse(fi_args.fsid, fsid); + + fdres = socket(AF_UNIX, SOCK_STREAM, 0); + if (fdres == -1) { + fprintf(stderr, "ERROR: failed to create socket to " + "receive progress information: %s\n", + strerror(errno)); + err = 1; + goto out; + } + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, + NULL, addr.sun_path, sizeof(addr.sun_path)-1); + /* ignore EOVERFLOW, just use shorter name and hope for the best */ + ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr)); + if (ret == -1) { + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); + if (fdres < 0 && fdres != -ENOENT) { + fprintf(stderr, "WARNING: failed to open status file: " + "%s\n", strerror(-fdres)); + err = 1; + goto out; + } + } + + if (fdres >= 0) { + past_scrubs = scrub_read_file(fdres, 1); + if (IS_ERR(past_scrubs)) + fprintf(stderr, "WARNING: failed to read status: %s\n", + strerror(-PTR_ERR(past_scrubs))); + } + + printf("scrub status for %s\n", fsid); + + /* + * TODO: rather communicate with scrub process instead of + * dumping the file stats for instant results + */ + if (do_stats_per_dev) { + for (i = 0; i < fi_args.num_devices; ++i) { + last_scrub = last_dev_scrub(past_scrubs, + di_args[i].devid); + if (!last_scrub) { + print_scrub_dev(&di_args[i], NULL, print_raw, + NULL, NULL); + continue; + } + print_scrub_dev(&di_args[i], &last_scrub->p, print_raw, + last_scrub->stats.finished ? + "history" : "status", + &last_scrub->stats); + } + } else { + init_fs_stat(&fs_stat); + for (i = 0; i < fi_args.num_devices; ++i) { + last_scrub = last_dev_scrub(past_scrubs, + di_args[i].devid); + if (!last_scrub) + continue; + add_to_fs_stat(&last_scrub->p, &last_scrub->stats, + &fs_stat); + } + print_fs_stat(&fs_stat, print_raw); + } + +out: + free_history(past_scrubs); + free(di_args); + close(fdmnt); + if (fdres > -1) + close(fdres); + + return err; +}