@@ -109,7 +109,7 @@ hist_free(
* in the long tail of small extents, e.g. 98% of the free space extents are
* larger than 31 blocks.
*/
-static struct histogram_cdf *
+struct histogram_cdf *
hist_cdf(
const struct histogram *hs)
{
@@ -151,7 +151,7 @@ hist_cdf(
}
/* Free all data associated with a histogram cdf. */
-static void
+void
histcdf_free(
struct histogram_cdf *cdf)
{
@@ -68,6 +68,9 @@ static inline unsigned int hist_buckets(const struct histogram *hs)
return hs->nr_buckets;
}
+struct histogram_cdf *hist_cdf(const struct histogram *hs);
+void histcdf_free(struct histogram_cdf *cdf);
+
void hist_import(struct histogram *dest, const struct histogram *src);
void hist_move(struct histogram *dest, struct histogram *src);
@@ -100,6 +100,22 @@ The
supported are:
.RS 1.0i
.TP
+.BI fstrim_pct= percentage
+To constrain the amount of time spent on fstrim activities during phase 8,
+this program tries to balance estimated runtime against completeness of the
+trim.
+In short, the program avoids small trim requests to save time.
+
+During phase 7, a log-scale histogram of free space extents is constructed.
+At the start of phase 8, a CDF is computed in decreasing order of extent
+length from the histogram buckets.
+A point corresponding to the fstrim percentage target is chosen from the CDF
+and mapped back to a histogram bucket.
+Free space extents at least as long as the bucket size are trimmed.
+Smaller extents are ignored.
+
+By default, the percentage threshold is 99%.
+.TP
.BI iwarn
Treat informational messages as warnings.
This will result in a nonzero return code, and a higher logging level.
@@ -11,6 +11,7 @@
#include "list.h"
#include "libfrog/paths.h"
#include "libfrog/workqueue.h"
+#include "libfrog/histogram.h"
#include "xfs_scrub.h"
#include "common.h"
#include "progress.h"
@@ -57,10 +58,12 @@ static int
fstrim_fsblocks(
struct scrub_ctx *ctx,
uint64_t start_fsb,
- uint64_t fsbcount)
+ uint64_t fsbcount,
+ uint64_t minlen_fsb)
{
uint64_t start = cvt_off_fsb_to_b(&ctx->mnt, start_fsb);
uint64_t len = cvt_off_fsb_to_b(&ctx->mnt, fsbcount);
+ uint64_t minlen = cvt_off_fsb_to_b(&ctx->mnt, minlen_fsb);
int error;
while (len > 0) {
@@ -68,7 +71,7 @@ fstrim_fsblocks(
run = min(len, FSTRIM_MAX_BYTES);
- error = fstrim(ctx, start, run);
+ error = fstrim(ctx, start, run, minlen);
if (error == EOPNOTSUPP) {
/* Pretend we finished all the work. */
progress_add(len);
@@ -78,9 +81,10 @@ fstrim_fsblocks(
char descr[DESCR_BUFSZ];
snprintf(descr, sizeof(descr) - 1,
- _("fstrim start 0x%llx run 0x%llx"),
+ _("fstrim start 0x%llx run 0x%llx minlen 0x%llx"),
(unsigned long long)start,
- (unsigned long long)run);
+ (unsigned long long)run,
+ (unsigned long long)minlen);
str_liberror(ctx, error, descr);
return error;
}
@@ -93,6 +97,80 @@ fstrim_fsblocks(
return 0;
}
+/*
+ * Return the smallest minlen that still enables us to discard the specified
+ * number of free blocks. Returns 0 if something goes wrong, which means no
+ * minlen threshold for discard.
+ */
+static uint64_t
+minlen_for_threshold(
+ const struct histogram *hs,
+ uint64_t blk_threshold)
+{
+ struct histogram_cdf *cdf;
+ unsigned int i;
+ uint64_t ret = 0;
+
+ /* Insufficient samples to make a meaningful histogram */
+ if (hs->tot_obs < hs->nr_buckets * 10)
+ return 0;
+
+ cdf = hist_cdf(hs);
+ if (!cdf)
+ return 0;
+
+ for (i = 1; i < hs->nr_buckets; i++) {
+ if (cdf->buckets[i].sum < blk_threshold) {
+ ret = hs->buckets[i - 1].low;
+ break;
+ }
+ }
+
+ histcdf_free(cdf);
+ return ret;
+}
+
+/* Compute a suitable minlen parameter for fstrim. */
+static uint64_t
+fstrim_compute_minlen(
+ const struct scrub_ctx *ctx,
+ const struct histogram *freesp_hist)
+{
+ uint64_t ret;
+ double blk_threshold = 0;
+ unsigned int ag_max_usable;
+
+ /*
+ * The kernel will reject a minlen that's larger than m_ag_max_usable.
+ * We can't calculate or query that value directly, so we guesstimate
+ * that it's 95% of the AG size.
+ */
+ ag_max_usable = ctx->mnt.fsgeom.agblocks * 95 / 100;
+
+ if (debug > 1) {
+ struct histogram_strings hstr = {
+ .sum = _("free space blocks"),
+ .observations = _("free space extents"),
+ };
+
+ hist_print(freesp_hist, &hstr);
+ }
+
+ ret = minlen_for_threshold(freesp_hist,
+ freesp_hist->tot_sum * ctx->fstrim_block_pct);
+
+ if (debug > 1)
+ printf(_("fstrim minlen %lld threshold %lld ag_max_usable %u\n"),
+ (unsigned long long)ret,
+ (unsigned long long)blk_threshold,
+ ag_max_usable);
+ if (ret > ag_max_usable)
+ ret = ag_max_usable;
+ if (ret == 1)
+ ret = 0;
+ return ret;
+}
+
/* Trim each AG on the data device. */
static int
fstrim_datadev(
@@ -100,8 +178,11 @@ fstrim_datadev(
{
struct xfs_fsop_geom *geo = &ctx->mnt.fsgeom;
uint64_t fsbno;
+ uint64_t minlen_fsb;
int error;
+ minlen_fsb = fstrim_compute_minlen(ctx, &ctx->datadev_hist);
+
for (fsbno = 0; fsbno < geo->datablocks; fsbno += geo->agblocks) {
uint64_t fsbcount;
@@ -112,7 +193,7 @@ fstrim_datadev(
*/
progress_add(geo->blocksize);
fsbcount = min(geo->datablocks - fsbno, geo->agblocks);
- error = fstrim_fsblocks(ctx, fsbno, fsbcount);
+ error = fstrim_fsblocks(ctx, fsbno, fsbcount, minlen_fsb);
if (error)
return error;
}
@@ -300,11 +300,13 @@ int
fstrim(
struct scrub_ctx *ctx,
uint64_t start,
- uint64_t len)
+ uint64_t len,
+ uint64_t minlen)
{
struct fstrim_range range = {
.start = start,
.len = len,
+ .minlen = minlen,
};
if (ioctl(ctx->mnt.fd, FITRIM, &range) == 0)
@@ -24,6 +24,6 @@ typedef int (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *,
int scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn,
scan_fs_tree_dirent_fn dirent_fn, void *arg);
-int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len);
+int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len, uint64_t minlen);
#endif /* XFS_SCRUB_VFS_H_ */
@@ -622,11 +622,13 @@ report_outcome(
*/
enum o_opt_nums {
IWARN = 0,
+ FSTRIM_PCT,
O_MAX_OPTS,
};
static char *o_opts[] = {
[IWARN] = "iwarn",
+ [FSTRIM_PCT] = "fstrim_pct",
[O_MAX_OPTS] = NULL,
};
@@ -635,8 +637,11 @@ parse_o_opts(
struct scrub_ctx *ctx,
char *p)
{
+ double dval;
+
while (*p != '\0') {
char *val;
+ char *endp;
switch (getsubopt(&p, o_opts, &val)) {
case IWARN:
@@ -647,6 +652,35 @@ parse_o_opts(
}
info_is_warning = true;
break;
+ case FSTRIM_PCT:
+ if (!val) {
+ fprintf(stderr,
+ _("-o fstrim_pct requires a parameter\n"));
+ usage();
+ }
+
+ errno = 0;
+ dval = strtod(val, &endp);
+
+ if (*endp) {
+ fprintf(stderr,
+ _("-o fstrim_pct must be a floating point number\n"));
+ usage();
+ }
+ if (errno) {
+ fprintf(stderr,
+ _("-o fstrim_pct: %s\n"),
+ strerror(errno));
+ usage();
+ }
+ if (dval <= 0 || dval > 100) {
+ fprintf(stderr,
+ _("-o fstrim_pct must be larger than 0 and less than 100\n"));
+ usage();
+ }
+
+ ctx->fstrim_block_pct = dval / 100.0;
+ break;
default:
usage();
break;
@@ -659,7 +693,9 @@ main(
int argc,
char **argv)
{
- struct scrub_ctx ctx = {0};
+ struct scrub_ctx ctx = {
+ .fstrim_block_pct = FSTRIM_BLOCK_PCT_DEFAULT,
+ };
struct phase_rusage all_pi;
char *mtab = NULL;
FILE *progress_fp = NULL;
@@ -90,8 +90,20 @@ struct scrub_ctx {
/* Free space histograms, in fsb */
struct histogram datadev_hist;
+
+ /*
+ * Pick the largest value for fstrim minlen such that we trim at least
+ * this much space per volume.
+ */
+ double fstrim_block_pct;
};
+/*
+ * Trim only enough free space extents (in order of decreasing length) to
+ * ensure that this percentage of the free space is trimmed.
+ */
+#define FSTRIM_BLOCK_PCT_DEFAULT (99.0 / 100.0)
+
/* Phase helper functions */
void xfs_shutdown_fs(struct scrub_ctx *ctx);
int scrub_cleanup(struct scrub_ctx *ctx);