Message ID | 20240709191028.2329-2-wen.gang.wang@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | introduce defrag to xfs_spaceman | expand |
On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > Non-exclusive defragment > Here we are introducing the non-exclusive manner to defragment a file, > especially for huge files, without blocking IO to it long. > Non-exclusive defragmentation divides the whole file into small segments. > For each segment, we lock the file, defragment the segment and unlock the file. > Defragmenting the small segment doesn’t take long. File IO requests can get > served between defragmenting segments before blocked long. Also we put > (user adjustable) idle time between defragmenting two consecutive segments to > balance the defragmentation and file IOs. > > The first patch in the set checks for valid target files > > Valid target files to defrag must: > 1. be accessible for read/write > 2. be regular files > 3. be in XFS filesystem > 4. the containing XFS has reflink enabled. This is not checked > before starting defragmentation, but error would be reported > later. > > Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> > --- > spaceman/Makefile | 2 +- > spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ > spaceman/init.c | 1 + > spaceman/space.h | 1 + > 4 files changed, 201 insertions(+), 1 deletion(-) > create mode 100644 spaceman/defrag.c > > diff --git a/spaceman/Makefile b/spaceman/Makefile > index 1f048d54..9c00b20a 100644 > --- a/spaceman/Makefile > +++ b/spaceman/Makefile > @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs > > LTCOMMAND = xfs_spaceman > HFILES = init.h space.h > -CFILES = info.c init.c file.c health.c prealloc.c trim.c > +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c > LSRCFILES = xfs_info.sh > > LLDLIBS = $(LIBXCMD) $(LIBFROG) > diff --git a/spaceman/defrag.c b/spaceman/defrag.c > new file mode 100644 > index 00000000..c9732984 > --- /dev/null > +++ b/spaceman/defrag.c > @@ -0,0 +1,198 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2024 Oracle. > + * All Rights Reserved. > + */ > + > +#include "libxfs.h" > +#include <linux/fiemap.h> > +#include <linux/fsmap.h> > +#include "libfrog/fsgeom.h" > +#include "command.h" > +#include "init.h" > +#include "libfrog/paths.h" > +#include "space.h" > +#include "input.h" > + > +/* defrag segment size limit in units of 512 bytes */ > +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ > +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ > +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; > + > +/* size of the defrag target file */ > +static off_t g_defrag_file_size = 0; > + > +/* stats for the target file extents before defrag */ > +struct ext_stats { > + long nr_ext_total; > + long nr_ext_unwritten; > + long nr_ext_shared; > +}; > +static struct ext_stats g_ext_stats; > + > +/* > + * check if the target is a valid file to defrag > + * also store file size > + * returns: > + * true for yes and false for no > + */ > +static bool > +defrag_check_file(char *path) > +{ > + struct statfs statfs_s; > + struct stat stat_s; > + > + if (access(path, F_OK|W_OK) == -1) { > + if (errno == ENOENT) > + fprintf(stderr, "file \"%s\" doesn't exist\n", path); > + else > + fprintf(stderr, "no access to \"%s\", %s\n", path, > + strerror(errno)); > + return false; > + } > + > + if (stat(path, &stat_s) == -1) { > + fprintf(stderr, "failed to get file info on \"%s\": %s\n", > + path, strerror(errno)); > + return false; > + } > + > + g_defrag_file_size = stat_s.st_size; > + > + if (!S_ISREG(stat_s.st_mode)) { > + fprintf(stderr, "\"%s\" is not a regular file\n", path); > + return false; > + } > + > + if (statfs(path, &statfs_s) == -1) { statfs is deprecated, please use fstatvfs. > + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", > + path, strerror(errno)); > + return false; > + } > + > + if (statfs_s.f_type != XFS_SUPER_MAGIC) { > + fprintf(stderr, "\"%s\" is not a xfs file\n", path); > + return false; > + } > + > + return true; > +} > + > +/* > + * defragment a file > + * return 0 if successfully done, 1 otherwise > + */ > +static int > +defrag_xfs_defrag(char *file_path) { defrag_xfs_path() ? > + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; > + long nr_seg_defrag = 0, nr_ext_defrag = 0; > + int scratch_fd = -1, defrag_fd = -1; > + char tmp_file_path[PATH_MAX+1]; > + char *defrag_dir; > + struct fsxattr fsx; > + int ret = 0; > + > + fsx.fsx_nextents = 0; > + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); > + > + if (!defrag_check_file(file_path)) { > + ret = 1; > + goto out; > + } > + > + defrag_fd = open(file_path, O_RDWR); > + if (defrag_fd == -1) { Not sure why you check the path before opening it -- all those file and statvfs attributes that you collect there can change (or the entire fs gets unmounted) until you've pinned the fs by opening the file. > + fprintf(stderr, "Opening %s failed. %s\n", file_path, > + strerror(errno)); > + ret = 1; > + goto out; > + } > + > + defrag_dir = dirname(file_path); > + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, > + getpid()); > + tmp_file_path[PATH_MAX] = 0; > + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. > + if (scratch_fd == -1) { > + fprintf(stderr, "Opening temporary file %s failed. %s\n", > + tmp_file_path, strerror(errno)); > + ret = 1; > + goto out; > + } > +out: > + if (scratch_fd != -1) { > + close(scratch_fd); > + unlink(tmp_file_path); > + } > + if (defrag_fd != -1) { > + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); > + close(defrag_fd); > + } > + > + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," > + "%ld are \"shared\"\n", > + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, > + g_ext_stats.nr_ext_shared); > + printf("Tried to defragment %ld extents in %ld segments\n", > + nr_ext_defrag, nr_seg_defrag); > + printf("Time stats(ms): max clone: %d, max unshare: %d," > + " max punch_hole: %d\n", > + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); > + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); > + return ret; > +} > + > + > +static void defrag_help(void) > +{ > + printf(_( > +"\n" > +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" "Defragment" > +"can be served durning the defragmentations.\n" > +"\n" > +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" > +" default is 16\n")); > +} > + > +static cmdinfo_t defrag_cmd; > + > +static int > +defrag_f(int argc, char **argv) > +{ > + int i; > + int c; > + > + while ((c = getopt(argc, argv, "s:")) != EOF) { > + switch(c) { > + case 's': > + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; > + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { > + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; > + printf("Using minimium segment size %d\n", > + g_segment_size_lmt); > + } > + break; > + default: > + command_usage(&defrag_cmd); > + return 1; > + } > + } > + > + for (i = 0; i < filecount; i++) > + defrag_xfs_defrag(filetable[i].name); Pass in the whole filetable[i] and then you've already got an open fd and some validation that it's an xfs filesystem. > + return 0; > +} > +void defrag_init(void) > +{ > + defrag_cmd.name = "defrag"; > + defrag_cmd.altname = "dfg"; > + defrag_cmd.cfunc = defrag_f; > + defrag_cmd.argmin = 0; > + defrag_cmd.argmax = 4; > + defrag_cmd.args = "[-s segment_size]"; > + defrag_cmd.flags = CMD_FLAG_ONESHOT; IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor won't let this command get run against a non-xfs file. --D > + defrag_cmd.oneline = _("Defragment XFS files"); > + defrag_cmd.help = defrag_help; > + > + add_command(&defrag_cmd); > +} > diff --git a/spaceman/init.c b/spaceman/init.c > index cf1ff3cb..396f965c 100644 > --- a/spaceman/init.c > +++ b/spaceman/init.c > @@ -35,6 +35,7 @@ init_commands(void) > trim_init(); > freesp_init(); > health_init(); > + defrag_init(); > } > > static int > diff --git a/spaceman/space.h b/spaceman/space.h > index 723209ed..c288aeb9 100644 > --- a/spaceman/space.h > +++ b/spaceman/space.h > @@ -26,6 +26,7 @@ extern void help_init(void); > extern void prealloc_init(void); > extern void quit_init(void); > extern void trim_init(void); > +extern void defrag_init(void); > #ifdef HAVE_GETFSMAP > extern void freesp_init(void); > #else > -- > 2.39.3 (Apple Git-146) > >
Hi Darrick, Thanks for review, pls check in lines. > On Jul 9, 2024, at 2:18 PM, Darrick J. Wong <djwong@kernel.org> wrote: > > On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: >> Content-Type: text/plain; charset=UTF-8 >> Content-Transfer-Encoding: 8bit >> >> Non-exclusive defragment >> Here we are introducing the non-exclusive manner to defragment a file, >> especially for huge files, without blocking IO to it long. >> Non-exclusive defragmentation divides the whole file into small segments. >> For each segment, we lock the file, defragment the segment and unlock the file. >> Defragmenting the small segment doesn’t take long. File IO requests can get >> served between defragmenting segments before blocked long. Also we put >> (user adjustable) idle time between defragmenting two consecutive segments to >> balance the defragmentation and file IOs. >> >> The first patch in the set checks for valid target files >> >> Valid target files to defrag must: >> 1. be accessible for read/write >> 2. be regular files >> 3. be in XFS filesystem >> 4. the containing XFS has reflink enabled. This is not checked >> before starting defragmentation, but error would be reported >> later. >> >> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> >> --- >> spaceman/Makefile | 2 +- >> spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ >> spaceman/init.c | 1 + >> spaceman/space.h | 1 + >> 4 files changed, 201 insertions(+), 1 deletion(-) >> create mode 100644 spaceman/defrag.c >> >> diff --git a/spaceman/Makefile b/spaceman/Makefile >> index 1f048d54..9c00b20a 100644 >> --- a/spaceman/Makefile >> +++ b/spaceman/Makefile >> @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs >> >> LTCOMMAND = xfs_spaceman >> HFILES = init.h space.h >> -CFILES = info.c init.c file.c health.c prealloc.c trim.c >> +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c >> LSRCFILES = xfs_info.sh >> >> LLDLIBS = $(LIBXCMD) $(LIBFROG) >> diff --git a/spaceman/defrag.c b/spaceman/defrag.c >> new file mode 100644 >> index 00000000..c9732984 >> --- /dev/null >> +++ b/spaceman/defrag.c >> @@ -0,0 +1,198 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Copyright (c) 2024 Oracle. >> + * All Rights Reserved. >> + */ >> + >> +#include "libxfs.h" >> +#include <linux/fiemap.h> >> +#include <linux/fsmap.h> >> +#include "libfrog/fsgeom.h" >> +#include "command.h" >> +#include "init.h" >> +#include "libfrog/paths.h" >> +#include "space.h" >> +#include "input.h" >> + >> +/* defrag segment size limit in units of 512 bytes */ >> +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ >> +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ >> +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; >> + >> +/* size of the defrag target file */ >> +static off_t g_defrag_file_size = 0; >> + >> +/* stats for the target file extents before defrag */ >> +struct ext_stats { >> + long nr_ext_total; >> + long nr_ext_unwritten; >> + long nr_ext_shared; >> +}; >> +static struct ext_stats g_ext_stats; >> + >> +/* >> + * check if the target is a valid file to defrag >> + * also store file size >> + * returns: >> + * true for yes and false for no >> + */ >> +static bool >> +defrag_check_file(char *path) >> +{ >> + struct statfs statfs_s; >> + struct stat stat_s; >> + >> + if (access(path, F_OK|W_OK) == -1) { >> + if (errno == ENOENT) >> + fprintf(stderr, "file \"%s\" doesn't exist\n", path); >> + else >> + fprintf(stderr, "no access to \"%s\", %s\n", path, >> + strerror(errno)); >> + return false; >> + } >> + >> + if (stat(path, &stat_s) == -1) { >> + fprintf(stderr, "failed to get file info on \"%s\": %s\n", >> + path, strerror(errno)); >> + return false; >> + } >> + >> + g_defrag_file_size = stat_s.st_size; >> + >> + if (!S_ISREG(stat_s.st_mode)) { >> + fprintf(stderr, "\"%s\" is not a regular file\n", path); >> + return false; >> + } >> + >> + if (statfs(path, &statfs_s) == -1) { > > statfs is deprecated, please use fstatvfs. OK, will move to fstatvfs. > >> + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", >> + path, strerror(errno)); >> + return false; >> + } >> + >> + if (statfs_s.f_type != XFS_SUPER_MAGIC) { >> + fprintf(stderr, "\"%s\" is not a xfs file\n", path); >> + return false; >> + } >> + >> + return true; >> +} >> + >> +/* >> + * defragment a file >> + * return 0 if successfully done, 1 otherwise >> + */ >> +static int >> +defrag_xfs_defrag(char *file_path) { > > defrag_xfs_path() ? OK. > >> + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; >> + long nr_seg_defrag = 0, nr_ext_defrag = 0; >> + int scratch_fd = -1, defrag_fd = -1; >> + char tmp_file_path[PATH_MAX+1]; >> + char *defrag_dir; >> + struct fsxattr fsx; >> + int ret = 0; >> + >> + fsx.fsx_nextents = 0; >> + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); >> + >> + if (!defrag_check_file(file_path)) { >> + ret = 1; >> + goto out; >> + } >> + >> + defrag_fd = open(file_path, O_RDWR); >> + if (defrag_fd == -1) { > > Not sure why you check the path before opening it -- all those file and > statvfs attributes that you collect there can change (or the entire fs > gets unmounted) until you've pinned the fs by opening the file. The idea comes from internal reviews hoping some explicit reasons why Defrag failed. Those reasons include: 1) if user has permission to access the target file. 2) if the species path exist (when moving to spaceman, spaceman takes care of it) 3) if the specified is a regular file 4) if the target file is a XFS file Thing might change after checking and opening, but that’s very rare case and user is responsible for that change rather than this tool. > >> + fprintf(stderr, "Opening %s failed. %s\n", file_path, >> + strerror(errno)); >> + ret = 1; >> + goto out; >> + } >> + >> + defrag_dir = dirname(file_path); >> + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, >> + getpid()); >> + tmp_file_path[PATH_MAX] = 0; >> + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); > > O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. > My first first version was using O_TMPFILE. But clone failed somehow (Don’t remember the details). I retried O_TMPFILE, it’s working now. So will move to use O_TMPFILE. >> + if (scratch_fd == -1) { >> + fprintf(stderr, "Opening temporary file %s failed. %s\n", >> + tmp_file_path, strerror(errno)); >> + ret = 1; >> + goto out; >> + } >> +out: >> + if (scratch_fd != -1) { >> + close(scratch_fd); >> + unlink(tmp_file_path); >> + } >> + if (defrag_fd != -1) { >> + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); >> + close(defrag_fd); >> + } >> + >> + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," >> + "%ld are \"shared\"\n", >> + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, >> + g_ext_stats.nr_ext_shared); >> + printf("Tried to defragment %ld extents in %ld segments\n", >> + nr_ext_defrag, nr_seg_defrag); >> + printf("Time stats(ms): max clone: %d, max unshare: %d," >> + " max punch_hole: %d\n", >> + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); >> + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); >> + return ret; >> +} >> + >> + >> +static void defrag_help(void) >> +{ >> + printf(_( >> +"\n" >> +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" > > "Defragment" OK. > >> +"can be served durning the defragmentations.\n" >> +"\n" >> +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" >> +" default is 16\n")); >> +} >> + >> +static cmdinfo_t defrag_cmd; >> + >> +static int >> +defrag_f(int argc, char **argv) >> +{ >> + int i; >> + int c; >> + >> + while ((c = getopt(argc, argv, "s:")) != EOF) { >> + switch(c) { >> + case 's': >> + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; >> + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { >> + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; >> + printf("Using minimium segment size %d\n", >> + g_segment_size_lmt); >> + } >> + break; >> + default: >> + command_usage(&defrag_cmd); >> + return 1; >> + } >> + } >> + >> + for (i = 0; i < filecount; i++) >> + defrag_xfs_defrag(filetable[i].name); > > Pass in the whole filetable[i] and then you've already got an open fd > and some validation that it's an xfs filesystem. Good to know. > >> + return 0; >> +} >> +void defrag_init(void) >> +{ >> + defrag_cmd.name = "defrag"; >> + defrag_cmd.altname = "dfg"; >> + defrag_cmd.cfunc = defrag_f; >> + defrag_cmd.argmin = 0; >> + defrag_cmd.argmax = 4; >> + defrag_cmd.args = "[-s segment_size]"; >> + defrag_cmd.flags = CMD_FLAG_ONESHOT; > > IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor > won't let this command get run against a non-xfs file. > OK. Thanks, Winging > --D > >> + defrag_cmd.oneline = _("Defragment XFS files"); >> + defrag_cmd.help = defrag_help; >> + >> + add_command(&defrag_cmd); >> +} >> diff --git a/spaceman/init.c b/spaceman/init.c >> index cf1ff3cb..396f965c 100644 >> --- a/spaceman/init.c >> +++ b/spaceman/init.c >> @@ -35,6 +35,7 @@ init_commands(void) >> trim_init(); >> freesp_init(); >> health_init(); >> + defrag_init(); >> } >> >> static int >> diff --git a/spaceman/space.h b/spaceman/space.h >> index 723209ed..c288aeb9 100644 >> --- a/spaceman/space.h >> +++ b/spaceman/space.h >> @@ -26,6 +26,7 @@ extern void help_init(void); >> extern void prealloc_init(void); >> extern void quit_init(void); >> extern void trim_init(void); >> +extern void defrag_init(void); >> #ifdef HAVE_GETFSMAP >> extern void freesp_init(void); >> #else >> -- >> 2.39.3 (Apple Git-146)
> On Jul 11, 2024, at 2:54 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote: > > Hi Darrick, > Thanks for review, pls check in lines. > >> On Jul 9, 2024, at 2:18 PM, Darrick J. Wong <djwong@kernel.org> wrote: >> >> On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: >>> Content-Type: text/plain; charset=UTF-8 >>> Content-Transfer-Encoding: 8bit >>> >>> Non-exclusive defragment >>> Here we are introducing the non-exclusive manner to defragment a file, >>> especially for huge files, without blocking IO to it long. >>> Non-exclusive defragmentation divides the whole file into small segments. >>> For each segment, we lock the file, defragment the segment and unlock the file. >>> Defragmenting the small segment doesn’t take long. File IO requests can get >>> served between defragmenting segments before blocked long. Also we put >>> (user adjustable) idle time between defragmenting two consecutive segments to >>> balance the defragmentation and file IOs. >>> >>> The first patch in the set checks for valid target files >>> >>> Valid target files to defrag must: >>> 1. be accessible for read/write >>> 2. be regular files >>> 3. be in XFS filesystem >>> 4. the containing XFS has reflink enabled. This is not checked >>> before starting defragmentation, but error would be reported >>> later. >>> >>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> >>> --- >>> spaceman/Makefile | 2 +- >>> spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ >>> spaceman/init.c | 1 + >>> spaceman/space.h | 1 + >>> 4 files changed, 201 insertions(+), 1 deletion(-) >>> create mode 100644 spaceman/defrag.c >>> >>> diff --git a/spaceman/Makefile b/spaceman/Makefile >>> index 1f048d54..9c00b20a 100644 >>> --- a/spaceman/Makefile >>> +++ b/spaceman/Makefile >>> @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs >>> >>> LTCOMMAND = xfs_spaceman >>> HFILES = init.h space.h >>> -CFILES = info.c init.c file.c health.c prealloc.c trim.c >>> +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c >>> LSRCFILES = xfs_info.sh >>> >>> LLDLIBS = $(LIBXCMD) $(LIBFROG) >>> diff --git a/spaceman/defrag.c b/spaceman/defrag.c >>> new file mode 100644 >>> index 00000000..c9732984 >>> --- /dev/null >>> +++ b/spaceman/defrag.c >>> @@ -0,0 +1,198 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* >>> + * Copyright (c) 2024 Oracle. >>> + * All Rights Reserved. >>> + */ >>> + >>> +#include "libxfs.h" >>> +#include <linux/fiemap.h> >>> +#include <linux/fsmap.h> >>> +#include "libfrog/fsgeom.h" >>> +#include "command.h" >>> +#include "init.h" >>> +#include "libfrog/paths.h" >>> +#include "space.h" >>> +#include "input.h" >>> + >>> +/* defrag segment size limit in units of 512 bytes */ >>> +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ >>> +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ >>> +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; >>> + >>> +/* size of the defrag target file */ >>> +static off_t g_defrag_file_size = 0; >>> + >>> +/* stats for the target file extents before defrag */ >>> +struct ext_stats { >>> + long nr_ext_total; >>> + long nr_ext_unwritten; >>> + long nr_ext_shared; >>> +}; >>> +static struct ext_stats g_ext_stats; >>> + >>> +/* >>> + * check if the target is a valid file to defrag >>> + * also store file size >>> + * returns: >>> + * true for yes and false for no >>> + */ >>> +static bool >>> +defrag_check_file(char *path) >>> +{ >>> + struct statfs statfs_s; >>> + struct stat stat_s; >>> + >>> + if (access(path, F_OK|W_OK) == -1) { >>> + if (errno == ENOENT) >>> + fprintf(stderr, "file \"%s\" doesn't exist\n", path); >>> + else >>> + fprintf(stderr, "no access to \"%s\", %s\n", path, >>> + strerror(errno)); >>> + return false; >>> + } >>> + >>> + if (stat(path, &stat_s) == -1) { >>> + fprintf(stderr, "failed to get file info on \"%s\": %s\n", >>> + path, strerror(errno)); >>> + return false; >>> + } >>> + >>> + g_defrag_file_size = stat_s.st_size; >>> + >>> + if (!S_ISREG(stat_s.st_mode)) { >>> + fprintf(stderr, "\"%s\" is not a regular file\n", path); >>> + return false; >>> + } >>> + >>> + if (statfs(path, &statfs_s) == -1) { >> >> statfs is deprecated, please use fstatvfs. > > OK, will move to fstatvfs. > >> >>> + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", >>> + path, strerror(errno)); >>> + return false; >>> + } >>> + >>> + if (statfs_s.f_type != XFS_SUPER_MAGIC) { >>> + fprintf(stderr, "\"%s\" is not a xfs file\n", path); >>> + return false; >>> + } >>> + >>> + return true; >>> +} >>> + >>> +/* >>> + * defragment a file >>> + * return 0 if successfully done, 1 otherwise >>> + */ >>> +static int >>> +defrag_xfs_defrag(char *file_path) { >> >> defrag_xfs_path() ? > > OK. >> >>> + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; >>> + long nr_seg_defrag = 0, nr_ext_defrag = 0; >>> + int scratch_fd = -1, defrag_fd = -1; >>> + char tmp_file_path[PATH_MAX+1]; >>> + char *defrag_dir; >>> + struct fsxattr fsx; >>> + int ret = 0; >>> + >>> + fsx.fsx_nextents = 0; >>> + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); >>> + >>> + if (!defrag_check_file(file_path)) { >>> + ret = 1; >>> + goto out; >>> + } >>> + >>> + defrag_fd = open(file_path, O_RDWR); >>> + if (defrag_fd == -1) { >> >> Not sure why you check the path before opening it -- all those file and >> statvfs attributes that you collect there can change (or the entire fs >> gets unmounted) until you've pinned the fs by opening the file. > > The idea comes from internal reviews hoping some explicit reasons why > Defrag failed. Those reasons include: > 1) if user has permission to access the target file. > 2) if the species path exist (when moving to spaceman, spaceman takes care of it) > 3) if the specified is a regular file > 4) if the target file is a XFS file > > Thing might change after checking and opening, but that’s very rare case and user is > responsible for that change rather than this tool. > >> >>> + fprintf(stderr, "Opening %s failed. %s\n", file_path, >>> + strerror(errno)); >>> + ret = 1; >>> + goto out; >>> + } >>> + >>> + defrag_dir = dirname(file_path); >>> + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, >>> + getpid()); >>> + tmp_file_path[PATH_MAX] = 0; >>> + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); >> >> O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. >> > > My first first version was using O_TMPFILE. But clone failed somehow (Don’t remember the details). > I retried O_TMPFILE, it’s working now. So will move to use O_TMPFILE. > >>> + if (scratch_fd == -1) { >>> + fprintf(stderr, "Opening temporary file %s failed. %s\n", >>> + tmp_file_path, strerror(errno)); >>> + ret = 1; >>> + goto out; >>> + } >>> +out: >>> + if (scratch_fd != -1) { >>> + close(scratch_fd); >>> + unlink(tmp_file_path); >>> + } >>> + if (defrag_fd != -1) { >>> + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); >>> + close(defrag_fd); >>> + } >>> + >>> + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," >>> + "%ld are \"shared\"\n", >>> + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, >>> + g_ext_stats.nr_ext_shared); >>> + printf("Tried to defragment %ld extents in %ld segments\n", >>> + nr_ext_defrag, nr_seg_defrag); >>> + printf("Time stats(ms): max clone: %d, max unshare: %d," >>> + " max punch_hole: %d\n", >>> + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); >>> + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); >>> + return ret; >>> +} >>> + >>> + >>> +static void defrag_help(void) >>> +{ >>> + printf(_( >>> +"\n" >>> +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" >> >> "Defragment" > > OK. > >> >>> +"can be served durning the defragmentations.\n" >>> +"\n" >>> +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" >>> +" default is 16\n")); >>> +} >>> + >>> +static cmdinfo_t defrag_cmd; >>> + >>> +static int >>> +defrag_f(int argc, char **argv) >>> +{ >>> + int i; >>> + int c; >>> + >>> + while ((c = getopt(argc, argv, "s:")) != EOF) { >>> + switch(c) { >>> + case 's': >>> + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; >>> + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { >>> + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; >>> + printf("Using minimium segment size %d\n", >>> + g_segment_size_lmt); >>> + } >>> + break; >>> + default: >>> + command_usage(&defrag_cmd); >>> + return 1; >>> + } >>> + } >>> + >>> + for (i = 0; i < filecount; i++) >>> + defrag_xfs_defrag(filetable[i].name); >> >> Pass in the whole filetable[i] and then you've already got an open fd >> and some validation that it's an xfs filesystem. > Filetable[I].xfd.fd doesn’t work well. UNSHARE returns “Bad file descriptor”, I am suspecting that fd is readonly. So I have to write-open again. Thanks, Wengang > Good to know. >> >>> + return 0; >>> +} >>> +void defrag_init(void) >>> +{ >>> + defrag_cmd.name = "defrag"; >>> + defrag_cmd.altname = "dfg"; >>> + defrag_cmd.cfunc = defrag_f; >>> + defrag_cmd.argmin = 0; >>> + defrag_cmd.argmax = 4; >>> + defrag_cmd.args = "[-s segment_size]"; >>> + defrag_cmd.flags = CMD_FLAG_ONESHOT; >> >> IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor >> won't let this command get run against a non-xfs file. >> > > OK. > > Thanks, > Winging > >> --D >> >>> + defrag_cmd.oneline = _("Defragment XFS files"); >>> + defrag_cmd.help = defrag_help; >>> + >>> + add_command(&defrag_cmd); >>> +} >>> diff --git a/spaceman/init.c b/spaceman/init.c >>> index cf1ff3cb..396f965c 100644 >>> --- a/spaceman/init.c >>> +++ b/spaceman/init.c >>> @@ -35,6 +35,7 @@ init_commands(void) >>> trim_init(); >>> freesp_init(); >>> health_init(); >>> + defrag_init(); >>> } >>> >>> static int >>> diff --git a/spaceman/space.h b/spaceman/space.h >>> index 723209ed..c288aeb9 100644 >>> --- a/spaceman/space.h >>> +++ b/spaceman/space.h >>> @@ -26,6 +26,7 @@ extern void help_init(void); >>> extern void prealloc_init(void); >>> extern void quit_init(void); >>> extern void trim_init(void); >>> +extern void defrag_init(void); >>> #ifdef HAVE_GETFSMAP >>> extern void freesp_init(void); >>> #else >>> -- >>> 2.39.3 (Apple Git-146)
On Mon, Jul 15, 2024 at 09:30:42PM +0000, Wengang Wang wrote: > > > > On Jul 11, 2024, at 2:54 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote: > > > > Hi Darrick, > > Thanks for review, pls check in lines. > > > >> On Jul 9, 2024, at 2:18 PM, Darrick J. Wong <djwong@kernel.org> wrote: > >> > >> On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: > >>> Content-Type: text/plain; charset=UTF-8 > >>> Content-Transfer-Encoding: 8bit > >>> > >>> Non-exclusive defragment > >>> Here we are introducing the non-exclusive manner to defragment a file, > >>> especially for huge files, without blocking IO to it long. > >>> Non-exclusive defragmentation divides the whole file into small segments. > >>> For each segment, we lock the file, defragment the segment and unlock the file. > >>> Defragmenting the small segment doesn’t take long. File IO requests can get > >>> served between defragmenting segments before blocked long. Also we put > >>> (user adjustable) idle time between defragmenting two consecutive segments to > >>> balance the defragmentation and file IOs. > >>> > >>> The first patch in the set checks for valid target files > >>> > >>> Valid target files to defrag must: > >>> 1. be accessible for read/write > >>> 2. be regular files > >>> 3. be in XFS filesystem > >>> 4. the containing XFS has reflink enabled. This is not checked > >>> before starting defragmentation, but error would be reported > >>> later. > >>> > >>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> > >>> --- > >>> spaceman/Makefile | 2 +- > >>> spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ > >>> spaceman/init.c | 1 + > >>> spaceman/space.h | 1 + > >>> 4 files changed, 201 insertions(+), 1 deletion(-) > >>> create mode 100644 spaceman/defrag.c > >>> > >>> diff --git a/spaceman/Makefile b/spaceman/Makefile > >>> index 1f048d54..9c00b20a 100644 > >>> --- a/spaceman/Makefile > >>> +++ b/spaceman/Makefile > >>> @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs > >>> > >>> LTCOMMAND = xfs_spaceman > >>> HFILES = init.h space.h > >>> -CFILES = info.c init.c file.c health.c prealloc.c trim.c > >>> +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c > >>> LSRCFILES = xfs_info.sh > >>> > >>> LLDLIBS = $(LIBXCMD) $(LIBFROG) > >>> diff --git a/spaceman/defrag.c b/spaceman/defrag.c > >>> new file mode 100644 > >>> index 00000000..c9732984 > >>> --- /dev/null > >>> +++ b/spaceman/defrag.c > >>> @@ -0,0 +1,198 @@ > >>> +// SPDX-License-Identifier: GPL-2.0 > >>> +/* > >>> + * Copyright (c) 2024 Oracle. > >>> + * All Rights Reserved. > >>> + */ > >>> + > >>> +#include "libxfs.h" > >>> +#include <linux/fiemap.h> > >>> +#include <linux/fsmap.h> > >>> +#include "libfrog/fsgeom.h" > >>> +#include "command.h" > >>> +#include "init.h" > >>> +#include "libfrog/paths.h" > >>> +#include "space.h" > >>> +#include "input.h" > >>> + > >>> +/* defrag segment size limit in units of 512 bytes */ > >>> +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ > >>> +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ > >>> +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; > >>> + > >>> +/* size of the defrag target file */ > >>> +static off_t g_defrag_file_size = 0; > >>> + > >>> +/* stats for the target file extents before defrag */ > >>> +struct ext_stats { > >>> + long nr_ext_total; > >>> + long nr_ext_unwritten; > >>> + long nr_ext_shared; > >>> +}; > >>> +static struct ext_stats g_ext_stats; > >>> + > >>> +/* > >>> + * check if the target is a valid file to defrag > >>> + * also store file size > >>> + * returns: > >>> + * true for yes and false for no > >>> + */ > >>> +static bool > >>> +defrag_check_file(char *path) > >>> +{ > >>> + struct statfs statfs_s; > >>> + struct stat stat_s; > >>> + > >>> + if (access(path, F_OK|W_OK) == -1) { > >>> + if (errno == ENOENT) > >>> + fprintf(stderr, "file \"%s\" doesn't exist\n", path); > >>> + else > >>> + fprintf(stderr, "no access to \"%s\", %s\n", path, > >>> + strerror(errno)); > >>> + return false; > >>> + } > >>> + > >>> + if (stat(path, &stat_s) == -1) { > >>> + fprintf(stderr, "failed to get file info on \"%s\": %s\n", > >>> + path, strerror(errno)); > >>> + return false; > >>> + } > >>> + > >>> + g_defrag_file_size = stat_s.st_size; > >>> + > >>> + if (!S_ISREG(stat_s.st_mode)) { > >>> + fprintf(stderr, "\"%s\" is not a regular file\n", path); > >>> + return false; > >>> + } > >>> + > >>> + if (statfs(path, &statfs_s) == -1) { > >> > >> statfs is deprecated, please use fstatvfs. > > > > OK, will move to fstatvfs. > > > >> > >>> + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", > >>> + path, strerror(errno)); > >>> + return false; > >>> + } > >>> + > >>> + if (statfs_s.f_type != XFS_SUPER_MAGIC) { > >>> + fprintf(stderr, "\"%s\" is not a xfs file\n", path); > >>> + return false; > >>> + } > >>> + > >>> + return true; > >>> +} > >>> + > >>> +/* > >>> + * defragment a file > >>> + * return 0 if successfully done, 1 otherwise > >>> + */ > >>> +static int > >>> +defrag_xfs_defrag(char *file_path) { > >> > >> defrag_xfs_path() ? > > > > OK. > >> > >>> + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; > >>> + long nr_seg_defrag = 0, nr_ext_defrag = 0; > >>> + int scratch_fd = -1, defrag_fd = -1; > >>> + char tmp_file_path[PATH_MAX+1]; > >>> + char *defrag_dir; > >>> + struct fsxattr fsx; > >>> + int ret = 0; > >>> + > >>> + fsx.fsx_nextents = 0; > >>> + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); > >>> + > >>> + if (!defrag_check_file(file_path)) { > >>> + ret = 1; > >>> + goto out; > >>> + } > >>> + > >>> + defrag_fd = open(file_path, O_RDWR); > >>> + if (defrag_fd == -1) { > >> > >> Not sure why you check the path before opening it -- all those file and > >> statvfs attributes that you collect there can change (or the entire fs > >> gets unmounted) until you've pinned the fs by opening the file. > > > > The idea comes from internal reviews hoping some explicit reasons why > > Defrag failed. Those reasons include: > > 1) if user has permission to access the target file. > > 2) if the species path exist (when moving to spaceman, spaceman takes care of it) > > 3) if the specified is a regular file > > 4) if the target file is a XFS file > > > > Thing might change after checking and opening, but that’s very rare case and user is > > responsible for that change rather than this tool. > > > >> > >>> + fprintf(stderr, "Opening %s failed. %s\n", file_path, > >>> + strerror(errno)); > >>> + ret = 1; > >>> + goto out; > >>> + } > >>> + > >>> + defrag_dir = dirname(file_path); > >>> + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, > >>> + getpid()); > >>> + tmp_file_path[PATH_MAX] = 0; > >>> + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); > >> > >> O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. > >> > > > > My first first version was using O_TMPFILE. But clone failed somehow (Don’t remember the details). > > I retried O_TMPFILE, it’s working now. So will move to use O_TMPFILE. > > > >>> + if (scratch_fd == -1) { > >>> + fprintf(stderr, "Opening temporary file %s failed. %s\n", > >>> + tmp_file_path, strerror(errno)); > >>> + ret = 1; > >>> + goto out; > >>> + } > >>> +out: > >>> + if (scratch_fd != -1) { > >>> + close(scratch_fd); > >>> + unlink(tmp_file_path); > >>> + } > >>> + if (defrag_fd != -1) { > >>> + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); > >>> + close(defrag_fd); > >>> + } > >>> + > >>> + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," > >>> + "%ld are \"shared\"\n", > >>> + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, > >>> + g_ext_stats.nr_ext_shared); > >>> + printf("Tried to defragment %ld extents in %ld segments\n", > >>> + nr_ext_defrag, nr_seg_defrag); > >>> + printf("Time stats(ms): max clone: %d, max unshare: %d," > >>> + " max punch_hole: %d\n", > >>> + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); > >>> + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); > >>> + return ret; > >>> +} > >>> + > >>> + > >>> +static void defrag_help(void) > >>> +{ > >>> + printf(_( > >>> +"\n" > >>> +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" > >> > >> "Defragment" > > > > OK. > > > >> > >>> +"can be served durning the defragmentations.\n" > >>> +"\n" > >>> +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" > >>> +" default is 16\n")); > >>> +} > >>> + > >>> +static cmdinfo_t defrag_cmd; > >>> + > >>> +static int > >>> +defrag_f(int argc, char **argv) > >>> +{ > >>> + int i; > >>> + int c; > >>> + > >>> + while ((c = getopt(argc, argv, "s:")) != EOF) { > >>> + switch(c) { > >>> + case 's': > >>> + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; > >>> + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { > >>> + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; > >>> + printf("Using minimium segment size %d\n", > >>> + g_segment_size_lmt); > >>> + } > >>> + break; > >>> + default: > >>> + command_usage(&defrag_cmd); > >>> + return 1; > >>> + } > >>> + } > >>> + > >>> + for (i = 0; i < filecount; i++) > >>> + defrag_xfs_defrag(filetable[i].name); > >> > >> Pass in the whole filetable[i] and then you've already got an open fd > >> and some validation that it's an xfs filesystem. > > > > Filetable[I].xfd.fd doesn’t work well. UNSHARE returns “Bad file descriptor”, I am suspecting that fd is readonly. > > So I have to write-open again. Ah, ok. In that case, after you reopen the file, you ought to stat both of them and check that st_dev/st_ino match. (Or just change spaceman to be able to open files O_RDWR?) --D > Thanks, > Wengang > > > Good to know. > >> > >>> + return 0; > >>> +} > >>> +void defrag_init(void) > >>> +{ > >>> + defrag_cmd.name = "defrag"; > >>> + defrag_cmd.altname = "dfg"; > >>> + defrag_cmd.cfunc = defrag_f; > >>> + defrag_cmd.argmin = 0; > >>> + defrag_cmd.argmax = 4; > >>> + defrag_cmd.args = "[-s segment_size]"; > >>> + defrag_cmd.flags = CMD_FLAG_ONESHOT; > >> > >> IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor > >> won't let this command get run against a non-xfs file. > >> > > > > OK. > > > > Thanks, > > Winging > > > >> --D > >> > >>> + defrag_cmd.oneline = _("Defragment XFS files"); > >>> + defrag_cmd.help = defrag_help; > >>> + > >>> + add_command(&defrag_cmd); > >>> +} > >>> diff --git a/spaceman/init.c b/spaceman/init.c > >>> index cf1ff3cb..396f965c 100644 > >>> --- a/spaceman/init.c > >>> +++ b/spaceman/init.c > >>> @@ -35,6 +35,7 @@ init_commands(void) > >>> trim_init(); > >>> freesp_init(); > >>> health_init(); > >>> + defrag_init(); > >>> } > >>> > >>> static int > >>> diff --git a/spaceman/space.h b/spaceman/space.h > >>> index 723209ed..c288aeb9 100644 > >>> --- a/spaceman/space.h > >>> +++ b/spaceman/space.h > >>> @@ -26,6 +26,7 @@ extern void help_init(void); > >>> extern void prealloc_init(void); > >>> extern void quit_init(void); > >>> extern void trim_init(void); > >>> +extern void defrag_init(void); > >>> #ifdef HAVE_GETFSMAP > >>> extern void freesp_init(void); > >>> #else > >>> -- > >>> 2.39.3 (Apple Git-146) > >
diff --git a/spaceman/Makefile b/spaceman/Makefile index 1f048d54..9c00b20a 100644 --- a/spaceman/Makefile +++ b/spaceman/Makefile @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs LTCOMMAND = xfs_spaceman HFILES = init.h space.h -CFILES = info.c init.c file.c health.c prealloc.c trim.c +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c LSRCFILES = xfs_info.sh LLDLIBS = $(LIBXCMD) $(LIBFROG) diff --git a/spaceman/defrag.c b/spaceman/defrag.c new file mode 100644 index 00000000..c9732984 --- /dev/null +++ b/spaceman/defrag.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Oracle. + * All Rights Reserved. + */ + +#include "libxfs.h" +#include <linux/fiemap.h> +#include <linux/fsmap.h> +#include "libfrog/fsgeom.h" +#include "command.h" +#include "init.h" +#include "libfrog/paths.h" +#include "space.h" +#include "input.h" + +/* defrag segment size limit in units of 512 bytes */ +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; + +/* size of the defrag target file */ +static off_t g_defrag_file_size = 0; + +/* stats for the target file extents before defrag */ +struct ext_stats { + long nr_ext_total; + long nr_ext_unwritten; + long nr_ext_shared; +}; +static struct ext_stats g_ext_stats; + +/* + * check if the target is a valid file to defrag + * also store file size + * returns: + * true for yes and false for no + */ +static bool +defrag_check_file(char *path) +{ + struct statfs statfs_s; + struct stat stat_s; + + if (access(path, F_OK|W_OK) == -1) { + if (errno == ENOENT) + fprintf(stderr, "file \"%s\" doesn't exist\n", path); + else + fprintf(stderr, "no access to \"%s\", %s\n", path, + strerror(errno)); + return false; + } + + if (stat(path, &stat_s) == -1) { + fprintf(stderr, "failed to get file info on \"%s\": %s\n", + path, strerror(errno)); + return false; + } + + g_defrag_file_size = stat_s.st_size; + + if (!S_ISREG(stat_s.st_mode)) { + fprintf(stderr, "\"%s\" is not a regular file\n", path); + return false; + } + + if (statfs(path, &statfs_s) == -1) { + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", + path, strerror(errno)); + return false; + } + + if (statfs_s.f_type != XFS_SUPER_MAGIC) { + fprintf(stderr, "\"%s\" is not a xfs file\n", path); + return false; + } + + return true; +} + +/* + * defragment a file + * return 0 if successfully done, 1 otherwise + */ +static int +defrag_xfs_defrag(char *file_path) { + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; + long nr_seg_defrag = 0, nr_ext_defrag = 0; + int scratch_fd = -1, defrag_fd = -1; + char tmp_file_path[PATH_MAX+1]; + char *defrag_dir; + struct fsxattr fsx; + int ret = 0; + + fsx.fsx_nextents = 0; + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); + + if (!defrag_check_file(file_path)) { + ret = 1; + goto out; + } + + defrag_fd = open(file_path, O_RDWR); + if (defrag_fd == -1) { + fprintf(stderr, "Opening %s failed. %s\n", file_path, + strerror(errno)); + ret = 1; + goto out; + } + + defrag_dir = dirname(file_path); + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, + getpid()); + tmp_file_path[PATH_MAX] = 0; + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); + if (scratch_fd == -1) { + fprintf(stderr, "Opening temporary file %s failed. %s\n", + tmp_file_path, strerror(errno)); + ret = 1; + goto out; + } +out: + if (scratch_fd != -1) { + close(scratch_fd); + unlink(tmp_file_path); + } + if (defrag_fd != -1) { + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); + close(defrag_fd); + } + + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," + "%ld are \"shared\"\n", + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, + g_ext_stats.nr_ext_shared); + printf("Tried to defragment %ld extents in %ld segments\n", + nr_ext_defrag, nr_seg_defrag); + printf("Time stats(ms): max clone: %d, max unshare: %d," + " max punch_hole: %d\n", + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); + return ret; +} + + +static void defrag_help(void) +{ + printf(_( +"\n" +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" +"can be served durning the defragmentations.\n" +"\n" +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" +" default is 16\n")); +} + +static cmdinfo_t defrag_cmd; + +static int +defrag_f(int argc, char **argv) +{ + int i; + int c; + + while ((c = getopt(argc, argv, "s:")) != EOF) { + switch(c) { + case 's': + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; + printf("Using minimium segment size %d\n", + g_segment_size_lmt); + } + break; + default: + command_usage(&defrag_cmd); + return 1; + } + } + + for (i = 0; i < filecount; i++) + defrag_xfs_defrag(filetable[i].name); + return 0; +} +void defrag_init(void) +{ + defrag_cmd.name = "defrag"; + defrag_cmd.altname = "dfg"; + defrag_cmd.cfunc = defrag_f; + defrag_cmd.argmin = 0; + defrag_cmd.argmax = 4; + defrag_cmd.args = "[-s segment_size]"; + defrag_cmd.flags = CMD_FLAG_ONESHOT; + defrag_cmd.oneline = _("Defragment XFS files"); + defrag_cmd.help = defrag_help; + + add_command(&defrag_cmd); +} diff --git a/spaceman/init.c b/spaceman/init.c index cf1ff3cb..396f965c 100644 --- a/spaceman/init.c +++ b/spaceman/init.c @@ -35,6 +35,7 @@ init_commands(void) trim_init(); freesp_init(); health_init(); + defrag_init(); } static int diff --git a/spaceman/space.h b/spaceman/space.h index 723209ed..c288aeb9 100644 --- a/spaceman/space.h +++ b/spaceman/space.h @@ -26,6 +26,7 @@ extern void help_init(void); extern void prealloc_init(void); extern void quit_init(void); extern void trim_init(void); +extern void defrag_init(void); #ifdef HAVE_GETFSMAP extern void freesp_init(void); #else
Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-exclusive defragment Here we are introducing the non-exclusive manner to defragment a file, especially for huge files, without blocking IO to it long. Non-exclusive defragmentation divides the whole file into small segments. For each segment, we lock the file, defragment the segment and unlock the file. Defragmenting the small segment doesn’t take long. File IO requests can get served between defragmenting segments before blocked long. Also we put (user adjustable) idle time between defragmenting two consecutive segments to balance the defragmentation and file IOs. The first patch in the set checks for valid target files Valid target files to defrag must: 1. be accessible for read/write 2. be regular files 3. be in XFS filesystem 4. the containing XFS has reflink enabled. This is not checked before starting defragmentation, but error would be reported later. Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> --- spaceman/Makefile | 2 +- spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ spaceman/init.c | 1 + spaceman/space.h | 1 + 4 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 spaceman/defrag.c