diff mbox series

[2/2] btrfs-progs: defrag: open files RO on new enough kernels or if root

Message ID 20180903101426.14968-2-kilobyte@angband.pl (mailing list archive)
State New, archived
Headers show
Series [1/2] btrfs-progs: fix kernel version parsing on some versions past 3.0 | expand

Commit Message

Adam Borowski Sept. 3, 2018, 10:14 a.m. UTC
Fixes EXTXBSY races.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
---
 cmds-filesystem.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Nikolay Borisov Sept. 3, 2018, 11:01 a.m. UTC | #1
On  3.09.2018 13:14, Adam Borowski wrote:
> Fixes EXTXBSY races.

You have to be more eloquent than that and explain at least one race
condition.


> 
> Signed-off-by: Adam Borowski <kilobyte@angband.pl>
> ---
>  cmds-filesystem.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/cmds-filesystem.c b/cmds-filesystem.c
> index 06c8311b..4c9df69f 100644
> --- a/cmds-filesystem.c
> +++ b/cmds-filesystem.c
> @@ -26,6 +26,7 @@
>  #include <ftw.h>
>  #include <mntent.h>
>  #include <linux/limits.h>
> +#include <linux/version.h>
>  #include <getopt.h>
>  
>  #include <btrfsutil.h>
> @@ -39,12 +40,14 @@
>  #include "list_sort.h"
>  #include "disk-io.h"
>  #include "help.h"
> +#include "fsfeatures.h"
>  
>  /*
>   * for btrfs fi show, we maintain a hash of fsids we've already printed.
>   * This way we don't print dups if a given FS is mounted more than once.
>   */
>  static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
> +static mode_t defrag_ro = O_RDONLY;

This brings no value whatsoever, just use O_RDONLY directly

>  
>  static const char * const filesystem_cmd_group_usage[] = {
>  	"btrfs filesystem [<group>] <command> [<args>]",
> @@ -877,7 +880,7 @@ static int defrag_callback(const char *fpath, const struct stat *sb,
>  	if ((typeflag == FTW_F) && S_ISREG(sb->st_mode)) {
>  		if (defrag_global_verbose)
>  			printf("%s\n", fpath);
> -		fd = open(fpath, O_RDWR);
> +		fd = open(fpath, defrag_ro);
>  		if (fd < 0) {
>  			goto error;
>  		}
> @@ -914,6 +917,9 @@ static int cmd_filesystem_defrag(int argc, char **argv)
>  	int compress_type = BTRFS_COMPRESS_NONE;
>  	DIR *dirstream;
>  
> +	if (get_running_kernel_version() < KERNEL_VERSION(4,19,0) && getuid())
> +		defrag_ro = O_RDWR;
> +
>  	/*
>  	 * Kernel has a different default (256K) that is supposed to be safe,
>  	 * but it does not defragment very well. The 32M will likely lead to
> @@ -1014,7 +1020,7 @@ static int cmd_filesystem_defrag(int argc, char **argv)
>  		int defrag_err = 0;
>  
>  		dirstream = NULL;
> -		fd = open_file_or_dir(argv[i], &dirstream);
> +		fd = open_file_or_dir3(argv[i], &dirstream, defrag_ro);
>  		if (fd < 0) {
>  			error("cannot open %s: %m", argv[i]);
>  			ret = -errno;
>
Nikolay Borisov Sept. 3, 2018, 11:04 a.m. UTC | #2
On  3.09.2018 13:14, Adam Borowski wrote:
> Fixes EXTXBSY races.
> 
> Signed-off-by: Adam Borowski <kilobyte@angband.pl>
> ---
>  cmds-filesystem.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/cmds-filesystem.c b/cmds-filesystem.c
> index 06c8311b..4c9df69f 100644
> --- a/cmds-filesystem.c
> +++ b/cmds-filesystem.c
> @@ -26,6 +26,7 @@
>  #include <ftw.h>
>  #include <mntent.h>
>  #include <linux/limits.h>
> +#include <linux/version.h>
>  #include <getopt.h>
>  
>  #include <btrfsutil.h>
> @@ -39,12 +40,14 @@
>  #include "list_sort.h"
>  #include "disk-io.h"
>  #include "help.h"
> +#include "fsfeatures.h"
>  
>  /*
>   * for btrfs fi show, we maintain a hash of fsids we've already printed.
>   * This way we don't print dups if a given FS is mounted more than once.
>   */
>  static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
> +static mode_t defrag_ro = O_RDONLY;
>  
>  static const char * const filesystem_cmd_group_usage[] = {
>  	"btrfs filesystem [<group>] <command> [<args>]",
> @@ -877,7 +880,7 @@ static int defrag_callback(const char *fpath, const struct stat *sb,
>  	if ((typeflag == FTW_F) && S_ISREG(sb->st_mode)) {
>  		if (defrag_global_verbose)
>  			printf("%s\n", fpath);
> -		fd = open(fpath, O_RDWR);
> +		fd = open(fpath, defrag_ro);

Looking at the kernel code I think this is in fact incorrect, because in
ioctl.c we have:

                if (!(file->f_mode & FMODE_WRITE)) {

                        ret = -EINVAL;

                        goto out;

                }

So it seems a hard requirement to have opened a file for RW when you
want to defragment it.

>  		if (fd < 0) {
>  			goto error;
>  		}
> @@ -914,6 +917,9 @@ static int cmd_filesystem_defrag(int argc, char **argv)
>  	int compress_type = BTRFS_COMPRESS_NONE;
>  	DIR *dirstream;
>  
> +	if (get_running_kernel_version() < KERNEL_VERSION(4,19,0) && getuid())
> +		defrag_ro = O_RDWR;
> +
>  	/*
>  	 * Kernel has a different default (256K) that is supposed to be safe,
>  	 * but it does not defragment very well. The 32M will likely lead to
> @@ -1014,7 +1020,7 @@ static int cmd_filesystem_defrag(int argc, char **argv)
>  		int defrag_err = 0;
>  
>  		dirstream = NULL;
> -		fd = open_file_or_dir(argv[i], &dirstream);
> +		fd = open_file_or_dir3(argv[i], &dirstream, defrag_ro);
>  		if (fd < 0) {
>  			error("cannot open %s: %m", argv[i]);
>  			ret = -errno;
>
Adam Borowski Sept. 3, 2018, 11:12 a.m. UTC | #3
On Mon, Sep 03, 2018 at 02:01:21PM +0300, Nikolay Borisov wrote:
> On  3.09.2018 13:14, Adam Borowski wrote:
> > Fixes EXTXBSY races.
> 
> You have to be more eloquent than that and explain at least one race
> condition.

If you try to defrag an executable that's currently running:

ERROR: cannot open XXX: Text file busy
total 1 failures

If you try to run an executable that's being defragged:

-bash: XXX: Text file busy

The former tends to be a long-lasting condition but has only benign fallout
(executables almost never get fragmented, not recompressing a single file is
not the end of the world), the latter is only a brief window of time but has
potential for data loss.

> > +static mode_t defrag_ro = O_RDONLY;
> 
> This brings no value whatsoever, just use O_RDONLY directly

On old kernels it gets overwritten with:

> > +	if (get_running_kernel_version() < KERNEL_VERSION(4,19,0) && getuid())
> > +		defrag_ro = O_RDWR;


Meow!
Adam Borowski Sept. 3, 2018, 11:28 a.m. UTC | #4
On Mon, Sep 03, 2018 at 02:04:23PM +0300, Nikolay Borisov wrote:
> On  3.09.2018 13:14, Adam Borowski wrote:
> > -		fd = open(fpath, O_RDWR);
> > +		fd = open(fpath, defrag_ro);
> 
> Looking at the kernel code I think this is in fact incorrect, because in
> ioctl.c we have:
> 
>                 if (!(file->f_mode & FMODE_WRITE)) {
> 
>                         ret = -EINVAL;
> 
>                         goto out;
> 
>                 }
> 
> So it seems a hard requirement to have opened a file for RW when you
> want to defragment it.

Oif!  I confused this with dedup, which does allow root to dedup RO even on
old kernels.  Good catch.
diff mbox series

Patch

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 06c8311b..4c9df69f 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -26,6 +26,7 @@ 
 #include <ftw.h>
 #include <mntent.h>
 #include <linux/limits.h>
+#include <linux/version.h>
 #include <getopt.h>
 
 #include <btrfsutil.h>
@@ -39,12 +40,14 @@ 
 #include "list_sort.h"
 #include "disk-io.h"
 #include "help.h"
+#include "fsfeatures.h"
 
 /*
  * for btrfs fi show, we maintain a hash of fsids we've already printed.
  * This way we don't print dups if a given FS is mounted more than once.
  */
 static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
+static mode_t defrag_ro = O_RDONLY;
 
 static const char * const filesystem_cmd_group_usage[] = {
 	"btrfs filesystem [<group>] <command> [<args>]",
@@ -877,7 +880,7 @@  static int defrag_callback(const char *fpath, const struct stat *sb,
 	if ((typeflag == FTW_F) && S_ISREG(sb->st_mode)) {
 		if (defrag_global_verbose)
 			printf("%s\n", fpath);
-		fd = open(fpath, O_RDWR);
+		fd = open(fpath, defrag_ro);
 		if (fd < 0) {
 			goto error;
 		}
@@ -914,6 +917,9 @@  static int cmd_filesystem_defrag(int argc, char **argv)
 	int compress_type = BTRFS_COMPRESS_NONE;
 	DIR *dirstream;
 
+	if (get_running_kernel_version() < KERNEL_VERSION(4,19,0) && getuid())
+		defrag_ro = O_RDWR;
+
 	/*
 	 * Kernel has a different default (256K) that is supposed to be safe,
 	 * but it does not defragment very well. The 32M will likely lead to
@@ -1014,7 +1020,7 @@  static int cmd_filesystem_defrag(int argc, char **argv)
 		int defrag_err = 0;
 
 		dirstream = NULL;
-		fd = open_file_or_dir(argv[i], &dirstream);
+		fd = open_file_or_dir3(argv[i], &dirstream, defrag_ro);
 		if (fd < 0) {
 			error("cannot open %s: %m", argv[i]);
 			ret = -errno;