diff mbox series

[v2,3/4] fsstress: add EXCHANGE renameat2 support

Message ID 8e8cf5e50bc3c26c90d2677d3194d36346ef0c24.1572057903.git.kaixuxia@tencent.com (mailing list archive)
State New, archived
Headers show
Series xfstests: add deadlock between the AGI and AGF with RENAME_WHITEOUT test | expand

Commit Message

Kaixu Xia Oct. 26, 2019, 11:18 a.m. UTC
Support the EXCHANGE renameat2 syscall in fsstress.

In order to maintain filelist/filename integrity, we restrict
rexchange to files of the same type.

Signed-off-by: kaixuxia <kaixuxia@tencent.com>
---
 ltp/fsstress.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 71 insertions(+), 21 deletions(-)

Comments

Brian Foster Oct. 29, 2019, 1:40 p.m. UTC | #1
On Sat, Oct 26, 2019 at 07:18:37PM +0800, kaixuxia wrote:
> Support the EXCHANGE renameat2 syscall in fsstress.
> 
> In order to maintain filelist/filename integrity, we restrict
> rexchange to files of the same type.
> 
> Signed-off-by: kaixuxia <kaixuxia@tencent.com>
> ---

While this looks pretty good to me at this point, I do notice instances
of the following in a quick test:

0/29: rename(EXCHANGE) d3/d9/dc/dd to d3/d9/dc/dd/df 22
...
0/43: rename(EXCHANGE) d3 to d3/d9/dc/d18 22
...

It looks like we're getting an EINVAL error on rexchange of directories.
That same operation seems to work fine via the ./src/renameat2 tool. Any
idea what's going on there?

Brian

>  ltp/fsstress.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 71 insertions(+), 21 deletions(-)
> 
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index ecc1adc..83d6337 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
> @@ -69,6 +69,9 @@ static int renameat2(int dfd1, const char *path1,
>  #ifndef RENAME_NOREPLACE
>  #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
>  #endif
> +#ifndef RENAME_EXCHANGE
> +#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
> +#endif
>  #ifndef RENAME_WHITEOUT
>  #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
>  #endif
> @@ -115,6 +118,7 @@ typedef enum {
>  	OP_REMOVEFATTR,
>  	OP_RENAME,
>  	OP_RNOREPLACE,
> +	OP_REXCHANGE,
>  	OP_RWHITEOUT,
>  	OP_RESVSP,
>  	OP_RMDIR,
> @@ -235,6 +239,7 @@ void	readv_f(int, long);
>  void	removefattr_f(int, long);
>  void	rename_f(int, long);
>  void	rnoreplace_f(int, long);
> +void	rexchange_f(int, long);
>  void	rwhiteout_f(int, long);
>  void	resvsp_f(int, long);
>  void	rmdir_f(int, long);
> @@ -296,6 +301,7 @@ opdesc_t	ops[] = {
>  	{ OP_REMOVEFATTR, "removefattr", removefattr_f, 1, 1 },
>  	{ OP_RENAME, "rename", rename_f, 2, 1 },
>  	{ OP_RNOREPLACE, "rnoreplace", rnoreplace_f, 2, 1 },
> +	{ OP_REXCHANGE, "rexchange", rexchange_f, 2, 1 },
>  	{ OP_RWHITEOUT, "rwhiteout", rwhiteout_f, 2, 1 },
>  	{ OP_RESVSP, "resvsp", resvsp_f, 1, 1 },
>  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
> @@ -371,7 +377,7 @@ void	del_from_flist(int, int);
>  int	dirid_to_name(char *, int);
>  void	doproc(void);
>  int	fent_to_name(pathname_t *, flist_t *, fent_t *);
> -void	fix_parent(int, int);
> +void	fix_parent(int, int, bool);
>  void	free_pathname(pathname_t *);
>  int	generate_fname(fent_t *, int, pathname_t *, int *, int *);
>  int	generate_xattr_name(int, char *, int);
> @@ -1118,7 +1124,7 @@ fent_to_name(pathname_t *name, flist_t *flp, fent_t *fep)
>  }
>  
>  void
> -fix_parent(int oldid, int newid)
> +fix_parent(int oldid, int newid, bool swap)
>  {
>  	fent_t	*fep;
>  	flist_t	*flp;
> @@ -1129,6 +1135,8 @@ fix_parent(int oldid, int newid)
>  		for (j = 0, fep = flp->fents; j < flp->nfiles; j++, fep++) {
>  			if (fep->parent == oldid)
>  				fep->parent = newid;
> +			else if (swap && fep->parent == newid)
> +				fep->parent = oldid;
>  		}
>  	}
>  }
> @@ -4256,6 +4264,7 @@ out:
>  
>  struct print_flags renameat2_flags [] = {
>  	{ RENAME_NOREPLACE, "NOREPLACE"},
> +	{ RENAME_EXCHANGE, "EXCHANGE"},
>  	{ RENAME_WHITEOUT, "WHITEOUT"},
>  	{ -1, NULL}
>  };
> @@ -4291,41 +4300,76 @@ do_renameat2(int opno, long r, int mode)
>  		return;
>  	}
>  
> -	/* get an existing directory for the destination parent directory name */
> -	if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
> -		parid = -1;
> -	else
> -		parid = dfep->id;
> -	v |= v1;
> +	/*
> +	 * Both pathnames must exist for the RENAME_EXCHANGE, and in
> +	 * order to maintain filelist/filename integrity, we should
> +	 * restrict exchange operation to files of the same type.
> +	 */
> +	if (mode == RENAME_EXCHANGE) {
> +		which = 1 << (flp - flist);
> +		init_pathname(&newf);
> +		if (!get_fname(which, random(), &newf, NULL, &dfep, &v)) {
> +			if (v)
> +				printf("%d/%d: rename - no target filename\n",
> +					procid, opno);
> +			free_pathname(&newf);
> +			free_pathname(&f);
> +			return;
> +		}
> +		v |= v1;
> +		id = dfep->id;
> +		parid = dfep->parent;
> +	} else {
> +		/*
> +		 * Get an existing directory for the destination parent
> +		 * directory name.
> +		 */
> +		if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
> +			parid = -1;
> +		else
> +			parid = dfep->id;
> +		v |= v1;
>  
> -	/* generate a new path using an existing parent directory in name */
> -	init_pathname(&newf);
> -	e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
> -	v |= v1;
> -	if (!e) {
> -		if (v) {
> -			(void)fent_to_name(&f, &flist[FT_DIR], dfep);
> -			printf("%d/%d: rename - no filename from %s\n",
> -				procid, opno, f.path);
> +		/*
> +		 * Generate a new path using an existing parent directory
> +		 * in name.
> +		 */
> +		init_pathname(&newf);
> +		e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
> +		v |= v1;
> +		if (!e) {
> +			if (v) {
> +				(void)fent_to_name(&f, &flist[FT_DIR], dfep);
> +				printf("%d/%d: rename - no filename from %s\n",
> +					procid, opno, f.path);
> +			}
> +			free_pathname(&newf);
> +			free_pathname(&f);
> +			return;
>  		}
> -		free_pathname(&newf);
> -		free_pathname(&f);
> -		return;
>  	}
>  	e = rename_path(&f, &newf, mode) < 0 ? errno : 0;
>  	check_cwd();
>  	if (e == 0) {
>  		int xattr_counter = fep->xattr_counter;
> +		bool swap = (mode == RENAME_EXCHANGE) ? true : false;
>  
>  		oldid = fep->id;
>  		oldparid = fep->parent;
>  
> +		/*
> +		 * Swap the parent ids for RENAME_EXCHANGE, and replace the
> +		 * old parent id for the others.
> +		 */
>  		if (flp - flist == FT_DIR)
> -			fix_parent(oldid, id);
> +			fix_parent(oldid, id, swap);
>  
>  		if (mode == RENAME_WHITEOUT) {
>  			fep->xattr_counter = 0;
>  			add_to_flist(flp - flist, id, parid, xattr_counter);
> +		} else if (mode == RENAME_EXCHANGE) {
> +			fep->xattr_counter = dfep->xattr_counter;
> +			dfep->xattr_counter = xattr_counter;
>  		} else {
>  			del_from_flist(flp - flist, fep - flp->fents);
>  			add_to_flist(flp - flist, id, parid, xattr_counter);
> @@ -4359,6 +4403,12 @@ rnoreplace_f(int opno, long r)
>  }
>  
>  void
> +rexchange_f(int opno, long r)
> +{
> +	do_renameat2(opno, r, RENAME_EXCHANGE);
> +}
> +
> +void
>  rwhiteout_f(int opno, long r)
>  {
>  	do_renameat2(opno, r, RENAME_WHITEOUT);
> -- 
> 1.8.3.1
>
Kaixu Xia Oct. 30, 2019, 3:17 a.m. UTC | #2
On 2019/10/29 21:40, Brian Foster wrote:
> On Sat, Oct 26, 2019 at 07:18:37PM +0800, kaixuxia wrote:
>> Support the EXCHANGE renameat2 syscall in fsstress.
>>
>> In order to maintain filelist/filename integrity, we restrict
>> rexchange to files of the same type.
>>
>> Signed-off-by: kaixuxia <kaixuxia@tencent.com>
>> ---
> 
> While this looks pretty good to me at this point, I do notice instances
> of the following in a quick test:
> 
> 0/29: rename(EXCHANGE) d3/d9/dc/dd to d3/d9/dc/dd/df 22
> ...
> 0/43: rename(EXCHANGE) d3 to d3/d9/dc/d18 22
> ...
> 
> It looks like we're getting an EINVAL error on rexchange of directories.
> That same operation seems to work fine via the ./src/renameat2 tool. Any
> idea what's going on there?

Hmm.. I am not sure if I understand what your mean. Seems like
this is because the special source and target parameters setting.
There are parameters check for RENAME_EXCHANGE in renameat2() call,

 static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
                         const char __user *newname, unsigned int flags)
 {
  ...
         /* source should not be ancestor of target */
         error = -EINVAL;
         if (old_dentry == trap)
                 goto exit5;
         /* target should not be an ancestor of source */
         if (!(flags & RENAME_EXCHANGE))
                 error = -ENOTEMPTY;
         if (new_dentry == trap)
                 goto exit5;
 ...
 } 

so we get the EINVAL error on rexchange of directories. I also tested it
via the ./src/renameat2 tool, and the strace result as below,

 # src/renameat2 -x /xfs-bufdeadlock/d3 /xfs-bufdeadlock/d3/d9/dc/d18
  Invalid argument

 syscall_316(0xffffff9c, 0x7ffe38930813, 0xffffff9c, 0x7ffe38930827, 0x2, 0) = -1 (errno 22)
 
Exchange looks a bit more tricky here.. Maybe we have two choices,
one is just leave the EINVAL there since the fsstress is stress
test and the EINVAL possibility is low. The other one is we should
do parameters check before invoking the renameat2() call, if the
source and target file fents are not suitable we will try more
until get the right file fents...

> 
> Brian
> 
>>  ltp/fsstress.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------------
>>  1 file changed, 71 insertions(+), 21 deletions(-)
>>
>> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
>> index ecc1adc..83d6337 100644
>> --- a/ltp/fsstress.c
>> +++ b/ltp/fsstress.c
>> @@ -69,6 +69,9 @@ static int renameat2(int dfd1, const char *path1,
>>  #ifndef RENAME_NOREPLACE
>>  #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
>>  #endif
>> +#ifndef RENAME_EXCHANGE
>> +#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
>> +#endif
>>  #ifndef RENAME_WHITEOUT
>>  #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
>>  #endif
>> @@ -115,6 +118,7 @@ typedef enum {
>>  	OP_REMOVEFATTR,
>>  	OP_RENAME,
>>  	OP_RNOREPLACE,
>> +	OP_REXCHANGE,
>>  	OP_RWHITEOUT,
>>  	OP_RESVSP,
>>  	OP_RMDIR,
>> @@ -235,6 +239,7 @@ void	readv_f(int, long);
>>  void	removefattr_f(int, long);
>>  void	rename_f(int, long);
>>  void	rnoreplace_f(int, long);
>> +void	rexchange_f(int, long);
>>  void	rwhiteout_f(int, long);
>>  void	resvsp_f(int, long);
>>  void	rmdir_f(int, long);
>> @@ -296,6 +301,7 @@ opdesc_t	ops[] = {
>>  	{ OP_REMOVEFATTR, "removefattr", removefattr_f, 1, 1 },
>>  	{ OP_RENAME, "rename", rename_f, 2, 1 },
>>  	{ OP_RNOREPLACE, "rnoreplace", rnoreplace_f, 2, 1 },
>> +	{ OP_REXCHANGE, "rexchange", rexchange_f, 2, 1 },
>>  	{ OP_RWHITEOUT, "rwhiteout", rwhiteout_f, 2, 1 },
>>  	{ OP_RESVSP, "resvsp", resvsp_f, 1, 1 },
>>  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
>> @@ -371,7 +377,7 @@ void	del_from_flist(int, int);
>>  int	dirid_to_name(char *, int);
>>  void	doproc(void);
>>  int	fent_to_name(pathname_t *, flist_t *, fent_t *);
>> -void	fix_parent(int, int);
>> +void	fix_parent(int, int, bool);
>>  void	free_pathname(pathname_t *);
>>  int	generate_fname(fent_t *, int, pathname_t *, int *, int *);
>>  int	generate_xattr_name(int, char *, int);
>> @@ -1118,7 +1124,7 @@ fent_to_name(pathname_t *name, flist_t *flp, fent_t *fep)
>>  }
>>  
>>  void
>> -fix_parent(int oldid, int newid)
>> +fix_parent(int oldid, int newid, bool swap)
>>  {
>>  	fent_t	*fep;
>>  	flist_t	*flp;
>> @@ -1129,6 +1135,8 @@ fix_parent(int oldid, int newid)
>>  		for (j = 0, fep = flp->fents; j < flp->nfiles; j++, fep++) {
>>  			if (fep->parent == oldid)
>>  				fep->parent = newid;
>> +			else if (swap && fep->parent == newid)
>> +				fep->parent = oldid;
>>  		}
>>  	}
>>  }
>> @@ -4256,6 +4264,7 @@ out:
>>  
>>  struct print_flags renameat2_flags [] = {
>>  	{ RENAME_NOREPLACE, "NOREPLACE"},
>> +	{ RENAME_EXCHANGE, "EXCHANGE"},
>>  	{ RENAME_WHITEOUT, "WHITEOUT"},
>>  	{ -1, NULL}
>>  };
>> @@ -4291,41 +4300,76 @@ do_renameat2(int opno, long r, int mode)
>>  		return;
>>  	}
>>  
>> -	/* get an existing directory for the destination parent directory name */
>> -	if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
>> -		parid = -1;
>> -	else
>> -		parid = dfep->id;
>> -	v |= v1;
>> +	/*
>> +	 * Both pathnames must exist for the RENAME_EXCHANGE, and in
>> +	 * order to maintain filelist/filename integrity, we should
>> +	 * restrict exchange operation to files of the same type.
>> +	 */
>> +	if (mode == RENAME_EXCHANGE) {
>> +		which = 1 << (flp - flist);
>> +		init_pathname(&newf);
>> +		if (!get_fname(which, random(), &newf, NULL, &dfep, &v)) {
>> +			if (v)
>> +				printf("%d/%d: rename - no target filename\n",
>> +					procid, opno);
>> +			free_pathname(&newf);
>> +			free_pathname(&f);
>> +			return;
>> +		}
>> +		v |= v1;
>> +		id = dfep->id;
>> +		parid = dfep->parent;
>> +	} else {
>> +		/*
>> +		 * Get an existing directory for the destination parent
>> +		 * directory name.
>> +		 */
>> +		if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
>> +			parid = -1;
>> +		else
>> +			parid = dfep->id;
>> +		v |= v1;
>>  
>> -	/* generate a new path using an existing parent directory in name */
>> -	init_pathname(&newf);
>> -	e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
>> -	v |= v1;
>> -	if (!e) {
>> -		if (v) {
>> -			(void)fent_to_name(&f, &flist[FT_DIR], dfep);
>> -			printf("%d/%d: rename - no filename from %s\n",
>> -				procid, opno, f.path);
>> +		/*
>> +		 * Generate a new path using an existing parent directory
>> +		 * in name.
>> +		 */
>> +		init_pathname(&newf);
>> +		e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
>> +		v |= v1;
>> +		if (!e) {
>> +			if (v) {
>> +				(void)fent_to_name(&f, &flist[FT_DIR], dfep);
>> +				printf("%d/%d: rename - no filename from %s\n",
>> +					procid, opno, f.path);
>> +			}
>> +			free_pathname(&newf);
>> +			free_pathname(&f);
>> +			return;
>>  		}
>> -		free_pathname(&newf);
>> -		free_pathname(&f);
>> -		return;
>>  	}
>>  	e = rename_path(&f, &newf, mode) < 0 ? errno : 0;
>>  	check_cwd();
>>  	if (e == 0) {
>>  		int xattr_counter = fep->xattr_counter;
>> +		bool swap = (mode == RENAME_EXCHANGE) ? true : false;
>>  
>>  		oldid = fep->id;
>>  		oldparid = fep->parent;
>>  
>> +		/*
>> +		 * Swap the parent ids for RENAME_EXCHANGE, and replace the
>> +		 * old parent id for the others.
>> +		 */
>>  		if (flp - flist == FT_DIR)
>> -			fix_parent(oldid, id);
>> +			fix_parent(oldid, id, swap);
>>  
>>  		if (mode == RENAME_WHITEOUT) {
>>  			fep->xattr_counter = 0;
>>  			add_to_flist(flp - flist, id, parid, xattr_counter);
>> +		} else if (mode == RENAME_EXCHANGE) {
>> +			fep->xattr_counter = dfep->xattr_counter;
>> +			dfep->xattr_counter = xattr_counter;
>>  		} else {
>>  			del_from_flist(flp - flist, fep - flp->fents);
>>  			add_to_flist(flp - flist, id, parid, xattr_counter);
>> @@ -4359,6 +4403,12 @@ rnoreplace_f(int opno, long r)
>>  }
>>  
>>  void
>> +rexchange_f(int opno, long r)
>> +{
>> +	do_renameat2(opno, r, RENAME_EXCHANGE);
>> +}
>> +
>> +void
>>  rwhiteout_f(int opno, long r)
>>  {
>>  	do_renameat2(opno, r, RENAME_WHITEOUT);
>> -- 
>> 1.8.3.1
>>
>
Brian Foster Oct. 30, 2019, 12:40 p.m. UTC | #3
On Wed, Oct 30, 2019 at 11:17:04AM +0800, kaixuxia wrote:
> On 2019/10/29 21:40, Brian Foster wrote:
> > On Sat, Oct 26, 2019 at 07:18:37PM +0800, kaixuxia wrote:
> >> Support the EXCHANGE renameat2 syscall in fsstress.
> >>
> >> In order to maintain filelist/filename integrity, we restrict
> >> rexchange to files of the same type.
> >>
> >> Signed-off-by: kaixuxia <kaixuxia@tencent.com>
> >> ---
> > 
> > While this looks pretty good to me at this point, I do notice instances
> > of the following in a quick test:
> > 
> > 0/29: rename(EXCHANGE) d3/d9/dc/dd to d3/d9/dc/dd/df 22
> > ...
> > 0/43: rename(EXCHANGE) d3 to d3/d9/dc/d18 22
> > ...
> > 
> > It looks like we're getting an EINVAL error on rexchange of directories.
> > That same operation seems to work fine via the ./src/renameat2 tool. Any
> > idea what's going on there?
> 
> Hmm.. I am not sure if I understand what your mean. Seems like
> this is because the special source and target parameters setting.
> There are parameters check for RENAME_EXCHANGE in renameat2() call,
> 
>  static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
>                          const char __user *newname, unsigned int flags)
>  {
>   ...
>          /* source should not be ancestor of target */
>          error = -EINVAL;
>          if (old_dentry == trap)
>                  goto exit5;
>          /* target should not be an ancestor of source */
>          if (!(flags & RENAME_EXCHANGE))
>                  error = -ENOTEMPTY;
>          if (new_dentry == trap)
>                  goto exit5;
>  ...
>  } 
> 
> so we get the EINVAL error on rexchange of directories. I also tested it
> via the ./src/renameat2 tool, and the strace result as below,
> 

Ah, I see. I wasn't aware of the restriction and didn't catch that quirk
of these particular requests, so I thought it was failing arbitrary
directory swaps (which is what I tested with renameat2). This makes
sense, thanks for the explanation.

>  # src/renameat2 -x /xfs-bufdeadlock/d3 /xfs-bufdeadlock/d3/d9/dc/d18
>   Invalid argument
> 
>  syscall_316(0xffffff9c, 0x7ffe38930813, 0xffffff9c, 0x7ffe38930827, 0x2, 0) = -1 (errno 22)
>  
> Exchange looks a bit more tricky here.. Maybe we have two choices,
> one is just leave the EINVAL there since the fsstress is stress
> test and the EINVAL possibility is low. The other one is we should
> do parameters check before invoking the renameat2() call, if the
> source and target file fents are not suitable we will try more
> until get the right file fents...
> 

Hmm.. I think it might be fine to ignore from a functional standpoint if
the complexity is too involved to detect and skip. That said, I'm
wondering if the filelist helps us enough here to implement similar
checks as in the kernel VFS. On a quick look, it appears we walk up the
dentry chain looking to see if one dentry is a parent of the other. See
d_ancestor() (called via do_renameat2() -> lock_rename()) for example:

/*
 * ...
 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
 * an ancestor of p2, else NULL.
 */
struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
{
        struct dentry *p;

        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
                if (p->d_parent == p1)
                        return p;
        }
        return NULL;
}

Any reason we couldn't do a couple similar checks on rexchange of two
dirs and skip the rename if necessary?

Brian

> > 
> > Brian
> > 
> >>  ltp/fsstress.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------------
> >>  1 file changed, 71 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> >> index ecc1adc..83d6337 100644
> >> --- a/ltp/fsstress.c
> >> +++ b/ltp/fsstress.c
> >> @@ -69,6 +69,9 @@ static int renameat2(int dfd1, const char *path1,
> >>  #ifndef RENAME_NOREPLACE
> >>  #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
> >>  #endif
> >> +#ifndef RENAME_EXCHANGE
> >> +#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
> >> +#endif
> >>  #ifndef RENAME_WHITEOUT
> >>  #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
> >>  #endif
> >> @@ -115,6 +118,7 @@ typedef enum {
> >>  	OP_REMOVEFATTR,
> >>  	OP_RENAME,
> >>  	OP_RNOREPLACE,
> >> +	OP_REXCHANGE,
> >>  	OP_RWHITEOUT,
> >>  	OP_RESVSP,
> >>  	OP_RMDIR,
> >> @@ -235,6 +239,7 @@ void	readv_f(int, long);
> >>  void	removefattr_f(int, long);
> >>  void	rename_f(int, long);
> >>  void	rnoreplace_f(int, long);
> >> +void	rexchange_f(int, long);
> >>  void	rwhiteout_f(int, long);
> >>  void	resvsp_f(int, long);
> >>  void	rmdir_f(int, long);
> >> @@ -296,6 +301,7 @@ opdesc_t	ops[] = {
> >>  	{ OP_REMOVEFATTR, "removefattr", removefattr_f, 1, 1 },
> >>  	{ OP_RENAME, "rename", rename_f, 2, 1 },
> >>  	{ OP_RNOREPLACE, "rnoreplace", rnoreplace_f, 2, 1 },
> >> +	{ OP_REXCHANGE, "rexchange", rexchange_f, 2, 1 },
> >>  	{ OP_RWHITEOUT, "rwhiteout", rwhiteout_f, 2, 1 },
> >>  	{ OP_RESVSP, "resvsp", resvsp_f, 1, 1 },
> >>  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
> >> @@ -371,7 +377,7 @@ void	del_from_flist(int, int);
> >>  int	dirid_to_name(char *, int);
> >>  void	doproc(void);
> >>  int	fent_to_name(pathname_t *, flist_t *, fent_t *);
> >> -void	fix_parent(int, int);
> >> +void	fix_parent(int, int, bool);
> >>  void	free_pathname(pathname_t *);
> >>  int	generate_fname(fent_t *, int, pathname_t *, int *, int *);
> >>  int	generate_xattr_name(int, char *, int);
> >> @@ -1118,7 +1124,7 @@ fent_to_name(pathname_t *name, flist_t *flp, fent_t *fep)
> >>  }
> >>  
> >>  void
> >> -fix_parent(int oldid, int newid)
> >> +fix_parent(int oldid, int newid, bool swap)
> >>  {
> >>  	fent_t	*fep;
> >>  	flist_t	*flp;
> >> @@ -1129,6 +1135,8 @@ fix_parent(int oldid, int newid)
> >>  		for (j = 0, fep = flp->fents; j < flp->nfiles; j++, fep++) {
> >>  			if (fep->parent == oldid)
> >>  				fep->parent = newid;
> >> +			else if (swap && fep->parent == newid)
> >> +				fep->parent = oldid;
> >>  		}
> >>  	}
> >>  }
> >> @@ -4256,6 +4264,7 @@ out:
> >>  
> >>  struct print_flags renameat2_flags [] = {
> >>  	{ RENAME_NOREPLACE, "NOREPLACE"},
> >> +	{ RENAME_EXCHANGE, "EXCHANGE"},
> >>  	{ RENAME_WHITEOUT, "WHITEOUT"},
> >>  	{ -1, NULL}
> >>  };
> >> @@ -4291,41 +4300,76 @@ do_renameat2(int opno, long r, int mode)
> >>  		return;
> >>  	}
> >>  
> >> -	/* get an existing directory for the destination parent directory name */
> >> -	if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
> >> -		parid = -1;
> >> -	else
> >> -		parid = dfep->id;
> >> -	v |= v1;
> >> +	/*
> >> +	 * Both pathnames must exist for the RENAME_EXCHANGE, and in
> >> +	 * order to maintain filelist/filename integrity, we should
> >> +	 * restrict exchange operation to files of the same type.
> >> +	 */
> >> +	if (mode == RENAME_EXCHANGE) {
> >> +		which = 1 << (flp - flist);
> >> +		init_pathname(&newf);
> >> +		if (!get_fname(which, random(), &newf, NULL, &dfep, &v)) {
> >> +			if (v)
> >> +				printf("%d/%d: rename - no target filename\n",
> >> +					procid, opno);
> >> +			free_pathname(&newf);
> >> +			free_pathname(&f);
> >> +			return;
> >> +		}
> >> +		v |= v1;
> >> +		id = dfep->id;
> >> +		parid = dfep->parent;
> >> +	} else {
> >> +		/*
> >> +		 * Get an existing directory for the destination parent
> >> +		 * directory name.
> >> +		 */
> >> +		if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
> >> +			parid = -1;
> >> +		else
> >> +			parid = dfep->id;
> >> +		v |= v1;
> >>  
> >> -	/* generate a new path using an existing parent directory in name */
> >> -	init_pathname(&newf);
> >> -	e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
> >> -	v |= v1;
> >> -	if (!e) {
> >> -		if (v) {
> >> -			(void)fent_to_name(&f, &flist[FT_DIR], dfep);
> >> -			printf("%d/%d: rename - no filename from %s\n",
> >> -				procid, opno, f.path);
> >> +		/*
> >> +		 * Generate a new path using an existing parent directory
> >> +		 * in name.
> >> +		 */
> >> +		init_pathname(&newf);
> >> +		e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
> >> +		v |= v1;
> >> +		if (!e) {
> >> +			if (v) {
> >> +				(void)fent_to_name(&f, &flist[FT_DIR], dfep);
> >> +				printf("%d/%d: rename - no filename from %s\n",
> >> +					procid, opno, f.path);
> >> +			}
> >> +			free_pathname(&newf);
> >> +			free_pathname(&f);
> >> +			return;
> >>  		}
> >> -		free_pathname(&newf);
> >> -		free_pathname(&f);
> >> -		return;
> >>  	}
> >>  	e = rename_path(&f, &newf, mode) < 0 ? errno : 0;
> >>  	check_cwd();
> >>  	if (e == 0) {
> >>  		int xattr_counter = fep->xattr_counter;
> >> +		bool swap = (mode == RENAME_EXCHANGE) ? true : false;
> >>  
> >>  		oldid = fep->id;
> >>  		oldparid = fep->parent;
> >>  
> >> +		/*
> >> +		 * Swap the parent ids for RENAME_EXCHANGE, and replace the
> >> +		 * old parent id for the others.
> >> +		 */
> >>  		if (flp - flist == FT_DIR)
> >> -			fix_parent(oldid, id);
> >> +			fix_parent(oldid, id, swap);
> >>  
> >>  		if (mode == RENAME_WHITEOUT) {
> >>  			fep->xattr_counter = 0;
> >>  			add_to_flist(flp - flist, id, parid, xattr_counter);
> >> +		} else if (mode == RENAME_EXCHANGE) {
> >> +			fep->xattr_counter = dfep->xattr_counter;
> >> +			dfep->xattr_counter = xattr_counter;
> >>  		} else {
> >>  			del_from_flist(flp - flist, fep - flp->fents);
> >>  			add_to_flist(flp - flist, id, parid, xattr_counter);
> >> @@ -4359,6 +4403,12 @@ rnoreplace_f(int opno, long r)
> >>  }
> >>  
> >>  void
> >> +rexchange_f(int opno, long r)
> >> +{
> >> +	do_renameat2(opno, r, RENAME_EXCHANGE);
> >> +}
> >> +
> >> +void
> >>  rwhiteout_f(int opno, long r)
> >>  {
> >>  	do_renameat2(opno, r, RENAME_WHITEOUT);
> >> -- 
> >> 1.8.3.1
> >>
> > 
> 
> -- 
> kaixuxia
Kaixu Xia Oct. 31, 2019, 1:51 a.m. UTC | #4
On 2019/10/30 20:40, Brian Foster wrote:
> On Wed, Oct 30, 2019 at 11:17:04AM +0800, kaixuxia wrote:
>> On 2019/10/29 21:40, Brian Foster wrote:
>>> On Sat, Oct 26, 2019 at 07:18:37PM +0800, kaixuxia wrote:
>>>> Support the EXCHANGE renameat2 syscall in fsstress.
>>>>
>>>> In order to maintain filelist/filename integrity, we restrict
>>>> rexchange to files of the same type.
>>>>
>>>> Signed-off-by: kaixuxia <kaixuxia@tencent.com>
>>>> ---
>>>
>>> While this looks pretty good to me at this point, I do notice instances
>>> of the following in a quick test:
>>>
>>> 0/29: rename(EXCHANGE) d3/d9/dc/dd to d3/d9/dc/dd/df 22
>>> ...
>>> 0/43: rename(EXCHANGE) d3 to d3/d9/dc/d18 22
>>> ...
>>>
>>> It looks like we're getting an EINVAL error on rexchange of directories.
>>> That same operation seems to work fine via the ./src/renameat2 tool. Any
>>> idea what's going on there?
>>
>> Hmm.. I am not sure if I understand what your mean. Seems like
>> this is because the special source and target parameters setting.
>> There are parameters check for RENAME_EXCHANGE in renameat2() call,
>>
>>  static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
>>                          const char __user *newname, unsigned int flags)
>>  {
>>   ...
>>          /* source should not be ancestor of target */
>>          error = -EINVAL;
>>          if (old_dentry == trap)
>>                  goto exit5;
>>          /* target should not be an ancestor of source */
>>          if (!(flags & RENAME_EXCHANGE))
>>                  error = -ENOTEMPTY;
>>          if (new_dentry == trap)
>>                  goto exit5;
>>  ...
>>  } 
>>
>> so we get the EINVAL error on rexchange of directories. I also tested it
>> via the ./src/renameat2 tool, and the strace result as below,
>>
> 
> Ah, I see. I wasn't aware of the restriction and didn't catch that quirk
> of these particular requests, so I thought it was failing arbitrary
> directory swaps (which is what I tested with renameat2). This makes
> sense, thanks for the explanation.
> 
>>  # src/renameat2 -x /xfs-bufdeadlock/d3 /xfs-bufdeadlock/d3/d9/dc/d18
>>   Invalid argument
>>
>>  syscall_316(0xffffff9c, 0x7ffe38930813, 0xffffff9c, 0x7ffe38930827, 0x2, 0) = -1 (errno 22)
>>  
>> Exchange looks a bit more tricky here.. Maybe we have two choices,
>> one is just leave the EINVAL there since the fsstress is stress
>> test and the EINVAL possibility is low. The other one is we should
>> do parameters check before invoking the renameat2() call, if the
>> source and target file fents are not suitable we will try more
>> until get the right file fents...
>>
> 
> Hmm.. I think it might be fine to ignore from a functional standpoint if
> the complexity is too involved to detect and skip. That said, I'm
> wondering if the filelist helps us enough here to implement similar
> checks as in the kernel VFS. On a quick look, it appears we walk up the
> dentry chain looking to see if one dentry is a parent of the other. See
> d_ancestor() (called via do_renameat2() -> lock_rename()) for example:
> 
> /*
>  * ...
>  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
>  * an ancestor of p2, else NULL.
>  */
> struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
> {
>         struct dentry *p;
> 
>         for (p = p2; !IS_ROOT(p); p = p->d_parent) {
>                 if (p->d_parent == p1)
>                         return p;
>         }
>         return NULL;
> }
> 
> Any reason we couldn't do a couple similar checks on rexchange of two
> dirs and skip the rename if necessary?
> 
Yeah, sounds more reasonable, will add the couple checks
in next version.

Kaixu

> Brian
> 
>>>
>>> Brian
>>>
>>>>  ltp/fsstress.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------------
>>>>  1 file changed, 71 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
>>>> index ecc1adc..83d6337 100644
>>>> --- a/ltp/fsstress.c
>>>> +++ b/ltp/fsstress.c
>>>> @@ -69,6 +69,9 @@ static int renameat2(int dfd1, const char *path1,
>>>>  #ifndef RENAME_NOREPLACE
>>>>  #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
>>>>  #endif
>>>> +#ifndef RENAME_EXCHANGE
>>>> +#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
>>>> +#endif
>>>>  #ifndef RENAME_WHITEOUT
>>>>  #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
>>>>  #endif
>>>> @@ -115,6 +118,7 @@ typedef enum {
>>>>  	OP_REMOVEFATTR,
>>>>  	OP_RENAME,
>>>>  	OP_RNOREPLACE,
>>>> +	OP_REXCHANGE,
>>>>  	OP_RWHITEOUT,
>>>>  	OP_RESVSP,
>>>>  	OP_RMDIR,
>>>> @@ -235,6 +239,7 @@ void	readv_f(int, long);
>>>>  void	removefattr_f(int, long);
>>>>  void	rename_f(int, long);
>>>>  void	rnoreplace_f(int, long);
>>>> +void	rexchange_f(int, long);
>>>>  void	rwhiteout_f(int, long);
>>>>  void	resvsp_f(int, long);
>>>>  void	rmdir_f(int, long);
>>>> @@ -296,6 +301,7 @@ opdesc_t	ops[] = {
>>>>  	{ OP_REMOVEFATTR, "removefattr", removefattr_f, 1, 1 },
>>>>  	{ OP_RENAME, "rename", rename_f, 2, 1 },
>>>>  	{ OP_RNOREPLACE, "rnoreplace", rnoreplace_f, 2, 1 },
>>>> +	{ OP_REXCHANGE, "rexchange", rexchange_f, 2, 1 },
>>>>  	{ OP_RWHITEOUT, "rwhiteout", rwhiteout_f, 2, 1 },
>>>>  	{ OP_RESVSP, "resvsp", resvsp_f, 1, 1 },
>>>>  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
>>>> @@ -371,7 +377,7 @@ void	del_from_flist(int, int);
>>>>  int	dirid_to_name(char *, int);
>>>>  void	doproc(void);
>>>>  int	fent_to_name(pathname_t *, flist_t *, fent_t *);
>>>> -void	fix_parent(int, int);
>>>> +void	fix_parent(int, int, bool);
>>>>  void	free_pathname(pathname_t *);
>>>>  int	generate_fname(fent_t *, int, pathname_t *, int *, int *);
>>>>  int	generate_xattr_name(int, char *, int);
>>>> @@ -1118,7 +1124,7 @@ fent_to_name(pathname_t *name, flist_t *flp, fent_t *fep)
>>>>  }
>>>>  
>>>>  void
>>>> -fix_parent(int oldid, int newid)
>>>> +fix_parent(int oldid, int newid, bool swap)
>>>>  {
>>>>  	fent_t	*fep;
>>>>  	flist_t	*flp;
>>>> @@ -1129,6 +1135,8 @@ fix_parent(int oldid, int newid)
>>>>  		for (j = 0, fep = flp->fents; j < flp->nfiles; j++, fep++) {
>>>>  			if (fep->parent == oldid)
>>>>  				fep->parent = newid;
>>>> +			else if (swap && fep->parent == newid)
>>>> +				fep->parent = oldid;
>>>>  		}
>>>>  	}
>>>>  }
>>>> @@ -4256,6 +4264,7 @@ out:
>>>>  
>>>>  struct print_flags renameat2_flags [] = {
>>>>  	{ RENAME_NOREPLACE, "NOREPLACE"},
>>>> +	{ RENAME_EXCHANGE, "EXCHANGE"},
>>>>  	{ RENAME_WHITEOUT, "WHITEOUT"},
>>>>  	{ -1, NULL}
>>>>  };
>>>> @@ -4291,41 +4300,76 @@ do_renameat2(int opno, long r, int mode)
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	/* get an existing directory for the destination parent directory name */
>>>> -	if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
>>>> -		parid = -1;
>>>> -	else
>>>> -		parid = dfep->id;
>>>> -	v |= v1;
>>>> +	/*
>>>> +	 * Both pathnames must exist for the RENAME_EXCHANGE, and in
>>>> +	 * order to maintain filelist/filename integrity, we should
>>>> +	 * restrict exchange operation to files of the same type.
>>>> +	 */
>>>> +	if (mode == RENAME_EXCHANGE) {
>>>> +		which = 1 << (flp - flist);
>>>> +		init_pathname(&newf);
>>>> +		if (!get_fname(which, random(), &newf, NULL, &dfep, &v)) {
>>>> +			if (v)
>>>> +				printf("%d/%d: rename - no target filename\n",
>>>> +					procid, opno);
>>>> +			free_pathname(&newf);
>>>> +			free_pathname(&f);
>>>> +			return;
>>>> +		}
>>>> +		v |= v1;
>>>> +		id = dfep->id;
>>>> +		parid = dfep->parent;
>>>> +	} else {
>>>> +		/*
>>>> +		 * Get an existing directory for the destination parent
>>>> +		 * directory name.
>>>> +		 */
>>>> +		if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
>>>> +			parid = -1;
>>>> +		else
>>>> +			parid = dfep->id;
>>>> +		v |= v1;
>>>>  
>>>> -	/* generate a new path using an existing parent directory in name */
>>>> -	init_pathname(&newf);
>>>> -	e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
>>>> -	v |= v1;
>>>> -	if (!e) {
>>>> -		if (v) {
>>>> -			(void)fent_to_name(&f, &flist[FT_DIR], dfep);
>>>> -			printf("%d/%d: rename - no filename from %s\n",
>>>> -				procid, opno, f.path);
>>>> +		/*
>>>> +		 * Generate a new path using an existing parent directory
>>>> +		 * in name.
>>>> +		 */
>>>> +		init_pathname(&newf);
>>>> +		e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
>>>> +		v |= v1;
>>>> +		if (!e) {
>>>> +			if (v) {
>>>> +				(void)fent_to_name(&f, &flist[FT_DIR], dfep);
>>>> +				printf("%d/%d: rename - no filename from %s\n",
>>>> +					procid, opno, f.path);
>>>> +			}
>>>> +			free_pathname(&newf);
>>>> +			free_pathname(&f);
>>>> +			return;
>>>>  		}
>>>> -		free_pathname(&newf);
>>>> -		free_pathname(&f);
>>>> -		return;
>>>>  	}
>>>>  	e = rename_path(&f, &newf, mode) < 0 ? errno : 0;
>>>>  	check_cwd();
>>>>  	if (e == 0) {
>>>>  		int xattr_counter = fep->xattr_counter;
>>>> +		bool swap = (mode == RENAME_EXCHANGE) ? true : false;
>>>>  
>>>>  		oldid = fep->id;
>>>>  		oldparid = fep->parent;
>>>>  
>>>> +		/*
>>>> +		 * Swap the parent ids for RENAME_EXCHANGE, and replace the
>>>> +		 * old parent id for the others.
>>>> +		 */
>>>>  		if (flp - flist == FT_DIR)
>>>> -			fix_parent(oldid, id);
>>>> +			fix_parent(oldid, id, swap);
>>>>  
>>>>  		if (mode == RENAME_WHITEOUT) {
>>>>  			fep->xattr_counter = 0;
>>>>  			add_to_flist(flp - flist, id, parid, xattr_counter);
>>>> +		} else if (mode == RENAME_EXCHANGE) {
>>>> +			fep->xattr_counter = dfep->xattr_counter;
>>>> +			dfep->xattr_counter = xattr_counter;
>>>>  		} else {
>>>>  			del_from_flist(flp - flist, fep - flp->fents);
>>>>  			add_to_flist(flp - flist, id, parid, xattr_counter);
>>>> @@ -4359,6 +4403,12 @@ rnoreplace_f(int opno, long r)
>>>>  }
>>>>  
>>>>  void
>>>> +rexchange_f(int opno, long r)
>>>> +{
>>>> +	do_renameat2(opno, r, RENAME_EXCHANGE);
>>>> +}
>>>> +
>>>> +void
>>>>  rwhiteout_f(int opno, long r)
>>>>  {
>>>>  	do_renameat2(opno, r, RENAME_WHITEOUT);
>>>> -- 
>>>> 1.8.3.1
>>>>
>>>
>>
>> -- 
>> kaixuxia
>
diff mbox series

Patch

diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index ecc1adc..83d6337 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -69,6 +69,9 @@  static int renameat2(int dfd1, const char *path1,
 #ifndef RENAME_NOREPLACE
 #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
 #endif
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
+#endif
 #ifndef RENAME_WHITEOUT
 #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
 #endif
@@ -115,6 +118,7 @@  typedef enum {
 	OP_REMOVEFATTR,
 	OP_RENAME,
 	OP_RNOREPLACE,
+	OP_REXCHANGE,
 	OP_RWHITEOUT,
 	OP_RESVSP,
 	OP_RMDIR,
@@ -235,6 +239,7 @@  void	readv_f(int, long);
 void	removefattr_f(int, long);
 void	rename_f(int, long);
 void	rnoreplace_f(int, long);
+void	rexchange_f(int, long);
 void	rwhiteout_f(int, long);
 void	resvsp_f(int, long);
 void	rmdir_f(int, long);
@@ -296,6 +301,7 @@  opdesc_t	ops[] = {
 	{ OP_REMOVEFATTR, "removefattr", removefattr_f, 1, 1 },
 	{ OP_RENAME, "rename", rename_f, 2, 1 },
 	{ OP_RNOREPLACE, "rnoreplace", rnoreplace_f, 2, 1 },
+	{ OP_REXCHANGE, "rexchange", rexchange_f, 2, 1 },
 	{ OP_RWHITEOUT, "rwhiteout", rwhiteout_f, 2, 1 },
 	{ OP_RESVSP, "resvsp", resvsp_f, 1, 1 },
 	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
@@ -371,7 +377,7 @@  void	del_from_flist(int, int);
 int	dirid_to_name(char *, int);
 void	doproc(void);
 int	fent_to_name(pathname_t *, flist_t *, fent_t *);
-void	fix_parent(int, int);
+void	fix_parent(int, int, bool);
 void	free_pathname(pathname_t *);
 int	generate_fname(fent_t *, int, pathname_t *, int *, int *);
 int	generate_xattr_name(int, char *, int);
@@ -1118,7 +1124,7 @@  fent_to_name(pathname_t *name, flist_t *flp, fent_t *fep)
 }
 
 void
-fix_parent(int oldid, int newid)
+fix_parent(int oldid, int newid, bool swap)
 {
 	fent_t	*fep;
 	flist_t	*flp;
@@ -1129,6 +1135,8 @@  fix_parent(int oldid, int newid)
 		for (j = 0, fep = flp->fents; j < flp->nfiles; j++, fep++) {
 			if (fep->parent == oldid)
 				fep->parent = newid;
+			else if (swap && fep->parent == newid)
+				fep->parent = oldid;
 		}
 	}
 }
@@ -4256,6 +4264,7 @@  out:
 
 struct print_flags renameat2_flags [] = {
 	{ RENAME_NOREPLACE, "NOREPLACE"},
+	{ RENAME_EXCHANGE, "EXCHANGE"},
 	{ RENAME_WHITEOUT, "WHITEOUT"},
 	{ -1, NULL}
 };
@@ -4291,41 +4300,76 @@  do_renameat2(int opno, long r, int mode)
 		return;
 	}
 
-	/* get an existing directory for the destination parent directory name */
-	if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
-		parid = -1;
-	else
-		parid = dfep->id;
-	v |= v1;
+	/*
+	 * Both pathnames must exist for the RENAME_EXCHANGE, and in
+	 * order to maintain filelist/filename integrity, we should
+	 * restrict exchange operation to files of the same type.
+	 */
+	if (mode == RENAME_EXCHANGE) {
+		which = 1 << (flp - flist);
+		init_pathname(&newf);
+		if (!get_fname(which, random(), &newf, NULL, &dfep, &v)) {
+			if (v)
+				printf("%d/%d: rename - no target filename\n",
+					procid, opno);
+			free_pathname(&newf);
+			free_pathname(&f);
+			return;
+		}
+		v |= v1;
+		id = dfep->id;
+		parid = dfep->parent;
+	} else {
+		/*
+		 * Get an existing directory for the destination parent
+		 * directory name.
+		 */
+		if (!get_fname(FT_DIRm, random(), NULL, NULL, &dfep, &v))
+			parid = -1;
+		else
+			parid = dfep->id;
+		v |= v1;
 
-	/* generate a new path using an existing parent directory in name */
-	init_pathname(&newf);
-	e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
-	v |= v1;
-	if (!e) {
-		if (v) {
-			(void)fent_to_name(&f, &flist[FT_DIR], dfep);
-			printf("%d/%d: rename - no filename from %s\n",
-				procid, opno, f.path);
+		/*
+		 * Generate a new path using an existing parent directory
+		 * in name.
+		 */
+		init_pathname(&newf);
+		e = generate_fname(dfep, flp - flist, &newf, &id, &v1);
+		v |= v1;
+		if (!e) {
+			if (v) {
+				(void)fent_to_name(&f, &flist[FT_DIR], dfep);
+				printf("%d/%d: rename - no filename from %s\n",
+					procid, opno, f.path);
+			}
+			free_pathname(&newf);
+			free_pathname(&f);
+			return;
 		}
-		free_pathname(&newf);
-		free_pathname(&f);
-		return;
 	}
 	e = rename_path(&f, &newf, mode) < 0 ? errno : 0;
 	check_cwd();
 	if (e == 0) {
 		int xattr_counter = fep->xattr_counter;
+		bool swap = (mode == RENAME_EXCHANGE) ? true : false;
 
 		oldid = fep->id;
 		oldparid = fep->parent;
 
+		/*
+		 * Swap the parent ids for RENAME_EXCHANGE, and replace the
+		 * old parent id for the others.
+		 */
 		if (flp - flist == FT_DIR)
-			fix_parent(oldid, id);
+			fix_parent(oldid, id, swap);
 
 		if (mode == RENAME_WHITEOUT) {
 			fep->xattr_counter = 0;
 			add_to_flist(flp - flist, id, parid, xattr_counter);
+		} else if (mode == RENAME_EXCHANGE) {
+			fep->xattr_counter = dfep->xattr_counter;
+			dfep->xattr_counter = xattr_counter;
 		} else {
 			del_from_flist(flp - flist, fep - flp->fents);
 			add_to_flist(flp - flist, id, parid, xattr_counter);
@@ -4359,6 +4403,12 @@  rnoreplace_f(int opno, long r)
 }
 
 void
+rexchange_f(int opno, long r)
+{
+	do_renameat2(opno, r, RENAME_EXCHANGE);
+}
+
+void
 rwhiteout_f(int opno, long r)
 {
 	do_renameat2(opno, r, RENAME_WHITEOUT);