diff mbox series

xfs: use inode_set_cached_link()

Message ID 20241123075105.1082661-1-mjguzik@gmail.com (mailing list archive)
State New
Headers show
Series xfs: use inode_set_cached_link() | expand

Commit Message

Mateusz Guzik Nov. 23, 2024, 7:51 a.m. UTC
For cases where caching is applicable this dodges inode locking, memory
allocation and memcpy + strlen.

Throughput of readlink on Saphire Rappids (ops/s):
before:	3641273
after:	4009524 (+10%)

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---

First a minor note that in the stock case strlen is called on the buffer
and I verified that i_disk_size is the value which is computed.

The important note is that I'm assuming the pointed to area is stable
for the duration of the inode's lifetime -- that is if the read off
symlink is fine *or* it was just created and is eligible caching, it
wont get invalidated as long as the inode is in memory. If this does not
hold then this submission is wrong and it would be nice(tm) to remedy
it.

This depends on stuff which landed in vfs-6.14.misc, but is not in next
nor fs-next yet.

For benchmark code see bottom of https://lore.kernel.org/linux-fsdevel/20241120112037.822078-1-mjguzik@gmail.com/

 fs/xfs/xfs_iops.c    |  1 +
 fs/xfs/xfs_symlink.c | 24 ++++++++++++++++++++++++
 fs/xfs/xfs_symlink.h |  1 +
 3 files changed, 26 insertions(+)

Comments

Darrick J. Wong Nov. 23, 2024, 4:19 p.m. UTC | #1
On Sat, Nov 23, 2024 at 08:51:05AM +0100, Mateusz Guzik wrote:
> For cases where caching is applicable this dodges inode locking, memory
> allocation and memcpy + strlen.
> 
> Throughput of readlink on Saphire Rappids (ops/s):
> before:	3641273
> after:	4009524 (+10%)
> 
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
> 
> First a minor note that in the stock case strlen is called on the buffer
> and I verified that i_disk_size is the value which is computed.
> 
> The important note is that I'm assuming the pointed to area is stable
> for the duration of the inode's lifetime -- that is if the read off
> symlink is fine *or* it was just created and is eligible caching, it
> wont get invalidated as long as the inode is in memory. If this does not
> hold then this submission is wrong and it would be nice(tm) to remedy
> it.

It is not stable for the lifetime of the inode.  See commit
7b7820b83f2300 ("xfs: don't expose internal symlink metadata buffers to
the vfs").  With parent pointers' ability to expand the symlink xattr
fork area sufficiently to bump the symlink target into a remote block
and online repair's ability to mess with the inode, direct vfs access of
if_data has only become more difficult.

--D

> This depends on stuff which landed in vfs-6.14.misc, but is not in next
> nor fs-next yet.
> 
> For benchmark code see bottom of https://lore.kernel.org/linux-fsdevel/20241120112037.822078-1-mjguzik@gmail.com/
> 
>  fs/xfs/xfs_iops.c    |  1 +
>  fs/xfs/xfs_symlink.c | 24 ++++++++++++++++++++++++
>  fs/xfs/xfs_symlink.h |  1 +
>  3 files changed, 26 insertions(+)
> 
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 207e0dadffc3..1d0a3797f876 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -1394,6 +1394,7 @@ xfs_setup_iops(
>  		break;
>  	case S_IFLNK:
>  		inode->i_op = &xfs_symlink_inode_operations;
> +		xfs_setup_cached_symlink(ip);
>  		break;
>  	default:
>  		inode->i_op = &xfs_inode_operations;
> diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
> index 4252b07cd251..59bf1b9ccb20 100644
> --- a/fs/xfs/xfs_symlink.c
> +++ b/fs/xfs/xfs_symlink.c
> @@ -28,6 +28,30 @@
>  #include "xfs_parent.h"
>  #include "xfs_defer.h"
>  
> +void
> +xfs_setup_cached_symlink(
> +	struct xfs_inode	*ip)
> +{
> +	struct inode		*inode = &ip->i_vnode;
> +	xfs_fsize_t		pathlen;
> +
> +	/*
> +	 * If we have the symlink readily accessible let the VFS know where to
> +	 * find it. This avoids calls to xfs_readlink().
> +	 */
> +	pathlen = ip->i_disk_size;
> +	if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN)
> +		return;
> +
> +	if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)
> +		return;
> +
> +	if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_data))
> +		return;
> +
> +	inode_set_cached_link(inode, ip->i_df.if_data, pathlen);
> +}
> +
>  int
>  xfs_readlink(
>  	struct xfs_inode	*ip,
> diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
> index 0d29a50e66fd..0e45a8a33829 100644
> --- a/fs/xfs/xfs_symlink.h
> +++ b/fs/xfs/xfs_symlink.h
> @@ -12,5 +12,6 @@ int xfs_symlink(struct mnt_idmap *idmap, struct xfs_inode *dp,
>  		umode_t mode, struct xfs_inode **ipp);
>  int xfs_readlink(struct xfs_inode *ip, char *link);
>  int xfs_inactive_symlink(struct xfs_inode *ip);
> +void xfs_setup_cached_symlink(struct xfs_inode *ip);
>  
>  #endif /* __XFS_SYMLINK_H */
> -- 
> 2.43.0
>
Mateusz Guzik Nov. 23, 2024, 6:52 p.m. UTC | #2
On Sat, Nov 23, 2024 at 5:19 PM Darrick J. Wong <djwong@kernel.org> wrote:
>
> On Sat, Nov 23, 2024 at 08:51:05AM +0100, Mateusz Guzik wrote:
> > For cases where caching is applicable this dodges inode locking, memory
> > allocation and memcpy + strlen.
> >
> > Throughput of readlink on Saphire Rappids (ops/s):
> > before:       3641273
> > after:        4009524 (+10%)
> >
> > Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> > ---
> >
> > First a minor note that in the stock case strlen is called on the buffer
> > and I verified that i_disk_size is the value which is computed.
> >
> > The important note is that I'm assuming the pointed to area is stable
> > for the duration of the inode's lifetime -- that is if the read off
> > symlink is fine *or* it was just created and is eligible caching, it
> > wont get invalidated as long as the inode is in memory. If this does not
> > hold then this submission is wrong and it would be nice(tm) to remedy
> > it.
>
> It is not stable for the lifetime of the inode.  See commit
> 7b7820b83f2300 ("xfs: don't expose internal symlink metadata buffers to
> the vfs").  With parent pointers' ability to expand the symlink xattr
> fork area sufficiently to bump the symlink target into a remote block
> and online repair's ability to mess with the inode, direct vfs access of
> if_data has only become more difficult.
>

That's a bummer.

Thanks for the review.
kernel test robot Nov. 25, 2024, 1:37 p.m. UTC | #3
Hi Mateusz,

kernel test robot noticed the following build errors:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on linus/master v6.12 next-20241125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mateusz-Guzik/xfs-use-inode_set_cached_link/20241125-115441
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
patch link:    https://lore.kernel.org/r/20241123075105.1082661-1-mjguzik%40gmail.com
patch subject: [PATCH] xfs: use inode_set_cached_link()
config: i386-buildonly-randconfig-003-20241125 (https://download.01.org/0day-ci/archive/20241125/202411252143.IFCZKd2V-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241125/202411252143.IFCZKd2V-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411252143.IFCZKd2V-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from fs/xfs/xfs_symlink.c:7:
   In file included from fs/xfs/xfs.h:26:
   In file included from fs/xfs/xfs_linux.h:25:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     524 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     525 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
>> fs/xfs/xfs_symlink.c:52:2: error: call to undeclared function 'inode_set_cached_link'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
      52 |         inode_set_cached_link(inode, ip->i_df.if_data, pathlen);
         |         ^
   4 warnings and 1 error generated.


vim +/inode_set_cached_link +52 fs/xfs/xfs_symlink.c

    30	
    31	void
    32	xfs_setup_cached_symlink(
    33		struct xfs_inode	*ip)
    34	{
    35		struct inode		*inode = &ip->i_vnode;
    36		xfs_fsize_t		pathlen;
    37	
    38		/*
    39		 * If we have the symlink readily accessible let the VFS know where to
    40		 * find it. This avoids calls to xfs_readlink().
    41		 */
    42		pathlen = ip->i_disk_size;
    43		if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN)
    44			return;
    45	
    46		if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)
    47			return;
    48	
    49		if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_data))
    50			return;
    51	
  > 52		inode_set_cached_link(inode, ip->i_df.if_data, pathlen);
    53	}
    54
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 207e0dadffc3..1d0a3797f876 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1394,6 +1394,7 @@  xfs_setup_iops(
 		break;
 	case S_IFLNK:
 		inode->i_op = &xfs_symlink_inode_operations;
+		xfs_setup_cached_symlink(ip);
 		break;
 	default:
 		inode->i_op = &xfs_inode_operations;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 4252b07cd251..59bf1b9ccb20 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -28,6 +28,30 @@ 
 #include "xfs_parent.h"
 #include "xfs_defer.h"
 
+void
+xfs_setup_cached_symlink(
+	struct xfs_inode	*ip)
+{
+	struct inode		*inode = &ip->i_vnode;
+	xfs_fsize_t		pathlen;
+
+	/*
+	 * If we have the symlink readily accessible let the VFS know where to
+	 * find it. This avoids calls to xfs_readlink().
+	 */
+	pathlen = ip->i_disk_size;
+	if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN)
+		return;
+
+	if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)
+		return;
+
+	if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_data))
+		return;
+
+	inode_set_cached_link(inode, ip->i_df.if_data, pathlen);
+}
+
 int
 xfs_readlink(
 	struct xfs_inode	*ip,
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index 0d29a50e66fd..0e45a8a33829 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -12,5 +12,6 @@  int xfs_symlink(struct mnt_idmap *idmap, struct xfs_inode *dp,
 		umode_t mode, struct xfs_inode **ipp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_inactive_symlink(struct xfs_inode *ip);
+void xfs_setup_cached_symlink(struct xfs_inode *ip);
 
 #endif /* __XFS_SYMLINK_H */