diff mbox series

fs: switch f_iocb_flags and f_version

Message ID 20240822-mutig-kurznachrichten-68d154f25f41@brauner (mailing list archive)
State New
Headers show
Series fs: switch f_iocb_flags and f_version | expand

Commit Message

Christian Brauner Aug. 22, 2024, 2:14 p.m. UTC
Now that we shrank struct file by 24 bytes we still have a 4 byte hole.
Move f_version into the union and f_iocb_flags out of the union to fill
that hole and shrink struct file by another 4 bytes. This brings struct
file to 200 bytes down from 232 bytes.

I've tried to audit all codepaths that use f_version and none of them
rely on it in file->f_op->release() and never have since commit
1da177e4c3f4 ("Linux-2.6.12-rc2").

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
struct file {
        union {
                struct callback_head f_task_work;        /*     0    16 */
                struct llist_node  f_llist;              /*     0     8 */
                u64                f_version;            /*     0     8 */
        };                                               /*     0    16 */
        spinlock_t                 f_lock;               /*    16     4 */
        fmode_t                    f_mode;               /*    20     4 */
        atomic_long_t              f_count;              /*    24     8 */
        struct mutex               f_pos_lock;           /*    32    32 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        loff_t                     f_pos;                /*    64     8 */
        unsigned int               f_flags;              /*    72     4 */
        unsigned int               f_iocb_flags;         /*    76     4 */
        struct fown_struct *       f_owner;              /*    80     8 */
        const struct cred  *       f_cred;               /*    88     8 */
        struct file_ra_state       f_ra;                 /*    96    32 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        struct path                f_path;               /*   128    16 */
        struct inode *             f_inode;              /*   144     8 */
        const struct file_operations  * f_op;            /*   152     8 */
        void *                     f_security;           /*   160     8 */
        void *                     private_data;         /*   168     8 */
        struct hlist_head *        f_ep;                 /*   176     8 */
        struct address_space *     f_mapping;            /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        errseq_t                   f_wb_err;             /*   192     4 */
        errseq_t                   f_sb_err;             /*   196     4 */

        /* size: 200, cachelines: 4, members: 20 */
        /* last cacheline: 8 bytes */
};
---
 include/linux/fs.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

Comments

Jens Axboe Aug. 22, 2024, 2:55 p.m. UTC | #1
On 8/22/24 8:14 AM, Christian Brauner wrote:
> Now that we shrank struct file by 24 bytes we still have a 4 byte hole.
> Move f_version into the union and f_iocb_flags out of the union to fill
> that hole and shrink struct file by another 4 bytes. This brings struct
> file to 200 bytes down from 232 bytes.

Nice! Now you just need to find 8 more bytes and we'll be down to 3
cachelines for struct file.

> I've tried to audit all codepaths that use f_version and none of them
> rely on it in file->f_op->release() and never have since commit
> 1da177e4c3f4 ("Linux-2.6.12-rc2").

Do we want to add a comment to this effect? I know it's obvious from
sharing with f_task_work, but...
Christian Brauner Aug. 22, 2024, 3:10 p.m. UTC | #2
> Do we want to add a comment to this effect? I know it's obvious from
> sharing with f_task_work, but...

I'll add one.
Jeff Layton Aug. 22, 2024, 3:54 p.m. UTC | #3
On Thu, 2024-08-22 at 16:14 +0200, Christian Brauner wrote:
> Now that we shrank struct file by 24 bytes we still have a 4 byte hole.
> Move f_version into the union and f_iocb_flags out of the union to fill
> that hole and shrink struct file by another 4 bytes. This brings struct
> file to 200 bytes down from 232 bytes.
> 
> I've tried to audit all codepaths that use f_version and none of them
> rely on it in file->f_op->release() and never have since commit
> 1da177e4c3f4 ("Linux-2.6.12-rc2").
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> struct file {
>         union {
>                 struct callback_head f_task_work;        /*     0    16 */
>                 struct llist_node  f_llist;              /*     0     8 */
>                 u64                f_version;            /*     0     8 */
>         };                                               /*     0    16 */
>         spinlock_t                 f_lock;               /*    16     4 */
>         fmode_t                    f_mode;               /*    20     4 */
>         atomic_long_t              f_count;              /*    24     8 */
>         struct mutex               f_pos_lock;           /*    32    32 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         loff_t                     f_pos;                /*    64     8 */
>         unsigned int               f_flags;              /*    72     4 */
>         unsigned int               f_iocb_flags;         /*    76     4 */
>         struct fown_struct *       f_owner;              /*    80     8 */
>         const struct cred  *       f_cred;               /*    88     8 */
>         struct file_ra_state       f_ra;                 /*    96    32 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         struct path                f_path;               /*   128    16 */
>         struct inode *             f_inode;              /*   144     8 */
>         const struct file_operations  * f_op;            /*   152     8 */
>         void *                     f_security;           /*   160     8 */
>         void *                     private_data;         /*   168     8 */
>         struct hlist_head *        f_ep;                 /*   176     8 */
>         struct address_space *     f_mapping;            /*   184     8 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         errseq_t                   f_wb_err;             /*   192     4 */
>         errseq_t                   f_sb_err;             /*   196     4 */
> 
>         /* size: 200, cachelines: 4, members: 20 */
>         /* last cacheline: 8 bytes */
> };
> ---
>  include/linux/fs.h | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 7eb4f706d59f..7a2994405e8e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -998,9 +998,8 @@ struct file {
>  		struct callback_head 	f_task_work;
>  		/* fput() must use workqueue (most kernel threads). */
>  		struct llist_node	f_llist;
> -		unsigned int 		f_iocb_flags;
> +		u64			f_version;
>  	};
> -
>  	/*
>  	 * Protects f_ep, f_flags.
>  	 * Must not be taken from IRQ context.
> @@ -1011,6 +1010,7 @@ struct file {
>  	struct mutex		f_pos_lock;
>  	loff_t			f_pos;
>  	unsigned int		f_flags;
> +	unsigned int 		f_iocb_flags;
>  	struct fown_struct	*f_owner;
>  	const struct cred	*f_cred;
>  	struct file_ra_state	f_ra;
> @@ -1018,7 +1018,6 @@ struct file {
>  	struct inode		*f_inode;	/* cached value */
>  	const struct file_operations	*f_op;
>  
> -	u64			f_version;
>  #ifdef CONFIG_SECURITY
>  	void			*f_security;
>  #endif

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Jens Axboe Aug. 22, 2024, 4:17 p.m. UTC | #4
On 8/22/24 9:10 AM, Christian Brauner wrote:
>> Do we want to add a comment to this effect? I know it's obvious from
>> sharing with f_task_work, but...
> 
> I'll add one.

Sounds good. You can add my:

Reviewed-by: Jens Axboe <axboe@kernel.dk>

as well, forgot to mention that in the original reply.
Christoph Hellwig Aug. 23, 2024, 6:24 a.m. UTC | #5
Given that f_version only has about a dozen users maybe just kill
it and make them use the private data instead?  Many of the users
looks pretty bogus to start with as they either only set or only
compare the value as well.
Al Viro Aug. 23, 2024, 6:34 a.m. UTC | #6
On Thu, Aug 22, 2024 at 11:24:43PM -0700, Christoph Hellwig wrote:
> Given that f_version only has about a dozen users maybe just kill
> it and make them use the private data instead?  Many of the users
> looks pretty bogus to start with as they either only set or only
> compare the value as well.

Take a look at the uses in fs/pipe.c
Christoph Hellwig Aug. 23, 2024, 6:52 a.m. UTC | #7
On Fri, Aug 23, 2024 at 07:34:11AM +0100, Al Viro wrote:
> On Thu, Aug 22, 2024 at 11:24:43PM -0700, Christoph Hellwig wrote:
> > Given that f_version only has about a dozen users maybe just kill
> > it and make them use the private data instead?  Many of the users
> > looks pretty bogus to start with as they either only set or only
> > compare the value as well.
> 
> Take a look at the uses in fs/pipe.c

I did not say "all", but "many" (and maybe should have said "a few").
Al Viro Aug. 23, 2024, 6:59 a.m. UTC | #8
On Thu, Aug 22, 2024 at 11:52:41PM -0700, Christoph Hellwig wrote:
> On Fri, Aug 23, 2024 at 07:34:11AM +0100, Al Viro wrote:
> > On Thu, Aug 22, 2024 at 11:24:43PM -0700, Christoph Hellwig wrote:
> > > Given that f_version only has about a dozen users maybe just kill
> > > it and make them use the private data instead?  Many of the users
> > > looks pretty bogus to start with as they either only set or only
> > > compare the value as well.
> > 
> > Take a look at the uses in fs/pipe.c
> 
> I did not say "all", but "many" (and maybe should have said "a few").

Not the point...  From my (admittedly cursory) reading of that code,
we might want different instances of struct file that share the same
pipe_inode_info (pointed to by ->private_data) to have different values
in ->f_version.  If that is true, there's no hope of pushing it into
->private_data - we definitely do not what an extra level of indirection
for getting to pipe_inode_info for those..
Christian Brauner Aug. 23, 2024, 8:16 a.m. UTC | #9
On Thu, Aug 22, 2024 at 10:17:37AM GMT, Jens Axboe wrote:
> On 8/22/24 9:10 AM, Christian Brauner wrote:
> >> Do we want to add a comment to this effect? I know it's obvious from
> >> sharing with f_task_work, but...
> > 
> > I'll add one.
> 
> Sounds good. You can add my:
> 
> Reviewed-by: Jens Axboe <axboe@kernel.dk>
> 
> as well, forgot to mention that in the original reply.

I think we can deliver 192 bytes aka 3 cachelines.
Afaict we can move struct file_ra_state into the union instead of
f_version. See the appended patch I'm testing now. If that works then
we're down by 40 bytes this cycle.
Christian Brauner Aug. 24, 2024, 9:26 a.m. UTC | #10
On Fri, Aug 23, 2024 at 10:16:28AM GMT, Christian Brauner wrote:
> On Thu, Aug 22, 2024 at 10:17:37AM GMT, Jens Axboe wrote:
> > On 8/22/24 9:10 AM, Christian Brauner wrote:
> > >> Do we want to add a comment to this effect? I know it's obvious from
> > >> sharing with f_task_work, but...
> > > 
> > > I'll add one.
> > 
> > Sounds good. You can add my:
> > 
> > Reviewed-by: Jens Axboe <axboe@kernel.dk>
> > 
> > as well, forgot to mention that in the original reply.
> 
> I think we can deliver 192 bytes aka 3 cachelines.
> Afaict we can move struct file_ra_state into the union instead of
> f_version. See the appended patch I'm testing now. If that works then
> we're down by 40 bytes this cycle.

Seems to hold up. I've reorderd things so that no member crosses a
cacheline. Patch appended and in vfs.misc.
diff mbox series

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7eb4f706d59f..7a2994405e8e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -998,9 +998,8 @@  struct file {
 		struct callback_head 	f_task_work;
 		/* fput() must use workqueue (most kernel threads). */
 		struct llist_node	f_llist;
-		unsigned int 		f_iocb_flags;
+		u64			f_version;
 	};
-
 	/*
 	 * Protects f_ep, f_flags.
 	 * Must not be taken from IRQ context.
@@ -1011,6 +1010,7 @@  struct file {
 	struct mutex		f_pos_lock;
 	loff_t			f_pos;
 	unsigned int		f_flags;
+	unsigned int 		f_iocb_flags;
 	struct fown_struct	*f_owner;
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
@@ -1018,7 +1018,6 @@  struct file {
 	struct inode		*f_inode;	/* cached value */
 	const struct file_operations	*f_op;
 
-	u64			f_version;
 #ifdef CONFIG_SECURITY
 	void			*f_security;
 #endif