diff mbox series

[v2] 9p: use inode->i_lock to protect i_size_write()

Message ID 20190118111716.60013-1-houtao1@huawei.com (mailing list archive)
State New, archived
Headers show
Series [v2] 9p: use inode->i_lock to protect i_size_write() | expand

Commit Message

Hou Tao Jan. 18, 2019, 11:17 a.m. UTC
Use inode->i_lock to protect i_size_write(), else i_size_read() in
generic_fillattr() may loop infinitely in read_seqcount_begin() when
multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl()
simultaneously under 32-bit SMP environment, and a soft lockup will be
triggered as show below:

  watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217]
  Modules linked in:
  CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
  Hardware name: Generic DT based system
  PC is at generic_fillattr+0x104/0x108
  LR is at 0xec497f00
  pc : [<802b8898>]    lr : [<ec497f00>]    psr: 200c0013
  sp : ec497e20  ip : ed608030  fp : ec497e3c
  r10: 00000000  r9 : ec497f00  r8 : ed608030
  r7 : ec497ebc  r6 : ec497f00  r5 : ee5c1550  r4 : ee005780
  r3 : 0000052d  r2 : 00000000  r1 : ec497f00  r0 : ed608030
  Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
  Control: 10c5387d  Table: ac48006a  DAC: 00000051
  CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
  Hardware name: Generic DT based system
  Backtrace:
  [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24)
  [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc)
  [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20)
  [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8)
  [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380)
  [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0)
  [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64)
  [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c)
  [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc)
  [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48)
  [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240)
  [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44)
  [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4)
  [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88)
  [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98)
  [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4)
  [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c)
  [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48)
  [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec)
  [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78)
  [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28)

Reported-by: Xing Gaopeng <xingaopeng@huawei.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
v2:
	* move acquisition of i_lock into v9fs_stat2inode()
    	* rename v9fs_stat2inode() to v9fs_stat2inode_flags() to accomodate
	  v9fs_refresh_inode() which doesn't need to update i_size.
---
 fs/9p/v9fs_vfs.h       | 21 +++++++++++++++++++--
 fs/9p/vfs_inode.c      | 28 ++++++++++++++++------------
 fs/9p/vfs_inode_dotl.c | 28 +++++++++++++++++-----------
 3 files changed, 52 insertions(+), 25 deletions(-)

Comments

Hou Tao Jan. 22, 2019, 2:05 p.m. UTC | #1
ping ?

On 2019/1/18 19:17, Hou Tao wrote:
> Use inode->i_lock to protect i_size_write(), else i_size_read() in
> generic_fillattr() may loop infinitely in read_seqcount_begin() when
> multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl()
> simultaneously under 32-bit SMP environment, and a soft lockup will be
> triggered as show below:
> 
>   watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217]
>   Modules linked in:
>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>   Hardware name: Generic DT based system
>   PC is at generic_fillattr+0x104/0x108
>   LR is at 0xec497f00
>   pc : [<802b8898>]    lr : [<ec497f00>]    psr: 200c0013
>   sp : ec497e20  ip : ed608030  fp : ec497e3c
>   r10: 00000000  r9 : ec497f00  r8 : ed608030
>   r7 : ec497ebc  r6 : ec497f00  r5 : ee5c1550  r4 : ee005780
>   r3 : 0000052d  r2 : 00000000  r1 : ec497f00  r0 : ed608030
>   Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>   Control: 10c5387d  Table: ac48006a  DAC: 00000051
>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>   Hardware name: Generic DT based system
>   Backtrace:
>   [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24)
>   [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc)
>   [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20)
>   [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8)
>   [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380)
>   [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0)
>   [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64)
>   [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c)
>   [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc)
>   [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48)
>   [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240)
>   [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44)
>   [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4)
>   [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88)
>   [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98)
>   [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4)
>   [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c)
>   [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48)
>   [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec)
>   [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78)
>   [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28)
> 
> Reported-by: Xing Gaopeng <xingaopeng@huawei.com>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
> v2:
> 	* move acquisition of i_lock into v9fs_stat2inode()
>     	* rename v9fs_stat2inode() to v9fs_stat2inode_flags() to accomodate
> 	  v9fs_refresh_inode() which doesn't need to update i_size.
> ---
>  fs/9p/v9fs_vfs.h       | 21 +++++++++++++++++++--
>  fs/9p/vfs_inode.c      | 28 ++++++++++++++++------------
>  fs/9p/vfs_inode_dotl.c | 28 +++++++++++++++++-----------
>  3 files changed, 52 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
> index 5a0db6dec8d1..51b3e441b895 100644
> --- a/fs/9p/v9fs_vfs.h
> +++ b/fs/9p/v9fs_vfs.h
> @@ -40,6 +40,9 @@
>   */
>  #define P9_LOCK_TIMEOUT (30*HZ)
>  
> +/* flags for v9fs_stat2inode_flags() & v9fs_stat2inode_dotl_flags() */
> +#define V9FS_STAT2INODE_KEEP_ISIZE 1
> +
>  extern struct file_system_type v9fs_fs_type;
>  extern const struct address_space_operations v9fs_addr_operations;
>  extern const struct file_operations v9fs_file_operations;
> @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
>  		    struct inode *inode, umode_t mode, dev_t);
>  void v9fs_evict_inode(struct inode *inode);
>  ino_t v9fs_qid2ino(struct p9_qid *qid);
> -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
> -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
> +void v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
> +			    struct super_block *sb, unsigned int flags);
> +void v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
> +				 unsigned int flags);
>  int v9fs_dir_release(struct inode *inode, struct file *filp);
>  int v9fs_file_open(struct inode *inode, struct file *file);
>  void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
> @@ -83,4 +88,16 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
>  }
>  
>  int v9fs_open_to_dotl_flags(int flags);
> +
> +static inline void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
> +				    struct super_block *sb)
> +{
> +	v9fs_stat2inode_flags(stat, inode, sb, 0);
> +}
> +
> +static inline void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat,
> +					 struct inode *inode)
> +{
> +	v9fs_stat2inode_dotl_flags(stat, inode, 0);
> +}
>  #endif
> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index 85ff859d3af5..ca7d93184400 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -1166,16 +1166,17 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
>  }
>  
>  /**
> - * v9fs_stat2inode - populate an inode structure with mistat info
> + * v9fs_stat2inode_flags - populate an inode structure with mistat info
>   * @stat: Plan 9 metadata (mistat) structure
>   * @inode: inode to populate
>   * @sb: superblock of filesystem
> + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
>   *
>   */
>  
>  void
> -v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
> -	struct super_block *sb)
> +v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
> +		       struct super_block *sb, unsigned int flags)
>  {
>  	umode_t mode;
>  	char ext[32];
> @@ -1184,6 +1185,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>  	struct v9fs_session_info *v9ses = sb->s_fs_info;
>  	struct v9fs_inode *v9inode = V9FS_I(inode);
>  
> +	/* Protect the updates of i_size & cache_validity */
> +	spin_lock(&inode->i_lock);
> +
>  	set_nlink(inode, 1);
>  
>  	inode->i_atime.tv_sec = stat->atime;
> @@ -1216,11 +1220,14 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>  	mode = p9mode2perm(v9ses, stat);
>  	mode |= inode->i_mode & ~S_IALLUGO;
>  	inode->i_mode = mode;
> -	i_size_write(inode, stat->length);
>  
> +	if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
> +		i_size_write(inode, stat->length);
>  	/* not real number of blocks, but 512 byte ones ... */
> -	inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
> +	inode->i_blocks = (stat->length + 512 - 1) >> 9;
>  	v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
> +
> +	spin_unlock(&inode->i_lock);
>  }
>  
>  /**
> @@ -1416,9 +1423,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
>  {
>  	int umode;
>  	dev_t rdev;
> -	loff_t i_size;
>  	struct p9_wstat *st;
>  	struct v9fs_session_info *v9ses;
> +	unsigned int flags;
>  
>  	v9ses = v9fs_inode2v9ses(inode);
>  	st = p9_client_stat(fid);
> @@ -1431,16 +1438,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
>  	if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
>  		goto out;
>  
> -	spin_lock(&inode->i_lock);
>  	/*
>  	 * We don't want to refresh inode->i_size,
>  	 * because we may have cached data
>  	 */
> -	i_size = inode->i_size;
> -	v9fs_stat2inode(st, inode, inode->i_sb);
> -	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
> -		inode->i_size = i_size;
> -	spin_unlock(&inode->i_lock);
> +	flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
> +		V9FS_STAT2INODE_KEEP_ISIZE : 0;
> +	v9fs_stat2inode_flags(st, inode, inode->i_sb, flags);
>  out:
>  	p9stat_free(st);
>  	kfree(st);
> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
> index 4823e1c46999..5c727d49edc4 100644
> --- a/fs/9p/vfs_inode_dotl.c
> +++ b/fs/9p/vfs_inode_dotl.c
> @@ -604,18 +604,23 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
>  }
>  
>  /**
> - * v9fs_stat2inode_dotl - populate an inode structure with stat info
> + * v9fs_stat2inode_dotl_flags - populate an inode structure with stat info
>   * @stat: stat structure
>   * @inode: inode to populate
> + * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
>   *
>   */
>  
>  void
> -v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
> +v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
> +			    unsigned int flags)
>  {
>  	umode_t mode;
>  	struct v9fs_inode *v9inode = V9FS_I(inode);
>  
> +	/* Protect the updates of i_size & cache_validity */
> +	spin_lock(&inode->i_lock);
> +
>  	if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
>  		inode->i_atime.tv_sec = stat->st_atime_sec;
>  		inode->i_atime.tv_nsec = stat->st_atime_nsec;
> @@ -631,7 +636,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
>  		mode |= inode->i_mode & ~S_IALLUGO;
>  		inode->i_mode = mode;
>  
> -		i_size_write(inode, stat->st_size);
> +		if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
> +			i_size_write(inode, stat->st_size);
>  		inode->i_blocks = stat->st_blocks;
>  	} else {
>  		if (stat->st_result_mask & P9_STATS_ATIME) {
> @@ -661,7 +667,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
>  		}
>  		if (stat->st_result_mask & P9_STATS_RDEV)
>  			inode->i_rdev = new_decode_dev(stat->st_rdev);
> -		if (stat->st_result_mask & P9_STATS_SIZE)
> +		if (stat->st_result_mask & P9_STATS_SIZE &&
> +		    !(flags & V9FS_STAT2INODE_KEEP_ISIZE))
>  			i_size_write(inode, stat->st_size);
>  		if (stat->st_result_mask & P9_STATS_BLOCKS)
>  			inode->i_blocks = stat->st_blocks;
> @@ -673,6 +680,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
>  	 * because the inode structure does not have fields for them.
>  	 */
>  	v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
> +
> +	spin_unlock(&inode->i_lock);
>  }
>  
>  static int
> @@ -928,9 +937,9 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry,
>  
>  int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
>  {
> -	loff_t i_size;
>  	struct p9_stat_dotl *st;
>  	struct v9fs_session_info *v9ses;
> +	unsigned int flags;
>  
>  	v9ses = v9fs_inode2v9ses(inode);
>  	st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
> @@ -942,16 +951,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
>  	if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
>  		goto out;
>  
> -	spin_lock(&inode->i_lock);
>  	/*
>  	 * We don't want to refresh inode->i_size,
>  	 * because we may have cached data
>  	 */
> -	i_size = inode->i_size;
> -	v9fs_stat2inode_dotl(st, inode);
> -	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
> -		inode->i_size = i_size;
> -	spin_unlock(&inode->i_lock);
> +	flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
> +		V9FS_STAT2INODE_KEEP_ISIZE : 0;
> +	v9fs_stat2inode_dotl_flags(st, inode, flags);
>  out:
>  	kfree(st);
>  	return 0;
>
Dominique Martinet Jan. 23, 2019, 2:28 a.m. UTC | #2
Sorry for the late reply, I had thought I had but it looks like it
didn't go out.

Hou Tao wrote on Fri, Jan 18, 2019:
> Use inode->i_lock to protect i_size_write(), else i_size_read() in
> generic_fillattr() may loop infinitely in read_seqcount_begin() when
> multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl()
> simultaneously under 32-bit SMP environment, and a soft lockup will be
> triggered as show below:

I didn't notice before, but we have a couple of other places that call
i_size_write() in the 9p code, namely v9fs_write_end() and
v9fs_file_write_iter().
I do not see what's special about these that would require not taking
the inode lock?

write_end() has a comment that i_size cannot change under it because it
has the i_mutex, but it's obviously not sufficient given the stat2inode
code does not have it, so it needs to do the same dance as write_iter.


>   watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217]
>   Modules linked in:
>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>   Hardware name: Generic DT based system
>   PC is at generic_fillattr+0x104/0x108
>   LR is at 0xec497f00
>   pc : [<802b8898>]    lr : [<ec497f00>]    psr: 200c0013
>   sp : ec497e20  ip : ed608030  fp : ec497e3c
>   r10: 00000000  r9 : ec497f00  r8 : ed608030
>   r7 : ec497ebc  r6 : ec497f00  r5 : ee5c1550  r4 : ee005780
>   r3 : 0000052d  r2 : 00000000  r1 : ec497f00  r0 : ed608030
>   Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>   Control: 10c5387d  Table: ac48006a  DAC: 00000051
>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>   Hardware name: Generic DT based system
>   Backtrace:
>   [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24)
>   [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc)
>   [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20)
>   [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8)
>   [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380)
>   [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0)
>   [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64)
>   [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c)
>   [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc)
>   [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48)
>   [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240)
>   [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44)
>   [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4)
>   [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88)
>   [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98)
>   [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4)
>   [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c)
>   [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48)
>   [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec)
>   [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78)
>   [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28)
> 
> Reported-by: Xing Gaopeng <xingaopeng@huawei.com>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
> v2:
> 	* move acquisition of i_lock into v9fs_stat2inode()
>  	* rename v9fs_stat2inode() to v9fs_stat2inode_flags() to accomodate
> 	  v9fs_refresh_inode() which doesn't need to update i_size.

As a nitpick I don't really like foo() vs foo_flags() as
foo-that-takes-extra-flags.
There are a few such examples in the kernel already but I think it does
not really convery information; it's better to have the base function
take flags and just use it, or if you want wrappers then just never
expose the flags but make a static _v9fs_stat2inode take flags,
v9fs_stat2inode behave as the old one and a new
v9fs_stat2inode_keepisize for the update with cache.
I'd personally go with the former are there only are four call sites.

Anyway, no strong feeling there, do as you'd like for this one
(including keeping the current way if you think it's better); the
other comments require another round-trip anyway :)

> ---
>  fs/9p/v9fs_vfs.h       | 21 +++++++++++++++++++--
>  fs/9p/vfs_inode.c      | 28 ++++++++++++++++------------
>  fs/9p/vfs_inode_dotl.c | 28 +++++++++++++++++-----------
>  3 files changed, 52 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
> index 5a0db6dec8d1..51b3e441b895 100644
> --- a/fs/9p/v9fs_vfs.h
> +++ b/fs/9p/v9fs_vfs.h
> @@ -40,6 +40,9 @@
>   */
>  #define P9_LOCK_TIMEOUT (30*HZ)
>  
> +/* flags for v9fs_stat2inode_flags() & v9fs_stat2inode_dotl_flags() */
> +#define V9FS_STAT2INODE_KEEP_ISIZE 1
> +
>  extern struct file_system_type v9fs_fs_type;
>  extern const struct address_space_operations v9fs_addr_operations;
>  extern const struct file_operations v9fs_file_operations;
> @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
>  		    struct inode *inode, umode_t mode, dev_t);
>  void v9fs_evict_inode(struct inode *inode);
>  ino_t v9fs_qid2ino(struct p9_qid *qid);
> -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
> -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
> +void v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
> +			    struct super_block *sb, unsigned int flags);
> +void v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
> +				 unsigned int flags);
>  int v9fs_dir_release(struct inode *inode, struct file *filp);
>  int v9fs_file_open(struct inode *inode, struct file *file);
>  void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
> @@ -83,4 +88,16 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
>  }
>  
>  int v9fs_open_to_dotl_flags(int flags);
> +
> +static inline void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
> +				    struct super_block *sb)
> +{
> +	v9fs_stat2inode_flags(stat, inode, sb, 0);
> +}
> +
> +static inline void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat,
> +					 struct inode *inode)
> +{
> +	v9fs_stat2inode_dotl_flags(stat, inode, 0);
> +}
>  #endif
> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index 85ff859d3af5..ca7d93184400 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -1166,16 +1166,17 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
>  }
>  
>  /**
> - * v9fs_stat2inode - populate an inode structure with mistat info
> + * v9fs_stat2inode_flags - populate an inode structure with mistat info
>   * @stat: Plan 9 metadata (mistat) structure
>   * @inode: inode to populate
>   * @sb: superblock of filesystem
> + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
>   *
>   */
>  
>  void
> -v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
> -	struct super_block *sb)
> +v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
> +		       struct super_block *sb, unsigned int flags)
>  {
>  	umode_t mode;
>  	char ext[32];
> @@ -1184,6 +1185,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>  	struct v9fs_session_info *v9ses = sb->s_fs_info;
>  	struct v9fs_inode *v9inode = V9FS_I(inode);
>  
> +	/* Protect the updates of i_size & cache_validity */
> +	spin_lock(&inode->i_lock);
> +

You're still locking the whole function, which is what I complained
about in the previous patch.
First of all, in the KEEP_ISIZE path that lock isn't needed at all,
and it does not protect anything other than the i_size_write.


I do see that you wrote in the comment this is meant to protect
cache_validity, but it does not solve the race I mentionned in the v1
patch either - the race timing is from the moment the 9p getattr
request has been sent out, not just setting fields in here.
(e.g. thread1 sends getattr request, thread2 setattrs and invalidate
inode attr after request has been processed by server, thread1 receives
getattr and clears flag while attributes are still obsolete)

Let's fix this in a proper separate patch and not pretend it's resolved
by this trivial locking when it isn't.

For now, just locking around the i_size_write makes most sense.


Thanks,
Hou Tao Jan. 23, 2019, 1:40 p.m. UTC | #3
Hi,

On 2019/1/23 10:28, Dominique Martinet wrote:
> Sorry for the late reply, I had thought I had but it looks like it
> didn't go out.
> 
> Hou Tao wrote on Fri, Jan 18, 2019:
>> Use inode->i_lock to protect i_size_write(), else i_size_read() in
>> generic_fillattr() may loop infinitely in read_seqcount_begin() when
>> multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl()
>> simultaneously under 32-bit SMP environment, and a soft lockup will be
>> triggered as show below:
> 
> I didn't notice before, but we have a couple of other places that call
> i_size_write() in the 9p code, namely v9fs_write_end() and
> v9fs_file_write_iter().
> I do not see what's special about these that would require not taking
> the inode lock?
> 
> write_end() has a comment that i_size cannot change under it because it
> has the i_mutex, but it's obviously not sufficient given the stat2inode
> code does not have it, so it needs to do the same dance as write_iter.
> 
OK, will do that in v3

How about adding a helper as shown in the following lines ?

static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
{
    spin_lock(&inode->i_lock);
    i_size_write(inode, i_size);
    spin_unlock(&inode->i_lock);
}

> 
>>   watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217]
>>   Modules linked in:
>>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>>   Hardware name: Generic DT based system
>>   PC is at generic_fillattr+0x104/0x108
>>   LR is at 0xec497f00
>>   pc : [<802b8898>]    lr : [<ec497f00>]    psr: 200c0013
>>   sp : ec497e20  ip : ed608030  fp : ec497e3c
>>   r10: 00000000  r9 : ec497f00  r8 : ed608030
>>   r7 : ec497ebc  r6 : ec497f00  r5 : ee5c1550  r4 : ee005780
>>   r3 : 0000052d  r2 : 00000000  r1 : ec497f00  r0 : ed608030
>>   Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>   Control: 10c5387d  Table: ac48006a  DAC: 00000051
>>   CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
>>   Hardware name: Generic DT based system
>>   Backtrace:
>>   [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24)
>>   [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc)
>>   [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20)
>>   [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8)
>>   [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380)
>>   [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0)
>>   [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64)
>>   [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c)
>>   [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc)
>>   [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48)
>>   [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240)
>>   [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44)
>>   [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4)
>>   [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88)
>>   [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98)
>>   [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4)
>>   [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c)
>>   [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48)
>>   [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec)
>>   [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78)
>>   [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28)
>>
>> Reported-by: Xing Gaopeng <xingaopeng@huawei.com>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>> v2:
>> 	* move acquisition of i_lock into v9fs_stat2inode()
>>  	* rename v9fs_stat2inode() to v9fs_stat2inode_flags() to accomodate
>> 	  v9fs_refresh_inode() which doesn't need to update i_size.
> 
> As a nitpick I don't really like foo() vs foo_flags() as
> foo-that-takes-extra-flags.
> There are a few such examples in the kernel already but I think it does
> not really convery information; it's better to have the base function
> take flags and just use it, or if you want wrappers then just never
> expose the flags but make a static _v9fs_stat2inode take flags,
> v9fs_stat2inode behave as the old one and a new
> v9fs_stat2inode_keepisize for the update with cache.
> I'd personally go with the former are there only are four call sites.
> 
I agree with you. I will add a new flags parameter to v9fs_stat2inode() and use
it directly instead of creating inline wrappers around it.

> Anyway, no strong feeling there, do as you'd like for this one
> (including keeping the current way if you think it's better); the
> other comments require another round-trip anyway :)
> 


>> ---
>>  fs/9p/v9fs_vfs.h       | 21 +++++++++++++++++++--
>>  fs/9p/vfs_inode.c      | 28 ++++++++++++++++------------
>>  fs/9p/vfs_inode_dotl.c | 28 +++++++++++++++++-----------
>>  3 files changed, 52 insertions(+), 25 deletions(-)
>>
>> diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
>> index 5a0db6dec8d1..51b3e441b895 100644
>> --- a/fs/9p/v9fs_vfs.h
>> +++ b/fs/9p/v9fs_vfs.h
>> @@ -40,6 +40,9 @@
>>   */
>>  #define P9_LOCK_TIMEOUT (30*HZ)
>>  
>> +/* flags for v9fs_stat2inode_flags() & v9fs_stat2inode_dotl_flags() */
>> +#define V9FS_STAT2INODE_KEEP_ISIZE 1
>> +
>>  extern struct file_system_type v9fs_fs_type;
>>  extern const struct address_space_operations v9fs_addr_operations;
>>  extern const struct file_operations v9fs_file_operations;
>> @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
>>  		    struct inode *inode, umode_t mode, dev_t);
>>  void v9fs_evict_inode(struct inode *inode);
>>  ino_t v9fs_qid2ino(struct p9_qid *qid);
>> -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
>> -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
>> +void v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
>> +			    struct super_block *sb, unsigned int flags);
>> +void v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
>> +				 unsigned int flags);
>>  int v9fs_dir_release(struct inode *inode, struct file *filp);
>>  int v9fs_file_open(struct inode *inode, struct file *file);
>>  void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
>> @@ -83,4 +88,16 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
>>  }
>>  
>>  int v9fs_open_to_dotl_flags(int flags);
>> +
>> +static inline void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>> +				    struct super_block *sb)
>> +{
>> +	v9fs_stat2inode_flags(stat, inode, sb, 0);
>> +}
>> +
>> +static inline void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat,
>> +					 struct inode *inode)
>> +{
>> +	v9fs_stat2inode_dotl_flags(stat, inode, 0);
>> +}
>>  #endif
>> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
>> index 85ff859d3af5..ca7d93184400 100644
>> --- a/fs/9p/vfs_inode.c
>> +++ b/fs/9p/vfs_inode.c
>> @@ -1166,16 +1166,17 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
>>  }
>>  
>>  /**
>> - * v9fs_stat2inode - populate an inode structure with mistat info
>> + * v9fs_stat2inode_flags - populate an inode structure with mistat info
>>   * @stat: Plan 9 metadata (mistat) structure
>>   * @inode: inode to populate
>>   * @sb: superblock of filesystem
>> + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
>>   *
>>   */
>>  
>>  void
>> -v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>> -	struct super_block *sb)
>> +v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
>> +		       struct super_block *sb, unsigned int flags)
>>  {
>>  	umode_t mode;
>>  	char ext[32];
>> @@ -1184,6 +1185,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
>>  	struct v9fs_session_info *v9ses = sb->s_fs_info;
>>  	struct v9fs_inode *v9inode = V9FS_I(inode);
>>  
>> +	/* Protect the updates of i_size & cache_validity */
>> +	spin_lock(&inode->i_lock);
>> +
> 
> You're still locking the whole function, which is what I complained
> about in the previous patch.
> First of all, in the KEEP_ISIZE path that lock isn't needed at all,
> and it does not protect anything other than the i_size_write.
> 
In my understanding v9fs_refresh_inode() uses i_lock to protect
the update of cache_validity, so I lock the whole function to
keep up with with the old behavior, but as you have commented below,
the lock doesn't do any help, so I will only use i_lock to protect
i_size_write().

> 
> I do see that you wrote in the comment this is meant to protect
> cache_validity, but it does not solve the race I mentionned in the v1
> patch either - the race timing is from the moment the 9p getattr
> request has been sent out, not just setting fields in here.
> (e.g. thread1 sends getattr request, thread2 setattrs and invalidate
> inode attr after request has been processed by server, thread1 receives
> getattr and clears flag while attributes are still obsolete)
>> Let's fix this in a proper separate patch and not pretend it's resolved
> by this trivial locking when it isn't.

I see. Thanks for you comments.

> 
> For now, just locking around the i_size_write makes most sense.
>>
> Thanks,
>
Dominique Martinet Jan. 23, 2019, 1:50 p.m. UTC | #4
Hou Tao wrote on Wed, Jan 23, 2019:
> > write_end() has a comment that i_size cannot change under it because it
> > has the i_mutex, but it's obviously not sufficient given the stat2inode
> > code does not have it, so it needs to do the same dance as write_iter.
>
> OK, will do that in v3
> 
> How about adding a helper as shown in the following lines ?
>
> static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
> {
>     spin_lock(&inode->i_lock);
>     i_size_write(inode, i_size);
>     spin_unlock(&inode->i_lock);
> }

Sure. I'm actually surprise no other part of the kernel has such helper,
cifs seems to be using that pattern a lot too.
Actually, looking a bit deeper fs/stack.c has this code:
        /*
         * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
         * fsstack_copy_inode_size() to hold some lock around
         * i_size_write(), otherwise i_size_read() may spin forever (see
         * include/linux/fs.h).  We don't necessarily hold i_mutex when this
         * is called, so take i_lock for that case.
         *
         * And if CONFIG_LBDAF (on 32-bit), continue our effort to keep the
         * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
         * for that case too, and do both at once by combining the tests.
         *
         * There is none of this locking overhead in the 64-bit case.
         */
        if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
                spin_lock(&dst->i_lock);
        i_size_write(dst, i_size);
        dst->i_blocks = i_blocks;
        if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
                spin_unlock(&dst->i_lock);

It might make sense to do the same in our little helper ?

(it looks like i_blocks has the same problem? speaking of which we
probably do not want to update i_blocks either in the KEEP_SIZE
case...?)

> > As a nitpick I don't really like foo() vs foo_flags() as
> > foo-that-takes-extra-flags.
> > There are a few such examples in the kernel already but I think it does
> > not really convery information; it's better to have the base function
> > take flags and just use it, or if you want wrappers then just never
> > expose the flags but make a static _v9fs_stat2inode take flags,
> > v9fs_stat2inode behave as the old one and a new
> > v9fs_stat2inode_keepisize for the update with cache.
> > I'd personally go with the former are there only are four call sites.
>
> I agree with you. I will add a new flags parameter to v9fs_stat2inode() and use
> it directly instead of creating inline wrappers around it.

Thanks.
Hou Tao Jan. 24, 2019, 3:21 a.m. UTC | #5
Hi,

On 2019/1/23 21:50, Dominique Martinet wrote:
> Hou Tao wrote on Wed, Jan 23, 2019:
>>> write_end() has a comment that i_size cannot change under it because it
>>> has the i_mutex, but it's obviously not sufficient given the stat2inode
>>> code does not have it, so it needs to do the same dance as write_iter.
>>
>> OK, will do that in v3

After checking the code, i think v9fs_write_begin() truly doesn't need i_lock.
Because it is used for LOOSE or FSCACHE cache mode and under these two
modes the i_size will not updated at all (neither in v9fs_refresh_inode()
nor v9fs_vfs_getattr()). And for v9fs_file_write_iter(), the i_lock is needed.

>>
>> How about adding a helper as shown in the following lines ?
>>
>> static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
>> {
>>     spin_lock(&inode->i_lock);
>>     i_size_write(inode, i_size);
>>     spin_unlock(&inode->i_lock);
>> }
> 
> Sure. I'm actually surprise no other part of the kernel has such helper,
> cifs seems to be using that pattern a lot too.
> Actually, looking a bit deeper fs/stack.c has this code:
>         /*
>          * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
>          * fsstack_copy_inode_size() to hold some lock around
>          * i_size_write(), otherwise i_size_read() may spin forever (see
>          * include/linux/fs.h).  We don't necessarily hold i_mutex when this
>          * is called, so take i_lock for that case.
>          *
>          * And if CONFIG_LBDAF (on 32-bit), continue our effort to keep the
>          * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
>          * for that case too, and do both at once by combining the tests.
>          *
>          * There is none of this locking overhead in the 64-bit case.
>          */
>         if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
>                 spin_lock(&dst->i_lock);
>         i_size_write(dst, i_size);
>         dst->i_blocks = i_blocks;
>         if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
>                 spin_unlock(&dst->i_lock);
> 
> It might make sense to do the same in our little helper ?
OK. And there will be no performance loss in 64-bit case.

> 
> (it looks like i_blocks has the same problem? speaking of which we
> probably do not want to update i_blocks either in the KEEP_SIZE
> case...?)
> 
Yes, i_blocks may be corrupted under 32-bit case, and maybe it's appropriate
to fix it in a separated patch.

>>> As a nitpick I don't really like foo() vs foo_flags() as
>>> foo-that-takes-extra-flags.
>>> There are a few such examples in the kernel already but I think it does
>>> not really convery information; it's better to have the base function
>>> take flags and just use it, or if you want wrappers then just never
>>> expose the flags but make a static _v9fs_stat2inode take flags,
>>> v9fs_stat2inode behave as the old one and a new
>>> v9fs_stat2inode_keepisize for the update with cache.
>>> I'd personally go with the former are there only are four call sites.
>>
>> I agree with you. I will add a new flags parameter to v9fs_stat2inode() and use
>> it directly instead of creating inline wrappers around it.
> 
> Thanks.
>
diff mbox series

Patch

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 5a0db6dec8d1..51b3e441b895 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,6 +40,9 @@ 
  */
 #define P9_LOCK_TIMEOUT (30*HZ)
 
+/* flags for v9fs_stat2inode_flags() & v9fs_stat2inode_dotl_flags() */
+#define V9FS_STAT2INODE_KEEP_ISIZE 1
+
 extern struct file_system_type v9fs_fs_type;
 extern const struct address_space_operations v9fs_addr_operations;
 extern const struct file_operations v9fs_file_operations;
@@ -61,8 +64,10 @@  int v9fs_init_inode(struct v9fs_session_info *v9ses,
 		    struct inode *inode, umode_t mode, dev_t);
 void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
-void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
+void v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
+			    struct super_block *sb, unsigned int flags);
+void v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
+				 unsigned int flags);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
 void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
@@ -83,4 +88,16 @@  static inline void v9fs_invalidate_inode_attr(struct inode *inode)
 }
 
 int v9fs_open_to_dotl_flags(int flags);
+
+static inline void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
+				    struct super_block *sb)
+{
+	v9fs_stat2inode_flags(stat, inode, sb, 0);
+}
+
+static inline void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat,
+					 struct inode *inode)
+{
+	v9fs_stat2inode_dotl_flags(stat, inode, 0);
+}
 #endif
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 85ff859d3af5..ca7d93184400 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1166,16 +1166,17 @@  static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 }
 
 /**
- * v9fs_stat2inode - populate an inode structure with mistat info
+ * v9fs_stat2inode_flags - populate an inode structure with mistat info
  * @stat: Plan 9 metadata (mistat) structure
  * @inode: inode to populate
  * @sb: superblock of filesystem
+ * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
  *
  */
 
 void
-v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
-	struct super_block *sb)
+v9fs_stat2inode_flags(struct p9_wstat *stat, struct inode *inode,
+		       struct super_block *sb, unsigned int flags)
 {
 	umode_t mode;
 	char ext[32];
@@ -1184,6 +1185,9 @@  v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
+	/* Protect the updates of i_size & cache_validity */
+	spin_lock(&inode->i_lock);
+
 	set_nlink(inode, 1);
 
 	inode->i_atime.tv_sec = stat->atime;
@@ -1216,11 +1220,14 @@  v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	mode = p9mode2perm(v9ses, stat);
 	mode |= inode->i_mode & ~S_IALLUGO;
 	inode->i_mode = mode;
-	i_size_write(inode, stat->length);
 
+	if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+		i_size_write(inode, stat->length);
 	/* not real number of blocks, but 512 byte ones ... */
-	inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+	inode->i_blocks = (stat->length + 512 - 1) >> 9;
 	v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
+
+	spin_unlock(&inode->i_lock);
 }
 
 /**
@@ -1416,9 +1423,9 @@  int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 {
 	int umode;
 	dev_t rdev;
-	loff_t i_size;
 	struct p9_wstat *st;
 	struct v9fs_session_info *v9ses;
+	unsigned int flags;
 
 	v9ses = v9fs_inode2v9ses(inode);
 	st = p9_client_stat(fid);
@@ -1431,16 +1438,13 @@  int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 	if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
 		goto out;
 
-	spin_lock(&inode->i_lock);
 	/*
 	 * We don't want to refresh inode->i_size,
 	 * because we may have cached data
 	 */
-	i_size = inode->i_size;
-	v9fs_stat2inode(st, inode, inode->i_sb);
-	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-		inode->i_size = i_size;
-	spin_unlock(&inode->i_lock);
+	flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+		V9FS_STAT2INODE_KEEP_ISIZE : 0;
+	v9fs_stat2inode_flags(st, inode, inode->i_sb, flags);
 out:
 	p9stat_free(st);
 	kfree(st);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 4823e1c46999..5c727d49edc4 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -604,18 +604,23 @@  int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 }
 
 /**
- * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * v9fs_stat2inode_dotl_flags - populate an inode structure with stat info
  * @stat: stat structure
  * @inode: inode to populate
+ * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
  *
  */
 
 void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+v9fs_stat2inode_dotl_flags(struct p9_stat_dotl *stat, struct inode *inode,
+			    unsigned int flags)
 {
 	umode_t mode;
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
+	/* Protect the updates of i_size & cache_validity */
+	spin_lock(&inode->i_lock);
+
 	if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
 		inode->i_atime.tv_sec = stat->st_atime_sec;
 		inode->i_atime.tv_nsec = stat->st_atime_nsec;
@@ -631,7 +636,8 @@  v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 		mode |= inode->i_mode & ~S_IALLUGO;
 		inode->i_mode = mode;
 
-		i_size_write(inode, stat->st_size);
+		if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+			i_size_write(inode, stat->st_size);
 		inode->i_blocks = stat->st_blocks;
 	} else {
 		if (stat->st_result_mask & P9_STATS_ATIME) {
@@ -661,7 +667,8 @@  v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 		}
 		if (stat->st_result_mask & P9_STATS_RDEV)
 			inode->i_rdev = new_decode_dev(stat->st_rdev);
-		if (stat->st_result_mask & P9_STATS_SIZE)
+		if (stat->st_result_mask & P9_STATS_SIZE &&
+		    !(flags & V9FS_STAT2INODE_KEEP_ISIZE))
 			i_size_write(inode, stat->st_size);
 		if (stat->st_result_mask & P9_STATS_BLOCKS)
 			inode->i_blocks = stat->st_blocks;
@@ -673,6 +680,8 @@  v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 	 * because the inode structure does not have fields for them.
 	 */
 	v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
+
+	spin_unlock(&inode->i_lock);
 }
 
 static int
@@ -928,9 +937,9 @@  v9fs_vfs_get_link_dotl(struct dentry *dentry,
 
 int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
 {
-	loff_t i_size;
 	struct p9_stat_dotl *st;
 	struct v9fs_session_info *v9ses;
+	unsigned int flags;
 
 	v9ses = v9fs_inode2v9ses(inode);
 	st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
@@ -942,16 +951,13 @@  int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
 	if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
 		goto out;
 
-	spin_lock(&inode->i_lock);
 	/*
 	 * We don't want to refresh inode->i_size,
 	 * because we may have cached data
 	 */
-	i_size = inode->i_size;
-	v9fs_stat2inode_dotl(st, inode);
-	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-		inode->i_size = i_size;
-	spin_unlock(&inode->i_lock);
+	flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+		V9FS_STAT2INODE_KEEP_ISIZE : 0;
+	v9fs_stat2inode_dotl_flags(st, inode, flags);
 out:
 	kfree(st);
 	return 0;