diff mbox series

[1/3] quota: Check next/prev free block number after reading from quota file

Message ID 20220820110514.881373-2-chengzhihao1@huawei.com (mailing list archive)
State New, archived
Headers show
Series Check content after reading from quota file | expand

Commit Message

Zhihao Cheng Aug. 20, 2022, 11:05 a.m. UTC
Following process:
 Init: v2_read_file_info: <3> dqi_free_blk 0 dqi_free_entry 5 dqi_blks 6

 Step 1. chown bin f_a -> dquot_acquire -> v2_write_dquot:
  qtree_write_dquot
   do_insert_tree
    find_free_dqentry
     get_free_dqblk
      write_blk(info->dqi_blocks) // info->dqi_blocks = 6, failure. The
	   content in physical block (corresponding to blk 6) is random.

 Step 2. chown root f_a -> dquot_transfer -> dqput_all -> dqput ->
         ext4_release_dquot -> v2_release_dquot -> qtree_delete_dquot:
  dquot_release
   remove_tree
    free_dqentry
     put_free_dqblk(6)
      info->dqi_free_blk = blk    // info->dqi_free_blk = 6

 Step 3. drop cache (buffer head for block 6 is released)

 Step 4. chown bin f_b -> dquot_acquire -> commit_dqblk -> v2_write_dquot:
  qtree_write_dquot
   do_insert_tree
    find_free_dqentry
     get_free_dqblk
      dh = (struct qt_disk_dqdbheader *)buf
      blk = info->dqi_free_blk     // 6
      ret = read_blk(info, blk, buf)  // The content of buf is random
      info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free)  // random blk

 Step 5. chown bin f_c -> notify_change -> ext4_setattr -> dquot_transfer:
  dquot = dqget -> acquire_dquot -> ext4_acquire_dquot -> dquot_acquire ->
          commit_dqblk -> v2_write_dquot -> dq_insert_tree:
   do_insert_tree
    find_free_dqentry
     get_free_dqblk
      blk = info->dqi_free_blk    // If blk < 0 and blk is not an error
				     code, it will be returned as dquot

  transfer_to[USRQUOTA] = dquot  // A random negative value
  __dquot_transfer(transfer_to)
   dquot_add_inodes(transfer_to[cnt])
    spin_lock(&dquot->dq_dqb_lock)  // page fault

, which will lead to kernel page fault:
 Quota error (device sda): qtree_write_dquot: Error -8000 occurred
 while creating quota
 BUG: unable to handle page fault for address: ffffffffffffe120
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0002) - not-present page
 Oops: 0002 [#1] PREEMPT SMP
 CPU: 0 PID: 5974 Comm: chown Not tainted 6.0.0-rc1-00004
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
 RIP: 0010:_raw_spin_lock+0x3a/0x90
 Call Trace:
  dquot_add_inodes+0x28/0x270
  __dquot_transfer+0x377/0x840
  dquot_transfer+0xde/0x540
  ext4_setattr+0x405/0x14d0
  notify_change+0x68e/0x9f0
  chown_common+0x300/0x430
  __x64_sys_fchownat+0x29/0x40

In order to avoid accessing invalid quota memory address, this patch adds
block number checking of next/prev free block read from quota file.

Fetch a reproducer in [Link].

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216372
Fixes: 1da177e4c3f4152 ("Linux-2.6.12-rc2")
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
---
 fs/quota/quota_tree.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Jan Kara Sept. 21, 2022, 1:37 p.m. UTC | #1
On Sat 20-08-22 19:05:12, Zhihao Cheng wrote:
> Following process:
>  Init: v2_read_file_info: <3> dqi_free_blk 0 dqi_free_entry 5 dqi_blks 6
> 
>  Step 1. chown bin f_a -> dquot_acquire -> v2_write_dquot:
>   qtree_write_dquot
>    do_insert_tree
>     find_free_dqentry
>      get_free_dqblk
>       write_blk(info->dqi_blocks) // info->dqi_blocks = 6, failure. The
> 	   content in physical block (corresponding to blk 6) is random.
> 
>  Step 2. chown root f_a -> dquot_transfer -> dqput_all -> dqput ->
>          ext4_release_dquot -> v2_release_dquot -> qtree_delete_dquot:
>   dquot_release
>    remove_tree
>     free_dqentry
>      put_free_dqblk(6)
>       info->dqi_free_blk = blk    // info->dqi_free_blk = 6
> 
>  Step 3. drop cache (buffer head for block 6 is released)
> 
>  Step 4. chown bin f_b -> dquot_acquire -> commit_dqblk -> v2_write_dquot:
>   qtree_write_dquot
>    do_insert_tree
>     find_free_dqentry
>      get_free_dqblk
>       dh = (struct qt_disk_dqdbheader *)buf
>       blk = info->dqi_free_blk     // 6
>       ret = read_blk(info, blk, buf)  // The content of buf is random
>       info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free)  // random blk
> 
>  Step 5. chown bin f_c -> notify_change -> ext4_setattr -> dquot_transfer:
>   dquot = dqget -> acquire_dquot -> ext4_acquire_dquot -> dquot_acquire ->
>           commit_dqblk -> v2_write_dquot -> dq_insert_tree:
>    do_insert_tree
>     find_free_dqentry
>      get_free_dqblk
>       blk = info->dqi_free_blk    // If blk < 0 and blk is not an error
> 				     code, it will be returned as dquot
> 
>   transfer_to[USRQUOTA] = dquot  // A random negative value
>   __dquot_transfer(transfer_to)
>    dquot_add_inodes(transfer_to[cnt])
>     spin_lock(&dquot->dq_dqb_lock)  // page fault
> 
> , which will lead to kernel page fault:
>  Quota error (device sda): qtree_write_dquot: Error -8000 occurred
>  while creating quota
>  BUG: unable to handle page fault for address: ffffffffffffe120
>  #PF: supervisor write access in kernel mode
>  #PF: error_code(0x0002) - not-present page
>  Oops: 0002 [#1] PREEMPT SMP
>  CPU: 0 PID: 5974 Comm: chown Not tainted 6.0.0-rc1-00004
>  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
>  RIP: 0010:_raw_spin_lock+0x3a/0x90
>  Call Trace:
>   dquot_add_inodes+0x28/0x270
>   __dquot_transfer+0x377/0x840
>   dquot_transfer+0xde/0x540
>   ext4_setattr+0x405/0x14d0
>   notify_change+0x68e/0x9f0
>   chown_common+0x300/0x430
>   __x64_sys_fchownat+0x29/0x40
> 
> In order to avoid accessing invalid quota memory address, this patch adds
> block number checking of next/prev free block read from quota file.
> 
> Fetch a reproducer in [Link].
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=216372
> Fixes: 1da177e4c3f4152 ("Linux-2.6.12-rc2")

It's better to just have:

CC: stable@vger.kernel.org

here. Fixes tag pointing to kernel release is not very useful.

> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
>  	return ret;
>  }
>  
> +static inline int do_check_range(struct super_block *sb, uint val, uint max_val)
> +{
> +	if (val >= max_val) {
> +		quota_error(sb, "Getting block too big (%u >= %u)",
> +			    val, max_val);
> +		return -EUCLEAN;
> +	}
> +
> +	return 0;
> +}

I'd already provide min_val and the string for the message here as well (as
you do in patch 2). It is less churn in the next patch and free blocks
checking actually needs that as well. See below.

> +
> +static int check_free_block(struct qtree_mem_dqinfo *info,
> +			    struct qt_disk_dqdbheader *dh)
> +{
> +	int err = 0;
> +	uint nextblk, prevblk;
> +
> +	nextblk = le32_to_cpu(dh->dqdh_next_free);
> +	err = do_check_range(info->dqi_sb, nextblk, info->dqi_blocks);
> +	if (err)
> +		return err;
> +	prevblk = le32_to_cpu(dh->dqdh_prev_free);
> +	err = do_check_range(info->dqi_sb, prevblk, info->dqi_blocks);
> +	if (err)
> +		return err;

The free block should actually be > QT_TREEOFF so I'd add the check to
do_check_range().

Also rather than having check_free_block(), I'd provide a helper function
like check_dquot_block_header() which will check only free blocks pointers
now and in later patches you can add other checks there.

								Honza
Zhihao Cheng Sept. 22, 2022, 8:13 a.m. UTC | #2
在 2022/9/21 21:37, Jan Kara 写道:
Hi Jan,
> On Sat 20-08-22 19:05:12, Zhihao Cheng wrote:
>> Following process:
[...]
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=216372
>> Fixes: 1da177e4c3f4152 ("Linux-2.6.12-rc2")
> 
> It's better to just have:
> 
> CC: stable@vger.kernel.org
> 
> here. Fixes tag pointing to kernel release is not very useful.
Will add in v2.
> 
>> --- a/fs/quota/quota_tree.c
>> +++ b/fs/quota/quota_tree.c
>> @@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
>>   	return ret;
>>   }
>>   
>> +static inline int do_check_range(struct super_block *sb, uint val, uint max_val)
>> +{
>> +	if (val >= max_val) {
>> +		quota_error(sb, "Getting block too big (%u >= %u)",
>> +			    val, max_val);
>> +		return -EUCLEAN;
>> +	}
>> +
>> +	return 0;
>> +}
> 
> I'd already provide min_val and the string for the message here as well (as
> you do in patch 2). It is less churn in the next patch and free blocks
> checking actually needs that as well. See below.
> 
>> +
>> +static int check_free_block(struct qtree_mem_dqinfo *info,
>> +			    struct qt_disk_dqdbheader *dh)
>> +{
>> +	int err = 0;
>> +	uint nextblk, prevblk;
>> +
>> +	nextblk = le32_to_cpu(dh->dqdh_next_free);
>> +	err = do_check_range(info->dqi_sb, nextblk, info->dqi_blocks);
>> +	if (err)
>> +		return err;
>> +	prevblk = le32_to_cpu(dh->dqdh_prev_free);
>> +	err = do_check_range(info->dqi_sb, prevblk, info->dqi_blocks);
>> +	if (err)
>> +		return err;
> 
> The free block should actually be > QT_TREEOFF so I'd add the check to
> do_check_range().

'dh->dqdh_next_free' may be updated when quota entry removed, 
'dh->dqdh_next_free' can be used for next new quota entris.
Before sending v2, I found 'dh->dqdh_next_free' and 'dh->dqdh_prev_free' 
can easily be zero in newly allocated blocks when continually creating 
files onwed by different users:
find_free_dqentry
   get_free_dqblk
     write_blk(info, info->dqi_blocks, buf)  // zero'd qt_disk_dqdbheader
     blk = info->dqi_blocks++   // allocate new one block
   info->dqi_free_entry = blk   // will be used for new quota entries

find_free_dqentry
   if (info->dqi_free_entry)
     blk = info->dqi_free_entry
     read_blk(info, blk, buf)   // dh->dqdh_next_free = 
dh->dqdh_prev_free = 0

I think it's normal when 'dh->dqdh_next_free' or 'dh->dqdh_prev_free' 
equals to 0.
> 
> Also rather than having check_free_block(), I'd provide a helper function
> like check_dquot_block_header() which will check only free blocks pointers
> now and in later patches you can add other checks there.
OK, will be updated in v2.
> 
> 								Honza
>
Jan Kara Sept. 22, 2022, 11:39 a.m. UTC | #3
On Thu 22-09-22 16:13:59, Zhihao Cheng wrote:
> 在 2022/9/21 21:37, Jan Kara 写道:
> > > --- a/fs/quota/quota_tree.c
> > > +++ b/fs/quota/quota_tree.c
> > > @@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
> > >   	return ret;
> > >   }
> > > +static inline int do_check_range(struct super_block *sb, uint val, uint max_val)
> > > +{
> > > +	if (val >= max_val) {
> > > +		quota_error(sb, "Getting block too big (%u >= %u)",
> > > +			    val, max_val);
> > > +		return -EUCLEAN;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > 
> > I'd already provide min_val and the string for the message here as well (as
> > you do in patch 2). It is less churn in the next patch and free blocks
> > checking actually needs that as well. See below.
> > 
> > > +
> > > +static int check_free_block(struct qtree_mem_dqinfo *info,
> > > +			    struct qt_disk_dqdbheader *dh)
> > > +{
> > > +	int err = 0;
> > > +	uint nextblk, prevblk;
> > > +
> > > +	nextblk = le32_to_cpu(dh->dqdh_next_free);
> > > +	err = do_check_range(info->dqi_sb, nextblk, info->dqi_blocks);
> > > +	if (err)
> > > +		return err;
> > > +	prevblk = le32_to_cpu(dh->dqdh_prev_free);
> > > +	err = do_check_range(info->dqi_sb, prevblk, info->dqi_blocks);
> > > +	if (err)
> > > +		return err;
> > 
> > The free block should actually be > QT_TREEOFF so I'd add the check to
> > do_check_range().
> 
> 'dh->dqdh_next_free' may be updated when quota entry removed,
> 'dh->dqdh_next_free' can be used for next new quota entris.
> Before sending v2, I found 'dh->dqdh_next_free' and 'dh->dqdh_prev_free' can
> easily be zero in newly allocated blocks when continually creating files
> onwed by different users:
> find_free_dqentry
>   get_free_dqblk
>     write_blk(info, info->dqi_blocks, buf)  // zero'd qt_disk_dqdbheader
>     blk = info->dqi_blocks++   // allocate new one block
>   info->dqi_free_entry = blk   // will be used for new quota entries
> 
> find_free_dqentry
>   if (info->dqi_free_entry)
>     blk = info->dqi_free_entry
>     read_blk(info, blk, buf)   // dh->dqdh_next_free = dh->dqdh_prev_free =
> 0
> 
> I think it's normal when 'dh->dqdh_next_free' or 'dh->dqdh_prev_free' equals
> to 0.

Good point! 0 means "not present". So any block number (either in free list
or pointed from the quota tree) should be either 0 or > QT_TREEOFF.

								Honza
Zhihao Cheng Sept. 22, 2022, 12:58 p.m. UTC | #4
在 2022/9/22 19:39, Jan Kara 写道:
> On Thu 22-09-22 16:13:59, Zhihao Cheng wrote:
[...]
>>>
>>> The free block should actually be > QT_TREEOFF so I'd add the check to
>>> do_check_range().
>>
>> 'dh->dqdh_next_free' may be updated when quota entry removed,
>> 'dh->dqdh_next_free' can be used for next new quota entris.
>> Before sending v2, I found 'dh->dqdh_next_free' and 'dh->dqdh_prev_free' can
>> easily be zero in newly allocated blocks when continually creating files
>> onwed by different users:
>> find_free_dqentry
>>    get_free_dqblk
>>      write_blk(info, info->dqi_blocks, buf)  // zero'd qt_disk_dqdbheader
>>      blk = info->dqi_blocks++   // allocate new one block
>>    info->dqi_free_entry = blk   // will be used for new quota entries
>>
>> find_free_dqentry
>>    if (info->dqi_free_entry)
>>      blk = info->dqi_free_entry
>>      read_blk(info, blk, buf)   // dh->dqdh_next_free = dh->dqdh_prev_free =
>> 0
>>
>> I think it's normal when 'dh->dqdh_next_free' or 'dh->dqdh_prev_free' equals
>> to 0.
> 
> Good point! 0 means "not present". So any block number (either in free list
> or pointed from the quota tree) should be either 0 or > QT_TREEOFF.
> 
> 								Honza
> 

In case my emails being filtered agagin, this is notification of v2:
https://lore.kernel.org/linux-ext4/20220922130401.1792256-1-chengzhihao1@huawei.com/T/#m9676d64e8f7cdd7b7decdd0d6b725ec658110b3e
diff mbox series

Patch

diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 5f2405994280..f2e7f5b869a4 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -71,6 +71,35 @@  static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 	return ret;
 }
 
+static inline int do_check_range(struct super_block *sb, uint val, uint max_val)
+{
+	if (val >= max_val) {
+		quota_error(sb, "Getting block too big (%u >= %u)",
+			    val, max_val);
+		return -EUCLEAN;
+	}
+
+	return 0;
+}
+
+static int check_free_block(struct qtree_mem_dqinfo *info,
+			    struct qt_disk_dqdbheader *dh)
+{
+	int err = 0;
+	uint nextblk, prevblk;
+
+	nextblk = le32_to_cpu(dh->dqdh_next_free);
+	err = do_check_range(info->dqi_sb, nextblk, info->dqi_blocks);
+	if (err)
+		return err;
+	prevblk = le32_to_cpu(dh->dqdh_prev_free);
+	err = do_check_range(info->dqi_sb, prevblk, info->dqi_blocks);
+	if (err)
+		return err;
+
+	return err;
+}
+
 /* Remove empty block from list and return it */
 static int get_free_dqblk(struct qtree_mem_dqinfo *info)
 {
@@ -85,6 +114,9 @@  static int get_free_dqblk(struct qtree_mem_dqinfo *info)
 		ret = read_blk(info, blk, buf);
 		if (ret < 0)
 			goto out_buf;
+		ret = check_free_block(info, dh);
+		if (ret)
+			goto out_buf;
 		info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
 	}
 	else {
@@ -232,6 +264,9 @@  static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 		*err = read_blk(info, blk, buf);
 		if (*err < 0)
 			goto out_buf;
+		*err = check_free_block(info, dh);
+		if (*err)
+			goto out_buf;
 	} else {
 		blk = get_free_dqblk(info);
 		if ((int)blk < 0) {
@@ -424,6 +459,9 @@  static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		goto out_buf;
 	}
 	dh = (struct qt_disk_dqdbheader *)buf;
+	ret = check_free_block(info, dh);
+	if (ret)
+		goto out_buf;
 	le16_add_cpu(&dh->dqdh_entries, -1);
 	if (!le16_to_cpu(dh->dqdh_entries)) {	/* Block got free? */
 		ret = remove_free_dqentry(info, buf, blk);