diff mbox series

nfsd: do not use async mode when not in rcu context

Message ID 20241214134916.422488-1-yangerkun@huaweicloud.com (mailing list archive)
State Superseded
Headers show
Series nfsd: do not use async mode when not in rcu context | expand

Commit Message

yangerkun Dec. 14, 2024, 1:49 p.m. UTC
From: Yang Erkun <yangerkun@huawei.com>

shell:

mkfs.xfs -f /dev/sda
echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
exportfs -ra
service nfs-server start
mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
mount /dev/sda /mnt/sda
touch /mnt1/sda/file
exportfs -r
umount /mnt/sda

Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
describe that when we call e_show/c_show, the last reference can down to
0, and we will call expkey_put/svc_export_put with rcu context, this may
lead uaf or sleep in atomic bug. Finally, we introduce async mode to the
release and fix the bug. However, some other command may also finally call
expkey_put/svc_export_put without rcu context, but expect that the sync
mode for the resource release. Like upper shell, before that commit,
exportfs -r will remove all entry with sync mode, and the last umount
/mnt/sda will always success. But after this commit, the umount will always
fail, after we add some delay, they will success again. Personally, I think
is actually a bug, and need be fixed.

Use rcu_read_lock_any_held to distinguish does we really under rcu context,
and if no, release resource with sync mode.

Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
---
 fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

Comments

yangerkun Dec. 16, 2024, 3:18 a.m. UTC | #1
在 2024/12/14 21:49, Yang Erkun 写道:
> From: Yang Erkun <yangerkun@huawei.com>
> 
> shell:
> 
> mkfs.xfs -f /dev/sda
> echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
> echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
> exportfs -ra
> service nfs-server start
> mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
> mount /dev/sda /mnt/sda
> touch /mnt1/sda/file
> exportfs -r
> umount /mnt/sda
> 
> Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
> describe that when we call e_show/c_show, the last reference can down to
> 0, and we will call expkey_put/svc_export_put with rcu context, this may
> lead uaf or sleep in atomic bug. Finally, we introduce async mode to the
> release and fix the bug. However, some other command may also finally call
> expkey_put/svc_export_put without rcu context, but expect that the sync
> mode for the resource release. Like upper shell, before that commit,
> exportfs -r will remove all entry with sync mode, and the last umount
> /mnt/sda will always success. But after this commit, the umount will always
> fail, after we add some delay, they will success again. Personally, I think
> is actually a bug, and need be fixed.
> 
> Use rcu_read_lock_any_held to distinguish does we really under rcu context,
> and if no, release resource with sync mode.
> 
> Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>   fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------
>   1 file changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
> index eacafe46e3b6..25f13e877c2f 100644
> --- a/fs/nfsd/export.c
> +++ b/fs/nfsd/export.c
> @@ -40,11 +40,8 @@
>   #define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
>   #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
>   
> -static void expkey_put_work(struct work_struct *work)
> +static void expkey_release(struct svc_expkey *key)
>   {
> -	struct svc_expkey *key =
> -		container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
> -
>   	if (test_bit(CACHE_VALID, &key->h.flags) &&
>   	    !test_bit(CACHE_NEGATIVE, &key->h.flags))
>   		path_put(&key->ek_path);
> @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct *work)
>   	kfree(key);
>   }
>   
> +static void expkey_put_work(struct work_struct *work)
> +{
> +	struct svc_expkey *key =
> +		container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
> +
> +	expkey_release(key);
> +}
> +
>   static void expkey_put(struct kref *ref)
>   {
>   	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
>   
> -	INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
> -	queue_rcu_work(system_wq, &key->ek_rcu_work);
> +	if (rcu_read_lock_any_held()) {

Emm...

This api won't work when we disable CONFIG_PREEMPT_COUNT...

> +		INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
> +		queue_rcu_work(system_wq, &key->ek_rcu_work);
> +	} else {
> +		synchronize_rcu();
> +		expkey_release(key);
> +	}
>   }
>   
>   static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
> @@ -364,11 +374,8 @@ static void export_stats_destroy(struct export_stats *stats)
>   					    EXP_STATS_COUNTERS_NUM);
>   }
>   
> -static void svc_export_put_work(struct work_struct *work)
> +static void svc_export_release(struct svc_export *exp)
>   {
> -	struct svc_export *exp =
> -		container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
> -
>   	path_put(&exp->ex_path);
>   	auth_domain_put(exp->ex_client);
>   	nfsd4_fslocs_free(&exp->ex_fslocs);
> @@ -378,12 +385,25 @@ static void svc_export_put_work(struct work_struct *work)
>   	kfree(exp);
>   }
>   
> +static void svc_export_put_work(struct work_struct *work)
> +{
> +	struct svc_export *exp =
> +		container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
> +
> +	svc_export_release(exp);
> +}
> +
>   static void svc_export_put(struct kref *ref)
>   {
>   	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
>   
> -	INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
> -	queue_rcu_work(system_wq, &exp->ex_rcu_work);
> +	if (rcu_read_lock_any_held()) {
> +		INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
> +		queue_rcu_work(system_wq, &exp->ex_rcu_work);
> +	} else {
> +		synchronize_rcu();
> +		svc_export_release(exp);
> +	}
>   }
>   
>   static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
Chuck Lever Dec. 16, 2024, 1:48 p.m. UTC | #2
On 12/15/24 10:18 PM, yangerkun wrote:
> 
> 
> 在 2024/12/14 21:49, Yang Erkun 写道:
>> From: Yang Erkun <yangerkun@huawei.com>
>>
>> shell:
>>
>> mkfs.xfs -f /dev/sda
>> echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
>> echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
>> exportfs -ra
>> service nfs-server start
>> mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
>> mount /dev/sda /mnt/sda
>> touch /mnt1/sda/file
>> exportfs -r
>> umount /mnt/sda
>>
>> Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
>> describe that when we call e_show/c_show, the last reference can down to
>> 0, and we will call expkey_put/svc_export_put with rcu context, this may
>> lead uaf or sleep in atomic bug. Finally, we introduce async mode to the
>> release and fix the bug. However, some other command may also finally 
>> call
>> expkey_put/svc_export_put without rcu context, but expect that the sync
>> mode for the resource release. Like upper shell, before that commit,
>> exportfs -r will remove all entry with sync mode, and the last umount
>> /mnt/sda will always success. But after this commit, the umount will 
>> always
>> fail, after we add some delay, they will success again. Personally, I 
>> think
>> is actually a bug, and need be fixed.
>>
>> Use rcu_read_lock_any_held to distinguish does we really under rcu 
>> context,
>> and if no, release resource with sync mode.
>>
>> Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
>> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
>> ---
>>   fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 32 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
>> index eacafe46e3b6..25f13e877c2f 100644
>> --- a/fs/nfsd/export.c
>> +++ b/fs/nfsd/export.c
>> @@ -40,11 +40,8 @@
>>   #define    EXPKEY_HASHMAX        (1 << EXPKEY_HASHBITS)
>>   #define    EXPKEY_HASHMASK        (EXPKEY_HASHMAX -1)
>> -static void expkey_put_work(struct work_struct *work)
>> +static void expkey_release(struct svc_expkey *key)
>>   {
>> -    struct svc_expkey *key =
>> -        container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
>> -
>>       if (test_bit(CACHE_VALID, &key->h.flags) &&
>>           !test_bit(CACHE_NEGATIVE, &key->h.flags))
>>           path_put(&key->ek_path);
>> @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct *work)
>>       kfree(key);
>>   }
>> +static void expkey_put_work(struct work_struct *work)
>> +{
>> +    struct svc_expkey *key =
>> +        container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
>> +
>> +    expkey_release(key);
>> +}
>> +
>>   static void expkey_put(struct kref *ref)
>>   {
>>       struct svc_expkey *key = container_of(ref, struct svc_expkey, 
>> h.ref);
>> -    INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
>> -    queue_rcu_work(system_wq, &key->ek_rcu_work);
>> +    if (rcu_read_lock_any_held()) {
> 
> Emm...
> 
> This api won't work when we disable CONFIG_PREEMPT_COUNT...

OK, I assume you will send a v2 :-)

Can you include the reviewers listed in MAINTAINERS on the To: line?

R:      Neil Brown <neilb@suse.de>
R:      Olga Kornievskaia <okorniev@redhat.com>
R:      Dai Ngo <Dai.Ngo@oracle.com>
R:      Tom Talpey <tom@talpey.com>

Thanks!


>> +        INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
>> +        queue_rcu_work(system_wq, &key->ek_rcu_work);
>> +    } else {
>> +        synchronize_rcu();
>> +        expkey_release(key);
>> +    }
>>   }
>>   static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
>> @@ -364,11 +374,8 @@ static void export_stats_destroy(struct 
>> export_stats *stats)
>>                           EXP_STATS_COUNTERS_NUM);
>>   }
>> -static void svc_export_put_work(struct work_struct *work)
>> +static void svc_export_release(struct svc_export *exp)
>>   {
>> -    struct svc_export *exp =
>> -        container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
>> -
>>       path_put(&exp->ex_path);
>>       auth_domain_put(exp->ex_client);
>>       nfsd4_fslocs_free(&exp->ex_fslocs);
>> @@ -378,12 +385,25 @@ static void svc_export_put_work(struct 
>> work_struct *work)
>>       kfree(exp);
>>   }
>> +static void svc_export_put_work(struct work_struct *work)
>> +{
>> +    struct svc_export *exp =
>> +        container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
>> +
>> +    svc_export_release(exp);
>> +}
>> +
>>   static void svc_export_put(struct kref *ref)
>>   {
>>       struct svc_export *exp = container_of(ref, struct svc_export, 
>> h.ref);
>> -    INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
>> -    queue_rcu_work(system_wq, &exp->ex_rcu_work);
>> +    if (rcu_read_lock_any_held()) {
>> +        INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
>> +        queue_rcu_work(system_wq, &exp->ex_rcu_work);
>> +    } else {
>> +        synchronize_rcu();
>> +        svc_export_release(exp);
>> +    }
>>   }
>>   static int svc_export_upcall(struct cache_detail *cd, struct 
>> cache_head *h)
>
yangerkun Dec. 16, 2024, 1:58 p.m. UTC | #3
在 2024/12/16 21:48, Chuck Lever 写道:
> On 12/15/24 10:18 PM, yangerkun wrote:
>>
>>
>> 在 2024/12/14 21:49, Yang Erkun 写道:
>>> From: Yang Erkun <yangerkun@huawei.com>
>>>
>>> shell:
>>>
>>> mkfs.xfs -f /dev/sda
>>> echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
>>> echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
>>> exportfs -ra
>>> service nfs-server start
>>> mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
>>> mount /dev/sda /mnt/sda
>>> touch /mnt1/sda/file
>>> exportfs -r
>>> umount /mnt/sda
>>>
>>> Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with 
>>> rcu_work")
>>> describe that when we call e_show/c_show, the last reference can down to
>>> 0, and we will call expkey_put/svc_export_put with rcu context, this may
>>> lead uaf or sleep in atomic bug. Finally, we introduce async mode to the
>>> release and fix the bug. However, some other command may also finally 
>>> call
>>> expkey_put/svc_export_put without rcu context, but expect that the sync
>>> mode for the resource release. Like upper shell, before that commit,
>>> exportfs -r will remove all entry with sync mode, and the last umount
>>> /mnt/sda will always success. But after this commit, the umount will 
>>> always
>>> fail, after we add some delay, they will success again. Personally, I 
>>> think
>>> is actually a bug, and need be fixed.
>>>
>>> Use rcu_read_lock_any_held to distinguish does we really under rcu 
>>> context,
>>> and if no, release resource with sync mode.
>>>
>>> Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with 
>>> rcu_work")
>>> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
>>> ---
>>>   fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------
>>>   1 file changed, 32 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
>>> index eacafe46e3b6..25f13e877c2f 100644
>>> --- a/fs/nfsd/export.c
>>> +++ b/fs/nfsd/export.c
>>> @@ -40,11 +40,8 @@
>>>   #define    EXPKEY_HASHMAX        (1 << EXPKEY_HASHBITS)
>>>   #define    EXPKEY_HASHMASK        (EXPKEY_HASHMAX -1)
>>> -static void expkey_put_work(struct work_struct *work)
>>> +static void expkey_release(struct svc_expkey *key)
>>>   {
>>> -    struct svc_expkey *key =
>>> -        container_of(to_rcu_work(work), struct svc_expkey, 
>>> ek_rcu_work);
>>> -
>>>       if (test_bit(CACHE_VALID, &key->h.flags) &&
>>>           !test_bit(CACHE_NEGATIVE, &key->h.flags))
>>>           path_put(&key->ek_path);
>>> @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct 
>>> *work)
>>>       kfree(key);
>>>   }
>>> +static void expkey_put_work(struct work_struct *work)
>>> +{
>>> +    struct svc_expkey *key =
>>> +        container_of(to_rcu_work(work), struct svc_expkey, 
>>> ek_rcu_work);
>>> +
>>> +    expkey_release(key);
>>> +}
>>> +
>>>   static void expkey_put(struct kref *ref)
>>>   {
>>>       struct svc_expkey *key = container_of(ref, struct svc_expkey, 
>>> h.ref);
>>> -    INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
>>> -    queue_rcu_work(system_wq, &key->ek_rcu_work);
>>> +    if (rcu_read_lock_any_held()) {
>>
>> Emm...
>>
>> This api won't work when we disable CONFIG_PREEMPT_COUNT...
> 
> OK, I assume you will send a v2 :-)

Yes~ V2 will be posted soon.

> 
> Can you include the reviewers listed in MAINTAINERS on the To: line?
> 
> R:      Neil Brown <neilb@suse.de>
> R:      Olga Kornievskaia <okorniev@redhat.com>
> R:      Dai Ngo <Dai.Ngo@oracle.com>
> R:      Tom Talpey <tom@talpey.com>
> 
> Thanks!

Thanks for your reminder!

> 
> 
>>> +        INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
>>> +        queue_rcu_work(system_wq, &key->ek_rcu_work);
>>> +    } else {
>>> +        synchronize_rcu();
>>> +        expkey_release(key);
>>> +    }
>>>   }
>>>   static int expkey_upcall(struct cache_detail *cd, struct cache_head 
>>> *h)
>>> @@ -364,11 +374,8 @@ static void export_stats_destroy(struct 
>>> export_stats *stats)
>>>                           EXP_STATS_COUNTERS_NUM);
>>>   }
>>> -static void svc_export_put_work(struct work_struct *work)
>>> +static void svc_export_release(struct svc_export *exp)
>>>   {
>>> -    struct svc_export *exp =
>>> -        container_of(to_rcu_work(work), struct svc_export, 
>>> ex_rcu_work);
>>> -
>>>       path_put(&exp->ex_path);
>>>       auth_domain_put(exp->ex_client);
>>>       nfsd4_fslocs_free(&exp->ex_fslocs);
>>> @@ -378,12 +385,25 @@ static void svc_export_put_work(struct 
>>> work_struct *work)
>>>       kfree(exp);
>>>   }
>>> +static void svc_export_put_work(struct work_struct *work)
>>> +{
>>> +    struct svc_export *exp =
>>> +        container_of(to_rcu_work(work), struct svc_export, 
>>> ex_rcu_work);
>>> +
>>> +    svc_export_release(exp);
>>> +}
>>> +
>>>   static void svc_export_put(struct kref *ref)
>>>   {
>>>       struct svc_export *exp = container_of(ref, struct svc_export, 
>>> h.ref);
>>> -    INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
>>> -    queue_rcu_work(system_wq, &exp->ex_rcu_work);
>>> +    if (rcu_read_lock_any_held()) {
>>> +        INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
>>> +        queue_rcu_work(system_wq, &exp->ex_rcu_work);
>>> +    } else {
>>> +        synchronize_rcu();
>>> +        svc_export_release(exp);
>>> +    }
>>>   }
>>>   static int svc_export_upcall(struct cache_detail *cd, struct 
>>> cache_head *h)
>>
> 
>
diff mbox series

Patch

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index eacafe46e3b6..25f13e877c2f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -40,11 +40,8 @@ 
 #define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 
-static void expkey_put_work(struct work_struct *work)
+static void expkey_release(struct svc_expkey *key)
 {
-	struct svc_expkey *key =
-		container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
-
 	if (test_bit(CACHE_VALID, &key->h.flags) &&
 	    !test_bit(CACHE_NEGATIVE, &key->h.flags))
 		path_put(&key->ek_path);
@@ -52,12 +49,25 @@  static void expkey_put_work(struct work_struct *work)
 	kfree(key);
 }
 
+static void expkey_put_work(struct work_struct *work)
+{
+	struct svc_expkey *key =
+		container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
+
+	expkey_release(key);
+}
+
 static void expkey_put(struct kref *ref)
 {
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
-	INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
-	queue_rcu_work(system_wq, &key->ek_rcu_work);
+	if (rcu_read_lock_any_held()) {
+		INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
+		queue_rcu_work(system_wq, &key->ek_rcu_work);
+	} else {
+		synchronize_rcu();
+		expkey_release(key);
+	}
 }
 
 static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -364,11 +374,8 @@  static void export_stats_destroy(struct export_stats *stats)
 					    EXP_STATS_COUNTERS_NUM);
 }
 
-static void svc_export_put_work(struct work_struct *work)
+static void svc_export_release(struct svc_export *exp)
 {
-	struct svc_export *exp =
-		container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
-
 	path_put(&exp->ex_path);
 	auth_domain_put(exp->ex_client);
 	nfsd4_fslocs_free(&exp->ex_fslocs);
@@ -378,12 +385,25 @@  static void svc_export_put_work(struct work_struct *work)
 	kfree(exp);
 }
 
+static void svc_export_put_work(struct work_struct *work)
+{
+	struct svc_export *exp =
+		container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
+
+	svc_export_release(exp);
+}
+
 static void svc_export_put(struct kref *ref)
 {
 	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
 
-	INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
-	queue_rcu_work(system_wq, &exp->ex_rcu_work);
+	if (rcu_read_lock_any_held()) {
+		INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
+		queue_rcu_work(system_wq, &exp->ex_rcu_work);
+	} else {
+		synchronize_rcu();
+		svc_export_release(exp);
+	}
 }
 
 static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)