Message ID | 20241214134916.422488-1-yangerkun@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | nfsd: do not use async mode when not in rcu context | expand |
在 2024/12/14 21:49, Yang Erkun 写道: > From: Yang Erkun <yangerkun@huawei.com> > > shell: > > mkfs.xfs -f /dev/sda > echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports > echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports > exportfs -ra > service nfs-server start > mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1 > mount /dev/sda /mnt/sda > touch /mnt1/sda/file > exportfs -r > umount /mnt/sda > > Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work") > describe that when we call e_show/c_show, the last reference can down to > 0, and we will call expkey_put/svc_export_put with rcu context, this may > lead uaf or sleep in atomic bug. Finally, we introduce async mode to the > release and fix the bug. However, some other command may also finally call > expkey_put/svc_export_put without rcu context, but expect that the sync > mode for the resource release. Like upper shell, before that commit, > exportfs -r will remove all entry with sync mode, and the last umount > /mnt/sda will always success. But after this commit, the umount will always > fail, after we add some delay, they will success again. Personally, I think > is actually a bug, and need be fixed. > > Use rcu_read_lock_any_held to distinguish does we really under rcu context, > and if no, release resource with sync mode. > > Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work") > Signed-off-by: Yang Erkun <yangerkun@huawei.com> > --- > fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------ > 1 file changed, 32 insertions(+), 12 deletions(-) > > diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c > index eacafe46e3b6..25f13e877c2f 100644 > --- a/fs/nfsd/export.c > +++ b/fs/nfsd/export.c > @@ -40,11 +40,8 @@ > #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) > #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) > > -static void expkey_put_work(struct work_struct *work) > +static void expkey_release(struct svc_expkey *key) > { > - struct svc_expkey *key = > - container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); > - > if (test_bit(CACHE_VALID, &key->h.flags) && > !test_bit(CACHE_NEGATIVE, &key->h.flags)) > path_put(&key->ek_path); > @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct *work) > kfree(key); > } > > +static void expkey_put_work(struct work_struct *work) > +{ > + struct svc_expkey *key = > + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); > + > + expkey_release(key); > +} > + > static void expkey_put(struct kref *ref) > { > struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); > > - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); > - queue_rcu_work(system_wq, &key->ek_rcu_work); > + if (rcu_read_lock_any_held()) { Emm... This api won't work when we disable CONFIG_PREEMPT_COUNT... > + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); > + queue_rcu_work(system_wq, &key->ek_rcu_work); > + } else { > + synchronize_rcu(); > + expkey_release(key); > + } > } > > static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) > @@ -364,11 +374,8 @@ static void export_stats_destroy(struct export_stats *stats) > EXP_STATS_COUNTERS_NUM); > } > > -static void svc_export_put_work(struct work_struct *work) > +static void svc_export_release(struct svc_export *exp) > { > - struct svc_export *exp = > - container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); > - > path_put(&exp->ex_path); > auth_domain_put(exp->ex_client); > nfsd4_fslocs_free(&exp->ex_fslocs); > @@ -378,12 +385,25 @@ static void svc_export_put_work(struct work_struct *work) > kfree(exp); > } > > +static void svc_export_put_work(struct work_struct *work) > +{ > + struct svc_export *exp = > + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); > + > + svc_export_release(exp); > +} > + > static void svc_export_put(struct kref *ref) > { > struct svc_export *exp = container_of(ref, struct svc_export, h.ref); > > - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); > - queue_rcu_work(system_wq, &exp->ex_rcu_work); > + if (rcu_read_lock_any_held()) { > + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); > + queue_rcu_work(system_wq, &exp->ex_rcu_work); > + } else { > + synchronize_rcu(); > + svc_export_release(exp); > + } > } > > static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
On 12/15/24 10:18 PM, yangerkun wrote: > > > 在 2024/12/14 21:49, Yang Erkun 写道: >> From: Yang Erkun <yangerkun@huawei.com> >> >> shell: >> >> mkfs.xfs -f /dev/sda >> echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports >> echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports >> exportfs -ra >> service nfs-server start >> mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1 >> mount /dev/sda /mnt/sda >> touch /mnt1/sda/file >> exportfs -r >> umount /mnt/sda >> >> Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work") >> describe that when we call e_show/c_show, the last reference can down to >> 0, and we will call expkey_put/svc_export_put with rcu context, this may >> lead uaf or sleep in atomic bug. Finally, we introduce async mode to the >> release and fix the bug. However, some other command may also finally >> call >> expkey_put/svc_export_put without rcu context, but expect that the sync >> mode for the resource release. Like upper shell, before that commit, >> exportfs -r will remove all entry with sync mode, and the last umount >> /mnt/sda will always success. But after this commit, the umount will >> always >> fail, after we add some delay, they will success again. Personally, I >> think >> is actually a bug, and need be fixed. >> >> Use rcu_read_lock_any_held to distinguish does we really under rcu >> context, >> and if no, release resource with sync mode. >> >> Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work") >> Signed-off-by: Yang Erkun <yangerkun@huawei.com> >> --- >> fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------ >> 1 file changed, 32 insertions(+), 12 deletions(-) >> >> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c >> index eacafe46e3b6..25f13e877c2f 100644 >> --- a/fs/nfsd/export.c >> +++ b/fs/nfsd/export.c >> @@ -40,11 +40,8 @@ >> #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) >> #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) >> -static void expkey_put_work(struct work_struct *work) >> +static void expkey_release(struct svc_expkey *key) >> { >> - struct svc_expkey *key = >> - container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); >> - >> if (test_bit(CACHE_VALID, &key->h.flags) && >> !test_bit(CACHE_NEGATIVE, &key->h.flags)) >> path_put(&key->ek_path); >> @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct *work) >> kfree(key); >> } >> +static void expkey_put_work(struct work_struct *work) >> +{ >> + struct svc_expkey *key = >> + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); >> + >> + expkey_release(key); >> +} >> + >> static void expkey_put(struct kref *ref) >> { >> struct svc_expkey *key = container_of(ref, struct svc_expkey, >> h.ref); >> - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); >> - queue_rcu_work(system_wq, &key->ek_rcu_work); >> + if (rcu_read_lock_any_held()) { > > Emm... > > This api won't work when we disable CONFIG_PREEMPT_COUNT... OK, I assume you will send a v2 :-) Can you include the reviewers listed in MAINTAINERS on the To: line? R: Neil Brown <neilb@suse.de> R: Olga Kornievskaia <okorniev@redhat.com> R: Dai Ngo <Dai.Ngo@oracle.com> R: Tom Talpey <tom@talpey.com> Thanks! >> + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); >> + queue_rcu_work(system_wq, &key->ek_rcu_work); >> + } else { >> + synchronize_rcu(); >> + expkey_release(key); >> + } >> } >> static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) >> @@ -364,11 +374,8 @@ static void export_stats_destroy(struct >> export_stats *stats) >> EXP_STATS_COUNTERS_NUM); >> } >> -static void svc_export_put_work(struct work_struct *work) >> +static void svc_export_release(struct svc_export *exp) >> { >> - struct svc_export *exp = >> - container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); >> - >> path_put(&exp->ex_path); >> auth_domain_put(exp->ex_client); >> nfsd4_fslocs_free(&exp->ex_fslocs); >> @@ -378,12 +385,25 @@ static void svc_export_put_work(struct >> work_struct *work) >> kfree(exp); >> } >> +static void svc_export_put_work(struct work_struct *work) >> +{ >> + struct svc_export *exp = >> + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); >> + >> + svc_export_release(exp); >> +} >> + >> static void svc_export_put(struct kref *ref) >> { >> struct svc_export *exp = container_of(ref, struct svc_export, >> h.ref); >> - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); >> - queue_rcu_work(system_wq, &exp->ex_rcu_work); >> + if (rcu_read_lock_any_held()) { >> + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); >> + queue_rcu_work(system_wq, &exp->ex_rcu_work); >> + } else { >> + synchronize_rcu(); >> + svc_export_release(exp); >> + } >> } >> static int svc_export_upcall(struct cache_detail *cd, struct >> cache_head *h) >
在 2024/12/16 21:48, Chuck Lever 写道: > On 12/15/24 10:18 PM, yangerkun wrote: >> >> >> 在 2024/12/14 21:49, Yang Erkun 写道: >>> From: Yang Erkun <yangerkun@huawei.com> >>> >>> shell: >>> >>> mkfs.xfs -f /dev/sda >>> echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports >>> echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports >>> exportfs -ra >>> service nfs-server start >>> mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1 >>> mount /dev/sda /mnt/sda >>> touch /mnt1/sda/file >>> exportfs -r >>> umount /mnt/sda >>> >>> Commit f8c989a0c89a ("nfsd: release svc_expkey/svc_export with >>> rcu_work") >>> describe that when we call e_show/c_show, the last reference can down to >>> 0, and we will call expkey_put/svc_export_put with rcu context, this may >>> lead uaf or sleep in atomic bug. Finally, we introduce async mode to the >>> release and fix the bug. However, some other command may also finally >>> call >>> expkey_put/svc_export_put without rcu context, but expect that the sync >>> mode for the resource release. Like upper shell, before that commit, >>> exportfs -r will remove all entry with sync mode, and the last umount >>> /mnt/sda will always success. But after this commit, the umount will >>> always >>> fail, after we add some delay, they will success again. Personally, I >>> think >>> is actually a bug, and need be fixed. >>> >>> Use rcu_read_lock_any_held to distinguish does we really under rcu >>> context, >>> and if no, release resource with sync mode. >>> >>> Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with >>> rcu_work") >>> Signed-off-by: Yang Erkun <yangerkun@huawei.com> >>> --- >>> fs/nfsd/export.c | 44 ++++++++++++++++++++++++++++++++------------ >>> 1 file changed, 32 insertions(+), 12 deletions(-) >>> >>> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c >>> index eacafe46e3b6..25f13e877c2f 100644 >>> --- a/fs/nfsd/export.c >>> +++ b/fs/nfsd/export.c >>> @@ -40,11 +40,8 @@ >>> #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) >>> #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) >>> -static void expkey_put_work(struct work_struct *work) >>> +static void expkey_release(struct svc_expkey *key) >>> { >>> - struct svc_expkey *key = >>> - container_of(to_rcu_work(work), struct svc_expkey, >>> ek_rcu_work); >>> - >>> if (test_bit(CACHE_VALID, &key->h.flags) && >>> !test_bit(CACHE_NEGATIVE, &key->h.flags)) >>> path_put(&key->ek_path); >>> @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct >>> *work) >>> kfree(key); >>> } >>> +static void expkey_put_work(struct work_struct *work) >>> +{ >>> + struct svc_expkey *key = >>> + container_of(to_rcu_work(work), struct svc_expkey, >>> ek_rcu_work); >>> + >>> + expkey_release(key); >>> +} >>> + >>> static void expkey_put(struct kref *ref) >>> { >>> struct svc_expkey *key = container_of(ref, struct svc_expkey, >>> h.ref); >>> - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); >>> - queue_rcu_work(system_wq, &key->ek_rcu_work); >>> + if (rcu_read_lock_any_held()) { >> >> Emm... >> >> This api won't work when we disable CONFIG_PREEMPT_COUNT... > > OK, I assume you will send a v2 :-) Yes~ V2 will be posted soon. > > Can you include the reviewers listed in MAINTAINERS on the To: line? > > R: Neil Brown <neilb@suse.de> > R: Olga Kornievskaia <okorniev@redhat.com> > R: Dai Ngo <Dai.Ngo@oracle.com> > R: Tom Talpey <tom@talpey.com> > > Thanks! Thanks for your reminder! > > >>> + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); >>> + queue_rcu_work(system_wq, &key->ek_rcu_work); >>> + } else { >>> + synchronize_rcu(); >>> + expkey_release(key); >>> + } >>> } >>> static int expkey_upcall(struct cache_detail *cd, struct cache_head >>> *h) >>> @@ -364,11 +374,8 @@ static void export_stats_destroy(struct >>> export_stats *stats) >>> EXP_STATS_COUNTERS_NUM); >>> } >>> -static void svc_export_put_work(struct work_struct *work) >>> +static void svc_export_release(struct svc_export *exp) >>> { >>> - struct svc_export *exp = >>> - container_of(to_rcu_work(work), struct svc_export, >>> ex_rcu_work); >>> - >>> path_put(&exp->ex_path); >>> auth_domain_put(exp->ex_client); >>> nfsd4_fslocs_free(&exp->ex_fslocs); >>> @@ -378,12 +385,25 @@ static void svc_export_put_work(struct >>> work_struct *work) >>> kfree(exp); >>> } >>> +static void svc_export_put_work(struct work_struct *work) >>> +{ >>> + struct svc_export *exp = >>> + container_of(to_rcu_work(work), struct svc_export, >>> ex_rcu_work); >>> + >>> + svc_export_release(exp); >>> +} >>> + >>> static void svc_export_put(struct kref *ref) >>> { >>> struct svc_export *exp = container_of(ref, struct svc_export, >>> h.ref); >>> - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); >>> - queue_rcu_work(system_wq, &exp->ex_rcu_work); >>> + if (rcu_read_lock_any_held()) { >>> + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); >>> + queue_rcu_work(system_wq, &exp->ex_rcu_work); >>> + } else { >>> + synchronize_rcu(); >>> + svc_export_release(exp); >>> + } >>> } >>> static int svc_export_upcall(struct cache_detail *cd, struct >>> cache_head *h) >> > >
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index eacafe46e3b6..25f13e877c2f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -40,11 +40,8 @@ #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put_work(struct work_struct *work) +static void expkey_release(struct svc_expkey *key) { - struct svc_expkey *key = - container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); - if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); @@ -52,12 +49,25 @@ static void expkey_put_work(struct work_struct *work) kfree(key); } +static void expkey_put_work(struct work_struct *work) +{ + struct svc_expkey *key = + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); + + expkey_release(key); +} + static void expkey_put(struct kref *ref) { struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); - queue_rcu_work(system_wq, &key->ek_rcu_work); + if (rcu_read_lock_any_held()) { + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); + queue_rcu_work(system_wq, &key->ek_rcu_work); + } else { + synchronize_rcu(); + expkey_release(key); + } } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -364,11 +374,8 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_put_work(struct work_struct *work) +static void svc_export_release(struct svc_export *exp) { - struct svc_export *exp = - container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); - path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); @@ -378,12 +385,25 @@ static void svc_export_put_work(struct work_struct *work) kfree(exp); } +static void svc_export_put_work(struct work_struct *work) +{ + struct svc_export *exp = + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); + + svc_export_release(exp); +} + static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); - queue_rcu_work(system_wq, &exp->ex_rcu_work); + if (rcu_read_lock_any_held()) { + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); + queue_rcu_work(system_wq, &exp->ex_rcu_work); + } else { + synchronize_rcu(); + svc_export_release(exp); + } } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)