Message ID | 20211206175942.47326-2-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nfsd: Initial implementation of NFSv4 Courteous Server | expand |
> On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > Add new callback, lm_expire_lock, to lock_manager_operations to allow > the lock manager to take appropriate action to resolve the lock conflict > if possible. The callback takes 2 arguments, file_lock of the blocker > and a testonly flag: > > testonly = 1 check and return true if lock conflict can be resolved > else return false. > testonly = 0 resolve the conflict if possible, return true if conflict > was resolved esle return false. > > Lock manager, such as NFSv4 courteous server, uses this callback to > resolve conflict by destroying lock owner, or the NFSv4 courtesy client > (client that has expired but allowed to maintains its states) that owns > the lock. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> Al, Jeff, as co-maintainers of record for fs/locks.c, can you give an Ack or Reviewed-by? I'd like to take this patch through the nfsd tree for v5.17. Thanks for your time! > --- > fs/locks.c | 28 +++++++++++++++++++++++++--- > include/linux/fs.h | 1 + > 2 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/fs/locks.c b/fs/locks.c > index 3d6fb4ae847b..0fef0a6322c7 100644 > --- a/fs/locks.c > +++ b/fs/locks.c > @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) > struct file_lock *cfl; > struct file_lock_context *ctx; > struct inode *inode = locks_inode(filp); > + bool ret; > > ctx = smp_load_acquire(&inode->i_flctx); > if (!ctx || list_empty_careful(&ctx->flc_posix)) { > @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, struct file_lock *fl) > } > > spin_lock(&ctx->flc_lock); > +retry: > list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { > - if (posix_locks_conflict(fl, cfl)) { > - locks_copy_conflock(fl, cfl); > - goto out; > + if (!posix_locks_conflict(fl, cfl)) > + continue; > + if (cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock && > + cfl->fl_lmops->lm_expire_lock(cfl, 1)) { > + spin_unlock(&ctx->flc_lock); > + ret = cfl->fl_lmops->lm_expire_lock(cfl, 0); > + spin_lock(&ctx->flc_lock); > + if (ret) > + goto retry; > } > + locks_copy_conflock(fl, cfl); > + goto out; > } > fl->fl_type = F_UNLCK; > out: > @@ -1140,6 +1150,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, > int error; > bool added = false; > LIST_HEAD(dispose); > + bool ret; > > ctx = locks_get_lock_context(inode, request->fl_type); > if (!ctx) > @@ -1166,9 +1177,20 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, > * blocker's list of waiters and the global blocked_hash. > */ > if (request->fl_type != F_UNLCK) { > +retry: > list_for_each_entry(fl, &ctx->flc_posix, fl_list) { > if (!posix_locks_conflict(request, fl)) > continue; > + if (fl->fl_lmops && fl->fl_lmops->lm_expire_lock && > + fl->fl_lmops->lm_expire_lock(fl, 1)) { > + spin_unlock(&ctx->flc_lock); > + percpu_up_read(&file_rwsem); > + ret = fl->fl_lmops->lm_expire_lock(fl, 0); > + percpu_down_read(&file_rwsem); > + spin_lock(&ctx->flc_lock); > + if (ret) > + goto retry; > + } > if (conflock) > locks_copy_conflock(conflock, fl); > error = -EAGAIN; > diff --git a/include/linux/fs.h b/include/linux/fs.h > index e7a633353fd2..1a76b6451398 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1071,6 +1071,7 @@ struct lock_manager_operations { > int (*lm_change)(struct file_lock *, int, struct list_head *); > void (*lm_setup)(struct file_lock *, void **); > bool (*lm_breaker_owns_lease)(struct file_lock *); > + bool (*lm_expire_lock)(struct file_lock *fl, bool testonly); > }; > > struct lock_manager { > -- > 2.9.5 > -- Chuck Lever
On Mon, 2021-12-06 at 18:39 +0000, Chuck Lever III wrote: > > > > On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > > > Add new callback, lm_expire_lock, to lock_manager_operations to > > allow > > the lock manager to take appropriate action to resolve the lock > > conflict > > if possible. The callback takes 2 arguments, file_lock of the > > blocker > > and a testonly flag: > > > > testonly = 1 check and return true if lock conflict can be > > resolved > > else return false. > > testonly = 0 resolve the conflict if possible, return true if > > conflict > > was resolved esle return false. > > > > Lock manager, such as NFSv4 courteous server, uses this callback to > > resolve conflict by destroying lock owner, or the NFSv4 courtesy > > client > > (client that has expired but allowed to maintains its states) that > > owns > > the lock. > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > Al, Jeff, as co-maintainers of record for fs/locks.c, can you give > an Ack or Reviewed-by? I'd like to take this patch through the nfsd > tree for v5.17. Thanks for your time! > > > > --- > > fs/locks.c | 28 +++++++++++++++++++++++++--- > > include/linux/fs.h | 1 + > > 2 files changed, 26 insertions(+), 3 deletions(-) > > > > diff --git a/fs/locks.c b/fs/locks.c > > index 3d6fb4ae847b..0fef0a6322c7 100644 > > --- a/fs/locks.c > > +++ b/fs/locks.c > > @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct > > file_lock *fl) > > struct file_lock *cfl; > > struct file_lock_context *ctx; > > struct inode *inode = locks_inode(filp); > > + bool ret; > > > > ctx = smp_load_acquire(&inode->i_flctx); > > if (!ctx || list_empty_careful(&ctx->flc_posix)) { > > @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, struct > > file_lock *fl) > > } > > > > spin_lock(&ctx->flc_lock); > > +retry: > > list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { > > - if (posix_locks_conflict(fl, cfl)) { > > - locks_copy_conflock(fl, cfl); > > - goto out; > > + if (!posix_locks_conflict(fl, cfl)) > > + continue; > > + if (cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock > > && > > + cfl->fl_lmops->lm_expire_lock(cfl, > > 1)) { > > + spin_unlock(&ctx->flc_lock); > > + ret = cfl->fl_lmops->lm_expire_lock(cfl, > > 0); > > + spin_lock(&ctx->flc_lock); > > + if (ret) > > + goto retry; > > } > > + locks_copy_conflock(fl, cfl); How do you know 'cfl' still points to a valid object after you've dropped the spin lock that was protecting the list? > > + goto out; > > } > > fl->fl_type = F_UNLCK; > > out: > > @@ -1140,6 +1150,7 @@ static int posix_lock_inode(struct inode > > *inode, struct file_lock *request, > > int error; > > bool added = false; > > LIST_HEAD(dispose); > > + bool ret; > > > > ctx = locks_get_lock_context(inode, request->fl_type); > > if (!ctx) > > @@ -1166,9 +1177,20 @@ static int posix_lock_inode(struct inode > > *inode, struct file_lock *request, > > * blocker's list of waiters and the global blocked_hash. > > */ > > if (request->fl_type != F_UNLCK) { > > +retry: > > list_for_each_entry(fl, &ctx->flc_posix, fl_list) { > > if (!posix_locks_conflict(request, fl)) > > continue; > > + if (fl->fl_lmops && fl->fl_lmops- > > >lm_expire_lock && > > + fl->fl_lmops- > > >lm_expire_lock(fl, 1)) { > > + spin_unlock(&ctx->flc_lock); > > + percpu_up_read(&file_rwsem); > > + ret = fl->fl_lmops- > > >lm_expire_lock(fl, 0); > > + percpu_down_read(&file_rwsem); > > + spin_lock(&ctx->flc_lock); > > + if (ret) > > + goto retry; > > + } ditto. > > if (conflock) > > locks_copy_conflock(conflock, fl); > > error = -EAGAIN; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index e7a633353fd2..1a76b6451398 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -1071,6 +1071,7 @@ struct lock_manager_operations { > > int (*lm_change)(struct file_lock *, int, struct list_head > > *); > > void (*lm_setup)(struct file_lock *, void **); > > bool (*lm_breaker_owns_lease)(struct file_lock *); > > + bool (*lm_expire_lock)(struct file_lock *fl, bool > > testonly); > > }; > > > > struct lock_manager { > > -- > > 2.9.5 > > > > -- > Chuck Lever > > >
On Mon, Dec 06, 2021 at 07:52:29PM +0000, Trond Myklebust wrote: > On Mon, 2021-12-06 at 18:39 +0000, Chuck Lever III wrote: > > > > > > > On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > > > > > Add new callback, lm_expire_lock, to lock_manager_operations to > > > allow > > > the lock manager to take appropriate action to resolve the lock > > > conflict > > > if possible. The callback takes 2 arguments, file_lock of the > > > blocker > > > and a testonly flag: > > > > > > testonly = 1 check and return true if lock conflict can be > > > resolved > > > else return false. > > > testonly = 0 resolve the conflict if possible, return true if > > > conflict > > > was resolved esle return false. > > > > > > Lock manager, such as NFSv4 courteous server, uses this callback to > > > resolve conflict by destroying lock owner, or the NFSv4 courtesy > > > client > > > (client that has expired but allowed to maintains its states) that > > > owns > > > the lock. > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > > Al, Jeff, as co-maintainers of record for fs/locks.c, can you give > > an Ack or Reviewed-by? I'd like to take this patch through the nfsd > > tree for v5.17. Thanks for your time! > > > > > > > --- > > > fs/locks.c | 28 +++++++++++++++++++++++++--- > > > include/linux/fs.h | 1 + > > > 2 files changed, 26 insertions(+), 3 deletions(-) > > > > > > diff --git a/fs/locks.c b/fs/locks.c > > > index 3d6fb4ae847b..0fef0a6322c7 100644 > > > --- a/fs/locks.c > > > +++ b/fs/locks.c > > > @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct > > > file_lock *fl) > > > struct file_lock *cfl; > > > struct file_lock_context *ctx; > > > struct inode *inode = locks_inode(filp); > > > + bool ret; > > > > > > ctx = smp_load_acquire(&inode->i_flctx); > > > if (!ctx || list_empty_careful(&ctx->flc_posix)) { > > > @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, struct > > > file_lock *fl) > > > } > > > > > > spin_lock(&ctx->flc_lock); > > > +retry: > > > list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { > > > - if (posix_locks_conflict(fl, cfl)) { > > > - locks_copy_conflock(fl, cfl); > > > - goto out; > > > + if (!posix_locks_conflict(fl, cfl)) > > > + continue; > > > + if (cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock > > > && > > > + cfl->fl_lmops->lm_expire_lock(cfl, > > > 1)) { > > > + spin_unlock(&ctx->flc_lock); > > > + ret = cfl->fl_lmops->lm_expire_lock(cfl, > > > 0); > > > + spin_lock(&ctx->flc_lock); > > > + if (ret) > > > + goto retry; > > > } > > > + locks_copy_conflock(fl, cfl); > > How do you know 'cfl' still points to a valid object after you've > dropped the spin lock that was protecting the list? Ugh, good point, I should have noticed that when I suggested this approach.... Maybe the first call could instead return return some reference-counted object that a second call could wait on. Better, maybe it could add itself to a list of such things and then we could do this in one pass. --b. > > > > + goto out; > > > } > > > fl->fl_type = F_UNLCK; > > > out: > > > @@ -1140,6 +1150,7 @@ static int posix_lock_inode(struct inode > > > *inode, struct file_lock *request, > > > int error; > > > bool added = false; > > > LIST_HEAD(dispose); > > > + bool ret; > > > > > > ctx = locks_get_lock_context(inode, request->fl_type); > > > if (!ctx) > > > @@ -1166,9 +1177,20 @@ static int posix_lock_inode(struct inode > > > *inode, struct file_lock *request, > > > * blocker's list of waiters and the global blocked_hash. > > > */ > > > if (request->fl_type != F_UNLCK) { > > > +retry: > > > list_for_each_entry(fl, &ctx->flc_posix, fl_list) { > > > if (!posix_locks_conflict(request, fl)) > > > continue; > > > + if (fl->fl_lmops && fl->fl_lmops- > > > >lm_expire_lock && > > > + fl->fl_lmops- > > > >lm_expire_lock(fl, 1)) { > > > + spin_unlock(&ctx->flc_lock); > > > + percpu_up_read(&file_rwsem); > > > + ret = fl->fl_lmops- > > > >lm_expire_lock(fl, 0); > > > + percpu_down_read(&file_rwsem); > > > + spin_lock(&ctx->flc_lock); > > > + if (ret) > > > + goto retry; > > > + } > > ditto. > > > > if (conflock) > > > locks_copy_conflock(conflock, fl); > > > error = -EAGAIN; > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > index e7a633353fd2..1a76b6451398 100644 > > > --- a/include/linux/fs.h > > > +++ b/include/linux/fs.h > > > @@ -1071,6 +1071,7 @@ struct lock_manager_operations { > > > int (*lm_change)(struct file_lock *, int, struct list_head > > > *); > > > void (*lm_setup)(struct file_lock *, void **); > > > bool (*lm_breaker_owns_lease)(struct file_lock *); > > > + bool (*lm_expire_lock)(struct file_lock *fl, bool > > > testonly); > > > }; > > > > > > struct lock_manager { > > > -- > > > 2.9.5 > > > > > > > -- > > Chuck Lever > > > > > > > > -- > Trond Myklebust > Linux NFS client maintainer, Hammerspace > trond.myklebust@hammerspace.com > >
On 12/6/21 12:05 PM, bfields@fieldses.org wrote: > On Mon, Dec 06, 2021 at 07:52:29PM +0000, Trond Myklebust wrote: >> On Mon, 2021-12-06 at 18:39 +0000, Chuck Lever III wrote: >>> >>>> On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> wrote: >>>> >>>> Add new callback, lm_expire_lock, to lock_manager_operations to >>>> allow >>>> the lock manager to take appropriate action to resolve the lock >>>> conflict >>>> if possible. The callback takes 2 arguments, file_lock of the >>>> blocker >>>> and a testonly flag: >>>> >>>> testonly = 1 check and return true if lock conflict can be >>>> resolved >>>> else return false. >>>> testonly = 0 resolve the conflict if possible, return true if >>>> conflict >>>> was resolved esle return false. >>>> >>>> Lock manager, such as NFSv4 courteous server, uses this callback to >>>> resolve conflict by destroying lock owner, or the NFSv4 courtesy >>>> client >>>> (client that has expired but allowed to maintains its states) that >>>> owns >>>> the lock. >>>> >>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>> Al, Jeff, as co-maintainers of record for fs/locks.c, can you give >>> an Ack or Reviewed-by? I'd like to take this patch through the nfsd >>> tree for v5.17. Thanks for your time! >>> >>> >>>> --- >>>> fs/locks.c | 28 +++++++++++++++++++++++++--- >>>> include/linux/fs.h | 1 + >>>> 2 files changed, 26 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/fs/locks.c b/fs/locks.c >>>> index 3d6fb4ae847b..0fef0a6322c7 100644 >>>> --- a/fs/locks.c >>>> +++ b/fs/locks.c >>>> @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct >>>> file_lock *fl) >>>> struct file_lock *cfl; >>>> struct file_lock_context *ctx; >>>> struct inode *inode = locks_inode(filp); >>>> + bool ret; >>>> >>>> ctx = smp_load_acquire(&inode->i_flctx); >>>> if (!ctx || list_empty_careful(&ctx->flc_posix)) { >>>> @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, struct >>>> file_lock *fl) >>>> } >>>> >>>> spin_lock(&ctx->flc_lock); >>>> +retry: >>>> list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { >>>> - if (posix_locks_conflict(fl, cfl)) { >>>> - locks_copy_conflock(fl, cfl); >>>> - goto out; >>>> + if (!posix_locks_conflict(fl, cfl)) >>>> + continue; >>>> + if (cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock >>>> && >>>> + cfl->fl_lmops->lm_expire_lock(cfl, >>>> 1)) { >>>> + spin_unlock(&ctx->flc_lock); >>>> + ret = cfl->fl_lmops->lm_expire_lock(cfl, >>>> 0); >>>> + spin_lock(&ctx->flc_lock); >>>> + if (ret) >>>> + goto retry; >>>> } >>>> + locks_copy_conflock(fl, cfl); >> How do you know 'cfl' still points to a valid object after you've >> dropped the spin lock that was protecting the list? > Ugh, good point, I should have noticed that when I suggested this > approach.... > > Maybe the first call could instead return return some reference-counted > object that a second call could wait on. > > Better, maybe it could add itself to a list of such things and then we > could do this in one pass. I think we adjust this logic a little bit to cover race condition: The 1st call to lm_expire_lock returns the client needs to be expired. Before we make the 2nd call, we save the 'lm_expire_lock' into a local variable then drop the spinlock, and use the local variable to make the 2nd call so that we do not reference 'cfl'. The argument of the second is the opaque return value from the 1st call. nfsd4_fl_expire_lock also needs some adjustment to support the above. -Dai > > --b. > >>>> + goto out; >>>> } >>>> fl->fl_type = F_UNLCK; >>>> out: >>>> @@ -1140,6 +1150,7 @@ static int posix_lock_inode(struct inode >>>> *inode, struct file_lock *request, >>>> int error; >>>> bool added = false; >>>> LIST_HEAD(dispose); >>>> + bool ret; >>>> >>>> ctx = locks_get_lock_context(inode, request->fl_type); >>>> if (!ctx) >>>> @@ -1166,9 +1177,20 @@ static int posix_lock_inode(struct inode >>>> *inode, struct file_lock *request, >>>> * blocker's list of waiters and the global blocked_hash. >>>> */ >>>> if (request->fl_type != F_UNLCK) { >>>> +retry: >>>> list_for_each_entry(fl, &ctx->flc_posix, fl_list) { >>>> if (!posix_locks_conflict(request, fl)) >>>> continue; >>>> + if (fl->fl_lmops && fl->fl_lmops- >>>>> lm_expire_lock && >>>> + fl->fl_lmops- >>>>> lm_expire_lock(fl, 1)) { >>>> + spin_unlock(&ctx->flc_lock); >>>> + percpu_up_read(&file_rwsem); >>>> + ret = fl->fl_lmops- >>>>> lm_expire_lock(fl, 0); >>>> + percpu_down_read(&file_rwsem); >>>> + spin_lock(&ctx->flc_lock); >>>> + if (ret) >>>> + goto retry; >>>> + } >> ditto. >> >>>> if (conflock) >>>> locks_copy_conflock(conflock, fl); >>>> error = -EAGAIN; >>>> diff --git a/include/linux/fs.h b/include/linux/fs.h >>>> index e7a633353fd2..1a76b6451398 100644 >>>> --- a/include/linux/fs.h >>>> +++ b/include/linux/fs.h >>>> @@ -1071,6 +1071,7 @@ struct lock_manager_operations { >>>> int (*lm_change)(struct file_lock *, int, struct list_head >>>> *); >>>> void (*lm_setup)(struct file_lock *, void **); >>>> bool (*lm_breaker_owns_lease)(struct file_lock *); >>>> + bool (*lm_expire_lock)(struct file_lock *fl, bool >>>> testonly); >>>> }; >>>> >>>> struct lock_manager { >>>> -- >>>> 2.9.5 >>>> >>> -- >>> Chuck Lever >>> >>> >>> >> -- >> Trond Myklebust >> Linux NFS client maintainer, Hammerspace >> trond.myklebust@hammerspace.com >> >>
On Mon, 2021-12-06 at 12:36 -0800, dai.ngo@oracle.com wrote: > > On 12/6/21 12:05 PM, bfields@fieldses.org wrote: > > On Mon, Dec 06, 2021 at 07:52:29PM +0000, Trond Myklebust wrote: > > > On Mon, 2021-12-06 at 18:39 +0000, Chuck Lever III wrote: > > > > > > > > > On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> > > > > > wrote: > > > > > > > > > > Add new callback, lm_expire_lock, to lock_manager_operations > > > > > to > > > > > allow > > > > > the lock manager to take appropriate action to resolve the > > > > > lock > > > > > conflict > > > > > if possible. The callback takes 2 arguments, file_lock of the > > > > > blocker > > > > > and a testonly flag: > > > > > > > > > > testonly = 1 check and return true if lock conflict can be > > > > > resolved > > > > > else return false. > > > > > testonly = 0 resolve the conflict if possible, return true > > > > > if > > > > > conflict > > > > > was resolved esle return false. > > > > > > > > > > Lock manager, such as NFSv4 courteous server, uses this > > > > > callback to > > > > > resolve conflict by destroying lock owner, or the NFSv4 > > > > > courtesy > > > > > client > > > > > (client that has expired but allowed to maintains its states) > > > > > that > > > > > owns > > > > > the lock. > > > > > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > > Al, Jeff, as co-maintainers of record for fs/locks.c, can you > > > > give > > > > an Ack or Reviewed-by? I'd like to take this patch through the > > > > nfsd > > > > tree for v5.17. Thanks for your time! > > > > > > > > > > > > > --- > > > > > fs/locks.c | 28 +++++++++++++++++++++++++--- > > > > > include/linux/fs.h | 1 + > > > > > 2 files changed, 26 insertions(+), 3 deletions(-) > > > > > > > > > > diff --git a/fs/locks.c b/fs/locks.c > > > > > index 3d6fb4ae847b..0fef0a6322c7 100644 > > > > > --- a/fs/locks.c > > > > > +++ b/fs/locks.c > > > > > @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct > > > > > file_lock *fl) > > > > > struct file_lock *cfl; > > > > > struct file_lock_context *ctx; > > > > > struct inode *inode = locks_inode(filp); > > > > > + bool ret; > > > > > > > > > > ctx = smp_load_acquire(&inode->i_flctx); > > > > > if (!ctx || list_empty_careful(&ctx->flc_posix)) { > > > > > @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, > > > > > struct > > > > > file_lock *fl) > > > > > } > > > > > > > > > > spin_lock(&ctx->flc_lock); > > > > > +retry: > > > > > list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { > > > > > - if (posix_locks_conflict(fl, cfl)) { > > > > > - locks_copy_conflock(fl, cfl); > > > > > - goto out; > > > > > + if (!posix_locks_conflict(fl, cfl)) > > > > > + continue; > > > > > + if (cfl->fl_lmops && cfl->fl_lmops- > > > > > >lm_expire_lock > > > > > && > > > > > + cfl->fl_lmops- > > > > > >lm_expire_lock(cfl, > > > > > 1)) { > > > > > + spin_unlock(&ctx->flc_lock); > > > > > + ret = cfl->fl_lmops- > > > > > >lm_expire_lock(cfl, > > > > > 0); > > > > > + spin_lock(&ctx->flc_lock); > > > > > + if (ret) > > > > > + goto retry; > > > > > } > > > > > + locks_copy_conflock(fl, cfl); > > > How do you know 'cfl' still points to a valid object after you've > > > dropped the spin lock that was protecting the list? > > Ugh, good point, I should have noticed that when I suggested this > > approach.... > > > > Maybe the first call could instead return return some reference- > > counted > > object that a second call could wait on. > > > > Better, maybe it could add itself to a list of such things and then > > we > > could do this in one pass. > > I think we adjust this logic a little bit to cover race condition: > > The 1st call to lm_expire_lock returns the client needs to be > expired. > > Before we make the 2nd call, we save the 'lm_expire_lock' into a > local > variable then drop the spinlock, and use the local variable to make > the > 2nd call so that we do not reference 'cfl'. The argument of the > second > is the opaque return value from the 1st call. > > nfsd4_fl_expire_lock also needs some adjustment to support the above. > It's not just the fact that you're using 'cfl' in the actual call to lm_expire_lock(), but you're also using it after retaking the spinlock.
On 12/6/21 2:05 PM, Trond Myklebust wrote: > On Mon, 2021-12-06 at 12:36 -0800, dai.ngo@oracle.com wrote: >> On 12/6/21 12:05 PM, bfields@fieldses.org wrote: >>> On Mon, Dec 06, 2021 at 07:52:29PM +0000, Trond Myklebust wrote: >>>> On Mon, 2021-12-06 at 18:39 +0000, Chuck Lever III wrote: >>>>>> On Dec 6, 2021, at 12:59 PM, Dai Ngo <dai.ngo@oracle.com> >>>>>> wrote: >>>>>> >>>>>> Add new callback, lm_expire_lock, to lock_manager_operations >>>>>> to >>>>>> allow >>>>>> the lock manager to take appropriate action to resolve the >>>>>> lock >>>>>> conflict >>>>>> if possible. The callback takes 2 arguments, file_lock of the >>>>>> blocker >>>>>> and a testonly flag: >>>>>> >>>>>> testonly = 1 check and return true if lock conflict can be >>>>>> resolved >>>>>> else return false. >>>>>> testonly = 0 resolve the conflict if possible, return true >>>>>> if >>>>>> conflict >>>>>> was resolved esle return false. >>>>>> >>>>>> Lock manager, such as NFSv4 courteous server, uses this >>>>>> callback to >>>>>> resolve conflict by destroying lock owner, or the NFSv4 >>>>>> courtesy >>>>>> client >>>>>> (client that has expired but allowed to maintains its states) >>>>>> that >>>>>> owns >>>>>> the lock. >>>>>> >>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>>>> Al, Jeff, as co-maintainers of record for fs/locks.c, can you >>>>> give >>>>> an Ack or Reviewed-by? I'd like to take this patch through the >>>>> nfsd >>>>> tree for v5.17. Thanks for your time! >>>>> >>>>> >>>>>> --- >>>>>> fs/locks.c | 28 +++++++++++++++++++++++++--- >>>>>> include/linux/fs.h | 1 + >>>>>> 2 files changed, 26 insertions(+), 3 deletions(-) >>>>>> >>>>>> diff --git a/fs/locks.c b/fs/locks.c >>>>>> index 3d6fb4ae847b..0fef0a6322c7 100644 >>>>>> --- a/fs/locks.c >>>>>> +++ b/fs/locks.c >>>>>> @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct >>>>>> file_lock *fl) >>>>>> struct file_lock *cfl; >>>>>> struct file_lock_context *ctx; >>>>>> struct inode *inode = locks_inode(filp); >>>>>> + bool ret; >>>>>> >>>>>> ctx = smp_load_acquire(&inode->i_flctx); >>>>>> if (!ctx || list_empty_careful(&ctx->flc_posix)) { >>>>>> @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, >>>>>> struct >>>>>> file_lock *fl) >>>>>> } >>>>>> >>>>>> spin_lock(&ctx->flc_lock); >>>>>> +retry: >>>>>> list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { >>>>>> - if (posix_locks_conflict(fl, cfl)) { >>>>>> - locks_copy_conflock(fl, cfl); >>>>>> - goto out; >>>>>> + if (!posix_locks_conflict(fl, cfl)) >>>>>> + continue; >>>>>> + if (cfl->fl_lmops && cfl->fl_lmops- >>>>>>> lm_expire_lock >>>>>> && >>>>>> + cfl->fl_lmops- >>>>>>> lm_expire_lock(cfl, >>>>>> 1)) { >>>>>> + spin_unlock(&ctx->flc_lock); >>>>>> + ret = cfl->fl_lmops- >>>>>>> lm_expire_lock(cfl, >>>>>> 0); >>>>>> + spin_lock(&ctx->flc_lock); >>>>>> + if (ret) >>>>>> + goto retry; >>>>>> } >>>>>> + locks_copy_conflock(fl, cfl); >>>> How do you know 'cfl' still points to a valid object after you've >>>> dropped the spin lock that was protecting the list? >>> Ugh, good point, I should have noticed that when I suggested this >>> approach.... >>> >>> Maybe the first call could instead return return some reference- >>> counted >>> object that a second call could wait on. >>> >>> Better, maybe it could add itself to a list of such things and then >>> we >>> could do this in one pass. >> I think we adjust this logic a little bit to cover race condition: >> >> The 1st call to lm_expire_lock returns the client needs to be >> expired. >> >> Before we make the 2nd call, we save the 'lm_expire_lock' into a >> local >> variable then drop the spinlock, and use the local variable to make >> the >> 2nd call so that we do not reference 'cfl'. The argument of the >> second >> is the opaque return value from the 1st call. >> >> nfsd4_fl_expire_lock also needs some adjustment to support the above. >> > It's not just the fact that you're using 'cfl' in the actual call to > lm_expire_lock(), but you're also using it after retaking the spinlock. I plan to do this: checked_cfl = NULL; spin_lock(&ctx->flc_lock); retry: list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { if (!posix_locks_conflict(fl, cfl)) continue; if (check_cfl != cfg && cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock) { void *res_data; res_data = cfl->fl_lmops->lm_expire_lock(cfl, 1); if (res_data) { func = cfl->fl_lmops->lm_expire_lock; spin_unlock(&ctx->flc_lock); func(res_data, 0); spin_lock(&ctx->flc_lock); checked_cfl = cfl; goto retry; } locks_copy_conflock(fl, cfl); goto out; } fl->fl_type = F_UNLCK; Do you still see problems with this? -Dai
diff --git a/fs/locks.c b/fs/locks.c index 3d6fb4ae847b..0fef0a6322c7 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -954,6 +954,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) struct file_lock *cfl; struct file_lock_context *ctx; struct inode *inode = locks_inode(filp); + bool ret; ctx = smp_load_acquire(&inode->i_flctx); if (!ctx || list_empty_careful(&ctx->flc_posix)) { @@ -962,11 +963,20 @@ posix_test_lock(struct file *filp, struct file_lock *fl) } spin_lock(&ctx->flc_lock); +retry: list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { - if (posix_locks_conflict(fl, cfl)) { - locks_copy_conflock(fl, cfl); - goto out; + if (!posix_locks_conflict(fl, cfl)) + continue; + if (cfl->fl_lmops && cfl->fl_lmops->lm_expire_lock && + cfl->fl_lmops->lm_expire_lock(cfl, 1)) { + spin_unlock(&ctx->flc_lock); + ret = cfl->fl_lmops->lm_expire_lock(cfl, 0); + spin_lock(&ctx->flc_lock); + if (ret) + goto retry; } + locks_copy_conflock(fl, cfl); + goto out; } fl->fl_type = F_UNLCK; out: @@ -1140,6 +1150,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, int error; bool added = false; LIST_HEAD(dispose); + bool ret; ctx = locks_get_lock_context(inode, request->fl_type); if (!ctx) @@ -1166,9 +1177,20 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, * blocker's list of waiters and the global blocked_hash. */ if (request->fl_type != F_UNLCK) { +retry: list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!posix_locks_conflict(request, fl)) continue; + if (fl->fl_lmops && fl->fl_lmops->lm_expire_lock && + fl->fl_lmops->lm_expire_lock(fl, 1)) { + spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); + ret = fl->fl_lmops->lm_expire_lock(fl, 0); + percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + if (ret) + goto retry; + } if (conflock) locks_copy_conflock(conflock, fl); error = -EAGAIN; diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd2..1a76b6451398 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1071,6 +1071,7 @@ struct lock_manager_operations { int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); bool (*lm_breaker_owns_lease)(struct file_lock *); + bool (*lm_expire_lock)(struct file_lock *fl, bool testonly); }; struct lock_manager {
Add new callback, lm_expire_lock, to lock_manager_operations to allow the lock manager to take appropriate action to resolve the lock conflict if possible. The callback takes 2 arguments, file_lock of the blocker and a testonly flag: testonly = 1 check and return true if lock conflict can be resolved else return false. testonly = 0 resolve the conflict if possible, return true if conflict was resolved esle return false. Lock manager, such as NFSv4 courteous server, uses this callback to resolve conflict by destroying lock owner, or the NFSv4 courtesy client (client that has expired but allowed to maintains its states) that owns the lock. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- fs/locks.c | 28 +++++++++++++++++++++++++--- include/linux/fs.h | 1 + 2 files changed, 26 insertions(+), 3 deletions(-)