Message ID | 20121206153447.30693.54128.stgit@localhost.localdomain (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: > NFSd does lookup. Lookup is done starting from current->fs->root. > NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely > unshared) root. > So we have to swap root to those, which process, started NFSd, has. Because > that process can be in a container with it's own root. This doesn't sound right to me. Which lookups exactly do you see being done relative to current->fs->root ? --b. > > Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> > --- > fs/nfsd/netns.h | 1 + > fs/nfsd/nfssvc.c | 33 ++++++++++++++++++++++++++++++++- > 2 files changed, 33 insertions(+), 1 deletions(-) > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index abfc97c..5c423c6 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -101,6 +101,7 @@ struct nfsd_net { > struct timeval nfssvc_boot; > > struct svc_serv *nfsd_serv; > + struct path root; > }; > > extern int nfsd_net_id; > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index cee62ab..177bb60 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net) > > set_max_drc(); > do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ > + get_fs_root(current->fs, &nn->root); > return 0; > } > > @@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net) > if (destroy) > svc_shutdown_net(nn->nfsd_serv, net); > svc_destroy(nn->nfsd_serv); > - if (destroy) > + if (destroy) { > + path_put(&nn->root); > nn->nfsd_serv = NULL; > + } > } > > int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) > @@ -533,6 +536,25 @@ out: > return error; > } > > +/* > + * This function is actually slightly modified set_fs_root()<F9> > + */ > +static void nfsd_swap_root(struct net *net) > +{ > + struct nfsd_net *nn = net_generic(net, nfsd_net_id); > + struct fs_struct *fs = current->fs; > + struct path old_root; > + > + path_get(&nn->root); > + spin_lock(&fs->lock); > + write_seqcount_begin(&fs->seq); > + old_root = fs->root; > + fs->root = nn->root; > + write_seqcount_end(&fs->seq); > + spin_unlock(&fs->lock); > + if (old_root.dentry) > + path_put(&old_root); > +} > > /* > * This is the NFS server kernel thread > @@ -559,6 +581,15 @@ nfsd(void *vrqstp) > current->fs->umask = 0; > > /* > + * We have to swap NFSd kthread's fs->root. > + * Why so? Because NFSd can be started in container, which has it's own > + * root. > + * And so what? NFSd lookup files, and lookup start from > + * current->fs->root. > + */ > + nfsd_swap_root(net); > + > + /* > * thread is spawned with all signals set to SIG_IGN, re-enable > * the ones that will bring down the thread > */ > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
11.12.2012 00:28, J. Bruce Fields ?????: > On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: >> NFSd does lookup. Lookup is done starting from current->fs->root. >> NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely >> unshared) root. >> So we have to swap root to those, which process, started NFSd, has. Because >> that process can be in a container with it's own root. > > This doesn't sound right to me. > > Which lookups exactly do you see being done relative to > current->fs->root ? > Ok, you are right. I was mistaken here. This is not a exactly lookup, but d_path() problem in svc_export_request(). I.e. without root swapping, d_path() will give not local export path (like "/export") but something like this "/root/containers_root/export". > --b. > >> >> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> >> --- >> fs/nfsd/netns.h | 1 + >> fs/nfsd/nfssvc.c | 33 ++++++++++++++++++++++++++++++++- >> 2 files changed, 33 insertions(+), 1 deletions(-) >> >> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >> index abfc97c..5c423c6 100644 >> --- a/fs/nfsd/netns.h >> +++ b/fs/nfsd/netns.h >> @@ -101,6 +101,7 @@ struct nfsd_net { >> struct timeval nfssvc_boot; >> >> struct svc_serv *nfsd_serv; >> + struct path root; >> }; >> >> extern int nfsd_net_id; >> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c >> index cee62ab..177bb60 100644 >> --- a/fs/nfsd/nfssvc.c >> +++ b/fs/nfsd/nfssvc.c >> @@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net) >> >> set_max_drc(); >> do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ >> + get_fs_root(current->fs, &nn->root); >> return 0; >> } >> >> @@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net) >> if (destroy) >> svc_shutdown_net(nn->nfsd_serv, net); >> svc_destroy(nn->nfsd_serv); >> - if (destroy) >> + if (destroy) { >> + path_put(&nn->root); >> nn->nfsd_serv = NULL; >> + } >> } >> >> int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) >> @@ -533,6 +536,25 @@ out: >> return error; >> } >> >> +/* >> + * This function is actually slightly modified set_fs_root()<F9> >> + */ >> +static void nfsd_swap_root(struct net *net) >> +{ >> + struct nfsd_net *nn = net_generic(net, nfsd_net_id); >> + struct fs_struct *fs = current->fs; >> + struct path old_root; >> + >> + path_get(&nn->root); >> + spin_lock(&fs->lock); >> + write_seqcount_begin(&fs->seq); >> + old_root = fs->root; >> + fs->root = nn->root; >> + write_seqcount_end(&fs->seq); >> + spin_unlock(&fs->lock); >> + if (old_root.dentry) >> + path_put(&old_root); >> +} >> >> /* >> * This is the NFS server kernel thread >> @@ -559,6 +581,15 @@ nfsd(void *vrqstp) >> current->fs->umask = 0; >> >> /* >> + * We have to swap NFSd kthread's fs->root. >> + * Why so? Because NFSd can be started in container, which has it's own >> + * root. >> + * And so what? NFSd lookup files, and lookup start from >> + * current->fs->root. >> + */ >> + nfsd_swap_root(net); >> + >> + /* >> * thread is spawned with all signals set to SIG_IGN, re-enable >> * the ones that will bring down the thread >> */ >>
11.12.2012 18:00, Stanislav Kinsbursky ?????: > 11.12.2012 00:28, J. Bruce Fields ?????: >> On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: >>> NFSd does lookup. Lookup is done starting from current->fs->root. >>> NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely >>> unshared) root. >>> So we have to swap root to those, which process, started NFSd, has. Because >>> that process can be in a container with it's own root. >> >> This doesn't sound right to me. >> >> Which lookups exactly do you see being done relative to >> current->fs->root ? >> > > Ok, you are right. I was mistaken here. > This is not a exactly lookup, but d_path() problem in svc_export_request(). > I.e. without root swapping, d_path() will give not local export path (like "/export") > but something like this "/root/containers_root/export". > We, actually, can do it less in less aggressive way. I.e. instead root swap and current svc_export_request() implementation: void svc_export_request(...) { <snip> pth = d_path(&exp->ex_path, *bpp, *blen); <snip> } we can do something like this: void svc_export_request(...) { struct nfsd_net *nn = ... <snip> spin_lock(&dcache_lock); pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen); spin_unlock(&dcache_lock); <snip> } >> --b. >> >>> >>> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> >>> --- >>> fs/nfsd/netns.h | 1 + >>> fs/nfsd/nfssvc.c | 33 ++++++++++++++++++++++++++++++++- >>> 2 files changed, 33 insertions(+), 1 deletions(-) >>> >>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >>> index abfc97c..5c423c6 100644 >>> --- a/fs/nfsd/netns.h >>> +++ b/fs/nfsd/netns.h >>> @@ -101,6 +101,7 @@ struct nfsd_net { >>> struct timeval nfssvc_boot; >>> >>> struct svc_serv *nfsd_serv; >>> + struct path root; >>> }; >>> >>> extern int nfsd_net_id; >>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c >>> index cee62ab..177bb60 100644 >>> --- a/fs/nfsd/nfssvc.c >>> +++ b/fs/nfsd/nfssvc.c >>> @@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net) >>> >>> set_max_drc(); >>> do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ >>> + get_fs_root(current->fs, &nn->root); >>> return 0; >>> } >>> >>> @@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net) >>> if (destroy) >>> svc_shutdown_net(nn->nfsd_serv, net); >>> svc_destroy(nn->nfsd_serv); >>> - if (destroy) >>> + if (destroy) { >>> + path_put(&nn->root); >>> nn->nfsd_serv = NULL; >>> + } >>> } >>> >>> int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) >>> @@ -533,6 +536,25 @@ out: >>> return error; >>> } >>> >>> +/* >>> + * This function is actually slightly modified set_fs_root()<F9> >>> + */ >>> +static void nfsd_swap_root(struct net *net) >>> +{ >>> + struct nfsd_net *nn = net_generic(net, nfsd_net_id); >>> + struct fs_struct *fs = current->fs; >>> + struct path old_root; >>> + >>> + path_get(&nn->root); >>> + spin_lock(&fs->lock); >>> + write_seqcount_begin(&fs->seq); >>> + old_root = fs->root; >>> + fs->root = nn->root; >>> + write_seqcount_end(&fs->seq); >>> + spin_unlock(&fs->lock); >>> + if (old_root.dentry) >>> + path_put(&old_root); >>> +} >>> >>> /* >>> * This is the NFS server kernel thread >>> @@ -559,6 +581,15 @@ nfsd(void *vrqstp) >>> current->fs->umask = 0; >>> >>> /* >>> + * We have to swap NFSd kthread's fs->root. >>> + * Why so? Because NFSd can be started in container, which has it's own >>> + * root. >>> + * And so what? NFSd lookup files, and lookup start from >>> + * current->fs->root. >>> + */ >>> + nfsd_swap_root(net); >>> + >>> + /* >>> * thread is spawned with all signals set to SIG_IGN, re-enable >>> * the ones that will bring down the thread >>> */ >>> > >
11.12.2012 18:12, Stanislav Kinsbursky ?????: > 11.12.2012 18:00, Stanislav Kinsbursky ?????: >> 11.12.2012 00:28, J. Bruce Fields ?????: >>> On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: >>>> NFSd does lookup. Lookup is done starting from current->fs->root. >>>> NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely >>>> unshared) root. >>>> So we have to swap root to those, which process, started NFSd, has. Because >>>> that process can be in a container with it's own root. >>> >>> This doesn't sound right to me. >>> >>> Which lookups exactly do you see being done relative to >>> current->fs->root ? >>> >> >> Ok, you are right. I was mistaken here. >> This is not a exactly lookup, but d_path() problem in svc_export_request(). >> I.e. without root swapping, d_path() will give not local export path (like "/export") >> but something like this "/root/containers_root/export". >> > > We, actually, can do it less in less aggressive way. > I.e. instead root swap and current svc_export_request() implementation: > > void svc_export_request(...) > { > <snip> > pth = d_path(&exp->ex_path, *bpp, *blen); > <snip> > } > > we can do something like this: > > void svc_export_request(...) > { > struct nfsd_net *nn = ... > <snip> > spin_lock(&dcache_lock); > pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen); > spin_unlock(&dcache_lock); > <snip> > } > No, this won't work. Sorry for noise. >>> --b. >>> >>>> >>>> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> >>>> --- >>>> fs/nfsd/netns.h | 1 + >>>> fs/nfsd/nfssvc.c | 33 ++++++++++++++++++++++++++++++++- >>>> 2 files changed, 33 insertions(+), 1 deletions(-) >>>> >>>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >>>> index abfc97c..5c423c6 100644 >>>> --- a/fs/nfsd/netns.h >>>> +++ b/fs/nfsd/netns.h >>>> @@ -101,6 +101,7 @@ struct nfsd_net { >>>> struct timeval nfssvc_boot; >>>> >>>> struct svc_serv *nfsd_serv; >>>> + struct path root; >>>> }; >>>> >>>> extern int nfsd_net_id; >>>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c >>>> index cee62ab..177bb60 100644 >>>> --- a/fs/nfsd/nfssvc.c >>>> +++ b/fs/nfsd/nfssvc.c >>>> @@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net) >>>> >>>> set_max_drc(); >>>> do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ >>>> + get_fs_root(current->fs, &nn->root); >>>> return 0; >>>> } >>>> >>>> @@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net) >>>> if (destroy) >>>> svc_shutdown_net(nn->nfsd_serv, net); >>>> svc_destroy(nn->nfsd_serv); >>>> - if (destroy) >>>> + if (destroy) { >>>> + path_put(&nn->root); >>>> nn->nfsd_serv = NULL; >>>> + } >>>> } >>>> >>>> int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) >>>> @@ -533,6 +536,25 @@ out: >>>> return error; >>>> } >>>> >>>> +/* >>>> + * This function is actually slightly modified set_fs_root()<F9> >>>> + */ >>>> +static void nfsd_swap_root(struct net *net) >>>> +{ >>>> + struct nfsd_net *nn = net_generic(net, nfsd_net_id); >>>> + struct fs_struct *fs = current->fs; >>>> + struct path old_root; >>>> + >>>> + path_get(&nn->root); >>>> + spin_lock(&fs->lock); >>>> + write_seqcount_begin(&fs->seq); >>>> + old_root = fs->root; >>>> + fs->root = nn->root; >>>> + write_seqcount_end(&fs->seq); >>>> + spin_unlock(&fs->lock); >>>> + if (old_root.dentry) >>>> + path_put(&old_root); >>>> +} >>>> >>>> /* >>>> * This is the NFS server kernel thread >>>> @@ -559,6 +581,15 @@ nfsd(void *vrqstp) >>>> current->fs->umask = 0; >>>> >>>> /* >>>> + * We have to swap NFSd kthread's fs->root. >>>> + * Why so? Because NFSd can be started in container, which has it's own >>>> + * root. >>>> + * And so what? NFSd lookup files, and lookup start from >>>> + * current->fs->root. >>>> + */ >>>> + nfsd_swap_root(net); >>>> + >>>> + /* >>>> * thread is spawned with all signals set to SIG_IGN, re-enable >>>> * the ones that will bring down the thread >>>> */ >>>> >> >> > >
On Tue, Dec 11, 2012 at 06:00:00PM +0400, Stanislav Kinsbursky wrote: > 11.12.2012 00:28, J. Bruce Fields ??????????: > >On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: > >>NFSd does lookup. Lookup is done starting from current->fs->root. > >>NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely > >>unshared) root. > >>So we have to swap root to those, which process, started NFSd, has. Because > >>that process can be in a container with it's own root. > > > >This doesn't sound right to me. > > > >Which lookups exactly do you see being done relative to > >current->fs->root ? > > > > Ok, you are right. I was mistaken here. > This is not a exactly lookup, but d_path() problem in svc_export_request(). > I.e. without root swapping, d_path() will give not local export path (like "/export") > but something like this "/root/containers_root/export". Now, *that* is a different story (and makes some sense). Take a look at __d_path(), please. You don't need to set ->fs->root to get d_path() equivalent relative to given point. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote: > UID: 9899 > > 11.12.2012 18:00, Stanislav Kinsbursky ?????: > >11.12.2012 00:28, J. Bruce Fields ?????: > >>On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: > >>>NFSd does lookup. Lookup is done starting from current->fs->root. > >>>NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely > >>>unshared) root. > >>>So we have to swap root to those, which process, started NFSd, has. Because > >>>that process can be in a container with it's own root. > >> > >>This doesn't sound right to me. > >> > >>Which lookups exactly do you see being done relative to > >>current->fs->root ? > >> > > > >Ok, you are right. I was mistaken here. > >This is not a exactly lookup, but d_path() problem in svc_export_request(). > >I.e. without root swapping, d_path() will give not local export path (like "/export") > >but something like this "/root/containers_root/export". > > > > We, actually, can do it less in less aggressive way. > I.e. instead root swap and current svc_export_request() implementation: > > void svc_export_request(...) > { > <snip> > pth = d_path(&exp->ex_path, *bpp, *blen); > <snip> > } > > we can do something like this: > > void svc_export_request(...) > { > struct nfsd_net *nn = ... > <snip> > spin_lock(&dcache_lock); > pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen); > spin_unlock(&dcache_lock); > <snip> > } That looks simpler, but I still don't understand why we need it. I'm confused about how d_path works; I would have thought that filesystem namespaces would have their own vfsmount trees and hence that the (vfsmount, dentry) would be enough to specify the path. Is the root argument for the case of chroot? Do we care about that? Also, svc_export_request is called from mountd's read of /proc/net/rpc/nfsd.export/channel. If mountd's root is wrong, then nothing's going to work anyway. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
11.12.2012 18:54, Al Viro ?????: > On Tue, Dec 11, 2012 at 06:00:00PM +0400, Stanislav Kinsbursky wrote: >> 11.12.2012 00:28, J. Bruce Fields ??????????: >>> On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: >>>> NFSd does lookup. Lookup is done starting from current->fs->root. >>>> NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely >>>> unshared) root. >>>> So we have to swap root to those, which process, started NFSd, has. Because >>>> that process can be in a container with it's own root. >>> >>> This doesn't sound right to me. >>> >>> Which lookups exactly do you see being done relative to >>> current->fs->root ? >>> >> >> Ok, you are right. I was mistaken here. >> This is not a exactly lookup, but d_path() problem in svc_export_request(). >> I.e. without root swapping, d_path() will give not local export path (like "/export") >> but something like this "/root/containers_root/export". > > Now, *that* is a different story (and makes some sense). Take a look > at __d_path(), please. You don't need to set ->fs->root to get d_path() > equivalent relative to given point. > Thanks, Al. But __d_path() is not exported and this code is called from NFSd module. Is it suitable for you to export __d_path()?
On Tue, Dec 11, 2012 at 09:56:21AM -0500, J. Bruce Fields wrote: > That looks simpler, but I still don't understand why we need it. > > I'm confused about how d_path works; I would have thought that > filesystem namespaces would have their own vfsmount trees and hence that > the (vfsmount, dentry) would be enough to specify the path. Is the root > argument for the case of chroot? Do we care about that? __d_path() is relative pathname from here to there. Whether (and what for) is it wanted in case of nfsd patches is a separate question... > Also, svc_export_request is called from mountd's read of > /proc/net/rpc/nfsd.export/channel. If mountd's root is wrong, then > nothing's going to work anyway. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
11.12.2012 18:56, J. Bruce Fields ?????: > On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote: >> UID: 9899 >> >> 11.12.2012 18:00, Stanislav Kinsbursky ?????: >>> 11.12.2012 00:28, J. Bruce Fields ?????: >>>> On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: >>>>> NFSd does lookup. Lookup is done starting from current->fs->root. >>>>> NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely >>>>> unshared) root. >>>>> So we have to swap root to those, which process, started NFSd, has. Because >>>>> that process can be in a container with it's own root. >>>> >>>> This doesn't sound right to me. >>>> >>>> Which lookups exactly do you see being done relative to >>>> current->fs->root ? >>>> >>> >>> Ok, you are right. I was mistaken here. >>> This is not a exactly lookup, but d_path() problem in svc_export_request(). >>> I.e. without root swapping, d_path() will give not local export path (like "/export") >>> but something like this "/root/containers_root/export". >>> >> >> We, actually, can do it less in less aggressive way. >> I.e. instead root swap and current svc_export_request() implementation: >> >> void svc_export_request(...) >> { >> <snip> >> pth = d_path(&exp->ex_path, *bpp, *blen); >> <snip> >> } >> >> we can do something like this: >> >> void svc_export_request(...) >> { >> struct nfsd_net *nn = ... >> <snip> >> spin_lock(&dcache_lock); >> pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen); >> spin_unlock(&dcache_lock); >> <snip> >> } > > That looks simpler, but I still don't understand why we need it. > > I'm confused about how d_path works; I would have thought that > filesystem namespaces would have their own vfsmount trees and hence that > the (vfsmount, dentry) would be enough to specify the path. Is the root > argument for the case of chroot? Do we care about that? > It works very simple: just traverse the tree from specified dentry up to current->fs->root.dentry. Having container in some fully separated mount point is great, of course. But: 1) this is a limitation we really want to avoid. I.e. container can be chrooted into some path like "/root/containers_root/" as in example above. 2) NFSd kthread works in init root environment. But we anyway want to get proper path string in container root, but not in kthreads root. > Also, svc_export_request is called from mountd's read of > /proc/net/rpc/nfsd.export/channel. If mountd's root is wrong, then > nothing's going to work anyway. > I don't really understand, how mountd's root can be wrong. I.e. its' always right as I see it. NFSd kthreads have to swap/use relative path/whatever to communicate with proper mountd. Or I'm missing something? > --b. >
On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: > 11.12.2012 18:56, J. Bruce Fields ?????: > >On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote: > >>UID: 9899 > >> > >>11.12.2012 18:00, Stanislav Kinsbursky ?????: > >>>11.12.2012 00:28, J. Bruce Fields ?????: > >>>>On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote: > >>>>>NFSd does lookup. Lookup is done starting from current->fs->root. > >>>>>NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely > >>>>>unshared) root. > >>>>>So we have to swap root to those, which process, started NFSd, has. Because > >>>>>that process can be in a container with it's own root. > >>>> > >>>>This doesn't sound right to me. > >>>> > >>>>Which lookups exactly do you see being done relative to > >>>>current->fs->root ? > >>>> > >>> > >>>Ok, you are right. I was mistaken here. > >>>This is not a exactly lookup, but d_path() problem in svc_export_request(). > >>>I.e. without root swapping, d_path() will give not local export path (like "/export") > >>>but something like this "/root/containers_root/export". > >>> > >> > >>We, actually, can do it less in less aggressive way. > >>I.e. instead root swap and current svc_export_request() implementation: > >> > >>void svc_export_request(...) > >>{ > >> <snip> > >> pth = d_path(&exp->ex_path, *bpp, *blen); > >> <snip> > >>} > >> > >>we can do something like this: > >> > >>void svc_export_request(...) > >>{ > >> struct nfsd_net *nn = ... > >> <snip> > >> spin_lock(&dcache_lock); > >> pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen); > >> spin_unlock(&dcache_lock); > >> <snip> > >>} > > > >That looks simpler, but I still don't understand why we need it. > > > >I'm confused about how d_path works; I would have thought that > >filesystem namespaces would have their own vfsmount trees and hence that > >the (vfsmount, dentry) would be enough to specify the path. Is the root > >argument for the case of chroot? Do we care about that? > > > > It works very simple: just traverse the tree from specified dentry up to current->fs->root.dentry. > Having container in some fully separated mount point is great, of course. But: > 1) this is a limitation we really want to avoid. I.e. container can be chrooted into some path like "/root/containers_root/" as in example above. > 2) NFSd kthread works in init root environment. But we anyway want to get proper path string in container root, but not in kthreads root. > > >Also, svc_export_request is called from mountd's read of > >/proc/net/rpc/nfsd.export/channel. If mountd's root is wrong, then > >nothing's going to work anyway. > > > > I don't really understand, how mountd's root can be wrong. I.e. > its' always right as I see it. NFSd kthreads have to swap/use > relative path/whatever to communicate with proper mountd. > Or I'm missing something? Ugh, I see the problem: I thought svc_export_request was called at the time mountd does the read, but instead its done at the time nfsd does the upcall. I suspect that's wrong, and we really want this done in the context of the mountd process when it does the read call. If d_path is called there then we have no problem. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote: > On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: > > I don't really understand, how mountd's root can be wrong. I.e. > > its' always right as I see it. NFSd kthreads have to swap/use > > relative path/whatever to communicate with proper mountd. > > Or I'm missing something? > > Ugh, I see the problem: I thought svc_export_request was called at the > time mountd does the read, but instead its done at the time nfsd does > the upcall. > > I suspect that's wrong, and we really want this done in the context of > the mountd process when it does the read call. If d_path is called > there then we have no problem. Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to skip calling cache_request and instead delay that until cache_read(). I think that should be possible. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
11.12.2012 19:35, J. Bruce Fields ?????: > On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote: >> On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: >>> I don't really understand, how mountd's root can be wrong. I.e. >>> its' always right as I see it. NFSd kthreads have to swap/use >>> relative path/whatever to communicate with proper mountd. >>> Or I'm missing something? >> >> Ugh, I see the problem: I thought svc_export_request was called at the >> time mountd does the read, but instead its done at the time nfsd does >> the upcall. >> >> I suspect that's wrong, and we really want this done in the context of >> the mountd process when it does the read call. If d_path is called >> there then we have no problem. > > Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to > skip calling cache_request and instead delay that until cache_read(). I > think that should be possible. > Hmmm... I'm not familiar with mountd to kernel exchange protocol. It looks like kernel passes that path string to mountd on upcall. > --b. >
11.12.2012 19:35, J. Bruce Fields ?????: > On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote: >> On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: >>> I don't really understand, how mountd's root can be wrong. I.e. >>> its' always right as I see it. NFSd kthreads have to swap/use >>> relative path/whatever to communicate with proper mountd. >>> Or I'm missing something? >> >> Ugh, I see the problem: I thought svc_export_request was called at the >> time mountd does the read, but instead its done at the time nfsd does >> the upcall. >> >> I suspect that's wrong, and we really want this done in the context of >> the mountd process when it does the read call. If d_path is called >> there then we have no problem. > > Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to > skip calling cache_request and instead delay that until cache_read(). I > think that should be possible. > So, Bruce, what we going to do (or what you want me to do) with the rest of NFSd changes? I.e. how I should solve this d_path() problem? I.e. I don't understand what did you mean by "I'd be happier if we could modify sunrpc_cache_pipe_upcall to skip calling cache_request and instead delay that until cache_read()". Could you give me a hint? > --b. >
On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote: > 11.12.2012 19:35, J. Bruce Fields ?????: > >On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote: > >>On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: > >>>I don't really understand, how mountd's root can be wrong. I.e. > >>>its' always right as I see it. NFSd kthreads have to swap/use > >>>relative path/whatever to communicate with proper mountd. > >>>Or I'm missing something? > >> > >>Ugh, I see the problem: I thought svc_export_request was called at the > >>time mountd does the read, but instead its done at the time nfsd does > >>the upcall. > >> > >>I suspect that's wrong, and we really want this done in the context of > >>the mountd process when it does the read call. If d_path is called > >>there then we have no problem. > > > >Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to > >skip calling cache_request and instead delay that until cache_read(). I > >think that should be possible. > > > > So, Bruce, what we going to do (or what you want me to do) with the rest of NFSd changes? > I.e. how I should solve this d_path() problem? > I.e. I don't understand what did you mean by "I'd be happier if we could modify sunrpc_cache_pipe_upcall to > skip calling cache_request and instead delay that until cache_read()". > Could you give me a hint? Definitely. So normally the way these upcalls happen are: 1. the kernel does a cache lookup, finds no matching item, and calls sunrpc_cache_pipe_upcall(). 2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a struct cache_request crq and fills crq->buf with the upcall data by calling the cache's ->cache_request() method. 3. Then rpc.mountd realizes there's data available in /proc/net/rpc/nfsd.fh/content, so it does a read on that file. 4. cache_read copies the formatted upcall from crq->buf to to userspace. So all I'm suggesting is that instead of calling ->cache_request() at step 2, we do it at step 4. Then cache_request will be called from rpc.mountd's read. So we'll know which container rpc.mountd is in. Does that make sense? --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
11.01.2013 21:03, J. Bruce Fields ?????: > On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote: >> 11.12.2012 19:35, J. Bruce Fields ?????: >>> On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote: >>>> On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote: >>>>> I don't really understand, how mountd's root can be wrong. I.e. >>>>> its' always right as I see it. NFSd kthreads have to swap/use >>>>> relative path/whatever to communicate with proper mountd. >>>>> Or I'm missing something? >>>> >>>> Ugh, I see the problem: I thought svc_export_request was called at the >>>> time mountd does the read, but instead its done at the time nfsd does >>>> the upcall. >>>> >>>> I suspect that's wrong, and we really want this done in the context of >>>> the mountd process when it does the read call. If d_path is called >>>> there then we have no problem. >>> >>> Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to >>> skip calling cache_request and instead delay that until cache_read(). I >>> think that should be possible. >>> >> >> So, Bruce, what we going to do (or what you want me to do) with the rest of NFSd changes? >> I.e. how I should solve this d_path() problem? >> I.e. I don't understand what did you mean by "I'd be happier if we could modify sunrpc_cache_pipe_upcall to >> skip calling cache_request and instead delay that until cache_read()". >> Could you give me a hint? > > Definitely. So normally the way these upcalls happen are: > > 1. the kernel does a cache lookup, finds no matching item, and > calls sunrpc_cache_pipe_upcall(). > 2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a > struct cache_request crq and fills crq->buf with the upcall > data by calling the cache's ->cache_request() method. > 3. Then rpc.mountd realizes there's data available in > /proc/net/rpc/nfsd.fh/content, so it does a read on that file. > 4. cache_read copies the formatted upcall from crq->buf to > to userspace. > > So all I'm suggesting is that instead of calling ->cache_request() at > step 2, we do it at step 4. > > Then cache_request will be called from rpc.mountd's read. So we'll know > which container rpc.mountd is in. > > Does that make sense? > Ok, now I understand. If the upcall is just the way to inform rpc.mound, that there are some data to read and process, then it your proposing changes should work. I'll prepare initial patch set. Thanks! > --b. >
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index abfc97c..5c423c6 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -101,6 +101,7 @@ struct nfsd_net { struct timeval nfssvc_boot; struct svc_serv *nfsd_serv; + struct path root; }; extern int nfsd_net_id; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cee62ab..177bb60 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net) set_max_drc(); do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ + get_fs_root(current->fs, &nn->root); return 0; } @@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net) if (destroy) svc_shutdown_net(nn->nfsd_serv, net); svc_destroy(nn->nfsd_serv); - if (destroy) + if (destroy) { + path_put(&nn->root); nn->nfsd_serv = NULL; + } } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) @@ -533,6 +536,25 @@ out: return error; } +/* + * This function is actually slightly modified set_fs_root()<F9> + */ +static void nfsd_swap_root(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct fs_struct *fs = current->fs; + struct path old_root; + + path_get(&nn->root); + spin_lock(&fs->lock); + write_seqcount_begin(&fs->seq); + old_root = fs->root; + fs->root = nn->root; + write_seqcount_end(&fs->seq); + spin_unlock(&fs->lock); + if (old_root.dentry) + path_put(&old_root); +} /* * This is the NFS server kernel thread @@ -559,6 +581,15 @@ nfsd(void *vrqstp) current->fs->umask = 0; /* + * We have to swap NFSd kthread's fs->root. + * Why so? Because NFSd can be started in container, which has it's own + * root. + * And so what? NFSd lookup files, and lookup start from + * current->fs->root. + */ + nfsd_swap_root(net); + + /* * thread is spawned with all signals set to SIG_IGN, re-enable * the ones that will bring down the thread */
NFSd does lookup. Lookup is done starting from current->fs->root. NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely unshared) root. So we have to swap root to those, which process, started NFSd, has. Because that process can be in a container with it's own root. Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> --- fs/nfsd/netns.h | 1 + fs/nfsd/nfssvc.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html