diff mbox series

[v1] NFSv4.1 provide mount option to toggle trunking discovery

Message ID 20220223174041.77887-1-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v1] NFSv4.1 provide mount option to toggle trunking discovery | expand

Commit Message

Olga Kornievskaia Feb. 23, 2022, 5:40 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
toggle whether or not the client will engage in actively discovery
of trunking locations.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/client.c           | 3 ++-
 fs/nfs/fs_context.c       | 8 ++++++++
 include/linux/nfs_fs_sb.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

Comments

Olga Kornievskaia Feb. 23, 2022, 11:55 p.m. UTC | #1
I have forgotten to cc Kurt Garloff to the post.

On Wed, Feb 23, 2022 at 12:40 PM Olga Kornievskaia
<olga.kornievskaia@gmail.com> wrote:
>
> From: Olga Kornievskaia <kolga@netapp.com>
>
> Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
> toggle whether or not the client will engage in actively discovery
> of trunking locations.
>
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfs/client.c           | 3 ++-
>  fs/nfs/fs_context.c       | 8 ++++++++
>  include/linux/nfs_fs_sb.h | 1 +
>  3 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index d1f34229e11a..84c080ddfd01 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -857,7 +857,8 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
>         }
>
>         if (clp->rpc_ops->discover_trunking != NULL &&
> -                       (server->caps & NFS_CAP_FS_LOCATIONS)) {
> +                       (server->caps & NFS_CAP_FS_LOCATIONS &&
> +                        !(server->flags & NFS_MOUNT_NOTRUNK_DISCOVERY))) {
>                 error = clp->rpc_ops->discover_trunking(server, mntfh);
>                 if (error < 0)
>                         return error;
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index ea17fa1f31ec..ad1448a63aa0 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -80,6 +80,7 @@ enum nfs_param {
>         Opt_source,
>         Opt_tcp,
>         Opt_timeo,
> +       Opt_trunkdiscovery,
>         Opt_udp,
>         Opt_v,
>         Opt_vers,
> @@ -180,6 +181,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
>         fsparam_string("source",        Opt_source),
>         fsparam_flag  ("tcp",           Opt_tcp),
>         fsparam_u32   ("timeo",         Opt_timeo),
> +       fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
>         fsparam_flag  ("udp",           Opt_udp),
>         fsparam_flag  ("v2",            Opt_v),
>         fsparam_flag  ("v3",            Opt_v),
> @@ -529,6 +531,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
>                 else
>                         ctx->flags &= ~NFS_MOUNT_NOCTO;
>                 break;
> +       case Opt_trunkdiscovery:
> +               if (result.negated)
> +                       ctx->flags |= NFS_MOUNT_NOTRUNK_DISCOVERY;
> +               else
> +                       ctx->flags &= ~NFS_MOUNT_NOTRUNK_DISCOVERY;
> +               break;
>         case Opt_ac:
>                 if (result.negated)
>                         ctx->flags |= NFS_MOUNT_NOAC;
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index ca0959e51e81..d0920d7f5f9e 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -151,6 +151,7 @@ struct nfs_server {
>  #define NFS_MOUNT_SOFTREVAL            0x800000
>  #define NFS_MOUNT_WRITE_EAGER          0x01000000
>  #define NFS_MOUNT_WRITE_WAIT           0x02000000
> +#define NFS_MOUNT_NOTRUNK_DISCOVERY    0x04000000
>
>         unsigned int            fattr_valid;    /* Valid attributes */
>         unsigned int            caps;           /* server capabilities */
> --
> 2.27.0
>
Kurt Garloff Feb. 24, 2022, 1:42 p.m. UTC | #2
Hi Olga,

thanks!

I can confirm that after applying your patch (against 5.15.24),
passing the mount option notrunkdiscovery to NFS mount does
indeed make the NFS mounts from the Qnap server (with knfsd from
3.4.6 kernel) work again.

Sorry to say, but I don't think this is the final solution yet:
* The option notrunkdiscovery is not tolerated by older kernels,
   so there is no way to pass a setting that works with <= 5.15.23
   and the kernels with your latest patch. This is a problem for
   me who keeps automount maps in LDAP.
* From a user perspective, the 5.15.24+/5.6.10+/5.17-rc behavior
   is still a regression, as the Qnap NFS mounts all break. With
   your patch, there is a way to recover, but it needs to be well
   documented and then be found by an admin who then needs to do
   the right change. Not acceptable for -stable IMVHO and according
   to Greg's explanations not for mainline either.

I see a number of possibilities to resolve this:
(0) We pretend it's not a problem that's serious enough to take
     action (and ensure that we do document this new option well).
(1) We revert the patch that does FS_LOCATIONS discovery.
     Assuming that FS_LOCATIONS does provide a useful feature, this
     would not be our preferred solution, I guess ...
(2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
     implements) and additionally allow for the opt-out with
     notrunkdiscovery mount option. This fixes the known regression
     with Qnap knfsd (without requiring user intervention) and still
     allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
     support this. The disadvantage is that we won't use the feature
     on NFSv4.1 servers which might support this feature perfectly
     (and there's no opt-in possibility). And the risk is that there
     might be NFSv4.2 servers out there that also misreport
     FS_LOCATIONS support and still need manual intervention (which
     at least is possible with notrunkdiscovery).
(3) We make this feature an opt-in thing and require users to
     pass trunkdiscovery to take advantage of the feature.
     This has zero risk of breakage (unless we screw up the patch),
     but always requires user intervention to take advantage of
     the FS_LOCATIONS feature.
(4) Identify a way to recover from the mount with FS_LOCATIONS
     against the broken server, so instead of hanging we do just
     turn this off if we find it not to work. Disadavantage is that
     this adds complexity and might stall the mounting for a while
     until the recovery worked. The complexity bears the risk for
     us screwing up.

I personally find solutions 2 -- 4 acceptable.

If the experts see (4) as simple enough, it might be worth a try.
Otherwise (2) or (3) would seem the way to go from my perspective.

Thanks!
Chuck Lever Feb. 24, 2022, 3:30 p.m. UTC | #3
> On Feb 23, 2022, at 12:40 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> 
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
> toggle whether or not the client will engage in actively discovery
> of trunking locations.

An alternative solution might be to change the client's
probe to treat NFS4ERR_DELAY as "no trunking information
available" and then allow operation to proceed on the
known good transport.

I can't think of a reason why normal operation needs to
stop until this request succeeds...?


> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
> fs/nfs/client.c           | 3 ++-
> fs/nfs/fs_context.c       | 8 ++++++++
> include/linux/nfs_fs_sb.h | 1 +
> 3 files changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index d1f34229e11a..84c080ddfd01 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -857,7 +857,8 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
> 	}
> 
> 	if (clp->rpc_ops->discover_trunking != NULL &&
> -			(server->caps & NFS_CAP_FS_LOCATIONS)) {
> +			(server->caps & NFS_CAP_FS_LOCATIONS &&
> +			 !(server->flags & NFS_MOUNT_NOTRUNK_DISCOVERY))) {
> 		error = clp->rpc_ops->discover_trunking(server, mntfh);
> 		if (error < 0)
> 			return error;
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index ea17fa1f31ec..ad1448a63aa0 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -80,6 +80,7 @@ enum nfs_param {
> 	Opt_source,
> 	Opt_tcp,
> 	Opt_timeo,
> +	Opt_trunkdiscovery,
> 	Opt_udp,
> 	Opt_v,
> 	Opt_vers,
> @@ -180,6 +181,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
> 	fsparam_string("source",	Opt_source),
> 	fsparam_flag  ("tcp",		Opt_tcp),
> 	fsparam_u32   ("timeo",		Opt_timeo),
> +	fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
> 	fsparam_flag  ("udp",		Opt_udp),
> 	fsparam_flag  ("v2",		Opt_v),
> 	fsparam_flag  ("v3",		Opt_v),
> @@ -529,6 +531,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
> 		else
> 			ctx->flags &= ~NFS_MOUNT_NOCTO;
> 		break;
> +	case Opt_trunkdiscovery:
> +		if (result.negated)
> +			ctx->flags |= NFS_MOUNT_NOTRUNK_DISCOVERY;
> +		else
> +			ctx->flags &= ~NFS_MOUNT_NOTRUNK_DISCOVERY;
> +		break;
> 	case Opt_ac:
> 		if (result.negated)
> 			ctx->flags |= NFS_MOUNT_NOAC;
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index ca0959e51e81..d0920d7f5f9e 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -151,6 +151,7 @@ struct nfs_server {
> #define NFS_MOUNT_SOFTREVAL		0x800000
> #define NFS_MOUNT_WRITE_EAGER		0x01000000
> #define NFS_MOUNT_WRITE_WAIT		0x02000000
> +#define NFS_MOUNT_NOTRUNK_DISCOVERY	0x04000000
> 
> 	unsigned int		fattr_valid;	/* Valid attributes */
> 	unsigned int		caps;		/* server capabilities */
> -- 
> 2.27.0
> 

--
Chuck Lever
Olga Kornievskaia Feb. 24, 2022, 5:55 p.m. UTC | #4
On Thu, Feb 24, 2022 at 10:30 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
>
> > On Feb 23, 2022, at 12:40 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> >
> > From: Olga Kornievskaia <kolga@netapp.com>
> >
> > Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
> > toggle whether or not the client will engage in actively discovery
> > of trunking locations.
>
> An alternative solution might be to change the client's
> probe to treat NFS4ERR_DELAY as "no trunking information
> available" and then allow operation to proceed on the
> known good transport.

I'm not sure what you mean about "the known good transport". I don't
think the ERR_DELAY is associated with a transport. Btw, if you saw a
previous patch which restricts fs_location query to the main transport
makes your statement even more confusing as it would mean there is no
good transport. Or do you mean to say we should have trunking
discovery done asynchronous to mount by a separate kernel thread and
therefore not impact mount steps?

I do object to treating a single ERR_DELAY during discovery as a
permanent error as there are legitimate reasons to a delay in looking
up the information that can be resolved in time by the server.
However, I don't object to putting a time limit or number of tries on
ERR_DELAY as safety wheels.

Lastly, I think perhaps we can do both have a mount option to toggle
discovery as well as safeguard the discovery from broken servers?

> I can't think of a reason why normal operation needs to
> stop until this request succeeds...?
>
>
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> > fs/nfs/client.c           | 3 ++-
> > fs/nfs/fs_context.c       | 8 ++++++++
> > include/linux/nfs_fs_sb.h | 1 +
> > 3 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index d1f34229e11a..84c080ddfd01 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -857,7 +857,8 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
> >       }
> >
> >       if (clp->rpc_ops->discover_trunking != NULL &&
> > -                     (server->caps & NFS_CAP_FS_LOCATIONS)) {
> > +                     (server->caps & NFS_CAP_FS_LOCATIONS &&
> > +                      !(server->flags & NFS_MOUNT_NOTRUNK_DISCOVERY))) {
> >               error = clp->rpc_ops->discover_trunking(server, mntfh);
> >               if (error < 0)
> >                       return error;
> > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > index ea17fa1f31ec..ad1448a63aa0 100644
> > --- a/fs/nfs/fs_context.c
> > +++ b/fs/nfs/fs_context.c
> > @@ -80,6 +80,7 @@ enum nfs_param {
> >       Opt_source,
> >       Opt_tcp,
> >       Opt_timeo,
> > +     Opt_trunkdiscovery,
> >       Opt_udp,
> >       Opt_v,
> >       Opt_vers,
> > @@ -180,6 +181,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
> >       fsparam_string("source",        Opt_source),
> >       fsparam_flag  ("tcp",           Opt_tcp),
> >       fsparam_u32   ("timeo",         Opt_timeo),
> > +     fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
> >       fsparam_flag  ("udp",           Opt_udp),
> >       fsparam_flag  ("v2",            Opt_v),
> >       fsparam_flag  ("v3",            Opt_v),
> > @@ -529,6 +531,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
> >               else
> >                       ctx->flags &= ~NFS_MOUNT_NOCTO;
> >               break;
> > +     case Opt_trunkdiscovery:
> > +             if (result.negated)
> > +                     ctx->flags |= NFS_MOUNT_NOTRUNK_DISCOVERY;
> > +             else
> > +                     ctx->flags &= ~NFS_MOUNT_NOTRUNK_DISCOVERY;
> > +             break;
> >       case Opt_ac:
> >               if (result.negated)
> >                       ctx->flags |= NFS_MOUNT_NOAC;
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index ca0959e51e81..d0920d7f5f9e 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -151,6 +151,7 @@ struct nfs_server {
> > #define NFS_MOUNT_SOFTREVAL           0x800000
> > #define NFS_MOUNT_WRITE_EAGER         0x01000000
> > #define NFS_MOUNT_WRITE_WAIT          0x02000000
> > +#define NFS_MOUNT_NOTRUNK_DISCOVERY  0x04000000
> >
> >       unsigned int            fattr_valid;    /* Valid attributes */
> >       unsigned int            caps;           /* server capabilities */
> > --
> > 2.27.0
> >
>
> --
> Chuck Lever
>
>
>
Chuck Lever Feb. 24, 2022, 6:20 p.m. UTC | #5
> On Feb 24, 2022, at 12:55 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> 
> On Thu, Feb 24, 2022 at 10:30 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>> 
>>> On Feb 23, 2022, at 12:40 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
>>> 
>>> From: Olga Kornievskaia <kolga@netapp.com>
>>> 
>>> Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
>>> toggle whether or not the client will engage in actively discovery
>>> of trunking locations.
>> 
>> An alternative solution might be to change the client's
>> probe to treat NFS4ERR_DELAY as "no trunking information
>> available" and then allow operation to proceed on the
>> known good transport.
> 
> I'm not sure what you mean about "the known good transport".

The transport on which the client sent the
GETATTR(fs_locations).

The NFS4ERR_DELAY response means the server has no other
trunks available "at this time."


> I don't
> think the ERR_DELAY is associated with a transport. Btw, if you saw a
> previous patch which restricts fs_location query to the main transport
> makes your statement even more confusing as it would mean there is no
> good transport. Or do you mean to say we should have trunking
> discovery done asynchronous to mount by a separate kernel thread and
> therefore not impact mount steps?

Yes, something like that.

Trunking discovery that is independent of the NFS mount
process should be the goal. In fact, trunking discovery
really ought to be done in user space.

- There is now a user/kernel API for managing transports

- The trunking configuration on the server might change
  during the lifetime of the mount, so periodic checking
  is needed

- Adding an extra round trip, especially one that might
  be slowed by one or more NFS4ERR_DELAY replies, is
  going to be a problem during a mount storm

- There might be local policies that affect which network
  paths to choose for trunking

- The choice of transports might be made automatically
  by an orchestrator

- Tying this setting to a mount option is not appropriate
  because the transports are shared amount multiple NFS
  mounts


> I do object to treating a single ERR_DELAY during discovery as a
> permanent error as there are legitimate reasons to a delay in looking
> up the information that can be resolved in time by the server.
> However, I don't object to putting a time limit or number of tries on
> ERR_DELAY as safety wheels.

In the past, some have objected to /any/ delay added to
the NFS mount process.

There's no reason to hold up the mount process -- the
client can try the trunking discovery probe again in a
few moments while the mount proceeds, can't it?

If that means handing the probe to a work queue or
leaving it to user space, that seems like a more
flexible choice.


> Lastly, I think perhaps we can do both have a mount option to toggle
> discovery as well as safeguard the discovery from broken servers?

I'd really rather not add a mount option for this
purpose unless you know of another reason why trunking
discovery needs to be disabled.

The best solution is to fix the server implementations.
If that's not possible then the second best is to have
the client manage the situation without needing any
human intervention.

Adding an administrative tunable is, to my mind, an
option of the very last resort.


--
Chuck Lever
Olga Kornievskaia Feb. 24, 2022, 9:25 p.m. UTC | #6
On Thu, Feb 24, 2022 at 1:20 PM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
> > On Feb 24, 2022, at 12:55 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> >
> > On Thu, Feb 24, 2022 at 10:30 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
> >>
> >>> On Feb 23, 2022, at 12:40 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> >>>
> >>> From: Olga Kornievskaia <kolga@netapp.com>
> >>>
> >>> Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
> >>> toggle whether or not the client will engage in actively discovery
> >>> of trunking locations.
> >>
> >> An alternative solution might be to change the client's
> >> probe to treat NFS4ERR_DELAY as "no trunking information
> >> available" and then allow operation to proceed on the
> >> known good transport.
> >
> > I'm not sure what you mean about "the known good transport".
>
> The transport on which the client sent the
> GETATTR(fs_locations).
>
> The NFS4ERR_DELAY response means the server has no other
> trunks available "at this time."

But GETATTR(fs_locations) isn't only used for trunking query, it's
used for filesystem location (migration) as well. Are we redefining
what ERR_DELAY means in the context of trunking vs migration?

> > I don't
> > think the ERR_DELAY is associated with a transport. Btw, if you saw a
> > previous patch which restricts fs_location query to the main transport
> > makes your statement even more confusing as it would mean there is no
> > good transport. Or do you mean to say we should have trunking
> > discovery done asynchronous to mount by a separate kernel thread and
> > therefore not impact mount steps?
>
> Yes, something like that.
>
> Trunking discovery that is independent of the NFS mount
> process should be the goal. In fact, trunking discovery
> really ought to be done in user space.

I agree it should be a goal of continuous management of trunking but
the initial setup is a part of file system attributes discovery.
fs_location is a file system attribute which is queried along with
other attributes upon discovery of a file system. Thus I maintain that
the current treatment of trunking discovery is valid.

What is being described below is a set of goals for trunking that we
have discussed before and are important.

> - There is now a user/kernel API for managing transports
>
> - The trunking configuration on the server might change
>   during the lifetime of the mount, so periodic checking
>   is needed
>
> - Adding an extra round trip, especially one that might
>   be slowed by one or more NFS4ERR_DELAY replies, is
>   going to be a problem during a mount storm
>
> - There might be local policies that affect which network
>   paths to choose for trunking
>
> - The choice of transports might be made automatically
>   by an orchestrator
>
> - Tying this setting to a mount option is not appropriate
>   because the transports are shared amount multiple NFS
>   mounts
>
>
> > I do object to treating a single ERR_DELAY during discovery as a
> > permanent error as there are legitimate reasons to a delay in looking
> > up the information that can be resolved in time by the server.
> > However, I don't object to putting a time limit or number of tries on
> > ERR_DELAY as safety wheels.
>
> In the past, some have objected to /any/ delay added to
> the NFS mount process.

I again would like to note that fs_locations is a file system
attribute thus I would argue has to be treated as other file system
attributes.

> There's no reason to hold up the mount process -- the
> client can try the trunking discovery probe again in a
> few moments while the mount proceeds, can't it?

Given that I suggested doing it asynchronous means I consider it a
possible design though I think it increases the complexity of the
system greatly (I'm not convinced that it's the right call to be
done).

> If that means handing the probe to a work queue or
> leaving it to user space, that seems like a more
> flexible choice.
>
>
> > Lastly, I think perhaps we can do both have a mount option to toggle
> > discovery as well as safeguard the discovery from broken servers?
>
> I'd really rather not add a mount option for this
> purpose unless you know of another reason why trunking
> discovery needs to be disabled.

I don't offhand. I thought it is the simplest and most appropriate
solution and perhaps inline with "migration/nomigration" option but I
must be mistaken there.

> The best solution is to fix the server implementations.
> If that's not possible then the second best is to have
> the client manage the situation without needing any
> human intervention.
>
> Adding an administrative tunable is, to my mind, an
> option of the very last resort.
>
>
> --
> Chuck Lever
>
>
>
Chuck Lever Feb. 24, 2022, 9:53 p.m. UTC | #7
> On Feb 24, 2022, at 4:25 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> 
> On Thu, Feb 24, 2022 at 1:20 PM Chuck Lever III <chuck.lever@oracle.com> wrote:
>> 
>> 
>>> On Feb 24, 2022, at 12:55 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
>>> 
>>> On Thu, Feb 24, 2022 at 10:30 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>>>> 
>>>>> On Feb 23, 2022, at 12:40 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
>>>>> 
>>>>> From: Olga Kornievskaia <kolga@netapp.com>
>>>>> 
>>>>> Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
>>>>> toggle whether or not the client will engage in actively discovery
>>>>> of trunking locations.
>>>> 
>>>> An alternative solution might be to change the client's
>>>> probe to treat NFS4ERR_DELAY as "no trunking information
>>>> available" and then allow operation to proceed on the
>>>> known good transport.
>>> 
>>> I'm not sure what you mean about "the known good transport".
>> 
>> The transport on which the client sent the
>> GETATTR(fs_locations).
>> 
>> The NFS4ERR_DELAY response means the server has no other
>> trunks available "at this time."
> 
> But GETATTR(fs_locations) isn't only used for trunking query, it's
> used for filesystem location (migration) as well. Are we redefining
> what ERR_DELAY means in the context of trunking vs migration?

I don't think I'm redefining what is described in RFC 8881
Section 15.1.1.3. The meaning of that status code is still
the same; it's the client's recovery action that can be
made to be different.

During migration, NFS4ERR_DELAY holds off the client until
open and lock state has been transitioned to the destination
server. In that case DELAY has to serialize further operations
from the client, and waiting and retrying is the correct
response.

I mean, the client won't know the hostname of the destination
until the GETATTR(fs_locations) returns a successful result.

For trunking discovery, DELAY still means roughly -EAGAIN.
But it's up to the caller whether and when to try the
operation again. I'm suggesting that in the context of
trunking discovery, there's no need to halt progress
until trunking discovery succeeds. The discovery probe
can be dropped or retried in the background.


>>> I do object to treating a single ERR_DELAY during discovery as a
>>> permanent error as there are legitimate reasons to a delay in looking
>>> up the information that can be resolved in time by the server.
>>> However, I don't object to putting a time limit or number of tries on
>>> ERR_DELAY as safety wheels.
>> 
>> In the past, some have objected to /any/ delay added to
>> the NFS mount process.
> 
> I again would like to note that fs_locations is a file system
> attribute thus I would argue has to be treated as other file system
> attributes.

True, fs_locations, as it was originally defined, is a
per-filesystem attribute.

But I don't see how that is relevant to this issue. The
client doesn't have to wait for trunking information to
start its operation using the main transport.


>>> Lastly, I think perhaps we can do both have a mount option to toggle
>>> discovery as well as safeguard the discovery from broken servers?
>> 
>> I'd really rather not add a mount option for this
>> purpose unless you know of another reason why trunking
>> discovery needs to be disabled.
> 
> I don't offhand. I thought it is the simplest and most appropriate
> solution and perhaps inline with "migration/nomigration" option but I
> must be mistaken there.

The "migration" option was a last resort. There were
really no other options to deal with servers that depend
on non-uniform client IDs.

There is an argument to be made that we shouldn't have
added that mount option because it controls the behavior
of all the mounts on that client.

IMO you shouldn't use "migration" as any kind of
precedent.


--
Chuck Lever
Kurt Garloff Feb. 25, 2022, 10:48 p.m. UTC | #8
Hi, 

Am 24. Februar 2022 14:42:41 MEZ schrieb Kurt Garloff <kurt@garloff.de>:
>Hi Olga,
>[...] 
>
>I see a number of possibilities to resolve this:
>(0) We pretend it's not a problem that's serious enough to take
>     action (and ensure that we do document this new option well).
>(1) We revert the patch that does FS_LOCATIONS discovery.
>     Assuming that FS_LOCATIONS does provide a useful feature, this
>     would not be our preferred solution, I guess ...
>(2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
>     implements) and additionally allow for the opt-out with
>     notrunkdiscovery mount option. This fixes the known regression
>     with Qnap knfsd (without requiring user intervention) and still
>     allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
>     support this. The disadvantage is that we won't use the feature
>     on NFSv4.1 servers which might support this feature perfectly
>     (and there's no opt-in possibility). And the risk is that there
>     might be NFSv4.2 servers out there that also misreport
>     FS_LOCATIONS support and still need manual intervention (which
>     at least is possible with notrunkdiscovery).
>(3) We make this feature an opt-in thing and require users to
>     pass trunkdiscovery to take advantage of the feature.
>     This has zero risk of breakage (unless we screw up the patch),
>     but always requires user intervention to take advantage of
>     the FS_LOCATIONS feature.
>(4) Identify a way to recover from the mount with FS_LOCATIONS
>     against the broken server, so instead of hanging we do just
>     turn this off if we find it not to work. Disadavantage is that
>     this adds complexity and might stall the mounting for a while
>     until the recovery worked. The complexity bears the risk for
>     us screwing up.
>
>I personally find solutions 2 -- 4 acceptable.
>
>If the experts see (4) as simple enough, it might be worth a try.
>Otherwise (2) or (3) would seem the way to go from my perspective.

Any thought ls? 

Thanks,
Benjamin Coddington March 16, 2022, 12:39 p.m. UTC | #9
On 25 Feb 2022, at 17:48, Kurt Garloff wrote:

> Hi,
>
> Am 24. Februar 2022 14:42:41 MEZ schrieb Kurt Garloff <kurt@garloff.de>:
>> Hi Olga,
>> [...]
>>
>> I see a number of possibilities to resolve this:
>> (0) We pretend it's not a problem that's serious enough to take
>>     action (and ensure that we do document this new option well).
>> (1) We revert the patch that does FS_LOCATIONS discovery.
>>     Assuming that FS_LOCATIONS does provide a useful feature, this
>>     would not be our preferred solution, I guess ...
>> (2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
>>     implements) and additionally allow for the opt-out with
>>     notrunkdiscovery mount option. This fixes the known regression
>>     with Qnap knfsd (without requiring user intervention) and still
>>     allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
>>     support this. The disadvantage is that we won't use the feature
>>     on NFSv4.1 servers which might support this feature perfectly
>>     (and there's no opt-in possibility). And the risk is that there
>>     might be NFSv4.2 servers out there that also misreport
>>     FS_LOCATIONS support and still need manual intervention (which
>>     at least is possible with notrunkdiscovery).
>> (3) We make this feature an opt-in thing and require users to
>>     pass trunkdiscovery to take advantage of the feature.
>>     This has zero risk of breakage (unless we screw up the patch),
>>     but always requires user intervention to take advantage of
>>     the FS_LOCATIONS feature.
>> (4) Identify a way to recover from the mount with FS_LOCATIONS
>>     against the broken server, so instead of hanging we do just
>>     turn this off if we find it not to work. Disadavantage is that
>>     this adds complexity and might stall the mounting for a while
>>     until the recovery worked. The complexity bears the risk for
>>     us screwing up.
>>
>> I personally find solutions 2 -- 4 acceptable.
>>
>> If the experts see (4) as simple enough, it might be worth a try.
>> Otherwise (2) or (3) would seem the way to go from my perspective.
>
> Any thought ls?

I think (3) is the best way, and perhaps using sysfs to toggle it would
be a solution to the problems presented by a mount option.

I'm worried that this issue is being ignored because that's usually what
happens when requests/patches are proposed that violate the policy of "we do
not fix the client for server bugs".  In this case that policy conficts with
"no user visible regressions".

Can anyone declare which policy takes precedent?

Ben
Chuck Lever March 16, 2022, 2:40 p.m. UTC | #10
> On Mar 16, 2022, at 8:39 AM, Benjamin Coddington <bcodding@redhat.com> wrote:
> 
> On 25 Feb 2022, at 17:48, Kurt Garloff wrote:
> 
>> Hi,
>> 
>> Am 24. Februar 2022 14:42:41 MEZ schrieb Kurt Garloff <kurt@garloff.de>:
>>> Hi Olga,
>>> [...]
>>> 
>>> I see a number of possibilities to resolve this:
>>> (0) We pretend it's not a problem that's serious enough to take
>>>     action (and ensure that we do document this new option well).
>>> (1) We revert the patch that does FS_LOCATIONS discovery.
>>>     Assuming that FS_LOCATIONS does provide a useful feature, this
>>>     would not be our preferred solution, I guess ...
>>> (2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
>>>     implements) and additionally allow for the opt-out with
>>>     notrunkdiscovery mount option. This fixes the known regression
>>>     with Qnap knfsd (without requiring user intervention) and still
>>>     allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
>>>     support this. The disadvantage is that we won't use the feature
>>>     on NFSv4.1 servers which might support this feature perfectly
>>>     (and there's no opt-in possibility). And the risk is that there
>>>     might be NFSv4.2 servers out there that also misreport
>>>     FS_LOCATIONS support and still need manual intervention (which
>>>     at least is possible with notrunkdiscovery).
>>> (3) We make this feature an opt-in thing and require users to
>>>     pass trunkdiscovery to take advantage of the feature.
>>>     This has zero risk of breakage (unless we screw up the patch),
>>>     but always requires user intervention to take advantage of
>>>     the FS_LOCATIONS feature.
>>> (4) Identify a way to recover from the mount with FS_LOCATIONS
>>>     against the broken server, so instead of hanging we do just
>>>     turn this off if we find it not to work. Disadavantage is that
>>>     this adds complexity and might stall the mounting for a while
>>>     until the recovery worked. The complexity bears the risk for
>>>     us screwing up.
>>> 
>>> I personally find solutions 2 -- 4 acceptable.
>>> 
>>> If the experts see (4) as simple enough, it might be worth a try.
>>> Otherwise (2) or (3) would seem the way to go from my perspective.
>> 
>> Any thought ls?
> 
> I think (3) is the best way, and perhaps using sysfs to toggle it would
> be a solution to the problems presented by a mount option.
> 
> I'm worried that this issue is being ignored because that's usually what
> happens when requests/patches are proposed that violate the policy of "we do
> not fix the client for server bugs".  In this case that policy conficts with
> "no user visible regressions".
> 
> Can anyone declare which policy takes precedent?

"No regressions" probably takes precedent. IMO 1) should be done
immediately.

This is not a server bug, necessarily. The server is responding
within the realm of what is allowed by specification and I can
see cases where a server might have a good reason to DELAY an
fs_locations request for a while.

So I think once 1) has been done and backported to stable, the
client functionality should be restored via 4) to ensure that a
server can't delay a client mount indefinitely. (Although I think
I've stated that preference before).

I don't see any need to involve a human in making the decision
to try fs_locations. The client should try fs_locations and if
it doesn't work, just move on as quickly as it can. As always,
I don't relish adding more administrative controls if we can
avoid it. Such controls add to our test matrix and our long
term technical debt. Easy to add, but very difficult to change
or remove once merged. I can't see an upside here.


--
Chuck Lever
Olga Kornievskaia March 16, 2022, 4:09 p.m. UTC | #11
On Wed, Mar 16, 2022 at 11:14 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
>
> > On Mar 16, 2022, at 8:39 AM, Benjamin Coddington <bcodding@redhat.com> wrote:
> >
> > On 25 Feb 2022, at 17:48, Kurt Garloff wrote:
> >
> >> Hi,
> >>
> >> Am 24. Februar 2022 14:42:41 MEZ schrieb Kurt Garloff <kurt@garloff.de>:
> >>> Hi Olga,
> >>> [...]
> >>>
> >>> I see a number of possibilities to resolve this:
> >>> (0) We pretend it's not a problem that's serious enough to take
> >>>     action (and ensure that we do document this new option well).
> >>> (1) We revert the patch that does FS_LOCATIONS discovery.
> >>>     Assuming that FS_LOCATIONS does provide a useful feature, this
> >>>     would not be our preferred solution, I guess ...
> >>> (2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
> >>>     implements) and additionally allow for the opt-out with
> >>>     notrunkdiscovery mount option. This fixes the known regression
> >>>     with Qnap knfsd (without requiring user intervention) and still
> >>>     allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
> >>>     support this. The disadvantage is that we won't use the feature
> >>>     on NFSv4.1 servers which might support this feature perfectly
> >>>     (and there's no opt-in possibility). And the risk is that there
> >>>     might be NFSv4.2 servers out there that also misreport
> >>>     FS_LOCATIONS support and still need manual intervention (which
> >>>     at least is possible with notrunkdiscovery).
> >>> (3) We make this feature an opt-in thing and require users to
> >>>     pass trunkdiscovery to take advantage of the feature.
> >>>     This has zero risk of breakage (unless we screw up the patch),
> >>>     but always requires user intervention to take advantage of
> >>>     the FS_LOCATIONS feature.
> >>> (4) Identify a way to recover from the mount with FS_LOCATIONS
> >>>     against the broken server, so instead of hanging we do just
> >>>     turn this off if we find it not to work. Disadavantage is that
> >>>     this adds complexity and might stall the mounting for a while
> >>>     until the recovery worked. The complexity bears the risk for
> >>>     us screwing up.
> >>>
> >>> I personally find solutions 2 -- 4 acceptable.
> >>>
> >>> If the experts see (4) as simple enough, it might be worth a try.
> >>> Otherwise (2) or (3) would seem the way to go from my perspective.
> >>
> >> Any thought ls?
> >
> > I think (3) is the best way, and perhaps using sysfs to toggle it would
> > be a solution to the problems presented by a mount option.
> >
> > I'm worried that this issue is being ignored because that's usually what
> > happens when requests/patches are proposed that violate the policy of "we do
> > not fix the client for server bugs".  In this case that policy conficts with
> > "no user visible regressions".
> >
> > Can anyone declare which policy takes precedent?
>
> "No regressions" probably takes precedent. IMO 1) should be done
> immediately.
>
> This is not a server bug, necessarily. The server is responding
> within the realm of what is allowed by specification and I can
> see cases where a server might have a good reason to DELAY an
> fs_locations request for a while.
>
> So I think once 1) has been done and backported to stable, the
> client functionality should be restored via 4) to ensure that a
> server can't delay a client mount indefinitely. (Although I think
> I've stated that preference before).

Reverting the patch is equivalent to introducing a mount option (with
what I'm hearing a preference of notrunking being the default) but
possibly better. It solves the problems of the broken servers and it
allows servers that want this functionality to use it.

> I don't see any need to involve a human in making the decision
> to try fs_locations. The client should try fs_locations and if
> it doesn't work, just move on as quickly as it can. As always,
> I don't relish adding more administrative controls if we can
> avoid it. Such controls add to our test matrix and our long
> term technical debt. Easy to add, but very difficult to change
> or remove once merged. I can't see an upside here.
>
>
> --
> Chuck Lever
>
>
>
Kurt Garloff March 31, 2022, 9:47 p.m. UTC | #12
Hi Olga, Chuck,

On 16/03/2022 17:09, Olga Kornievskaia wrote:
> On Wed, Mar 16, 2022 at 11:14 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>>> On Mar 16, 2022, at 8:39 AM, Benjamin Coddington <bcodding@redhat.com> wrote:
>>>
>>> On 25 Feb 2022, at 17:48, Kurt Garloff wrote:
>>>
>>>> Hi,
>>>>
>>>> Am 24. Februar 2022 14:42:41 MEZ schrieb Kurt Garloff <kurt@garloff.de>:
>>>>> Hi Olga,
>>>>> [...]
>>>>>
>>>>> I see a number of possibilities to resolve this:
>>>>> (0) We pretend it's not a problem that's serious enough to take
>>>>>      action (and ensure that we do document this new option well).
>>>>> (1) We revert the patch that does FS_LOCATIONS discovery.
>>>>>      Assuming that FS_LOCATIONS does provide a useful feature, this
>>>>>      would not be our preferred solution, I guess ...
>>>>> (2) We prevent NFS v4.1 servers to use FS_LOCATIONS (like my patch
>>>>>      implements) and additionally allow for the opt-out with
>>>>>      notrunkdiscovery mount option. This fixes the known regression
>>>>>      with Qnap knfsd (without requiring user intervention) and still
>>>>>      allows for FS_LOCATIONS to be useful with NFSv4.2 servers that
>>>>>      support this. The disadvantage is that we won't use the feature
>>>>>      on NFSv4.1 servers which might support this feature perfectly
>>>>>      (and there's no opt-in possibility). And the risk is that there
>>>>>      might be NFSv4.2 servers out there that also misreport
>>>>>      FS_LOCATIONS support and still need manual intervention (which
>>>>>      at least is possible with notrunkdiscovery).
>>>>> (3) We make this feature an opt-in thing and require users to
>>>>>      pass trunkdiscovery to take advantage of the feature.
>>>>>      This has zero risk of breakage (unless we screw up the patch),
>>>>>      but always requires user intervention to take advantage of
>>>>>      the FS_LOCATIONS feature.
>>>>> (4) Identify a way to recover from the mount with FS_LOCATIONS
>>>>>      against the broken server, so instead of hanging we do just
>>>>>      turn this off if we find it not to work. Disadavantage is that
>>>>>      this adds complexity and might stall the mounting for a while
>>>>>      until the recovery worked. The complexity bears the risk for
>>>>>      us screwing up.
>>>>>
>>>>> I personally find solutions 2 -- 4 acceptable.
>>>>>
>>>>> If the experts see (4) as simple enough, it might be worth a try.
>>>>> Otherwise (2) or (3) would seem the way to go from my perspective.
>>>> Any thought ls?
>>> I think (3) is the best way, and perhaps using sysfs to toggle it would
>>> be a solution to the problems presented by a mount option.
>>>
>>> I'm worried that this issue is being ignored because that's usually what
>>> happens when requests/patches are proposed that violate the policy of "we do
>>> not fix the client for server bugs".  In this case that policy conficts with
>>> "no user visible regressions".
>>>
>>> Can anyone declare which policy takes precedent?
>> "No regressions" probably takes precedent. IMO 1) should be done
>> immediately.
>>
>> This is not a server bug, necessarily. The server is responding
>> within the realm of what is allowed by specification and I can
>> see cases where a server might have a good reason to DELAY an
>> fs_locations request for a while.
>>
>> So I think once 1) has been done and backported to stable, the
>> client functionality should be restored via 4) to ensure that a
>> server can't delay a client mount indefinitely. (Although I think
>> I've stated that preference before).
> Reverting the patch is equivalent to introducing a mount option (with
> what I'm hearing a preference of notrunking being the default) but
> possibly better. It solves the problems of the broken servers and it
> allows servers that want this functionality to use it.

I agree with reverting and then introducing and opt-in setting instead.

I have not seen this submitted yet (though I have to admit that
I may have missed it).

So who is going to code this. If there is noone who has capacity
to do so, I can certainly jump in, though I am probably the least
skilled person in this conversation.

>> I don't see any need to involve a human in making the decision
>> to try fs_locations. The client should try fs_locations and if
>> it doesn't work, just move on as quickly as it can. As always,
>> I don't relish adding more administrative controls if we can
>> avoid it. Such controls add to our test matrix and our long
>> term technical debt. Easy to add, but very difficult to change
>> or remove once merged. I can't see an upside here.

That would be the better approach, but it also certainly is less
straight-forward to implement.
I'm happy to test, if we get some patch to look at short-term.

Otherwise, I'd suggest to go for reversion first and keep this
plan in the back of our minds for later execution.

Thanks.
diff mbox series

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d1f34229e11a..84c080ddfd01 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -857,7 +857,8 @@  static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
 	}
 
 	if (clp->rpc_ops->discover_trunking != NULL &&
-			(server->caps & NFS_CAP_FS_LOCATIONS)) {
+			(server->caps & NFS_CAP_FS_LOCATIONS &&
+			 !(server->flags & NFS_MOUNT_NOTRUNK_DISCOVERY))) {
 		error = clp->rpc_ops->discover_trunking(server, mntfh);
 		if (error < 0)
 			return error;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index ea17fa1f31ec..ad1448a63aa0 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -80,6 +80,7 @@  enum nfs_param {
 	Opt_source,
 	Opt_tcp,
 	Opt_timeo,
+	Opt_trunkdiscovery,
 	Opt_udp,
 	Opt_v,
 	Opt_vers,
@@ -180,6 +181,7 @@  static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_string("source",	Opt_source),
 	fsparam_flag  ("tcp",		Opt_tcp),
 	fsparam_u32   ("timeo",		Opt_timeo),
+	fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
 	fsparam_flag  ("udp",		Opt_udp),
 	fsparam_flag  ("v2",		Opt_v),
 	fsparam_flag  ("v3",		Opt_v),
@@ -529,6 +531,12 @@  static int nfs_fs_context_parse_param(struct fs_context *fc,
 		else
 			ctx->flags &= ~NFS_MOUNT_NOCTO;
 		break;
+	case Opt_trunkdiscovery:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NOTRUNK_DISCOVERY;
+		else
+			ctx->flags &= ~NFS_MOUNT_NOTRUNK_DISCOVERY;
+		break;
 	case Opt_ac:
 		if (result.negated)
 			ctx->flags |= NFS_MOUNT_NOAC;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index ca0959e51e81..d0920d7f5f9e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -151,6 +151,7 @@  struct nfs_server {
 #define NFS_MOUNT_SOFTREVAL		0x800000
 #define NFS_MOUNT_WRITE_EAGER		0x01000000
 #define NFS_MOUNT_WRITE_WAIT		0x02000000
+#define NFS_MOUNT_NOTRUNK_DISCOVERY	0x04000000
 
 	unsigned int		fattr_valid;	/* Valid attributes */
 	unsigned int		caps;		/* server capabilities */