diff mbox

synchronous AF_LOCAL connect

Message ID 20130220154751.GH14606@fieldses.org (mailing list archive)
State New, archived
Headers show

Commit Message

J. Bruce Fields Feb. 20, 2013, 3:47 p.m. UTC
On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
> The rpc code expects all connects to be done asynchronously by a
> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.  The
> only user (rpcbind) actually wants the connect done in the context of a
> process with the right namespace.  (And that will be true of gss proxy
> too, which also wants to use AF_LOCAL.)
> 
> But maybe I'm missing something.
> 
> Also, I haven't really tried to understand this code--I just assumed I
> could basically call xs_local_setup_socket from ->setup instead of the
> workqueue, and that seems to work based on a very superficial test.  At
> a minimum I guess the PF_FSTRANS fiddling shouldn't be there.

Here it is with that and the other extraneous xprt stuff gone.

See any problem with doing this?

(This is basically my attempt to implement Trond's solution to the
AF_LOCAL-connect-in-a-namespace problem:

	http://mid.gmane.org/<4FA345DA4F4AE44899BD2B03EEEC2FA9092E0A40@SACEXCMBX04-PRD.hq.netapp.com>

)

--b.

commit 49413f389f7b38b4cbeb2d1c7f25ebcbf00f25f6
Author: J. Bruce Fields <bfields@redhat.com>
Date:   Fri Feb 15 17:54:26 2013 -0500

    rpc: simplify AF_LOCAL connect
    
    It doesn't appear that anyone actually needs to connect asynchronously.
    
    Also, using a workqueue for the connect means we lose the namespace
    information from the original process.  This is a problem since there's
    no way to explicitly pass in a filesystem namespace for resolution of an
    AF_LOCAL address.
    
    Signed-off-by: J. Bruce Fields <bfields@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Chuck Lever Feb. 20, 2013, 3:56 p.m. UTC | #1
On Feb 20, 2013, at 10:47 AM, "J. Bruce Fields" <bfields@fieldses.org> wrote:

> On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
>> The rpc code expects all connects to be done asynchronously by a
>> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.  The
>> only user (rpcbind) actually wants the connect done in the context of a
>> process with the right namespace.  (And that will be true of gss proxy
>> too, which also wants to use AF_LOCAL.)
>> 
>> But maybe I'm missing something.
>> 
>> Also, I haven't really tried to understand this code--I just assumed I
>> could basically call xs_local_setup_socket from ->setup instead of the
>> workqueue, and that seems to work based on a very superficial test.  At
>> a minimum I guess the PF_FSTRANS fiddling shouldn't be there.
> 
> Here it is with that and the other extraneous xprt stuff gone.
> 
> See any problem with doing this?

Nothing is screaming at me.  As long as an AF_LOCAL connect operation doesn't ever sleep, this should be safe, I think.

How did you test it?


> (This is basically my attempt to implement Trond's solution to the
> AF_LOCAL-connect-in-a-namespace problem:
> 
> 	http://mid.gmane.org/<4FA345DA4F4AE44899BD2B03EEEC2FA9092E0A40@SACEXCMBX04-PRD.hq.netapp.com>
> 
> )
> 
> --b.
> 
> commit 49413f389f7b38b4cbeb2d1c7f25ebcbf00f25f6
> Author: J. Bruce Fields <bfields@redhat.com>
> Date:   Fri Feb 15 17:54:26 2013 -0500
> 
>    rpc: simplify AF_LOCAL connect
> 
>    It doesn't appear that anyone actually needs to connect asynchronously.
> 
>    Also, using a workqueue for the connect means we lose the namespace
>    information from the original process.  This is a problem since there's
>    no way to explicitly pass in a filesystem namespace for resolution of an
>    AF_LOCAL address.
> 
>    Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> 
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index bbc0915..b7d60e4 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1866,57 +1866,14 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
>  * @xprt: RPC transport to connect
>  * @transport: socket transport to connect
>  * @create_sock: function to create a socket of the correct type
> - *
> - * Invoked by a work queue tasklet.
>  */
> static void xs_local_setup_socket(struct work_struct *work)
> {
> 	struct sock_xprt *transport =
> 		container_of(work, struct sock_xprt, connect_worker.work);
> 	struct rpc_xprt *xprt = &transport->xprt;
> -	struct socket *sock;
> -	int status = -EIO;
> -
> -	current->flags |= PF_FSTRANS;
> -
> -	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
> -	status = __sock_create(xprt->xprt_net, AF_LOCAL,
> -					SOCK_STREAM, 0, &sock, 1);
> -	if (status < 0) {
> -		dprintk("RPC:       can't create AF_LOCAL "
> -			"transport socket (%d).\n", -status);
> -		goto out;
> -	}
> -	xs_reclassify_socketu(sock);
> 
> -	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
> -			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> -
> -	status = xs_local_finish_connecting(xprt, sock);
> -	switch (status) {
> -	case 0:
> -		dprintk("RPC:       xprt %p connected to %s\n",
> -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> -		xprt_set_connected(xprt);
> -		break;
> -	case -ENOENT:
> -		dprintk("RPC:       xprt %p: socket %s does not exist\n",
> -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> -		break;
> -	case -ECONNREFUSED:
> -		dprintk("RPC:       xprt %p: connection refused for %s\n",
> -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> -		break;
> -	default:
> -		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
> -				__func__, -status,
> -				xprt->address_strings[RPC_DISPLAY_ADDR]);
> -	}
> -
> -out:
> -	xprt_clear_connecting(xprt);
> -	xprt_wake_pending_tasks(xprt, status);
> -	current->flags &= ~PF_FSTRANS;
> +	xprt_wake_pending_tasks(xprt, -EIO);
> }
> 
> #ifdef CONFIG_SUNRPC_SWAP
> @@ -2588,6 +2545,48 @@ static const struct rpc_timeout xs_local_default_timeout = {
> 	.to_retries = 2,
> };
> 
> +
> +static int xs_local_connect(struct sock_xprt *transport)
> +{
> +	struct rpc_xprt *xprt = &transport->xprt;
> +	struct socket *sock;
> +	int status = -EIO;
> +
> +	status = __sock_create(xprt->xprt_net, AF_LOCAL,
> +					SOCK_STREAM, 0, &sock, 1);
> +	if (status < 0) {
> +		dprintk("RPC:       can't create AF_LOCAL "
> +			"transport socket (%d).\n", -status);
> +		return status;
> +	}
> +	xs_reclassify_socketu(sock);
> +
> +	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
> +			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> +
> +	status = xs_local_finish_connecting(xprt, sock);
> +	switch (status) {
> +	case 0:
> +		dprintk("RPC:       xprt %p connected to %s\n",
> +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> +		xprt_set_connected(xprt);
> +		break;
> +	case -ENOENT:
> +		dprintk("RPC:       xprt %p: socket %s does not exist\n",
> +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> +		break;
> +	case -ECONNREFUSED:
> +		dprintk("RPC:       xprt %p: connection refused for %s\n",
> +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> +		break;
> +	default:
> +		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
> +				__func__, -status,
> +				xprt->address_strings[RPC_DISPLAY_ADDR]);
> +	}
> +	return status;
> +}
> +
> /**
>  * xs_setup_local - Set up transport to use an AF_LOCAL socket
>  * @args: rpc transport creation arguments
> @@ -2630,6 +2629,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
> 		INIT_DELAYED_WORK(&transport->connect_worker,
> 					xs_local_setup_socket);
> 		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
> +		ret = ERR_PTR(xs_local_connect(transport));
> +		if (ret)
> +			goto out_err;
> 		break;
> 	default:
> 		ret = ERR_PTR(-EAFNOSUPPORT);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 20, 2013, 4:03 p.m. UTC | #2
On Wed, Feb 20, 2013 at 10:56:54AM -0500, Chuck Lever wrote:
> 
> On Feb 20, 2013, at 10:47 AM, "J. Bruce Fields" <bfields@fieldses.org>
> wrote:
> 
> > On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
> >> The rpc code expects all connects to be done asynchronously by a
> >> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.
> >> The only user (rpcbind) actually wants the connect done in the
> >> context of a process with the right namespace.  (And that will be
> >> true of gss proxy too, which also wants to use AF_LOCAL.)
> >> 
> >> But maybe I'm missing something.
> >> 
> >> Also, I haven't really tried to understand this code--I just
> >> assumed I could basically call xs_local_setup_socket from ->setup
> >> instead of the workqueue, and that seems to work based on a very
> >> superficial test.  At a minimum I guess the PF_FSTRANS fiddling
> >> shouldn't be there.
> > 
> > Here it is with that and the other extraneous xprt stuff gone.
> > 
> > See any problem with doing this?
> 
> Nothing is screaming at me.  As long as an AF_LOCAL connect operation
> doesn't ever sleep, this should be safe, I think.

I'm sure it must sleep.  Why would that make any difference?

> How did you test it?

I'm just doing my usual set of connectathon runs, and assuming mounts
would fail if the server's rpcbind registration failed.

--b.

> 
> 
> > (This is basically my attempt to implement Trond's solution to the
> > AF_LOCAL-connect-in-a-namespace problem:
> > 
> > 	http://mid.gmane.org/<4FA345DA4F4AE44899BD2B03EEEC2FA9092E0A40@SACEXCMBX04-PRD.hq.netapp.com>
> > 
> > )
> > 
> > --b.
> > 
> > commit 49413f389f7b38b4cbeb2d1c7f25ebcbf00f25f6
> > Author: J. Bruce Fields <bfields@redhat.com>
> > Date:   Fri Feb 15 17:54:26 2013 -0500
> > 
> >    rpc: simplify AF_LOCAL connect
> > 
> >    It doesn't appear that anyone actually needs to connect asynchronously.
> > 
> >    Also, using a workqueue for the connect means we lose the namespace
> >    information from the original process.  This is a problem since there's
> >    no way to explicitly pass in a filesystem namespace for resolution of an
> >    AF_LOCAL address.
> > 
> >    Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> > 
> > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> > index bbc0915..b7d60e4 100644
> > --- a/net/sunrpc/xprtsock.c
> > +++ b/net/sunrpc/xprtsock.c
> > @@ -1866,57 +1866,14 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
> >  * @xprt: RPC transport to connect
> >  * @transport: socket transport to connect
> >  * @create_sock: function to create a socket of the correct type
> > - *
> > - * Invoked by a work queue tasklet.
> >  */
> > static void xs_local_setup_socket(struct work_struct *work)
> > {
> > 	struct sock_xprt *transport =
> > 		container_of(work, struct sock_xprt, connect_worker.work);
> > 	struct rpc_xprt *xprt = &transport->xprt;
> > -	struct socket *sock;
> > -	int status = -EIO;
> > -
> > -	current->flags |= PF_FSTRANS;
> > -
> > -	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
> > -	status = __sock_create(xprt->xprt_net, AF_LOCAL,
> > -					SOCK_STREAM, 0, &sock, 1);
> > -	if (status < 0) {
> > -		dprintk("RPC:       can't create AF_LOCAL "
> > -			"transport socket (%d).\n", -status);
> > -		goto out;
> > -	}
> > -	xs_reclassify_socketu(sock);
> > 
> > -	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
> > -			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > -
> > -	status = xs_local_finish_connecting(xprt, sock);
> > -	switch (status) {
> > -	case 0:
> > -		dprintk("RPC:       xprt %p connected to %s\n",
> > -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > -		xprt_set_connected(xprt);
> > -		break;
> > -	case -ENOENT:
> > -		dprintk("RPC:       xprt %p: socket %s does not exist\n",
> > -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > -		break;
> > -	case -ECONNREFUSED:
> > -		dprintk("RPC:       xprt %p: connection refused for %s\n",
> > -				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > -		break;
> > -	default:
> > -		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
> > -				__func__, -status,
> > -				xprt->address_strings[RPC_DISPLAY_ADDR]);
> > -	}
> > -
> > -out:
> > -	xprt_clear_connecting(xprt);
> > -	xprt_wake_pending_tasks(xprt, status);
> > -	current->flags &= ~PF_FSTRANS;
> > +	xprt_wake_pending_tasks(xprt, -EIO);
> > }
> > 
> > #ifdef CONFIG_SUNRPC_SWAP
> > @@ -2588,6 +2545,48 @@ static const struct rpc_timeout xs_local_default_timeout = {
> > 	.to_retries = 2,
> > };
> > 
> > +
> > +static int xs_local_connect(struct sock_xprt *transport)
> > +{
> > +	struct rpc_xprt *xprt = &transport->xprt;
> > +	struct socket *sock;
> > +	int status = -EIO;
> > +
> > +	status = __sock_create(xprt->xprt_net, AF_LOCAL,
> > +					SOCK_STREAM, 0, &sock, 1);
> > +	if (status < 0) {
> > +		dprintk("RPC:       can't create AF_LOCAL "
> > +			"transport socket (%d).\n", -status);
> > +		return status;
> > +	}
> > +	xs_reclassify_socketu(sock);
> > +
> > +	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
> > +			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > +
> > +	status = xs_local_finish_connecting(xprt, sock);
> > +	switch (status) {
> > +	case 0:
> > +		dprintk("RPC:       xprt %p connected to %s\n",
> > +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > +		xprt_set_connected(xprt);
> > +		break;
> > +	case -ENOENT:
> > +		dprintk("RPC:       xprt %p: socket %s does not exist\n",
> > +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > +		break;
> > +	case -ECONNREFUSED:
> > +		dprintk("RPC:       xprt %p: connection refused for %s\n",
> > +				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
> > +		break;
> > +	default:
> > +		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
> > +				__func__, -status,
> > +				xprt->address_strings[RPC_DISPLAY_ADDR]);
> > +	}
> > +	return status;
> > +}
> > +
> > /**
> >  * xs_setup_local - Set up transport to use an AF_LOCAL socket
> >  * @args: rpc transport creation arguments
> > @@ -2630,6 +2629,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
> > 		INIT_DELAYED_WORK(&transport->connect_worker,
> > 					xs_local_setup_socket);
> > 		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
> > +		ret = ERR_PTR(xs_local_connect(transport));
> > +		if (ret)
> > +			goto out_err;
> > 		break;
> > 	default:
> > 		ret = ERR_PTR(-EAFNOSUPPORT);
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> -- 
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
> 
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever Feb. 20, 2013, 4:20 p.m. UTC | #3
On Feb 20, 2013, at 11:03 AM, J. Bruce Fields <bfields@fieldses.org> wrote:

> On Wed, Feb 20, 2013 at 10:56:54AM -0500, Chuck Lever wrote:
>> 
>> On Feb 20, 2013, at 10:47 AM, "J. Bruce Fields" <bfields@fieldses.org>
>> wrote:
>> 
>>> On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
>>>> The rpc code expects all connects to be done asynchronously by a
>>>> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.
>>>> The only user (rpcbind) actually wants the connect done in the
>>>> context of a process with the right namespace.  (And that will be
>>>> true of gss proxy too, which also wants to use AF_LOCAL.)
>>>> 
>>>> But maybe I'm missing something.
>>>> 
>>>> Also, I haven't really tried to understand this code--I just
>>>> assumed I could basically call xs_local_setup_socket from ->setup
>>>> instead of the workqueue, and that seems to work based on a very
>>>> superficial test.  At a minimum I guess the PF_FSTRANS fiddling
>>>> shouldn't be there.
>>> 
>>> Here it is with that and the other extraneous xprt stuff gone.
>>> 
>>> See any problem with doing this?
>> 
>> Nothing is screaming at me.  As long as an AF_LOCAL connect operation
>> doesn't ever sleep, this should be safe, I think.
> 
> I'm sure it must sleep.  Why would that make any difference?

As I understand it, sometimes an ASYNC RPC task is driving the connect, and such a task must never sleep when calling outside of rpciod.  rpciod must be allowed to put that task on a wait queue and go do other work if the connect operation doesn't succeed immediately, otherwise all ASYNC RPC operations hang (or worse, an oops occurs).


>> How did you test it?
> 
> I'm just doing my usual set of connectathon runs, and assuming mounts
> would fail if the server's rpcbind registration failed.

Have you tried killing rpcbind first to see how the error cases are handled?

Does rpcbind get the registration's "owner" field correct when namespaces are involved?
J. Bruce Fields Feb. 20, 2013, 4:34 p.m. UTC | #4
On Wed, Feb 20, 2013 at 11:20:05AM -0500, Chuck Lever wrote:
> 
> On Feb 20, 2013, at 11:03 AM, J. Bruce Fields <bfields@fieldses.org>
> wrote:
> 
> > On Wed, Feb 20, 2013 at 10:56:54AM -0500, Chuck Lever wrote:
> >> 
> >> On Feb 20, 2013, at 10:47 AM, "J. Bruce Fields"
> >> <bfields@fieldses.org> wrote:
> >> 
> >>> On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
> >>>> The rpc code expects all connects to be done asynchronously by a
> >>>> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.
> >>>> The only user (rpcbind) actually wants the connect done in the
> >>>> context of a process with the right namespace.  (And that will be
> >>>> true of gss proxy too, which also wants to use AF_LOCAL.)
> >>>> 
> >>>> But maybe I'm missing something.
> >>>> 
> >>>> Also, I haven't really tried to understand this code--I just
> >>>> assumed I could basically call xs_local_setup_socket from ->setup
> >>>> instead of the workqueue, and that seems to work based on a very
> >>>> superficial test.  At a minimum I guess the PF_FSTRANS fiddling
> >>>> shouldn't be there.
> >>> 
> >>> Here it is with that and the other extraneous xprt stuff gone.
> >>> 
> >>> See any problem with doing this?
> >> 
> >> Nothing is screaming at me.  As long as an AF_LOCAL connect
> >> operation doesn't ever sleep, this should be safe, I think.
> > 
> > I'm sure it must sleep.  Why would that make any difference?
> 
> As I understand it, sometimes an ASYNC RPC task is driving the
> connect, and such a task must never sleep when calling outside of
> rpciod.

AF_LOCAL is currently only used to register rpc services.  I can't see
any case when it's called asynchronously.

(And the same will be true of the gss-proxy calls, which also plan to
use AF_LOCAL.)

> rpciod must be allowed to put that task on a wait queue and
> go do other work if the connect operation doesn't succeed immediately,
> otherwise all ASYNC RPC operations hang (or worse, an oops occurs).
> 
> >> How did you test it?
> > 
> > I'm just doing my usual set of connectathon runs, and assuming mounts
> > would fail if the server's rpcbind registration failed.
> 
> Have you tried killing rpcbind first to see how the error cases are handled?

No, thanks for the suggestion, I'll check.

> Does rpcbind get the registration's "owner" field correct when
> namespaces are involved?

Looking at rpcb_clnt.c....  I only ever see r_owner set to "" or "0".  I
can't see why that would need to change in a container.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever Feb. 20, 2013, 5:04 p.m. UTC | #5
On Feb 20, 2013, at 11:34 AM, "J. Bruce Fields" <bfields@fieldses.org> wrote:

> On Wed, Feb 20, 2013 at 11:20:05AM -0500, Chuck Lever wrote:
>> 
>> On Feb 20, 2013, at 11:03 AM, J. Bruce Fields <bfields@fieldses.org>
>> wrote:
>> 
>>> On Wed, Feb 20, 2013 at 10:56:54AM -0500, Chuck Lever wrote:
>>>> 
>>>> On Feb 20, 2013, at 10:47 AM, "J. Bruce Fields"
>>>> <bfields@fieldses.org> wrote:
>>>> 
>>>>> On Mon, Feb 18, 2013 at 05:54:25PM -0500, bfields wrote:
>>>>>> The rpc code expects all connects to be done asynchronously by a
>>>>>> workqueue.  But that doesn't seem necessary in the AF_LOCAL case.
>>>>>> The only user (rpcbind) actually wants the connect done in the
>>>>>> context of a process with the right namespace.  (And that will be
>>>>>> true of gss proxy too, which also wants to use AF_LOCAL.)
>>>>>> 
>>>>>> But maybe I'm missing something.
>>>>>> 
>>>>>> Also, I haven't really tried to understand this code--I just
>>>>>> assumed I could basically call xs_local_setup_socket from ->setup
>>>>>> instead of the workqueue, and that seems to work based on a very
>>>>>> superficial test.  At a minimum I guess the PF_FSTRANS fiddling
>>>>>> shouldn't be there.
>>>>> 
>>>>> Here it is with that and the other extraneous xprt stuff gone.
>>>>> 
>>>>> See any problem with doing this?
>>>> 
>>>> Nothing is screaming at me.  As long as an AF_LOCAL connect
>>>> operation doesn't ever sleep, this should be safe, I think.
>>> 
>>> I'm sure it must sleep.  Why would that make any difference?
>> 
>> As I understand it, sometimes an ASYNC RPC task is driving the
>> connect, and such a task must never sleep when calling outside of
>> rpciod.
> 
> AF_LOCAL is currently only used to register rpc services.  I can't see
> any case when it's called asynchronously.
> 
> (And the same will be true of the gss-proxy calls, which also plan to
> use AF_LOCAL.)

Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.

If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.

Alternately, perhaps your new code could continue to invoke the kernel connect operation in a non-blocking mode, and simply have the local transport setup function fail if the non-blocking connect operation does not return with success.  That would be an interesting test to see if an AF_LOCAL connect operation actually does try to sleep in common code paths.


> 
>> rpciod must be allowed to put that task on a wait queue and
>> go do other work if the connect operation doesn't succeed immediately,
>> otherwise all ASYNC RPC operations hang (or worse, an oops occurs).
>> 
>>>> How did you test it?
>>> 
>>> I'm just doing my usual set of connectathon runs, and assuming mounts
>>> would fail if the server's rpcbind registration failed.
>> 
>> Have you tried killing rpcbind first to see how the error cases are handled?
> 
> No, thanks for the suggestion, I'll check.
> 
>> Does rpcbind get the registration's "owner" field correct when
>> namespaces are involved?
> 
> Looking at rpcb_clnt.c....  I only ever see r_owner set to "" or "0".  I
> can't see why that would need to change in a container.

rpcbind scrapes the remote end of the socket to see who's calling.  IIRC the r_owner field is ignored if it's not root who is making the registration request.

This is the raison d'ĂȘtre to register RPC services via AF_LOCAL transports.  Thus it should be explicitly tested that rpcbind gets registration correct after changes are made to AF_LOCAL support in the kernel's RPC client.
Trond Myklebust Feb. 20, 2013, 5:27 p.m. UTC | #6
On Wed, 2013-02-20 at 12:04 -0500, Chuck Lever wrote:

> Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.

The whole problem is that it is a piss-poorly defined feature in an
asynchronous kernel context.

Sockets carry around a well defined net namespace context that allow
them to resolve IP addresses. However they carry none of the file
namespace context information that is required to make sense of AF_LOCAL
"addresses".

IOW we have 3 options:

     1. Drop AF_LOCAL support altogether
     2. Add file namespace context to the RPC or socket layers
     3. Drop asynchronous support, so that we have a reliable
        userspace-defined context.

1) involves a user space api change, which will bring down the iron fist
of the Finn.
2) involves cooperation from the VFS and socket folks which doesn't seem
to be happening.

so that leaves (3), which is perfectly doable since we do _not_ want to
expose the rpc layer to anything outside the kernel. It's not intended
as a generic libtirpc...

> If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.

void xs_connect_local(struct rpc_task *task)
{
	if (RPC_IS_ASYNC(task))
		rpc_exit(task, -ENOTCONN);
.....
}

...done.
Simo Sorce Feb. 20, 2013, 5:32 p.m. UTC | #7
On Wed, 2013-02-20 at 17:27 +0000, Myklebust, Trond wrote:
> On Wed, 2013-02-20 at 12:04 -0500, Chuck Lever wrote:
> 
> > Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.
> 
> The whole problem is that it is a piss-poorly defined feature in an
> asynchronous kernel context.
> 
> Sockets carry around a well defined net namespace context that allow
> them to resolve IP addresses. However they carry none of the file
> namespace context information that is required to make sense of AF_LOCAL
> "addresses".
> 
> IOW we have 3 options:
> 
>      1. Drop AF_LOCAL support altogether
>      2. Add file namespace context to the RPC or socket layers
>      3. Drop asynchronous support, so that we have a reliable
>         userspace-defined context.
> 
> 1) involves a user space api change, which will bring down the iron fist
> of the Finn.
> 2) involves cooperation from the VFS and socket folks which doesn't seem
> to be happening.
> 
> so that leaves (3), which is perfectly doable since we do _not_ want to
> expose the rpc layer to anything outside the kernel. It's not intended
> as a generic libtirpc...
> 
> > If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.
> 
> void xs_connect_local(struct rpc_task *task)
> {
> 	if (RPC_IS_ASYNC(task))
> 		rpc_exit(task, -ENOTCONN);
> .....
> }
> 
> ...done.
> 

This seems the most reasonable approach to me too, and makes the code
simpler for now.

Simo.
Chuck Lever Feb. 20, 2013, 5:39 p.m. UTC | #8
On Feb 20, 2013, at 12:27 PM, "Myklebust, Trond" <Trond.Myklebust@netapp.com> wrote:

> On Wed, 2013-02-20 at 12:04 -0500, Chuck Lever wrote:
> 
>> Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.
> 
> The whole problem is that it is a piss-poorly defined feature in an
> asynchronous kernel context.
> 
> Sockets carry around a well defined net namespace context that allow
> them to resolve IP addresses. However they carry none of the file
> namespace context information that is required to make sense of AF_LOCAL
> "addresses".

I recognize this problem, but I fail to see how it is connected to asynchronicity in general.  The issue seems to be specifically how rpciod implements asynchronicity.

> IOW we have 3 options:
> 
>     1. Drop AF_LOCAL support altogether
>     2. Add file namespace context to the RPC or socket layers
>     3. Drop asynchronous support, so that we have a reliable
>        userspace-defined context.
> 
> 1) involves a user space api change, which will bring down the iron fist
> of the Finn.

The problem with 1) is that rpcbind uses a special feature of AF_LOCAL to protect registrations from being removed by a malicious or ignorant registrant.  That's why I added AF_LOCAL.  Somehow we would have to replace that feature.

> 2) involves cooperation from the VFS and socket folks which doesn't seem
> to be happening.

Yep, I'm aware of that.

> so that leaves (3), which is perfectly doable since we do _not_ want to
> expose the rpc layer to anything outside the kernel. It's not intended
> as a generic libtirpc...

I hoped for a better alternative, but I see that folks do not have the patience for that.  ;-)

> 
>> If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.
> 
> void xs_connect_local(struct rpc_task *task)
> {
> 	if (RPC_IS_ASYNC(task))
> 		rpc_exit(task, -ENOTCONN);
> .....
> }
> 
> ...done.

That's what I had in mind.  I might even add a WARN_ON_ONCE().
Trond Myklebust Feb. 20, 2013, 6:02 p.m. UTC | #9
On Wed, 2013-02-20 at 12:39 -0500, Chuck Lever wrote:
> On Feb 20, 2013, at 12:27 PM, "Myklebust, Trond" <Trond.Myklebust@netapp.com> wrote:
> 
> > On Wed, 2013-02-20 at 12:04 -0500, Chuck Lever wrote:
> > 
> >> Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.
> > 
> > The whole problem is that it is a piss-poorly defined feature in an
> > asynchronous kernel context.
> > 
> > Sockets carry around a well defined net namespace context that allow
> > them to resolve IP addresses. However they carry none of the file
> > namespace context information that is required to make sense of AF_LOCAL
> > "addresses".
> 
> I recognize this problem, but I fail to see how it is connected to asynchronicity in general.  The issue seems to be specifically how rpciod implements asynchronicity.

The problem is connected to asynchronicity in general because there is
no userspace context to fall back on. Asynchronous contexts are pretty
much by definition required to drop their connection to the original
context in order to be useful.

As for adding more userspace context information to rpciod, I refuse
point blank to even start that discussion until someone has a valid use
case.

> > IOW we have 3 options:
> > 
> >     1. Drop AF_LOCAL support altogether
> >     2. Add file namespace context to the RPC or socket layers
> >     3. Drop asynchronous support, so that we have a reliable
> >        userspace-defined context.
> > 
> > 1) involves a user space api change, which will bring down the iron fist
> > of the Finn.
> 
> The problem with 1) is that rpcbind uses a special feature of AF_LOCAL to protect registrations from being removed by a malicious or ignorant registrant.  That's why I added AF_LOCAL.  Somehow we would have to replace that feature.
> 
> > 2) involves cooperation from the VFS and socket folks which doesn't seem
> > to be happening.
> 
> Yep, I'm aware of that.
> 
> > so that leaves (3), which is perfectly doable since we do _not_ want to
> > expose the rpc layer to anything outside the kernel. It's not intended
> > as a generic libtirpc...
> 
> I hoped for a better alternative, but I see that folks do not have the patience for that.  ;-)
> 
> > 
> >> If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.
> > 
> > void xs_connect_local(struct rpc_task *task)
> > {
> > 	if (RPC_IS_ASYNC(task))
> > 		rpc_exit(task, -ENOTCONN);
> > .....
> > }
> > 
> > ...done.
> 
> That's what I had in mind.  I might even add a WARN_ON_ONCE().
>
J. Bruce Fields Feb. 20, 2013, 11:03 p.m. UTC | #10
On Wed, Feb 20, 2013 at 12:32:41PM -0500, Simo Sorce wrote:
> On Wed, 2013-02-20 at 17:27 +0000, Myklebust, Trond wrote:
> > On Wed, 2013-02-20 at 12:04 -0500, Chuck Lever wrote:
> > 
> > > Yes, but AF_LOCAL is supposed to be a generic transport for RPC.  This is not a feature we just made up, it's actually a well-defined API that exists on other platforms (it's even specified in RFCs).  Right now I would hesitate to restrict the use of AF_LOCAL upcalls to only synchronous contexts, because eventually we may want to use the transport in asynchronous contexts.
> > 
> > The whole problem is that it is a piss-poorly defined feature in an
> > asynchronous kernel context.
> > 
> > Sockets carry around a well defined net namespace context that allow
> > them to resolve IP addresses. However they carry none of the file
> > namespace context information that is required to make sense of AF_LOCAL
> > "addresses".
> > 
> > IOW we have 3 options:
> > 
> >      1. Drop AF_LOCAL support altogether
> >      2. Add file namespace context to the RPC or socket layers
> >      3. Drop asynchronous support, so that we have a reliable
> >         userspace-defined context.
> > 
> > 1) involves a user space api change, which will bring down the iron fist
> > of the Finn.
> > 2) involves cooperation from the VFS and socket folks which doesn't seem
> > to be happening.
> > 
> > so that leaves (3), which is perfectly doable since we do _not_ want to
> > expose the rpc layer to anything outside the kernel. It's not intended
> > as a generic libtirpc...
> > 
> > > If we were to go with using a synchronous connect, however, I think there should be some kind of safety check to make sure the xs connect function is not being invoked from an asynchronous context.  This is a restriction that does not exist for other transports supported by the kernel RPC client, so it should be underscored in the code.
> > 
> > void xs_connect_local(struct rpc_task *task)
> > {
> > 	if (RPC_IS_ASYNC(task))
> > 		rpc_exit(task, -ENOTCONN);
> > .....
> > }
> > 
> > ...done.
> > 
> 
> This seems the most reasonable approach to me too, and makes the code
> simpler for now.

OK, I've added that check and fixed some other bugs (thanks to Chuck for
some help in IRC).

I think that gets rpcbind working in containers fine.

gss-proxy has one more problem: it needs to do upcalls from nfsd threads
which won't have the right filesystem namespace.

I get a write from gss-proxy when it starts and can do an initial
connect then using its context.  But if we disconnect after that I'm
stuck.

Does it cause any problems if I just set the idle_timeout to 0 for
AF_LOCAL?

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bbc0915..b7d60e4 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1866,57 +1866,14 @@  static int xs_local_finish_connecting(struct rpc_xprt *xprt,
  * @xprt: RPC transport to connect
  * @transport: socket transport to connect
  * @create_sock: function to create a socket of the correct type
- *
- * Invoked by a work queue tasklet.
  */
 static void xs_local_setup_socket(struct work_struct *work)
 {
 	struct sock_xprt *transport =
 		container_of(work, struct sock_xprt, connect_worker.work);
 	struct rpc_xprt *xprt = &transport->xprt;
-	struct socket *sock;
-	int status = -EIO;
-
-	current->flags |= PF_FSTRANS;
-
-	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
-	status = __sock_create(xprt->xprt_net, AF_LOCAL,
-					SOCK_STREAM, 0, &sock, 1);
-	if (status < 0) {
-		dprintk("RPC:       can't create AF_LOCAL "
-			"transport socket (%d).\n", -status);
-		goto out;
-	}
-	xs_reclassify_socketu(sock);
 
-	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
-
-	status = xs_local_finish_connecting(xprt, sock);
-	switch (status) {
-	case 0:
-		dprintk("RPC:       xprt %p connected to %s\n",
-				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
-		xprt_set_connected(xprt);
-		break;
-	case -ENOENT:
-		dprintk("RPC:       xprt %p: socket %s does not exist\n",
-				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
-		break;
-	case -ECONNREFUSED:
-		dprintk("RPC:       xprt %p: connection refused for %s\n",
-				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
-		break;
-	default:
-		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
-				__func__, -status,
-				xprt->address_strings[RPC_DISPLAY_ADDR]);
-	}
-
-out:
-	xprt_clear_connecting(xprt);
-	xprt_wake_pending_tasks(xprt, status);
-	current->flags &= ~PF_FSTRANS;
+	xprt_wake_pending_tasks(xprt, -EIO);
 }
 
 #ifdef CONFIG_SUNRPC_SWAP
@@ -2588,6 +2545,48 @@  static const struct rpc_timeout xs_local_default_timeout = {
 	.to_retries = 2,
 };
 
+
+static int xs_local_connect(struct sock_xprt *transport)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock;
+	int status = -EIO;
+
+	status = __sock_create(xprt->xprt_net, AF_LOCAL,
+					SOCK_STREAM, 0, &sock, 1);
+	if (status < 0) {
+		dprintk("RPC:       can't create AF_LOCAL "
+			"transport socket (%d).\n", -status);
+		return status;
+	}
+	xs_reclassify_socketu(sock);
+
+	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+	status = xs_local_finish_connecting(xprt, sock);
+	switch (status) {
+	case 0:
+		dprintk("RPC:       xprt %p connected to %s\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		xprt_set_connected(xprt);
+		break;
+	case -ENOENT:
+		dprintk("RPC:       xprt %p: socket %s does not exist\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		break;
+	case -ECONNREFUSED:
+		dprintk("RPC:       xprt %p: connection refused for %s\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		break;
+	default:
+		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
+				__func__, -status,
+				xprt->address_strings[RPC_DISPLAY_ADDR]);
+	}
+	return status;
+}
+
 /**
  * xs_setup_local - Set up transport to use an AF_LOCAL socket
  * @args: rpc transport creation arguments
@@ -2630,6 +2629,9 @@  static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 		INIT_DELAYED_WORK(&transport->connect_worker,
 					xs_local_setup_socket);
 		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+		ret = ERR_PTR(xs_local_connect(transport));
+		if (ret)
+			goto out_err;
 		break;
 	default:
 		ret = ERR_PTR(-EAFNOSUPPORT);