[RFC,04/10] SUNRPC: Add common byte-swapped RPC header constants
diff mbox series

Message ID 20190201195747.11389.75164.stgit@manet.1015granger.net
State New
Headers show
Series
  • SUNRPC GSS overhaul
Related show

Commit Message

Chuck Lever Feb. 1, 2019, 7:57 p.m. UTC
Byte-swapping causes a CPU pipeline bubble on some processors. When
a decoder is comparing an on-the-wire value for equality, byte-
swapping can be avoided by comparing it directly to a pre-byte-
swapped constant value.

The current set of pre-xdr'd constants is missing some common values
used in the RPC header. Fill those out.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/auth_gss.h |    5 ++-
 include/linux/sunrpc/xdr.h      |   66 ++++++++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 26 deletions(-)

Comments

Tom Talpey Feb. 2, 2019, 2:30 a.m. UTC | #1
On 2/1/2019 2:57 PM, Chuck Lever wrote:
> Byte-swapping causes a CPU pipeline bubble on some processors. When
> a decoder is comparing an on-the-wire value for equality, byte-
> swapping can be avoided by comparing it directly to a pre-byte-
> swapped constant value.
> 
> The current set of pre-xdr'd constants is missing some common values
> used in the RPC header. Fill those out.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>   include/linux/sunrpc/auth_gss.h |    5 ++-
>   include/linux/sunrpc/xdr.h      |   66 ++++++++++++++++++++++++---------------
>   2 files changed, 45 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
> index 30427b7..adc4be2 100644
> --- a/include/linux/sunrpc/auth_gss.h
> +++ b/include/linux/sunrpc/auth_gss.h
> @@ -19,7 +19,10 @@
>   #include <linux/sunrpc/svc.h>
>   #include <linux/sunrpc/gss_api.h>
>   
> -#define RPC_GSS_VERSION		1
> +enum {
> +	RPC_GSS_VERSION = 1,
> +	rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION)
> +};
>   
>   #define MAXSEQ 0x80000000 /* maximum legal sequence number, from rfc 2203 */
>   
> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
> index 787939d..69161cb 100644
> --- a/include/linux/sunrpc/xdr.h
> +++ b/include/linux/sunrpc/xdr.h
> @@ -17,6 +17,7 @@
>   #include <asm/byteorder.h>
>   #include <asm/unaligned.h>
>   #include <linux/scatterlist.h>
> +#include <linux/sunrpc/msg_prot.h>
>   
>   struct bio_vec;
>   struct rpc_rqst;
> @@ -79,31 +80,46 @@ struct xdr_buf {
>   	buf->buflen = len;
>   }
>   
> -/*
> - * pre-xdr'ed macros.
> - */
> -
> -#define	xdr_zero	cpu_to_be32(0)
> -#define	xdr_one		cpu_to_be32(1)
> -#define	xdr_two		cpu_to_be32(2)
> -
> -#define	rpc_success		cpu_to_be32(RPC_SUCCESS)
> -#define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
> -#define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
> -#define	rpc_proc_unavail	cpu_to_be32(RPC_PROC_UNAVAIL)
> -#define	rpc_garbage_args	cpu_to_be32(RPC_GARBAGE_ARGS)
> -#define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ERR)
> -#define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPLY)
> -
> -#define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
> -#define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
> -#define	rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
> -#define	rpc_autherr_badverf	cpu_to_be32(RPC_AUTH_BADVERF)
> -#define	rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF)
> -#define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
> -#define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
> -#define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
> -#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
> +enum xdr_be32_equivalents {
> +	xdr_zero		= cpu_to_be32(0),
> +	xdr_one			= cpu_to_be32(1),
> +	xdr_two			= cpu_to_be32(2),

It is clever to use an enum to pre-compute these values, but
it becomes a concern that the type (and size) of an enum may
not be the same as values they may be compared to.

Commonly, code may compare them to int32, uint32, etc. What
guarantees are there that such comparisons will yield the
appropriate result, especially if a < or > test is performed?

Tom.

> +
> +	rpc_version		= cpu_to_be32(RPC_VERSION),
> +
> +	rpc_auth_null		= cpu_to_be32(RPC_AUTH_NULL),
> +	rpc_auth_unix		= cpu_to_be32(RPC_AUTH_UNIX),
> +	rpc_auth_short		= cpu_to_be32(RPC_AUTH_SHORT),
> +	rpc_auth_des		= cpu_to_be32(RPC_AUTH_DES),
> +	rpc_auth_krb		= cpu_to_be32(RPC_AUTH_KRB),
> +	rpc_auth_gss		= cpu_to_be32(RPC_AUTH_GSS),
> +
> +	rpc_call		= cpu_to_be32(RPC_CALL),
> +	rpc_reply		= cpu_to_be32(RPC_REPLY),
> +
> +	rpc_msg_accepted	= cpu_to_be32(RPC_MSG_ACCEPTED),
> +	rpc_msg_denied		= cpu_to_be32(RPC_MSG_DENIED),
> +
> +	rpc_success		= cpu_to_be32(RPC_SUCCESS),
> +	rpc_prog_unavail	= cpu_to_be32(RPC_PROG_UNAVAIL),
> +	rpc_prog_mismatch	= cpu_to_be32(RPC_PROG_MISMATCH),
> +	rpc_proc_unavail	= cpu_to_be32(RPC_PROC_UNAVAIL),
> +	rpc_garbage_args	= cpu_to_be32(RPC_GARBAGE_ARGS),
> +	rpc_system_err		= cpu_to_be32(RPC_SYSTEM_ERR),
> +	rpc_drop_reply		= cpu_to_be32(RPC_DROP_REPLY),
> +
> +	rpc_mismatch		= cpu_to_be32(RPC_MISMATCH),
> +	rpc_auth_error		= cpu_to_be32(RPC_AUTH_ERROR),
> +
> +	rpc_auth_ok		= cpu_to_be32(RPC_AUTH_OK),
> +	rpc_autherr_badcred	= cpu_to_be32(RPC_AUTH_BADCRED),
> +	rpc_autherr_rejectedcred = cpu_to_be32(RPC_AUTH_REJECTEDCRED),
> +	rpc_autherr_badverf	= cpu_to_be32(RPC_AUTH_BADVERF),
> +	rpc_autherr_rejectedverf = cpu_to_be32(RPC_AUTH_REJECTEDVERF),
> +	rpc_autherr_tooweak	= cpu_to_be32(RPC_AUTH_TOOWEAK),
> +	rpcsec_gsserr_credproblem = cpu_to_be32(RPCSEC_GSS_CREDPROBLEM),
> +	rpcsec_gsserr_ctxproblem = cpu_to_be32(RPCSEC_GSS_CTXPROBLEM),
> +};
>   
>   /*
>    * Miscellaneous XDR helper functions
> 
> 
>
Christoph Hellwig Feb. 2, 2019, 5:02 p.m. UTC | #2
On Fri, Feb 01, 2019 at 02:57:47PM -0500, Chuck Lever wrote:
> Byte-swapping causes a CPU pipeline bubble on some processors. When
> a decoder is comparing an on-the-wire value for equality, byte-
> swapping can be avoided by comparing it directly to a pre-byte-
> swapped constant value.

Which ones?
Chuck Lever Feb. 2, 2019, 10:46 p.m. UTC | #3
> On Feb 1, 2019, at 9:30 PM, Tom Talpey <tom@talpey.com> wrote:
> 
> On 2/1/2019 2:57 PM, Chuck Lever wrote:
>> Byte-swapping causes a CPU pipeline bubble on some processors. When
>> a decoder is comparing an on-the-wire value for equality, byte-
>> swapping can be avoided by comparing it directly to a pre-byte-
>> swapped constant value.
>> The current set of pre-xdr'd constants is missing some common values
>> used in the RPC header. Fill those out.
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  include/linux/sunrpc/auth_gss.h |    5 ++-
>>  include/linux/sunrpc/xdr.h      |   66 ++++++++++++++++++++++++---------------
>>  2 files changed, 45 insertions(+), 26 deletions(-)
>> diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
>> index 30427b7..adc4be2 100644
>> --- a/include/linux/sunrpc/auth_gss.h
>> +++ b/include/linux/sunrpc/auth_gss.h
>> @@ -19,7 +19,10 @@
>>  #include <linux/sunrpc/svc.h>
>>  #include <linux/sunrpc/gss_api.h>
>>  -#define RPC_GSS_VERSION		1
>> +enum {
>> +	RPC_GSS_VERSION = 1,
>> +	rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION)
>> +};
>>    #define MAXSEQ 0x80000000 /* maximum legal sequence number, from rfc 2203 */
>>  diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
>> index 787939d..69161cb 100644
>> --- a/include/linux/sunrpc/xdr.h
>> +++ b/include/linux/sunrpc/xdr.h
>> @@ -17,6 +17,7 @@
>>  #include <asm/byteorder.h>
>>  #include <asm/unaligned.h>
>>  #include <linux/scatterlist.h>
>> +#include <linux/sunrpc/msg_prot.h>
>>    struct bio_vec;
>>  struct rpc_rqst;
>> @@ -79,31 +80,46 @@ struct xdr_buf {
>>  	buf->buflen = len;
>>  }
>>  -/*
>> - * pre-xdr'ed macros.
>> - */
>> -
>> -#define	xdr_zero	cpu_to_be32(0)
>> -#define	xdr_one		cpu_to_be32(1)
>> -#define	xdr_two		cpu_to_be32(2)
>> -
>> -#define	rpc_success		cpu_to_be32(RPC_SUCCESS)
>> -#define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
>> -#define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
>> -#define	rpc_proc_unavail	cpu_to_be32(RPC_PROC_UNAVAIL)
>> -#define	rpc_garbage_args	cpu_to_be32(RPC_GARBAGE_ARGS)
>> -#define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ERR)
>> -#define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPLY)
>> -
>> -#define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
>> -#define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
>> -#define	rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
>> -#define	rpc_autherr_badverf	cpu_to_be32(RPC_AUTH_BADVERF)
>> -#define	rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF)
>> -#define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
>> -#define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
>> -#define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
>> -#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
>> +enum xdr_be32_equivalents {
>> +	xdr_zero		= cpu_to_be32(0),
>> +	xdr_one			= cpu_to_be32(1),
>> +	xdr_two			= cpu_to_be32(2),
> 
> It is clever to use an enum to pre-compute these values, but

Perhaps not clever; it is a current Linux kernel coding
practice to use an enum in favor of a C macro for constants.


> it becomes a concern that the type (and size) of an enum may
> not be the same as values they may be compared to.

Indeed, an enum is a variably-sized signed integer, IIUC.


> Commonly, code may compare them to int32, uint32, etc. What
> guarantees are there that such comparisons will yield the
> appropriate result, especially if a < or > test is performed?

I believe for the purposes of assignment and equality comparison
the compiler will promote these to the size and sign of the
variable. We would never perform a greater or less than test with
these values, obviously.

However, they probably should have an obvious and well-defined
type, and I should leave the already-defined macros as they
are.


> Tom.
> 
>> +
>> +	rpc_version		= cpu_to_be32(RPC_VERSION),
>> +
>> +	rpc_auth_null		= cpu_to_be32(RPC_AUTH_NULL),
>> +	rpc_auth_unix		= cpu_to_be32(RPC_AUTH_UNIX),
>> +	rpc_auth_short		= cpu_to_be32(RPC_AUTH_SHORT),
>> +	rpc_auth_des		= cpu_to_be32(RPC_AUTH_DES),
>> +	rpc_auth_krb		= cpu_to_be32(RPC_AUTH_KRB),
>> +	rpc_auth_gss		= cpu_to_be32(RPC_AUTH_GSS),
>> +
>> +	rpc_call		= cpu_to_be32(RPC_CALL),
>> +	rpc_reply		= cpu_to_be32(RPC_REPLY),
>> +
>> +	rpc_msg_accepted	= cpu_to_be32(RPC_MSG_ACCEPTED),
>> +	rpc_msg_denied		= cpu_to_be32(RPC_MSG_DENIED),
>> +
>> +	rpc_success		= cpu_to_be32(RPC_SUCCESS),
>> +	rpc_prog_unavail	= cpu_to_be32(RPC_PROG_UNAVAIL),
>> +	rpc_prog_mismatch	= cpu_to_be32(RPC_PROG_MISMATCH),
>> +	rpc_proc_unavail	= cpu_to_be32(RPC_PROC_UNAVAIL),
>> +	rpc_garbage_args	= cpu_to_be32(RPC_GARBAGE_ARGS),
>> +	rpc_system_err		= cpu_to_be32(RPC_SYSTEM_ERR),
>> +	rpc_drop_reply		= cpu_to_be32(RPC_DROP_REPLY),
>> +
>> +	rpc_mismatch		= cpu_to_be32(RPC_MISMATCH),
>> +	rpc_auth_error		= cpu_to_be32(RPC_AUTH_ERROR),
>> +
>> +	rpc_auth_ok		= cpu_to_be32(RPC_AUTH_OK),
>> +	rpc_autherr_badcred	= cpu_to_be32(RPC_AUTH_BADCRED),
>> +	rpc_autherr_rejectedcred = cpu_to_be32(RPC_AUTH_REJECTEDCRED),
>> +	rpc_autherr_badverf	= cpu_to_be32(RPC_AUTH_BADVERF),
>> +	rpc_autherr_rejectedverf = cpu_to_be32(RPC_AUTH_REJECTEDVERF),
>> +	rpc_autherr_tooweak	= cpu_to_be32(RPC_AUTH_TOOWEAK),
>> +	rpcsec_gsserr_credproblem = cpu_to_be32(RPCSEC_GSS_CREDPROBLEM),
>> +	rpcsec_gsserr_ctxproblem = cpu_to_be32(RPCSEC_GSS_CTXPROBLEM),
>> +};
>>    /*
>>   * Miscellaneous XDR helper functions

--
Chuck Lever
Chuck Lever Feb. 2, 2019, 10:49 p.m. UTC | #4
> On Feb 2, 2019, at 12:02 PM, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Fri, Feb 01, 2019 at 02:57:47PM -0500, Chuck Lever wrote:
>> Byte-swapping causes a CPU pipeline bubble on some processors. When
>> a decoder is comparing an on-the-wire value for equality, byte-
>> swapping can be avoided by comparing it directly to a pre-byte-
>> swapped constant value.
> 
> Which ones?

I assume you mean on which processors have I observed CPU cycle
spikes around bswap instructions. I've seen this behavior only
on Intel processors of various families.

Would you prefer a different justification for this clean-up?


--
Chuck Lever
Trond Myklebust Feb. 3, 2019, 3 p.m. UTC | #5
On Sat, 2019-02-02 at 17:46 -0500, Chuck Lever wrote:
> > On Feb 1, 2019, at 9:30 PM, Tom Talpey <tom@talpey.com> wrote:
> > 
> > On 2/1/2019 2:57 PM, Chuck Lever wrote:
> > > Byte-swapping causes a CPU pipeline bubble on some processors.
> > > When
> > > a decoder is comparing an on-the-wire value for equality, byte-
> > > swapping can be avoided by comparing it directly to a pre-byte-
> > > swapped constant value.
> > > The current set of pre-xdr'd constants is missing some common
> > > values
> > > used in the RPC header. Fill those out.
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > >  include/linux/sunrpc/auth_gss.h |    5 ++-
> > >  include/linux/sunrpc/xdr.h      |   66 ++++++++++++++++++++++++-
> > > --------------
> > >  2 files changed, 45 insertions(+), 26 deletions(-)
> > > diff --git a/include/linux/sunrpc/auth_gss.h
> > > b/include/linux/sunrpc/auth_gss.h
> > > index 30427b7..adc4be2 100644
> > > --- a/include/linux/sunrpc/auth_gss.h
> > > +++ b/include/linux/sunrpc/auth_gss.h
> > > @@ -19,7 +19,10 @@
> > >  #include <linux/sunrpc/svc.h>
> > >  #include <linux/sunrpc/gss_api.h>
> > >  -#define RPC_GSS_VERSION		1
> > > +enum {
> > > +	RPC_GSS_VERSION = 1,
> > > +	rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION)
> > > +};
> > >    #define MAXSEQ 0x80000000 /* maximum legal sequence number,
> > > from rfc 2203 */
> > >  diff --git a/include/linux/sunrpc/xdr.h
> > > b/include/linux/sunrpc/xdr.h
> > > index 787939d..69161cb 100644
> > > --- a/include/linux/sunrpc/xdr.h
> > > +++ b/include/linux/sunrpc/xdr.h
> > > @@ -17,6 +17,7 @@
> > >  #include <asm/byteorder.h>
> > >  #include <asm/unaligned.h>
> > >  #include <linux/scatterlist.h>
> > > +#include <linux/sunrpc/msg_prot.h>
> > >    struct bio_vec;
> > >  struct rpc_rqst;
> > > @@ -79,31 +80,46 @@ struct xdr_buf {
> > >  	buf->buflen = len;
> > >  }
> > >  -/*
> > > - * pre-xdr'ed macros.
> > > - */
> > > -
> > > -#define	xdr_zero	cpu_to_be32(0)
> > > -#define	xdr_one		cpu_to_be32(1)
> > > -#define	xdr_two		cpu_to_be32(2)
> > > -
> > > -#define	rpc_success		cpu_to_be32(RPC_SUCCESS)
> > > -#define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
> > > -#define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
> > > -#define	rpc_proc_unavail	cpu_to_be32(RPC_PROC_UNAVAIL)
> > > -#define	rpc_garbage_args	cpu_to_be32(RPC_GARBAGE_ARGS)
> > > -#define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ER
> > > R)
> > > -#define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPL
> > > Y)
> > > -
> > > -#define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
> > > -#define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
> > > -#define	rpc_autherr_rejectedcred
> > > cpu_to_be32(RPC_AUTH_REJECTEDCRED)
> > > -#define	rpc_autherr_badverf	cpu_to_be32(RPC_AUTH_BADVERF)
> > > -#define	rpc_autherr_rejectedverf
> > > cpu_to_be32(RPC_AUTH_REJECTEDVERF)
> > > -#define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
> > > -#define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CR
> > > EDPROBLEM)
> > > -#define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CT
> > > XPROBLEM)
> > > -#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
> > > +enum xdr_be32_equivalents {
> > > +	xdr_zero		= cpu_to_be32(0),
> > > +	xdr_one			= cpu_to_be32(1),
> > > +	xdr_two			= cpu_to_be32(2),
> > 
> > It is clever to use an enum to pre-compute these values, but
> 
> Perhaps not clever; it is a current Linux kernel coding
> practice to use an enum in favor of a C macro for constants.

Sure, but won't that confuse 'sparse' static checking? These constants
will no longer appear as being big endian.

Doesn't gcc's __builtin_bswap32() already compute the result at compile
time when you feed it a constant value? AFAICS it is supposed to, which
is why we use it directly in include/uapi/linux/swab.h instead of using
a special cased __builtin_constant_p().
Chuck Lever Feb. 3, 2019, 4:49 p.m. UTC | #6
> On Feb 3, 2019, at 10:00 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Sat, 2019-02-02 at 17:46 -0500, Chuck Lever wrote:
>>> On Feb 1, 2019, at 9:30 PM, Tom Talpey <tom@talpey.com> wrote:
>>> 
>>> On 2/1/2019 2:57 PM, Chuck Lever wrote:
>>>> Byte-swapping causes a CPU pipeline bubble on some processors.
>>>> When
>>>> a decoder is comparing an on-the-wire value for equality, byte-
>>>> swapping can be avoided by comparing it directly to a pre-byte-
>>>> swapped constant value.
>>>> The current set of pre-xdr'd constants is missing some common
>>>> values
>>>> used in the RPC header. Fill those out.
>>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>>> ---
>>>> include/linux/sunrpc/auth_gss.h |    5 ++-
>>>> include/linux/sunrpc/xdr.h      |   66 ++++++++++++++++++++++++-
>>>> --------------
>>>> 2 files changed, 45 insertions(+), 26 deletions(-)
>>>> diff --git a/include/linux/sunrpc/auth_gss.h
>>>> b/include/linux/sunrpc/auth_gss.h
>>>> index 30427b7..adc4be2 100644
>>>> --- a/include/linux/sunrpc/auth_gss.h
>>>> +++ b/include/linux/sunrpc/auth_gss.h
>>>> @@ -19,7 +19,10 @@
>>>> #include <linux/sunrpc/svc.h>
>>>> #include <linux/sunrpc/gss_api.h>
>>>> -#define RPC_GSS_VERSION		1
>>>> +enum {
>>>> +	RPC_GSS_VERSION = 1,
>>>> +	rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION)
>>>> +};
>>>>   #define MAXSEQ 0x80000000 /* maximum legal sequence number,
>>>> from rfc 2203 */
>>>> diff --git a/include/linux/sunrpc/xdr.h
>>>> b/include/linux/sunrpc/xdr.h
>>>> index 787939d..69161cb 100644
>>>> --- a/include/linux/sunrpc/xdr.h
>>>> +++ b/include/linux/sunrpc/xdr.h
>>>> @@ -17,6 +17,7 @@
>>>> #include <asm/byteorder.h>
>>>> #include <asm/unaligned.h>
>>>> #include <linux/scatterlist.h>
>>>> +#include <linux/sunrpc/msg_prot.h>
>>>>   struct bio_vec;
>>>> struct rpc_rqst;
>>>> @@ -79,31 +80,46 @@ struct xdr_buf {
>>>> 	buf->buflen = len;
>>>> }
>>>> -/*
>>>> - * pre-xdr'ed macros.
>>>> - */
>>>> -
>>>> -#define	xdr_zero	cpu_to_be32(0)
>>>> -#define	xdr_one		cpu_to_be32(1)
>>>> -#define	xdr_two		cpu_to_be32(2)
>>>> -
>>>> -#define	rpc_success		cpu_to_be32(RPC_SUCCESS)
>>>> -#define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
>>>> -#define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
>>>> -#define	rpc_proc_unavail	cpu_to_be32(RPC_PROC_UNAVAIL)
>>>> -#define	rpc_garbage_args	cpu_to_be32(RPC_GARBAGE_ARGS)
>>>> -#define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ER
>>>> R)
>>>> -#define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPL
>>>> Y)
>>>> -
>>>> -#define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
>>>> -#define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
>>>> -#define	rpc_autherr_rejectedcred
>>>> cpu_to_be32(RPC_AUTH_REJECTEDCRED)
>>>> -#define	rpc_autherr_badverf	cpu_to_be32(RPC_AUTH_BADVERF)
>>>> -#define	rpc_autherr_rejectedverf
>>>> cpu_to_be32(RPC_AUTH_REJECTEDVERF)
>>>> -#define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
>>>> -#define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CR
>>>> EDPROBLEM)
>>>> -#define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CT
>>>> XPROBLEM)
>>>> -#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
>>>> +enum xdr_be32_equivalents {
>>>> +	xdr_zero		= cpu_to_be32(0),
>>>> +	xdr_one			= cpu_to_be32(1),
>>>> +	xdr_two			= cpu_to_be32(2),
>>> 
>>> It is clever to use an enum to pre-compute these values, but
>> 
>> Perhaps not clever; it is a current Linux kernel coding
>> practice to use an enum in favor of a C macro for constants.
> 
> Sure, but won't that confuse 'sparse' static checking? These constants
> will no longer appear as being big endian.

Agreed, that is not a desirable situation for these symbolic constants.


> Doesn't gcc's __builtin_bswap32() already compute the result at compile
> time when you feed it a constant value? AFAICS it is supposed to, which
> is why we use it directly in include/uapi/linux/swab.h instead of using
> a special cased __builtin_constant_p().

The return type of __builtin_bswap32 is uint32_t, not __be32.

I will stick with cpu_to_be32(), and simply add the missing constants.


--
Chuck Lever
Trond Myklebust Feb. 3, 2019, 6:58 p.m. UTC | #7
On Sun, 2019-02-03 at 11:49 -0500, Chuck Lever wrote:
> > On Feb 3, 2019, at 10:00 AM, Trond Myklebust <
> > trondmy@hammerspace.com> wrote:
> > 
> > Doesn't gcc's __builtin_bswap32() already compute the result at
> > compile
> > time when you feed it a constant value? AFAICS it is supposed to,
> > which
> > is why we use it directly in include/uapi/linux/swab.h instead of
> > using
> > a special cased __builtin_constant_p().
> 
> The return type of __builtin_bswap32 is uint32_t, not __be32.
> 
> I will stick with cpu_to_be32(), and simply add the missing
> constants.
> 

cpu_to_be32() is a macro that expands to __builtin_bswap32() plus a
cast for most architectures on gcc.

What I'm saying is that as far as I know, the existing #defines should
compile into be32 constants by default, as should any call of the form
cpu_to_b32(<const expression>) and htonl(<const expression>).
Christoph Hellwig Feb. 4, 2019, 7:53 a.m. UTC | #8
On Sat, Feb 02, 2019 at 05:49:35PM -0500, Chuck Lever wrote:
> >> Byte-swapping causes a CPU pipeline bubble on some processors. When
> >> a decoder is comparing an on-the-wire value for equality, byte-
> >> swapping can be avoided by comparing it directly to a pre-byte-
> >> swapped constant value.
> > 
> > Which ones?
> 
> I assume you mean on which processors have I observed CPU cycle
> spikes around bswap instructions.

Yes.

> I've seen this behavior only
> on Intel processors of various families.

Interesting. In general we should not do separate byte swap instructions
on x86, as MOVBE can be used to do a load or store with an included
byteswap, and I thought the whole point for that was that they could
be handled in the same cycle.

In fact https://www.agner.org/optimize/instruction_tables.pdf
says that movbe is generally a single cycle instruction.

> Would you prefer a different justification for this clean-up?

I don't really care about the cleanup, it is just that the explanation
goes against conventional wisdom, which is why I was a little surpised.

And that is not just the cycles, but also as Trond pointed out
that the Linux byte swapping macro on constants should usually be
optimized away at compile time anyway.
Chuck Lever Feb. 4, 2019, 2:16 p.m. UTC | #9
> On Feb 4, 2019, at 2:53 AM, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Sat, Feb 02, 2019 at 05:49:35PM -0500, Chuck Lever wrote:
>>>> Byte-swapping causes a CPU pipeline bubble on some processors. When
>>>> a decoder is comparing an on-the-wire value for equality, byte-
>>>> swapping can be avoided by comparing it directly to a pre-byte-
>>>> swapped constant value.
>>> 
>>> Which ones?
>> 
>> I assume you mean on which processors have I observed CPU cycle
>> spikes around bswap instructions.
> 
> Yes.
> 
>> I've seen this behavior only
>> on Intel processors of various families.
> 
> Interesting. In general we should not do separate byte swap instructions
> on x86, as MOVBE can be used to do a load or store with an included
> byteswap, and I thought the whole point for that was that they could
> be handled in the same cycle.
> 
> In fact https://www.agner.org/optimize/instruction_tables.pdf
> says that movbe is generally a single cycle instruction.
> 
>> Would you prefer a different justification for this clean-up?
> 
> I don't really care about the cleanup, it is just that the explanation
> goes against conventional wisdom, which is why I was a little surpised.
> 
> And that is not just the cycles, but also as Trond pointed out
> that the Linux byte swapping macro on constants should usually be
> optimized away at compile time anyway.

They are. The problem is that we are byte-swapping the incoming wire
data and then comparing to CPU-endian constants in some hot paths.
It's better to leave the incoming data alone and compare to a pre-
byte-swapped constant. This patch adds some of these constants that
were missing, in preparation for fixing the hot paths.

That is apparently not clear from the patch description, so I will
endeavor to improve it.


--
Chuck Lever
Christoph Hellwig Feb. 4, 2019, 2:32 p.m. UTC | #10
On Mon, Feb 04, 2019 at 09:16:54AM -0500, Chuck Lever wrote:
> They are. The problem is that we are byte-swapping the incoming wire
> data and then comparing to CPU-endian constants in some hot paths.
> It's better to leave the incoming data alone and compare to a pre-
> byte-swapped constant. This patch adds some of these constants that
> were missing, in preparation for fixing the hot paths.
> 
> That is apparently not clear from the patch description, so I will
> endeavor to improve it.

Why do we need new enums / #defines for that?

Just replace:

	if (beXX_to_cpu(pkt->field) == SOME_CONSTANT)

with

	if (pkt->field == cpu_to_be32(SOME_CONSTANT))

and we are done.  The latter is a pretty common pattern through
the kernel.
Chuck Lever Feb. 4, 2019, 2:56 p.m. UTC | #11
> On Feb 4, 2019, at 9:32 AM, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Mon, Feb 04, 2019 at 09:16:54AM -0500, Chuck Lever wrote:
>> They are. The problem is that we are byte-swapping the incoming wire
>> data and then comparing to CPU-endian constants in some hot paths.
>> It's better to leave the incoming data alone and compare to a pre-
>> byte-swapped constant. This patch adds some of these constants that
>> were missing, in preparation for fixing the hot paths.
>> 
>> That is apparently not clear from the patch description, so I will
>> endeavor to improve it.
> 
> Why do we need new enums / #defines for that?
> 
> Just replace:
> 
> 	if (beXX_to_cpu(pkt->field) == SOME_CONSTANT)
> 
> with
> 
> 	if (pkt->field == cpu_to_be32(SOME_CONSTANT))
> 
> and we are done.

True.


> The latter is a pretty common pattern through the kernel.

However the pattern in the NFS server and lockd is to define a
lower-case version of the same macro that is pre-byte-swapped.
I'm attempting to follow existing precedent in this area.

We already have, for example, in various sunrpc headers:

enum rpc_accept_stat {
        RPC_SUCCESS = 0,
        RPC_PROG_UNAVAIL = 1,
        RPC_PROG_MISMATCH = 2,
  ...
};

#define rpc_success             cpu_to_be32(RPC_SUCCESS)
#define rpc_prog_unavail        cpu_to_be32(RPC_PROG_UNAVAIL)
#define rpc_prog_mismatch       cpu_to_be32(RPC_PROG_MISMATCH)

Or, for NFS:

enum nfs_stat {
   ...
   NFSERR_SHARE_DENIED = 10015,
   ...
};

#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)

There are some missing lower-case macros, which I am trying to
add to our existing collection before I rewrite the RPC header
encoding and decoding functions. So I'm adding:

+       rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION),

+       rpc_call                = cpu_to_be32(RPC_CALL),
+       rpc_reply               = cpu_to_be32(RPC_REPLY),
+
+       rpc_msg_accepted        = cpu_to_be32(RPC_MSG_ACCEPTED),
+       rpc_msg_denied          = cpu_to_be32(RPC_MSG_DENIED),

Actually since we have decided not to use enum for these, this
smaller addition can simply be squashed into the later patches,
and I can drop this patch, which was intended as a clean-up but
now appears to be unnecessary.

--
Chuck Lever
J. Bruce Fields Feb. 4, 2019, 7:37 p.m. UTC | #12
On Mon, Feb 04, 2019 at 09:56:20AM -0500, Chuck Lever wrote:
> 
> 
> > On Feb 4, 2019, at 9:32 AM, Christoph Hellwig <hch@infradead.org> wrote:
> > 
> > On Mon, Feb 04, 2019 at 09:16:54AM -0500, Chuck Lever wrote:
> >> They are. The problem is that we are byte-swapping the incoming wire
> >> data and then comparing to CPU-endian constants in some hot paths.
> >> It's better to leave the incoming data alone and compare to a pre-
> >> byte-swapped constant. This patch adds some of these constants that
> >> were missing, in preparation for fixing the hot paths.
> >> 
> >> That is apparently not clear from the patch description, so I will
> >> endeavor to improve it.
> > 
> > Why do we need new enums / #defines for that?
> > 
> > Just replace:
> > 
> > 	if (beXX_to_cpu(pkt->field) == SOME_CONSTANT)
> > 
> > with
> > 
> > 	if (pkt->field == cpu_to_be32(SOME_CONSTANT))
> > 
> > and we are done.
> 
> True.

I'm a little surprised the compiler couldn't do that automatically.
(Disclaimer: I know nothing about compilers.)

> > The latter is a pretty common pattern through the kernel.
> 
> However the pattern in the NFS server and lockd is to define a
> lower-case version of the same macro that is pre-byte-swapped.
> I'm attempting to follow existing precedent in this area.

I've never really understood why that was done.  (Not saying it was
wrong, just that I don't understand it.)  As long as you're reading the
value, how could byte-swapping it actually add a significant expense?

The one thing I do like is that I can look at e.g.:

	int nfsd4_get_nfs4_acl	...
	__be32 nfsd4_set_nfs4_acl ...

and tell that the former returns a linux errno, the latter an NFS error.

--b.

> 
> We already have, for example, in various sunrpc headers:
> 
> enum rpc_accept_stat {
>         RPC_SUCCESS = 0,
>         RPC_PROG_UNAVAIL = 1,
>         RPC_PROG_MISMATCH = 2,
>   ...
> };
> 
> #define rpc_success             cpu_to_be32(RPC_SUCCESS)
> #define rpc_prog_unavail        cpu_to_be32(RPC_PROG_UNAVAIL)
> #define rpc_prog_mismatch       cpu_to_be32(RPC_PROG_MISMATCH)
> 
> Or, for NFS:
> 
> enum nfs_stat {
>    ...
>    NFSERR_SHARE_DENIED = 10015,
>    ...
> };
> 
> #define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
> 
> There are some missing lower-case macros, which I am trying to
> add to our existing collection before I rewrite the RPC header
> encoding and decoding functions. So I'm adding:
> 
> +       rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION),
> 
> +       rpc_call                = cpu_to_be32(RPC_CALL),
> +       rpc_reply               = cpu_to_be32(RPC_REPLY),
> +
> +       rpc_msg_accepted        = cpu_to_be32(RPC_MSG_ACCEPTED),
> +       rpc_msg_denied          = cpu_to_be32(RPC_MSG_DENIED),
> 
> Actually since we have decided not to use enum for these, this
> smaller addition can simply be squashed into the later patches,
> and I can drop this patch, which was intended as a clean-up but
> now appears to be unnecessary.
> 
> --
> Chuck Lever
> 
>
Tom Talpey Feb. 5, 2019, 1:57 a.m. UTC | #13
On 2/4/2019 2:37 PM, J. Bruce Fields wrote:
> On Mon, Feb 04, 2019 at 09:56:20AM -0500, Chuck Lever wrote:
>>
>>
>>> On Feb 4, 2019, at 9:32 AM, Christoph Hellwig <hch@infradead.org> wrote:
>>>
>>> On Mon, Feb 04, 2019 at 09:16:54AM -0500, Chuck Lever wrote:
>>>> They are. The problem is that we are byte-swapping the incoming wire
>>>> data and then comparing to CPU-endian constants in some hot paths.
>>>> It's better to leave the incoming data alone and compare to a pre-
>>>> byte-swapped constant. This patch adds some of these constants that
>>>> were missing, in preparation for fixing the hot paths.
>>>>
>>>> That is apparently not clear from the patch description, so I will
>>>> endeavor to improve it.
>>>
>>> Why do we need new enums / #defines for that?
>>>
>>> Just replace:
>>>
>>> 	if (beXX_to_cpu(pkt->field) == SOME_CONSTANT)
>>>
>>> with
>>>
>>> 	if (pkt->field == cpu_to_be32(SOME_CONSTANT))
>>>
>>> and we are done.
>>
>> True.
> 
> I'm a little surprised the compiler couldn't do that automatically.
> (Disclaimer: I know nothing about compilers.)
> 
>>> The latter is a pretty common pattern through the kernel.
>>
>> However the pattern in the NFS server and lockd is to define a
>> lower-case version of the same macro that is pre-byte-swapped.
>> I'm attempting to follow existing precedent in this area.
> 
> I've never really understood why that was done.  (Not saying it was
> wrong, just that I don't understand it.)  As long as you're reading the
> value, how could byte-swapping it actually add a significant expense?

Shifts and Rotates are expensive on many processors, and hard to
pipeline for byteawap because each step needs to wait for the result
of the previous.

Maybe this is less of an issue with modern processors, but I'd
certainly not be surprised that embedded processors "did it the
hard way".

Tom.


> 
> The one thing I do like is that I can look at e.g.:
> 
> 	int nfsd4_get_nfs4_acl	...
> 	__be32 nfsd4_set_nfs4_acl ...
> 
> and tell that the former returns a linux errno, the latter an NFS error.
> 
> --b.
> 
>>
>> We already have, for example, in various sunrpc headers:
>>
>> enum rpc_accept_stat {
>>          RPC_SUCCESS = 0,
>>          RPC_PROG_UNAVAIL = 1,
>>          RPC_PROG_MISMATCH = 2,
>>    ...
>> };
>>
>> #define rpc_success             cpu_to_be32(RPC_SUCCESS)
>> #define rpc_prog_unavail        cpu_to_be32(RPC_PROG_UNAVAIL)
>> #define rpc_prog_mismatch       cpu_to_be32(RPC_PROG_MISMATCH)
>>
>> Or, for NFS:
>>
>> enum nfs_stat {
>>     ...
>>     NFSERR_SHARE_DENIED = 10015,
>>     ...
>> };
>>
>> #define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
>>
>> There are some missing lower-case macros, which I am trying to
>> add to our existing collection before I rewrite the RPC header
>> encoding and decoding functions. So I'm adding:
>>
>> +       rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION),
>>
>> +       rpc_call                = cpu_to_be32(RPC_CALL),
>> +       rpc_reply               = cpu_to_be32(RPC_REPLY),
>> +
>> +       rpc_msg_accepted        = cpu_to_be32(RPC_MSG_ACCEPTED),
>> +       rpc_msg_denied          = cpu_to_be32(RPC_MSG_DENIED),
>>
>> Actually since we have decided not to use enum for these, this
>> smaller addition can simply be squashed into the later patches,
>> and I can drop this patch, which was intended as a clean-up but
>> now appears to be unnecessary.
>>
>> --
>> Chuck Lever
>>
>>
> 
>

Patch
diff mbox series

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 30427b7..adc4be2 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -19,7 +19,10 @@ 
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/gss_api.h>
 
-#define RPC_GSS_VERSION		1
+enum {
+	RPC_GSS_VERSION = 1,
+	rpc_gss_version = cpu_to_be32(RPC_GSS_VERSION)
+};
 
 #define MAXSEQ 0x80000000 /* maximum legal sequence number, from rfc 2203 */
 
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 787939d..69161cb 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,7 @@ 
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
+#include <linux/sunrpc/msg_prot.h>
 
 struct bio_vec;
 struct rpc_rqst;
@@ -79,31 +80,46 @@  struct xdr_buf {
 	buf->buflen = len;
 }
 
-/*
- * pre-xdr'ed macros.
- */
-
-#define	xdr_zero	cpu_to_be32(0)
-#define	xdr_one		cpu_to_be32(1)
-#define	xdr_two		cpu_to_be32(2)
-
-#define	rpc_success		cpu_to_be32(RPC_SUCCESS)
-#define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
-#define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
-#define	rpc_proc_unavail	cpu_to_be32(RPC_PROC_UNAVAIL)
-#define	rpc_garbage_args	cpu_to_be32(RPC_GARBAGE_ARGS)
-#define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ERR)
-#define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPLY)
-
-#define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
-#define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
-#define	rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
-#define	rpc_autherr_badverf	cpu_to_be32(RPC_AUTH_BADVERF)
-#define	rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF)
-#define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
-#define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
-#define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
-#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
+enum xdr_be32_equivalents {
+	xdr_zero		= cpu_to_be32(0),
+	xdr_one			= cpu_to_be32(1),
+	xdr_two			= cpu_to_be32(2),
+
+	rpc_version		= cpu_to_be32(RPC_VERSION),
+
+	rpc_auth_null		= cpu_to_be32(RPC_AUTH_NULL),
+	rpc_auth_unix		= cpu_to_be32(RPC_AUTH_UNIX),
+	rpc_auth_short		= cpu_to_be32(RPC_AUTH_SHORT),
+	rpc_auth_des		= cpu_to_be32(RPC_AUTH_DES),
+	rpc_auth_krb		= cpu_to_be32(RPC_AUTH_KRB),
+	rpc_auth_gss		= cpu_to_be32(RPC_AUTH_GSS),
+
+	rpc_call		= cpu_to_be32(RPC_CALL),
+	rpc_reply		= cpu_to_be32(RPC_REPLY),
+
+	rpc_msg_accepted	= cpu_to_be32(RPC_MSG_ACCEPTED),
+	rpc_msg_denied		= cpu_to_be32(RPC_MSG_DENIED),
+
+	rpc_success		= cpu_to_be32(RPC_SUCCESS),
+	rpc_prog_unavail	= cpu_to_be32(RPC_PROG_UNAVAIL),
+	rpc_prog_mismatch	= cpu_to_be32(RPC_PROG_MISMATCH),
+	rpc_proc_unavail	= cpu_to_be32(RPC_PROC_UNAVAIL),
+	rpc_garbage_args	= cpu_to_be32(RPC_GARBAGE_ARGS),
+	rpc_system_err		= cpu_to_be32(RPC_SYSTEM_ERR),
+	rpc_drop_reply		= cpu_to_be32(RPC_DROP_REPLY),
+
+	rpc_mismatch		= cpu_to_be32(RPC_MISMATCH),
+	rpc_auth_error		= cpu_to_be32(RPC_AUTH_ERROR),
+
+	rpc_auth_ok		= cpu_to_be32(RPC_AUTH_OK),
+	rpc_autherr_badcred	= cpu_to_be32(RPC_AUTH_BADCRED),
+	rpc_autherr_rejectedcred = cpu_to_be32(RPC_AUTH_REJECTEDCRED),
+	rpc_autherr_badverf	= cpu_to_be32(RPC_AUTH_BADVERF),
+	rpc_autherr_rejectedverf = cpu_to_be32(RPC_AUTH_REJECTEDVERF),
+	rpc_autherr_tooweak	= cpu_to_be32(RPC_AUTH_TOOWEAK),
+	rpcsec_gsserr_credproblem = cpu_to_be32(RPCSEC_GSS_CREDPROBLEM),
+	rpcsec_gsserr_ctxproblem = cpu_to_be32(RPCSEC_GSS_CTXPROBLEM),
+};
 
 /*
  * Miscellaneous XDR helper functions