diff mbox

[V2] libibverbs: Allow arbitrary int values for MTU

Message ID 1372768306-6786-1-git-send-email-jsquyres@cisco.com (mailing list archive)
State Rejected
Headers show

Commit Message

Jeff Squyres July 2, 2013, 12:31 p.m. UTC
(Previous patch did not include updates for the man pages)

Keep IBV_MTU_* enums values as they are, but pass MTU values around as
a struct containing a single int.  

Per lengthy discusson on the linux-rdma list, this patch introdces a
source code incompatibility.  Although legacy applications can
continue to use the enum values, they will need to be updated to use
the struct.  Newer applications are encouraged to use arbitrary int
values, not the MTU enums (e.g., 1024, 1500, 9000).

Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
---
 Makefile.am                |  3 +-
 examples/devinfo.c         | 20 +++----------
 examples/pingpong.c        | 12 --------
 examples/pingpong.h        |  1 -
 examples/rc_pingpong.c     | 10 +++----
 examples/srq_pingpong.c    | 10 +++----
 examples/uc_pingpong.c     | 10 +++----
 examples/ud_pingpong.c     |  2 +-
 include/infiniband/verbs.h | 61 +++++++++++++++++++++++++++++++++++++--
 man/ibv_modify_qp.3        |  2 +-
 man/ibv_mtu_to_num.3       | 71 ++++++++++++++++++++++++++++++++++++++++++++++
 man/ibv_query_port.3       |  4 +--
 man/ibv_query_qp.3         |  2 +-
 src/cmd.c                  |  8 +++---
 src/marshall.c             |  2 +-
 15 files changed, 160 insertions(+), 58 deletions(-)
 create mode 100644 man/ibv_mtu_to_num.3

Comments

Jeff Squyres July 4, 2013, 12:36 a.m. UTC | #1
Bump.

On Jul 2, 2013, at 8:31 AM, Jeff Squyres <jsquyres@cisco.com> wrote:

> (Previous patch did not include updates for the man pages)
> 
> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> a struct containing a single int.  
> 
> Per lengthy discusson on the linux-rdma list, this patch introdces a
> source code incompatibility.  Although legacy applications can
> continue to use the enum values, they will need to be updated to use
> the struct.  Newer applications are encouraged to use arbitrary int
> values, not the MTU enums (e.g., 1024, 1500, 9000).
> 
> Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
> ---
> Makefile.am                |  3 +-
> examples/devinfo.c         | 20 +++----------
> examples/pingpong.c        | 12 --------
> examples/pingpong.h        |  1 -
> examples/rc_pingpong.c     | 10 +++----
> examples/srq_pingpong.c    | 10 +++----
> examples/uc_pingpong.c     | 10 +++----
> examples/ud_pingpong.c     |  2 +-
> include/infiniband/verbs.h | 61 +++++++++++++++++++++++++++++++++++++--
> man/ibv_modify_qp.3        |  2 +-
> man/ibv_mtu_to_num.3       | 71 ++++++++++++++++++++++++++++++++++++++++++++++
> man/ibv_query_port.3       |  4 +--
> man/ibv_query_qp.3         |  2 +-
> src/cmd.c                  |  8 +++---
> src/marshall.c             |  2 +-
> 15 files changed, 160 insertions(+), 58 deletions(-)
> create mode 100644 man/ibv_mtu_to_num.3
> 
> diff --git a/Makefile.am b/Makefile.am
> index 40e83be..1159e55 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -54,7 +54,8 @@ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1	\
>     man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3	\
>     man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3	\
>     man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3		\
> -    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3
> +    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3  \
> +    man/ibv_mtu_to_num.3
> 
> DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
>     debian/ibverbs-utils.install debian/libibverbs1.install \
> diff --git a/examples/devinfo.c b/examples/devinfo.c
> index ff078e4..e8fb27e 100644
> --- a/examples/devinfo.c
> +++ b/examples/devinfo.c
> @@ -111,18 +111,6 @@ static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap)
> 	}
> }
> 
> -static const char *mtu_str(enum ibv_mtu max_mtu)
> -{
> -	switch (max_mtu) {
> -	case IBV_MTU_256:  return "256";
> -	case IBV_MTU_512:  return "512";
> -	case IBV_MTU_1024: return "1024";
> -	case IBV_MTU_2048: return "2048";
> -	case IBV_MTU_4096: return "4096";
> -	default:           return "invalid MTU";
> -	}
> -}
> -
> static const char *width_str(uint8_t width)
> {
> 	switch (width) {
> @@ -301,10 +289,10 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
> 		printf("\t\tport:\t%d\n", port);
> 		printf("\t\t\tstate:\t\t\t%s (%d)\n",
> 		       port_state_str(port_attr.state), port_attr.state);
> -		printf("\t\t\tmax_mtu:\t\t%s (%d)\n",
> -		       mtu_str(port_attr.max_mtu), port_attr.max_mtu);
> -		printf("\t\t\tactive_mtu:\t\t%s (%d)\n",
> -		       mtu_str(port_attr.active_mtu), port_attr.active_mtu);
> +		printf("\t\t\tmax_mtu:\t\t%d (%d)\n",
> +		       ibv_mtu_to_num(port_attr.max_mtu), port_attr.max_mtu.mtu);
> +		printf("\t\t\tactive_mtu:\t\t%d (%d)\n",
> +			ibv_mtu_to_num(port_attr.active_mtu), port_attr.active_mtu.mtu);
> 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
> 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
> 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
> diff --git a/examples/pingpong.c b/examples/pingpong.c
> index 90732ef..d1c22c9 100644
> --- a/examples/pingpong.c
> +++ b/examples/pingpong.c
> @@ -36,18 +36,6 @@
> #include <stdio.h>
> #include <string.h>
> 
> -enum ibv_mtu pp_mtu_to_enum(int mtu)
> -{
> -	switch (mtu) {
> -	case 256:  return IBV_MTU_256;
> -	case 512:  return IBV_MTU_512;
> -	case 1024: return IBV_MTU_1024;
> -	case 2048: return IBV_MTU_2048;
> -	case 4096: return IBV_MTU_4096;
> -	default:   return -1;
> -	}
> -}
> -
> uint16_t pp_get_local_lid(struct ibv_context *context, int port)
> {
> 	struct ibv_port_attr attr;
> diff --git a/examples/pingpong.h b/examples/pingpong.h
> index 9cdc03e..91d217b 100644
> --- a/examples/pingpong.h
> +++ b/examples/pingpong.h
> @@ -35,7 +35,6 @@
> 
> #include <infiniband/verbs.h>
> 
> -enum ibv_mtu pp_mtu_to_enum(int mtu);
> uint16_t pp_get_local_lid(struct ibv_context *context, int port);
> int pp_get_port_info(struct ibv_context *context, int port,
> 		     struct ibv_port_attr *attr);
> diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
> index 15494a1..a7e1836 100644
> --- a/examples/rc_pingpong.c
> +++ b/examples/rc_pingpong.c
> @@ -78,7 +78,7 @@ struct pingpong_dest {
> };
> 
> static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
> -			  enum ibv_mtu mtu, int sl,
> +			  struct ibv_mtu_t mtu, int sl,
> 			  struct pingpong_dest *dest, int sgid_idx)
> {
> 	struct ibv_qp_attr attr = {
> @@ -209,7 +209,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -547,7 +547,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> 	int                      use_event = 0;
> @@ -608,8 +608,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
> index 6e00f8c..9173274 100644
> --- a/examples/srq_pingpong.c
> +++ b/examples/srq_pingpong.c
> @@ -81,7 +81,7 @@ struct pingpong_dest {
> 	union ibv_gid gid;
> };
> 
> -static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
> +static int pp_connect_ctx(struct pingpong_context *ctx, int port, struct ibv_mtu_t mtu,
> 			  int sl, const struct pingpong_dest *my_dest,
> 			  const struct pingpong_dest *dest, int sgid_idx)
> {
> @@ -229,7 +229,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -620,7 +620,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      num_qp = 16;
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> @@ -685,8 +685,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
> index 52c6c28..6b6bc85 100644
> --- a/examples/uc_pingpong.c
> +++ b/examples/uc_pingpong.c
> @@ -78,7 +78,7 @@ struct pingpong_dest {
> };
> 
> static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
> -			  enum ibv_mtu mtu, int sl,
> +			  struct ibv_mtu_t mtu, int sl,
> 			  struct pingpong_dest *dest, int sgid_idx)
> {
> 	struct ibv_qp_attr attr = {
> @@ -197,7 +197,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -535,7 +535,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> 	int                      use_event = 0;
> @@ -596,8 +596,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
> index 21c551d..5a0656f 100644
> --- a/examples/ud_pingpong.c
> +++ b/examples/ud_pingpong.c
> @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
> 			fprintf(stderr, "Unable to query port info for port %d\n", port);
> 			goto clean_device;
> 		}
> -		mtu = 1 << (port_info.active_mtu + 7);
> +		mtu = ibv_mtu_to_num(port_info.active_mtu);
> 		if (size > mtu) {
> 			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
> 			goto clean_device;
> diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
> index 4b1ab57..0545a8c 100644
> --- a/include/infiniband/verbs.h
> +++ b/include/infiniband/verbs.h
> @@ -144,6 +144,10 @@ struct ibv_device_attr {
> 	uint8_t			phys_port_cnt;
> };
> 
> +/*
> + * Symbolic enum names for MTU values are preserved for backwards
> + * compatibility.
> + */
> enum ibv_mtu {
> 	IBV_MTU_256  = 1,
> 	IBV_MTU_512  = 2,
> @@ -152,6 +156,16 @@ enum ibv_mtu {
> 	IBV_MTU_4096 = 5
> };
> 
> +/*
> + * ibv_mtu_t is an encoded integer type that represents an MTU value.
> + * If the value is <= IBV_MTU_4096, it is treated as one of the
> + * IBV_MTU_* enum values.  Otherwise, it is treated as its integer
> + * value.
> + */
> +struct ibv_mtu_t {
> +	int mtu;
> +};
> +
> enum ibv_port_state {
> 	IBV_PORT_NOP		= 0,
> 	IBV_PORT_DOWN		= 1,
> @@ -169,8 +183,8 @@ enum {
> 
> struct ibv_port_attr {
> 	enum ibv_port_state	state;
> -	enum ibv_mtu		max_mtu;
> -	enum ibv_mtu		active_mtu;
> +	struct ibv_mtu_t	max_mtu;
> +	struct ibv_mtu_t	active_mtu;
> 	int			gid_tbl_len;
> 	uint32_t		port_cap_flags;
> 	uint32_t		max_msg_sz;
> @@ -485,7 +499,7 @@ enum ibv_mig_state {
> struct ibv_qp_attr {
> 	enum ibv_qp_state	qp_state;
> 	enum ibv_qp_state	cur_qp_state;
> -	enum ibv_mtu		path_mtu;
> +	struct ibv_mtu_t	path_mtu;
> 	enum ibv_mig_state	path_mig_state;
> 	uint32_t		qkey;
> 	uint32_t		rq_psn;
> @@ -1138,6 +1152,47 @@ const char *ibv_port_state_str(enum ibv_port_state port_state);
>  */
> const char *ibv_event_type_str(enum ibv_event_type event);
> 
> +/**
> + * num_to_ibv_mtu - Convert an integer to its corresponding encoded
> + * ibv_mtu_t value.  If an integer value corresponding to an IBV_MTU_*
> + * enum value is passed, return the enum value (e.g., 1024 ->
> + * IBV_MTU_1024).  Otherwise, just return the value (e.g., 1500 ->
> + * 1500).
> + */
> +static inline struct ibv_mtu_t num_to_ibv_mtu(int num)
> +{
> +	struct ibv_mtu_t mtu;
> +
> +	switch (num) {
> +	case 256:  mtu.mtu = IBV_MTU_256;  break;
> +	case 512:  mtu.mtu = IBV_MTU_512;  break;
> +	case 1024: mtu.mtu = IBV_MTU_1024; break;
> +	case 2048: mtu.mtu = IBV_MTU_2048; break;
> +	case 4096: mtu.mtu = IBV_MTU_4096; break;
> +	default:   mtu.mtu = num;          break;
> +	}
> +
> +	return mtu;
> +}
> +
> +/**
> + * ibv_mtu_to_num - Convert an encoded ibv_mtu_t value to its
> + * corresponding integer value.  If an enum ibv_mtu value is passed,
> + * return its integer value (e.g., IBV_MTU_1024 -> 1024).  Otherwise,
> + * just return the value (e.g., 1500 -> 1500).
> + */
> +static inline int ibv_mtu_to_num(struct ibv_mtu_t mtu)
> +{
> +	switch (mtu.mtu) {
> +	case IBV_MTU_256:  return 256;
> +	case IBV_MTU_512:  return 512;
> +	case IBV_MTU_1024: return 1024;
> +	case IBV_MTU_2048: return 2048;
> +	case IBV_MTU_4096: return 4096;
> +	default:           return mtu.mtu;
> +	}
> +}
> +
> END_C_DECLS
> 
> #  undef __attribute_const
> diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3
> index cb3faaa..dd93674 100644
> --- a/man/ibv_modify_qp.3
> +++ b/man/ibv_modify_qp.3
> @@ -25,7 +25,7 @@ struct ibv_qp_attr {
> .in +8
> enum ibv_qp_state       qp_state;               /* Move the QP to this state */
> enum ibv_qp_state       cur_qp_state;           /* Assume this is the current QP state */
> -enum ibv_mtu            path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
> +struct ibv_mtu_t        path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
> enum ibv_mig_state      path_mig_state;         /* Path migration state (valid if HCA supports APM) */
> uint32_t                qkey;                   /* Q_Key for the QP (valid only for UD QPs) */
> uint32_t                rq_psn;                 /* PSN for receive queue (valid only for RC/UC QPs) */
> diff --git a/man/ibv_mtu_to_num.3 b/man/ibv_mtu_to_num.3
> new file mode 100644
> index 0000000..ddb9bd0
> --- /dev/null
> +++ b/man/ibv_mtu_to_num.3
> @@ -0,0 +1,71 @@
> +.\" -*- nroff -*-
> +.\"
> +.TH IBV_MTU_TO_NUM 3 2013-06-20 libibverbs "Libibverbs Programmer's Manual"
> +.SH "NAME"
> +.nf
> +ibv_mtu_to_num \- convert encoded MTU value to integer
> +.sp
> +num_to_ibv_mtu \- convert integer to encoded MTU value
> +.SH "SYNOPSIS"
> +.nf
> +.B #include <infiniband/verbs.h>
> +.sp
> +.BI "int ibv_mtu_to_num(struct ibv_mtu_t " "mtu" ");
> +.sp
> +.BI "struct ibv_mtu_t num_to_ibv_mtu(int " "num" ");
> +.fi
> +.SH "DESCRIPTION"
> +.PP
> +The
> +.I struct ibv_mtu_t
> +type is an encoded integer used to represent MTU values in order to
> +preserve backwards compatibility.  When the value of an
> +.I struct ibv_mtu_t
> +variable is <=
> +.BR IBV_MTU_4096\fR,
> +it is treated as the corresponding
> +.B IBV_MTU_*
> +enum value.  Otherwise, it is treated as its integer value.
> +.PP
> +MTU values less than the value of the enum
> +.B IBV_MTU_4096
> +(i.e., 5) cannot be represented.
> +.PP
> +.B ibv_mtu_to_num()
> +converts the encoded MTU value
> +.I mtu
> +to a plain integer value.  For example, if
> +.I mtu
> +contains the value
> +.BR IBV_MTU_1024\fR,
> +then the value 1024 will be returned.  Likewise, if
> +.I mtu
> +contains the value 1500, then 1500 will be returned.
> +.PP
> +.B num_to_ibv_mtu()
> +converts the integer
> +.I num
> +to its corresponding encoded
> +.I struct ibv_mtu_t
> +value.  For example, if
> +.I num
> +is 1024, then a
> +.I struct ibv_mtu_t
> +containing the value
> +.B IBV_MTU_1024
> +will be returned.  Likewise, if
> +.I num
> +is 1500, then a
> +.I struct ibv_mtu_t
> +containing the value 1500 will be returned.
> +.SH "RETURN VALUE"
> +.B ibv_mtu_to_num()
> +returns an integer MTU value.
> +.PP
> +.B num_to_ibv_mtu()
> +returns an encoded MTU value.
> +.SH "SEE ALSO"
> +.BR ibv_query_port (3)
> +.SH "AUTHORS"
> +.TP
> +Jeff Squyres <jsquyres@cisco.com>
> diff --git a/man/ibv_query_port.3 b/man/ibv_query_port.3
> index 9bedd90..5620049 100644
> --- a/man/ibv_query_port.3
> +++ b/man/ibv_query_port.3
> @@ -26,8 +26,8 @@ is an ibv_port_attr struct, as defined in <infiniband/verbs.h>.
> struct ibv_port_attr {
> .in +8
> enum ibv_port_state     state;          /* Logical port state */
> -enum ibv_mtu            max_mtu;        /* Max MTU supported by port */
> -enum ibv_mtu            active_mtu;     /* Actual MTU */
> +struct ibv_mtu_t        max_mtu;        /* Max MTU supported by port */
> +struct ibv_mtu_t        active_mtu;     /* Actual MTU */
> int                     gid_tbl_len;    /* Length of source GID table */
> uint32_t                port_cap_flags; /* Port capabilities */
> uint32_t                max_msg_sz;     /* Maximum message size */
> diff --git a/man/ibv_query_qp.3 b/man/ibv_query_qp.3
> index 3893ec8..fbf7ec3 100644
> --- a/man/ibv_query_qp.3
> +++ b/man/ibv_query_qp.3
> @@ -30,7 +30,7 @@ struct ibv_qp_attr {
> .in +8
> enum ibv_qp_state       qp_state;            /* Current QP state */
> enum ibv_qp_state       cur_qp_state;        /* Current QP state - irrelevant for ibv_query_qp */
> -enum ibv_mtu            path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
> +struct ibv_mtu_t        path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
> enum ibv_mig_state      path_mig_state;      /* Path migration state (valid if HCA supports APM) */
> uint32_t                qkey;                /* Q_Key of the QP (valid only for UD QPs) */
> uint32_t                rq_psn;              /* PSN for receive queue (valid only for RC/UC QPs) */
> diff --git a/src/cmd.c b/src/cmd.c
> index 9789092..13fac0d 100644
> --- a/src/cmd.c
> +++ b/src/cmd.c
> @@ -180,8 +180,8 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
> 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
> 
> 	port_attr->state      	   = resp.state;
> -	port_attr->max_mtu         = resp.max_mtu;
> -	port_attr->active_mtu      = resp.active_mtu;
> +	port_attr->max_mtu.mtu     = resp.max_mtu;
> +	port_attr->active_mtu.mtu  = resp.active_mtu;
> 	port_attr->gid_tbl_len     = resp.gid_tbl_len;
> 	port_attr->port_cap_flags  = resp.port_cap_flags;
> 	port_attr->max_msg_sz      = resp.max_msg_sz;
> @@ -678,7 +678,7 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> 	attr->alt_pkey_index                = resp.alt_pkey_index;
> 	attr->qp_state                      = resp.qp_state;
> 	attr->cur_qp_state                  = resp.cur_qp_state;
> -	attr->path_mtu                      = resp.path_mtu;
> +	attr->path_mtu.mtu                  = resp.path_mtu;
> 	attr->path_mig_state                = resp.path_mig_state;
> 	attr->sq_draining                   = resp.sq_draining;
> 	attr->max_rd_atomic                 = resp.max_rd_atomic;
> @@ -752,7 +752,7 @@ int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> 	cmd->alt_pkey_index 	 = attr->alt_pkey_index;
> 	cmd->qp_state 		 = attr->qp_state;
> 	cmd->cur_qp_state 	 = attr->cur_qp_state;
> -	cmd->path_mtu 		 = attr->path_mtu;
> +	cmd->path_mtu		 = attr->path_mtu.mtu;
> 	cmd->path_mig_state 	 = attr->path_mig_state;
> 	cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
> 	cmd->max_rd_atomic 	 = attr->max_rd_atomic;
> diff --git a/src/marshall.c b/src/marshall.c
> index 577b4b1..a3595dc 100644
> --- a/src/marshall.c
> +++ b/src/marshall.c
> @@ -59,7 +59,7 @@ void ibv_copy_qp_attr_from_kern(struct ibv_qp_attr *dst,
> 				struct ibv_kern_qp_attr *src)
> {
> 	dst->cur_qp_state = src->cur_qp_state;
> -	dst->path_mtu = src->path_mtu;
> +	dst->path_mtu.mtu = src->path_mtu;
> 	dst->path_mig_state = src->path_mig_state;
> 	dst->qkey = src->qkey;
> 	dst->rq_psn = src->rq_psn;
> -- 
> 1.8.2.1
>
Roland Dreier July 5, 2013, 7:11 p.m. UTC | #2
On Tue, Jul 2, 2013 at 5:31 AM, Jeff Squyres <jsquyres@cisco.com> wrote:
> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> a struct containing a single int.
>
> Per lengthy discusson on the linux-rdma list, this patch introdces a
> source code incompatibility.  Although legacy applications can
> continue to use the enum values, they will need to be updated to use
> the struct.  Newer applications are encouraged to use arbitrary int
> values, not the MTU enums (e.g., 1024, 1500, 9000).

So what happens if I have an old application binary, and I run against
a new libibverbs without recompiling?

Also it seems that I'm forced to change my source code to be able to
compile against new libibverbs?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 8, 2013, 4:26 p.m. UTC | #3
On Jul 5, 2013, at 3:11 PM, Roland Dreier <roland@purestorage.com> wrote:

> So what happens if I have an old application binary, and I run against
> a new libibverbs without recompiling?
> 
> Also it seems that I'm forced to change my source code to be able to
> compile against new libibverbs?


I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.  

After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.

I personally don't care which way this goes; I just want the ability to have non-enum MTU values.
Roland Dreier July 8, 2013, 5:19 p.m. UTC | #4
[resending to reply-all, sorry Jeff]

On Mon, Jul 8, 2013 at 9:26 AM, Jeff Squyres (jsquyres)
<jsquyres@cisco.com> wrote:
>> So what happens if I have an old application binary, and I run against
>> a new libibverbs without recompiling?

>> Also it seems that I'm forced to change my source code to be able to
>> compile against new libibverbs?

> I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.

> After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.

> I personally don't care which way this goes; I just want the ability to have non-enum MTU values.

So I guess I need to go back and read all of that thread carefully,
but I don't think that silently breaking old binaries and also
breaking sources is the right way to go.  What about preserving the
behavior of the existing API / ABI and then adding a new function to
return the size of the maximum datagram for a device?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 8, 2013, 5:26 p.m. UTC | #5
On Mon, Jul 08, 2013 at 10:19:33AM -0700, Roland Dreier wrote:
> [resending to reply-all, sorry Jeff]
> 
> On Mon, Jul 8, 2013 at 9:26 AM, Jeff Squyres (jsquyres)
> <jsquyres@cisco.com> wrote:
> >> So what happens if I have an old application binary, and I run against
> >> a new libibverbs without recompiling?
> 
> >> Also it seems that I'm forced to change my source code to be able to
> >> compile against new libibverbs?
> 
> > I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.
> 
> > After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.
> 
> > I personally don't care which way this goes; I just want the ability to have non-enum MTU values.
> 
> So I guess I need to go back and read all of that thread carefully,
> but I don't think that silently breaking old binaries and also
> breaking sources is the right way to go.  What about preserving the
> behavior of the existing API / ABI and then adding a new function to
> return the size of the maximum datagram for a device?

It is not just a device, but the QP attrs and so on, so there would be
quite a few new core functions needed to extend the MTU, and that
flows back into the kernel interface too...

Jeff's patch doesn't break old binaries, old binaries, running with
normal IB MTUs work fine. The structure layouts all stay the same,
etc.

Old sources will need an update to support non-IB MTUs no matter what,
forcing an update via the compiler seems saner then letting them
remain silently out of date..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 10, 2013, 12:14 p.m. UTC | #6
T24gSnVsIDgsIDIwMTMsIGF0IDE6MjYgUE0sIEphc29uIEd1bnRob3JwZSA8amd1bnRob3JwZUBv
YnNpZGlhbnJlc2VhcmNoLmNvbT4gd3JvdGU6DQoNCj4gSmVmZidzIHBhdGNoIGRvZXNuJ3QgYnJl
YWsgb2xkIGJpbmFyaWVzLCBvbGQgYmluYXJpZXMsIHJ1bm5pbmcgd2l0aA0KPiBub3JtYWwgSUIg
TVRVcyB3b3JrIGZpbmUuIFRoZSBzdHJ1Y3R1cmUgbGF5b3V0cyBhbGwgc3RheSB0aGUgc2FtZSwN
Cj4gZXRjLg0KDQoNCkZXSVcsIEkgZGlkIGEgc2ltcGxlIHRlc3QgdG8gY29uZmlybSB0aGlzLiAg
SSBpbnN0YWxsZWQgYSBzdG9jayBnaXQgSEVBRCBsaWJpYnZlcmJzIGludG8gJEhPTUUvbGliaWJ2
ZXJicy1IRUFEIGFuZCBhIGxpYmlidmVyYnMgd2l0aCB0aGUgTVRVIHBhdGNoIGluICRIT01FL2xp
YmlidmVyYnMtbXR1LXBhdGNoLiAgVGhlIG1seDQgZHJpdmVyIHdhcyBpbnN0YWxsZWQgaW50byBi
b3RoIHRyZWVzIChJIHVzZWQgc29tZSBmYWlybHkgb2xkIE1lbGxhbm94IEhDQXMrRGVsbCBzZXJ2
ZXJzIGZvciB0aGlzIHRlc3QpLg0KDQpUaGlzIGlzIHRoZSBiYXNlIGNhc2U6DQoNCi0tLS0tDQpb
NTowNl0gZGVsbDAxMjp+IOKdr+Kdr+KdryBjZCBsaWJpYnZlcmJzLUhFQUQNCls1OjA3XSBkZWxs
MDEyOn4vbGliaWJ2ZXJicy1IRUFEIOKdr+Kdr+KdryBsZGQgYmluL2lidl9yY19waW5ncG9uZw0K
CWxpbnV4LXZkc28uc28uMSA9PiAgKDB4MDAwMDJhYWFhYWFjYjAwMCkNCglsaWJpYnZlcmJzLnNv
LjEgPT4gL2hvbWUvanNxdXlyZXMvbGliaWJ2ZXJicy1IRUFEL2xpYi9saWJpYnZlcmJzLnNvLjEg
KDB4MDAwMDJhYWFhYWNjZDAwMCkNCglsaWJwdGhyZWFkLnNvLjAgPT4gL2xpYjY0L2xpYnB0aHJl
YWQuc28uMCAoMHgwMDAwMmFhYWFhZWVjMDAwKQ0KCWxpYmRsLnNvLjIgPT4gL2xpYjY0L2xpYmRs
LnNvLjIgKDB4MDAwMDJhYWFhYjEwOTAwMCkNCglsaWJjLnNvLjYgPT4gL2xpYjY0L2xpYmMuc28u
NiAoMHgwMDAwMmFhYWFiMzBlMDAwKQ0KCS9saWI2NC9sZC1saW51eC14ODYtNjQuc28uMiAoMHgw
MDAwMmFhYWFhYWFiMDAwKQ0KWzU6MDddIGRlbGwwMTI6fi9saWJpYnZlcmJzLUhFQUQg4p2v4p2v
4p2vIC4vYmluL2lidl9yY19waW5ncG9uZyBkZWxsMDExDQogIGxvY2FsIGFkZHJlc3M6ICBMSUQg
MHgwMDA0LCBRUE4gMHgwNDAwNGEsIFBTTiAweGMwODc0MiwgR0lEIDo6DQogIHJlbW90ZSBhZGRy
ZXNzOiBMSUQgMHgwMDE5LCBRUE4gMHgyMDAwNGEsIFBTTiAweDQ0YzQ4ZSwgR0lEIDo6DQo4MTky
MDAwIGJ5dGVzIGluIDAuMDIgc2Vjb25kcyA9IDQxNzAuMjggTWJpdC9zZWMNCjEwMDAgaXRlcnMg
aW4gMC4wMiBzZWNvbmRzID0gMTUuNzIgdXNlYy9pdGVyDQotLS0tLQ0KDQpXb3JrcyBmaW5lLiAg
Tm93IGxldCdzIHVzZSB0aGUgc2FtZSBsaWJpYnZlcmJzLUhFQUQgcmMgcGluZ3BvbmcgYmluYXJ5
LCBidXQgd2l0aCB0aGUgTVRVLXBhdGNoZWQgbGliaWJ2ZXJicy5zbzoNCg0KLS0tLS0NCls1OjA3
XSBkZWxsMDEyOn4vbGliaWJ2ZXJicy1IRUFEIOKdr+Kdr+KdryBtdiBsaWIvbGliaWJ2ZXJicy5z
by4xIGxpYi9saWJpYnZlcmJzLnNvLjEtYm9ndXMNCls1OjA3XSBkZWxsMDEyOn4vbGliaWJ2ZXJi
cy1IRUFEIOKdr+Kdr+KdryBleHBvcnQgTERfTElCUkFSWV9QQVRIPSRIT01FL2xpYmlidmVyYnMt
bXR1LXBhdGNoL2xpYg0KWzU6MDhdIGRlbGwwMTI6fi9saWJpYnZlcmJzLUhFQUQg4p2v4p2v4p2v
IGxkZCBiaW4vaWJ2X3JjX3Bpbmdwb25nDQoJbGludXgtdmRzby5zby4xID0+ICAoMHgwMDAwMmFh
YWFhYWNiMDAwKQ0KCWxpYmlidmVyYnMuc28uMSA9PiAvaG9tZS9qc3F1eXJlcy9saWJpYnZlcmJz
LW10dS1wYXRjaC9saWIvbGliaWJ2ZXJicy5zby4xICgweDAwMDAyYWFhYWFjY2QwMDApDQoJbGli
cHRocmVhZC5zby4wID0+IC9saWI2NC9saWJwdGhyZWFkLnNvLjAgKDB4MDAwMDJhYWFhYWVlZDAw
MCkNCglsaWJkbC5zby4yID0+IC9saWI2NC9saWJkbC5zby4yICgweDAwMDAyYWFhYWIxMGEwMDAp
DQoJbGliYy5zby42ID0+IC9saWI2NC9saWJjLnNvLjYgKDB4MDAwMDJhYWFhYjMwZTAwMCkNCgkv
bGliNjQvbGQtbGludXgteDg2LTY0LnNvLjIgKDB4MDAwMDJhYWFhYWFhYjAwMCkNCls1OjA4XSBk
ZWxsMDEyOn4vbGliaWJ2ZXJicy1IRUFEIOKdr+Kdr+KdryAuL2Jpbi9pYnZfcmNfcGluZ3Bvbmcg
ZGVsbDAxMQ0KICBsb2NhbCBhZGRyZXNzOiAgTElEIDB4MDAwNCwgUVBOIDB4MDgwMDRhLCBQU04g
MHg2NTM5MWMsIEdJRCA6Og0KICByZW1vdGUgYWRkcmVzczogTElEIDB4MDAxOSwgUVBOIDB4MjQw
MDRhLCBQU04gMHg3ZDEzN2UsIEdJRCA6Og0KODE5MjAwMCBieXRlcyBpbiAwLjAyIHNlY29uZHMg
PSA0MTYzLjM5IE1iaXQvc2VjDQoxMDAwIGl0ZXJzIGluIDAuMDIgc2Vjb25kcyA9IDE1Ljc0IHVz
ZWMvaXRlcg0KLS0tLS0NCg0KU3RpbGwgd29ya3MgZmluZS4NCg0KLS0gDQpKZWZmIFNxdXlyZXMN
CmpzcXV5cmVzQGNpc2NvLmNvbQ0KRm9yIGNvcnBvcmF0ZSBsZWdhbCBpbmZvcm1hdGlvbiBnbyB0
bzogaHR0cDovL3d3dy5jaXNjby5jb20vd2ViL2Fib3V0L2RvaW5nX2J1c2luZXNzL2xlZ2FsL2Ny
aS8NCg0K
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 15, 2013, 2:20 p.m. UTC | #7
Bump.

On Jul 10, 2013, at 8:14 AM, Jeff Squyres (jsquyres) <jsquyres@cisco.com> wrote:

> On Jul 8, 2013, at 1:26 PM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

> 

>> Jeff's patch doesn't break old binaries, old binaries, running with

>> normal IB MTUs work fine. The structure layouts all stay the same,

>> etc.

> 

> 

> FWIW, I did a simple test to confirm this.  I installed a stock git HEAD libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I used some fairly old Mellanox HCAs+Dell servers for this test).

> 

> This is the base case:

> 

> -----

> [5:06] dell012:~ ??? cd libibverbs-HEAD

> [5:07] dell012:~/libibverbs-HEAD ??? ldd bin/ibv_rc_pingpong

> 	linux-vdso.so.1 =>  (0x00002aaaaaacb000)

> 	libibverbs.so.1 => /home/jsquyres/libibverbs-HEAD/lib/libibverbs.so.1 (0x00002aaaaaccd000)

> 	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeec000)

> 	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab109000)

> 	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)

> 	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)

> [5:07] dell012:~/libibverbs-HEAD ??? ./bin/ibv_rc_pingpong dell011

>  local address:  LID 0x0004, QPN 0x04004a, PSN 0xc08742, GID ::

>  remote address: LID 0x0019, QPN 0x20004a, PSN 0x44c48e, GID ::

> 8192000 bytes in 0.02 seconds = 4170.28 Mbit/sec

> 1000 iters in 0.02 seconds = 15.72 usec/iter

> -----

> 

> Works fine.  Now let's use the same libibverbs-HEAD rc pingpong binary, but with the MTU-patched libibverbs.so:

> 

> -----

> [5:07] dell012:~/libibverbs-HEAD ??? mv lib/libibverbs.so.1 lib/libibverbs.so.1-bogus

> [5:07] dell012:~/libibverbs-HEAD ??? export LD_LIBRARY_PATH=$HOME/libibverbs-mtu-patch/lib

> [5:08] dell012:~/libibverbs-HEAD ??? ldd bin/ibv_rc_pingpong

> 	linux-vdso.so.1 =>  (0x00002aaaaaacb000)

> 	libibverbs.so.1 => /home/jsquyres/libibverbs-mtu-patch/lib/libibverbs.so.1 (0x00002aaaaaccd000)

> 	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeed000)

> 	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab10a000)

> 	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)

> 	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)

> [5:08] dell012:~/libibverbs-HEAD ??? ./bin/ibv_rc_pingpong dell011

>  local address:  LID 0x0004, QPN 0x08004a, PSN 0x65391c, GID ::

>  remote address: LID 0x0019, QPN 0x24004a, PSN 0x7d137e, GID ::

> 8192000 bytes in 0.02 seconds = 4163.39 Mbit/sec

> 1000 iters in 0.02 seconds = 15.74 usec/iter

> -----

> 

> Still works fine.



-- 
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/
Hefty, Sean July 16, 2013, 8:04 a.m. UTC | #8
> > Jeff's patch doesn't break old binaries, old binaries, running with

> > normal IB MTUs work fine. The structure layouts all stay the same,

> > etc.

> 

> 

> FWIW, I did a simple test to confirm this.  I installed a stock git HEAD

> libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in

> $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I

> used some fairly old Mellanox HCAs+Dell servers for this test).


This creates a bit of pain for anyone (like, hypothetically, say, for example, me) who wants to release source that supports both the older and newer version of libibverbs.
Jason Gunthorpe July 16, 2013, 2:47 p.m. UTC | #9
On Tue, Jul 16, 2013 at 08:04:08AM +0000, Hefty, Sean wrote:
> > > Jeff's patch doesn't break old binaries, old binaries, running with
> > > normal IB MTUs work fine. The structure layouts all stay the same,
> > > etc.
> > 
> > 
> > FWIW, I did a simple test to confirm this.  I installed a stock git HEAD
> > libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in
> > $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I
> > used some fairly old Mellanox HCAs+Dell servers for this test).
> 
> This creates a bit of pain for anyone (like, hypothetically, say,
> for example, me) who wants to release source that supports both the
> older and newer version of libibverbs.

But it is so minor. An autoconf test for the ibv_mtu_to_num function
in the headers. If it is not defined then define your own in your
private headers. All the other source remains the same, the only
change is you must call the conversion functions.

A source change is completely unvaoidable. Supporting the new MTU
values requires updated source.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 16, 2013, 5:11 p.m. UTC | #10
On Jul 16, 2013, at 10:47 AM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

> A source change is completely unvaoidable. Supporting the new MTU
> values requires updated source.


I don't really care one way or the other; I'll submit whatever patch people want.  :-)

But FWIW, I tend to believe the Doug/Jason position:

- MTU really needs to be a plain integer (not an enum)
- forcing application source change/adaptation is the safest way to move forward
- doing it this way preserves ABI, so existing binaries are safe
Roland Dreier July 17, 2013, 12:16 a.m. UTC | #11
On Tue, Jul 16, 2013 at 10:11 AM, Jeff Squyres (jsquyres)
<jsquyres@cisco.com> wrote:
> - doing it this way preserves ABI, so existing binaries are safe

I still don't get this.  Wouldn't an existing binary be pretty
surprised to get a value wildly out of range of the enum?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean July 17, 2013, 4:06 a.m. UTC | #12
> > A source change is completely unvaoidable. Supporting the new MTU
> > values requires updated source.
> 
> 
> I don't really care one way or the other; I'll submit whatever patch people
> want.  :-)
> 
> But FWIW, I tend to believe the Doug/Jason position:
> 
> - MTU really needs to be a plain integer (not an enum)
> - forcing application source change/adaptation is the safest way to move
> forward
> - doing it this way preserves ABI, so existing binaries are safe

I don't remember.  Is it known how the mtu is communicated with the kernel?  Looking at kern-abi.h, the mtu fields are:

struct ibv_query_port_resp {
	__u8  max_mtu;
	__u8  active_mtu;

struct ibv_kern_qp_attr {
	__u32	path_mtu;

struct ibv_query_qp_resp {
	__u8  path_mtu;

struct ibv_modify_qp {
	__u8  path_mtu;

In most cases, we only have 8 bits available to/from the kernel.  (There are at least 16 bits of reserved space in these structures.)

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford July 17, 2013, 5:57 p.m. UTC | #13
On 07/16/2013 08:16 PM, Roland Dreier wrote:
> On Tue, Jul 16, 2013 at 10:11 AM, Jeff Squyres (jsquyres)
> <jsquyres@cisco.com> wrote:
>> - doing it this way preserves ABI, so existing binaries are safe
> 
> I still don't get this.  Wouldn't an existing binary be pretty
> surprised to get a value wildly out of range of the enum?

Yes, but there's no way around that without simply lying about the MTU.
 So, the argument was made in the thread that historically, applications
have had to be modified when moved to a new link layer (aka, iWARP meant
IB apps had to be slightly modified for connection reasons, RoCE again
required some slight app modifications, etc) so this was seen as a case
of "the app will work on fabrics it already knows about, and will only
get confused if moved to this new fabric, and in that case, the app
needs to be modified anyway, so that's acceptable breakage for keeping
the apps working the rest of the time".  That was the argument anyway.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 17, 2013, 6:12 p.m. UTC | #14
On Jul 17, 2013, at 12:06 AM, "Hefty, Sean" <sean.hefty@intel.com> wrote:

> I don't remember.  Is it known how the mtu is communicated with the kernel?  

I hadn't looked at the kernel side yet; I was waiting for the userspace side to sort itself out first.

> Looking at kern-abi.h, the mtu fields are:
> 
> struct ibv_query_port_resp {
> 	__u8  max_mtu;
> 	__u8  active_mtu;
> 
> struct ibv_kern_qp_attr {
> 	__u32	path_mtu;
> 
> struct ibv_query_qp_resp {
> 	__u8  path_mtu;
> 
> struct ibv_modify_qp {
> 	__u8  path_mtu;
> 
> In most cases, we only have 8 bits available to/from the kernel.  (There are at least 16 bits of reserved space in these structures.)

Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-abi.h will be problematic from an ABI perspective.  Do people care about the kernel ABI, or is that mainly a userspace issue?
Hefty, Sean July 17, 2013, 9:41 p.m. UTC | #15
> I hadn't looked at the kernel side yet; I was waiting for the userspace side to
> sort itself out first.

I think it makes sense to start with how user space can get the data.  Without eating up reserved fields, we're starting with 8 bit values.

> Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-
> abi.h will be problematic from an ABI perspective.  Do people care about the
> kernel ABI, or is that mainly a userspace issue?

Well, we definitely care about the kernel to user ABI.

I can't imagine that we're dealing with more than a handful of actual MTU values.  Maybe the simplest thing is to extend the mtu enum to include what new values are needed, plus add a function to convert it.  (Can we call mulligan?)

I don't know how iwarp handles this.  Does it just report the wrong mtu, since it doesn't necessarily matter?  Steve - any idea here?

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise July 17, 2013, 9:44 p.m. UTC | #16
On 7/17/2013 4:41 PM, Hefty, Sean wrote:
>> I hadn't looked at the kernel side yet; I was waiting for the userspace side to
>> sort itself out first.
> I think it makes sense to start with how user space can get the data.  Without eating up reserved fields, we're starting with 8 bit values.
>
>> Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-
>> abi.h will be problematic from an ABI perspective.  Do people care about the
>> kernel ABI, or is that mainly a userspace issue?
> Well, we definitely care about the kernel to user ABI.
>
> I can't imagine that we're dealing with more than a handful of actual MTU values.  Maybe the simplest thing is to extend the mtu enum to include what new values are needed, plus add a function to convert it.  (Can we call mulligan?)
>
> I don't know how iwarp handles this.  Does it just report the wrong mtu, since it doesn't necessarily matter?  Steve - any idea here?

The iwarp drivers just report the nearest mtu enum.  Apps don't need it 
for iwarp like they do for ib.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 18, 2013, 2:32 a.m. UTC | #17
On Jul 17, 2013, at 5:44 PM, Steve Wise <swise@opengridcomputing.com> wrote:

> The iwarp drivers just report the nearest mtu enum.  Apps don't need it for iwarp like they do for ib.


For RC, it doesn't matter much.  So the fact that RoCE and iWARP lie about their MTU isn't a huge deal.  It's wrong, but it doesn't matter much.

We need it for UD for our upcoming device, however, because the MTU is the only way to get the max message size.
Jason Gunthorpe July 18, 2013, 4:50 p.m. UTC | #18
On Thu, Jul 18, 2013 at 02:32:05AM +0000, Jeff Squyres (jsquyres) wrote:
> On Jul 17, 2013, at 5:44 PM, Steve Wise <swise@opengridcomputing.com> wrote:
> 
> > The iwarp drivers just report the nearest mtu enum.  Apps don't need it for iwarp like they do for ib.

> For RC, it doesn't matter much.  So the fact that RoCE and iWARP lie
> about their MTU isn't a huge deal.  It's wrong, but it doesn't
> matter much.
> 
> We need it for UD for our upcoming device, however, because the MTU
> is the only way to get the max message size.

.. and UD is the least abstracted transport, so existing apps won't
support Jeff's new NIC anyhow, MTU is the least of their problems.

Existing apps with existing transports see the same old values.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 23, 2013, 1:26 p.m. UTC | #19
On Jul 18, 2013, at 12:50 PM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

>> We need it for UD for our upcoming device, however, because the MTU
>> is the only way to get the max message size.
> 
> .. and UD is the least abstracted transport, so existing apps won't
> support Jeff's new NIC anyhow, MTU is the least of their problems.
> 
> Existing apps with existing transports see the same old values.


...so how do we move forward?
Jeff Squyres July 30, 2013, 12:59 p.m. UTC | #20
On Jul 23, 2013, at 9:26 AM, Jeff Squyres (jsquyres) <jsquyres@cisco.com> wrote:

>> .. and UD is the least abstracted transport, so existing apps won't
>> support Jeff's new NIC anyhow, MTU is the least of their problems.
>> 
>> Existing apps with existing transports see the same old values.


Bump.
Christoph Lameter (Ampere) July 30, 2013, 4:44 p.m. UTC | #21
On Mon, 15 Jul 2013, Jeff Squyres (jsquyres) wrote:

> Bump.

What in the world does that mean? I am an oldtimer I guess. Seems that
this is something that can be done in the newfangled forum? How does this
affect mailing lists?

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Squyres July 30, 2013, 5:21 p.m. UTC | #22
On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl@gentwo.org> wrote:

> What in the world does that mean? I am an oldtimer I guess. Seems that
> this is something that can be done in the newfangled forum? How does this
> affect mailing lists?


I'm not sure what you're asking me; please see the prior posts on this thread that describes the MTU issue and why we still need a solution.
Christoph Lameter (Ampere) July 30, 2013, 6:31 p.m. UTC | #23
On Tue, 30 Jul 2013, Jeff Squyres (jsquyres) wrote:

> On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl@gentwo.org> wrote:
>
> > What in the world does that mean? I am an oldtimer I guess. Seems that
> > this is something that can be done in the newfangled forum? How does this
> > affect mailing lists?
>
>
> I'm not sure what you're asking me; please see the prior posts on this
> thread that describes the MTU issue and why we still need a solution.

What does "bump" mean? You keep sending replies that just says "bump".
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Atchley, Scott July 30, 2013, 6:45 p.m. UTC | #24
On Jul 30, 2013, at 2:31 PM, Christoph Lameter <cl@gentwo.org> wrote:

> On Tue, 30 Jul 2013, Jeff Squyres (jsquyres) wrote:
> 
>> On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl@gentwo.org> wrote:
>> 
>>> What in the world does that mean? I am an oldtimer I guess. Seems that
>>> this is something that can be done in the newfangled forum? How does this
>>> affect mailing lists?
>> 
>> 
>> I'm not sure what you're asking me; please see the prior posts on this
>> thread that describes the MTU issue and why we still need a solution.
> 
> What does "bump" mean? You keep sending replies that just says "bump".

http://en.wikipedia.org/wiki/Internet_forum#Thread

"When a member posts in a thread it will jump to the top since it is the latest updated thread. Similarly, other threads will jump in front of it when they receive posts. When a member posts in a thread for no reason but to have it go to the top, it is referred to as a bump or bumping."

He is trying to bring it back to everyone's attention.

Scott

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford July 30, 2013, 6:47 p.m. UTC | #25
On 07/30/2013 02:45 PM, Atchley, Scott wrote:
> http://en.wikipedia.org/wiki/Internet_forum#Thread
> 
> "When a member posts in a thread it will jump to the top since it is
> the latest updated thread. Similarly, other threads will jump in
> front of it when they receive posts. When a member posts in a thread
> for no reason but to have it go to the top, it is referred to as a
> bump or bumping."
> 
> He is trying to bring it back to everyone's attention.

Exactly.  On a mailing list like this, it's shorthand for "You seem to
have dropped this on the floor, let me resend it to you".

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier Aug. 5, 2013, 6:48 p.m. UTC | #26
On Wed, Jul 17, 2013 at 10:57 AM, Doug Ledford <dledford@redhat.com> wrote:
>>> - doing it this way preserves ABI, so existing binaries are safe

>> I still don't get this.  Wouldn't an existing binary be pretty
>> surprised to get a value wildly out of range of the enum?

> Yes, but there's no way around that without simply lying about the MTU.
>  So, the argument was made in the thread that historically, applications
> have had to be modified when moved to a new link layer (aka, iWARP meant
> IB apps had to be slightly modified for connection reasons, RoCE again
> required some slight app modifications, etc) so this was seen as a case
> of "the app will work on fabrics it already knows about, and will only
> get confused if moved to this new fabric, and in that case, the app
> needs to be modified anyway, so that's acceptable breakage for keeping
> the apps working the rest of the time".  That was the argument anyway.

So I've been thinking about this for a while and this doesn't seem
like the right tradeoff to me.  If we break source compatibility, then
everyone needs to add some annoying #ifdef-ery (and configure tests
etc), even if they don't care about this new fabric.  And by *not*
breaking binary compatibility, we leave the risk of old binaries
misbehaving.

Wouldn't the following be a better path forward:

 - Add a new API (ibv_get_max_datagram_size() or some such) that
returns the real value as an int, based on the true MTU
 - Have old query verbs continue to return only old MTU values, but
deprecate that field with the idea of removing it in a few years

We can implement this however is most convenient.  If we want to
preserve the kernel-user ABI (though it's not clear how to do that)
then we can have code in libibverbs that implements all the necessary
compatibility that makes sure the MTU enum field is one of the old
enums, etc.  Adding a new function to libibverbs is easy and safe, and
the only case where things go wrong (if we add compatibility code in
libibverbs) is a new kernel with a new device, but old libibverbs.

But as Sean pointed out, it's not clear how we expect to return
integer MTUs via the existing kernel-user ABI anyway, and we should
really at least figure that out before we risk breaking userspace for
no good reason.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Aug. 5, 2013, 7:02 p.m. UTC | #27
On Mon, Aug 05, 2013 at 11:48:20AM -0700, Roland Dreier wrote:
> On Wed, Jul 17, 2013 at 10:57 AM, Doug Ledford <dledford@redhat.com> wrote:
> >>> - doing it this way preserves ABI, so existing binaries are safe
> 
> >> I still don't get this.  Wouldn't an existing binary be pretty
> >> surprised to get a value wildly out of range of the enum?
> 
> > Yes, but there's no way around that without simply lying about the MTU.
> >  So, the argument was made in the thread that historically, applications
> > have had to be modified when moved to a new link layer (aka, iWARP meant
> > IB apps had to be slightly modified for connection reasons, RoCE again
> > required some slight app modifications, etc) so this was seen as a case
> > of "the app will work on fabrics it already knows about, and will only
> > get confused if moved to this new fabric, and in that case, the app
> > needs to be modified anyway, so that's acceptable breakage for keeping
> > the apps working the rest of the time".  That was the argument anyway.
> 
> So I've been thinking about this for a while and this doesn't seem
> like the right tradeoff to me.  If we break source compatibility, then
> everyone needs to add some annoying #ifdef-ery (and configure tests
> etc), even if they don't care about this new fabric.  And by *not*
> breaking binary compatibility, we leave the risk of old binaries
> misbehaving.

It isn't that hard. One ifdef and configure test to provide the two
conversion functions for old verbs completely takes care of it.

Most apps never touch that field anyhow.

> Wouldn't the following be a better path forward:
> 
>  - Add a new API (ibv_get_max_datagram_size() or some such) that
> returns the real value as an int, based on the true MTU
>  - Have old query verbs continue to return only old MTU values, but
> deprecate that field with the idea of removing it in a few years

It isn't that easy, one API certainly doesn't cover it. The MTU is in
two structs:

struct ibv_port_attr
struct ibv_qp_attr

Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
which need to be changed, and you can't change the _qp functions just
by adding a new call.

> But as Sean pointed out, it's not clear how we expect to return
> integer MTUs via the existing kernel-user ABI anyway, and we should
> really at least figure that out before we risk breaking userspace for
> no good reason.

It would be good to understand this, but maybe it is just a new field
in a bigger structure via the extendable call mechanism like Or has
been working on..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier Aug. 5, 2013, 8:06 p.m. UTC | #28
On Mon, Aug 5, 2013 at 12:02 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
>> Wouldn't the following be a better path forward:
>>
>>  - Add a new API (ibv_get_max_datagram_size() or some such) that
>> returns the real value as an int, based on the true MTU
>>  - Have old query verbs continue to return only old MTU values, but
>> deprecate that field with the idea of removing it in a few years

> It isn't that easy, one API certainly doesn't cover it. The MTU is in
> two structs:

> struct ibv_port_attr
> struct ibv_qp_attr

> Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
> which need to be changed, and you can't change the _qp functions just
> by adding a new call.

Well, for full generality I agree we would have to handle the QP
stuff.  But for now at least the only QPs that need (or allow) MTU to
be set are connected (RC, UC and XRC) QPs.  The current motivation for
allowing extended MTU is for datagrams.  So strictly speaking we only
need to deal with the query port verb.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Aug. 5, 2013, 10:22 p.m. UTC | #29
On Mon, Aug 05, 2013 at 01:06:36PM -0700, Roland Dreier wrote:
> On Mon, Aug 5, 2013 at 12:02 PM, Jason Gunthorpe
> <jgunthorpe@obsidianresearch.com> wrote:
> >> Wouldn't the following be a better path forward:
> >>
> >>  - Add a new API (ibv_get_max_datagram_size() or some such) that
> >> returns the real value as an int, based on the true MTU
> >>  - Have old query verbs continue to return only old MTU values, but
> >> deprecate that field with the idea of removing it in a few years
> 
> > It isn't that easy, one API certainly doesn't cover it. The MTU is in
> > two structs:
> 
> > struct ibv_port_attr
> > struct ibv_qp_attr
> 
> > Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
> > which need to be changed, and you can't change the _qp functions just
> > by adding a new call.
> 
> Well, for full generality I agree we would have to handle the QP
> stuff.  But for now at least the only QPs that need (or allow) MTU to
> be set are connected (RC, UC and XRC) QPs.  The current motivation for
> allowing extended MTU is for datagrams.  So strictly speaking we only
> need to deal with the query port verb.

Okay, but if you want to narrow things to just be Jeff's application
like that, then do we even need this to be part of verbs?

IP path MTU should be discovered by a netlink routing query to lookup
the target IP(s), the device maximum should not really be used by
apps..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier Aug. 5, 2013, 10:35 p.m. UTC | #30
On Mon, Aug 5, 2013 at 3:22 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> Okay, but if you want to narrow things to just be Jeff's application
> like that, then do we even need this to be part of verbs?
>
> IP path MTU should be discovered by a netlink routing query to lookup
> the target IP(s), the device maximum should not really be used by
> apps..

Jeff's case is not an IP transport.  Also seems a bit unfriendly to
make apps that just want to use verbs to have to figure out which
netdev to ask about.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Aug. 5, 2013, 10:43 p.m. UTC | #31
On Mon, Aug 05, 2013 at 03:35:22PM -0700, Roland Dreier wrote:
> On Mon, Aug 5, 2013 at 3:22 PM, Jason Gunthorpe
> <jgunthorpe@obsidianresearch.com> wrote:
> > Okay, but if you want to narrow things to just be Jeff's application
> > like that, then do we even need this to be part of verbs?
> >
> > IP path MTU should be discovered by a netlink routing query to lookup
> > the target IP(s), the device maximum should not really be used by
> > apps..
> 
> Jeff's case is not an IP transport.  

Oh, I'm confused then..

> Also seems a bit unfriendly to make apps that just want to use verbs
> to have to figure out which netdev to ask about.

Right, but we solved that by hiding that in the RDMA CM for IB cases,
presumably other cases using IP would need a similar shim.

But.. wait, if it isn't IP, then aren't there going to be other
changes required for a new address family? Will the device struct and
QP struct survive that unchanged? eg, can we roll the MTU change with
other required changes?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Makefile.am b/Makefile.am
index 40e83be..1159e55 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,7 +54,8 @@  man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1	\
     man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3	\
     man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3	\
     man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3		\
-    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3
+    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3  \
+    man/ibv_mtu_to_num.3
 
 DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
     debian/ibverbs-utils.install debian/libibverbs1.install \
diff --git a/examples/devinfo.c b/examples/devinfo.c
index ff078e4..e8fb27e 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -111,18 +111,6 @@  static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap)
 	}
 }
 
-static const char *mtu_str(enum ibv_mtu max_mtu)
-{
-	switch (max_mtu) {
-	case IBV_MTU_256:  return "256";
-	case IBV_MTU_512:  return "512";
-	case IBV_MTU_1024: return "1024";
-	case IBV_MTU_2048: return "2048";
-	case IBV_MTU_4096: return "4096";
-	default:           return "invalid MTU";
-	}
-}
-
 static const char *width_str(uint8_t width)
 {
 	switch (width) {
@@ -301,10 +289,10 @@  static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\tport:\t%d\n", port);
 		printf("\t\t\tstate:\t\t\t%s (%d)\n",
 		       port_state_str(port_attr.state), port_attr.state);
-		printf("\t\t\tmax_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.max_mtu), port_attr.max_mtu);
-		printf("\t\t\tactive_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.active_mtu), port_attr.active_mtu);
+		printf("\t\t\tmax_mtu:\t\t%d (%d)\n",
+		       ibv_mtu_to_num(port_attr.max_mtu), port_attr.max_mtu.mtu);
+		printf("\t\t\tactive_mtu:\t\t%d (%d)\n",
+			ibv_mtu_to_num(port_attr.active_mtu), port_attr.active_mtu.mtu);
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index 90732ef..d1c22c9 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -36,18 +36,6 @@ 
 #include <stdio.h>
 #include <string.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu)
-{
-	switch (mtu) {
-	case 256:  return IBV_MTU_256;
-	case 512:  return IBV_MTU_512;
-	case 1024: return IBV_MTU_1024;
-	case 2048: return IBV_MTU_2048;
-	case 4096: return IBV_MTU_4096;
-	default:   return -1;
-	}
-}
-
 uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 {
 	struct ibv_port_attr attr;
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 9cdc03e..91d217b 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -35,7 +35,6 @@ 
 
 #include <infiniband/verbs.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
 int pp_get_port_info(struct ibv_context *context, int port,
 		     struct ibv_port_attr *attr);
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 15494a1..a7e1836 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -78,7 +78,7 @@  struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  struct ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -209,7 +209,7 @@  out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -547,7 +547,7 @@  int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -608,8 +608,8 @@  int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
index 6e00f8c..9173274 100644
--- a/examples/srq_pingpong.c
+++ b/examples/srq_pingpong.c
@@ -81,7 +81,7 @@  struct pingpong_dest {
 	union ibv_gid gid;
 };
 
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, struct ibv_mtu_t mtu,
 			  int sl, const struct pingpong_dest *my_dest,
 			  const struct pingpong_dest *dest, int sgid_idx)
 {
@@ -229,7 +229,7 @@  out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -620,7 +620,7 @@  int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      num_qp = 16;
 	int                      rx_depth = 500;
 	int                      iters = 1000;
@@ -685,8 +685,8 @@  int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
index 52c6c28..6b6bc85 100644
--- a/examples/uc_pingpong.c
+++ b/examples/uc_pingpong.c
@@ -78,7 +78,7 @@  struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  struct ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -197,7 +197,7 @@  out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -535,7 +535,7 @@  int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -596,8 +596,8 @@  int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 21c551d..5a0656f 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -332,7 +332,7 @@  static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 			fprintf(stderr, "Unable to query port info for port %d\n", port);
 			goto clean_device;
 		}
-		mtu = 1 << (port_info.active_mtu + 7);
+		mtu = ibv_mtu_to_num(port_info.active_mtu);
 		if (size > mtu) {
 			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
 			goto clean_device;
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 4b1ab57..0545a8c 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -144,6 +144,10 @@  struct ibv_device_attr {
 	uint8_t			phys_port_cnt;
 };
 
+/*
+ * Symbolic enum names for MTU values are preserved for backwards
+ * compatibility.
+ */
 enum ibv_mtu {
 	IBV_MTU_256  = 1,
 	IBV_MTU_512  = 2,
@@ -152,6 +156,16 @@  enum ibv_mtu {
 	IBV_MTU_4096 = 5
 };
 
+/*
+ * ibv_mtu_t is an encoded integer type that represents an MTU value.
+ * If the value is <= IBV_MTU_4096, it is treated as one of the
+ * IBV_MTU_* enum values.  Otherwise, it is treated as its integer
+ * value.
+ */
+struct ibv_mtu_t {
+	int mtu;
+};
+
 enum ibv_port_state {
 	IBV_PORT_NOP		= 0,
 	IBV_PORT_DOWN		= 1,
@@ -169,8 +183,8 @@  enum {
 
 struct ibv_port_attr {
 	enum ibv_port_state	state;
-	enum ibv_mtu		max_mtu;
-	enum ibv_mtu		active_mtu;
+	struct ibv_mtu_t	max_mtu;
+	struct ibv_mtu_t	active_mtu;
 	int			gid_tbl_len;
 	uint32_t		port_cap_flags;
 	uint32_t		max_msg_sz;
@@ -485,7 +499,7 @@  enum ibv_mig_state {
 struct ibv_qp_attr {
 	enum ibv_qp_state	qp_state;
 	enum ibv_qp_state	cur_qp_state;
-	enum ibv_mtu		path_mtu;
+	struct ibv_mtu_t	path_mtu;
 	enum ibv_mig_state	path_mig_state;
 	uint32_t		qkey;
 	uint32_t		rq_psn;
@@ -1138,6 +1152,47 @@  const char *ibv_port_state_str(enum ibv_port_state port_state);
  */
 const char *ibv_event_type_str(enum ibv_event_type event);
 
+/**
+ * num_to_ibv_mtu - Convert an integer to its corresponding encoded
+ * ibv_mtu_t value.  If an integer value corresponding to an IBV_MTU_*
+ * enum value is passed, return the enum value (e.g., 1024 ->
+ * IBV_MTU_1024).  Otherwise, just return the value (e.g., 1500 ->
+ * 1500).
+ */
+static inline struct ibv_mtu_t num_to_ibv_mtu(int num)
+{
+	struct ibv_mtu_t mtu;
+
+	switch (num) {
+	case 256:  mtu.mtu = IBV_MTU_256;  break;
+	case 512:  mtu.mtu = IBV_MTU_512;  break;
+	case 1024: mtu.mtu = IBV_MTU_1024; break;
+	case 2048: mtu.mtu = IBV_MTU_2048; break;
+	case 4096: mtu.mtu = IBV_MTU_4096; break;
+	default:   mtu.mtu = num;          break;
+	}
+
+	return mtu;
+}
+
+/**
+ * ibv_mtu_to_num - Convert an encoded ibv_mtu_t value to its
+ * corresponding integer value.  If an enum ibv_mtu value is passed,
+ * return its integer value (e.g., IBV_MTU_1024 -> 1024).  Otherwise,
+ * just return the value (e.g., 1500 -> 1500).
+ */
+static inline int ibv_mtu_to_num(struct ibv_mtu_t mtu)
+{
+	switch (mtu.mtu) {
+	case IBV_MTU_256:  return 256;
+	case IBV_MTU_512:  return 512;
+	case IBV_MTU_1024: return 1024;
+	case IBV_MTU_2048: return 2048;
+	case IBV_MTU_4096: return 4096;
+	default:           return mtu.mtu;
+	}
+}
+
 END_C_DECLS
 
 #  undef __attribute_const
diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3
index cb3faaa..dd93674 100644
--- a/man/ibv_modify_qp.3
+++ b/man/ibv_modify_qp.3
@@ -25,7 +25,7 @@  struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;               /* Move the QP to this state */
 enum ibv_qp_state       cur_qp_state;           /* Assume this is the current QP state */
-enum ibv_mtu            path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
+struct ibv_mtu_t        path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;         /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                   /* Q_Key for the QP (valid only for UD QPs) */
 uint32_t                rq_psn;                 /* PSN for receive queue (valid only for RC/UC QPs) */
diff --git a/man/ibv_mtu_to_num.3 b/man/ibv_mtu_to_num.3
new file mode 100644
index 0000000..ddb9bd0
--- /dev/null
+++ b/man/ibv_mtu_to_num.3
@@ -0,0 +1,71 @@ 
+.\" -*- nroff -*-
+.\"
+.TH IBV_MTU_TO_NUM 3 2013-06-20 libibverbs "Libibverbs Programmer's Manual"
+.SH "NAME"
+.nf
+ibv_mtu_to_num \- convert encoded MTU value to integer
+.sp
+num_to_ibv_mtu \- convert integer to encoded MTU value
+.SH "SYNOPSIS"
+.nf
+.B #include <infiniband/verbs.h>
+.sp
+.BI "int ibv_mtu_to_num(struct ibv_mtu_t " "mtu" ");
+.sp
+.BI "struct ibv_mtu_t num_to_ibv_mtu(int " "num" ");
+.fi
+.SH "DESCRIPTION"
+.PP
+The
+.I struct ibv_mtu_t
+type is an encoded integer used to represent MTU values in order to
+preserve backwards compatibility.  When the value of an
+.I struct ibv_mtu_t
+variable is <=
+.BR IBV_MTU_4096\fR,
+it is treated as the corresponding
+.B IBV_MTU_*
+enum value.  Otherwise, it is treated as its integer value.
+.PP
+MTU values less than the value of the enum
+.B IBV_MTU_4096
+(i.e., 5) cannot be represented.
+.PP
+.B ibv_mtu_to_num()
+converts the encoded MTU value
+.I mtu
+to a plain integer value.  For example, if
+.I mtu
+contains the value
+.BR IBV_MTU_1024\fR,
+then the value 1024 will be returned.  Likewise, if
+.I mtu
+contains the value 1500, then 1500 will be returned.
+.PP
+.B num_to_ibv_mtu()
+converts the integer
+.I num
+to its corresponding encoded
+.I struct ibv_mtu_t
+value.  For example, if
+.I num
+is 1024, then a
+.I struct ibv_mtu_t
+containing the value
+.B IBV_MTU_1024
+will be returned.  Likewise, if
+.I num
+is 1500, then a
+.I struct ibv_mtu_t
+containing the value 1500 will be returned.
+.SH "RETURN VALUE"
+.B ibv_mtu_to_num()
+returns an integer MTU value.
+.PP
+.B num_to_ibv_mtu()
+returns an encoded MTU value.
+.SH "SEE ALSO"
+.BR ibv_query_port (3)
+.SH "AUTHORS"
+.TP
+Jeff Squyres <jsquyres@cisco.com>
diff --git a/man/ibv_query_port.3 b/man/ibv_query_port.3
index 9bedd90..5620049 100644
--- a/man/ibv_query_port.3
+++ b/man/ibv_query_port.3
@@ -26,8 +26,8 @@  is an ibv_port_attr struct, as defined in <infiniband/verbs.h>.
 struct ibv_port_attr {
 .in +8
 enum ibv_port_state     state;          /* Logical port state */
-enum ibv_mtu            max_mtu;        /* Max MTU supported by port */
-enum ibv_mtu            active_mtu;     /* Actual MTU */
+struct ibv_mtu_t        max_mtu;        /* Max MTU supported by port */
+struct ibv_mtu_t        active_mtu;     /* Actual MTU */
 int                     gid_tbl_len;    /* Length of source GID table */
 uint32_t                port_cap_flags; /* Port capabilities */
 uint32_t                max_msg_sz;     /* Maximum message size */
diff --git a/man/ibv_query_qp.3 b/man/ibv_query_qp.3
index 3893ec8..fbf7ec3 100644
--- a/man/ibv_query_qp.3
+++ b/man/ibv_query_qp.3
@@ -30,7 +30,7 @@  struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;            /* Current QP state */
 enum ibv_qp_state       cur_qp_state;        /* Current QP state - irrelevant for ibv_query_qp */
-enum ibv_mtu            path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
+struct ibv_mtu_t        path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;      /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                /* Q_Key of the QP (valid only for UD QPs) */
 uint32_t                rq_psn;              /* PSN for receive queue (valid only for RC/UC QPs) */
diff --git a/src/cmd.c b/src/cmd.c
index 9789092..13fac0d 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -180,8 +180,8 @@  int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
 
 	port_attr->state      	   = resp.state;
-	port_attr->max_mtu         = resp.max_mtu;
-	port_attr->active_mtu      = resp.active_mtu;
+	port_attr->max_mtu.mtu     = resp.max_mtu;
+	port_attr->active_mtu.mtu  = resp.active_mtu;
 	port_attr->gid_tbl_len     = resp.gid_tbl_len;
 	port_attr->port_cap_flags  = resp.port_cap_flags;
 	port_attr->max_msg_sz      = resp.max_msg_sz;
@@ -678,7 +678,7 @@  int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	attr->alt_pkey_index                = resp.alt_pkey_index;
 	attr->qp_state                      = resp.qp_state;
 	attr->cur_qp_state                  = resp.cur_qp_state;
-	attr->path_mtu                      = resp.path_mtu;
+	attr->path_mtu.mtu                  = resp.path_mtu;
 	attr->path_mig_state                = resp.path_mig_state;
 	attr->sq_draining                   = resp.sq_draining;
 	attr->max_rd_atomic                 = resp.max_rd_atomic;
@@ -752,7 +752,7 @@  int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	cmd->alt_pkey_index 	 = attr->alt_pkey_index;
 	cmd->qp_state 		 = attr->qp_state;
 	cmd->cur_qp_state 	 = attr->cur_qp_state;
-	cmd->path_mtu 		 = attr->path_mtu;
+	cmd->path_mtu		 = attr->path_mtu.mtu;
 	cmd->path_mig_state 	 = attr->path_mig_state;
 	cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
 	cmd->max_rd_atomic 	 = attr->max_rd_atomic;
diff --git a/src/marshall.c b/src/marshall.c
index 577b4b1..a3595dc 100644
--- a/src/marshall.c
+++ b/src/marshall.c
@@ -59,7 +59,7 @@  void ibv_copy_qp_attr_from_kern(struct ibv_qp_attr *dst,
 				struct ibv_kern_qp_attr *src)
 {
 	dst->cur_qp_state = src->cur_qp_state;
-	dst->path_mtu = src->path_mtu;
+	dst->path_mtu.mtu = src->path_mtu;
 	dst->path_mig_state = src->path_mig_state;
 	dst->qkey = src->qkey;
 	dst->rq_psn = src->rq_psn;