diff mbox

[libmlx4,V3,1/2] Add RoCE IP based addressing support for UD QPs

Message ID 1400405952-14388-2-git-send-email-ogerlitz@r-vnc04.mtr.labs.mlnx (mailing list archive)
State Rejected
Headers show

Commit Message

Or Gerlitz May 18, 2014, 9:39 a.m. UTC
From: Matan Barak <matanb@mellanox.com>

In order to implement IP based addressing for UD QPs, we need a way to
resolve the addresses internally.
The L2 params are passed to the provider driver using an extension verbs
- drv_ibv_create_ah_ex.
libmlx4 gets the extra mac and vid params from libibverbs and sets
mlx4_ah relevant attributes.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 src/mlx4.c  |    1 +
 src/mlx4.h  |    2 +
 src/verbs.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 77 insertions(+), 11 deletions(-)

Comments

Jason Gunthorpe May 21, 2014, 8 p.m. UTC | #1
On Sun, May 18, 2014 at 12:39:11PM +0300, Or Gerlitz wrote:
> From: Matan Barak <matanb@mellanox.com>
> 
> In order to implement IP based addressing for UD QPs, we need a way to
> resolve the addresses internally.
> The L2 params are passed to the provider driver using an extension verbs
> - drv_ibv_create_ah_ex.
   ^^^^^^
The name changed

> +struct ibv_ah *mlx4_create_ah_ex(struct ibv_pd *pd,
> +				 struct ibv_ah_attr_ex *attr_ex)
> +{
> +	struct ibv_port_attr port_attr;
> +	struct ibv_ah *ah;
> +	struct mlx4_ah *mah;
> +
> +	if (ibv_query_port(pd->context, attr_ex->port_num, &port_attr))
> +		return NULL;
> +
> +	ah = mlx4_create_ah_common(pd, (struct ibv_ah_attr *)attr_ex,
> +				   port_attr.link_layer);
> +
> +	if (NULL == ah)
> +		return NULL;

I'm seeing a real lack of error reporting here.

My prior question was never answered: Should we be consistently using
errno for functions that return pointers? If yes, you need to make
sure errno is set on these failure paths, and document the possible
values errno can take.

> +	/* check that ll is provided and valid */
> +	if (attr_ex->comp_mask & IBV_AH_ATTR_EX_LL) {
> +		if (ARPHRD_ETHER != attr_ex->ll.sa.sa_family ||
                  ^^^^^^^^^^^
As I mentioned before, ARPHDR_ETHER is not correct for sa_family.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak May 22, 2014, 9:19 a.m. UTC | #2
On 21/5/2014 11:00 PM, Jason Gunthorpe wrote:
> On Sun, May 18, 2014 at 12:39:11PM +0300, Or Gerlitz wrote:
>> From: Matan Barak <matanb@mellanox.com>
>>
>> In order to implement IP based addressing for UD QPs, we need a way to
>> resolve the addresses internally.
>> The L2 params are passed to the provider driver using an extension verbs
>> - drv_ibv_create_ah_ex.
>     ^^^^^^
> The name changed
>

Thanks!

>> +struct ibv_ah *mlx4_create_ah_ex(struct ibv_pd *pd,
>> +				 struct ibv_ah_attr_ex *attr_ex)
>> +{
>> +	struct ibv_port_attr port_attr;
>> +	struct ibv_ah *ah;
>> +	struct mlx4_ah *mah;
>> +
>> +	if (ibv_query_port(pd->context, attr_ex->port_num, &port_attr))
>> +		return NULL;
>> +
>> +	ah = mlx4_create_ah_common(pd, (struct ibv_ah_attr *)attr_ex,
>> +				   port_attr.link_layer);
>> +
>> +	if (NULL == ah)
>> +		return NULL;
>
> I'm seeing a real lack of error reporting here.
>

Correct, ibv_query_port error should be captured to errno,
In addition, we need to add an error for an incorrect link layer type.

> My prior question was never answered: Should we be consistently using
> errno for functions that return pointers? If yes, you need to make
> sure errno is set on these failure paths, and document the possible
> values errno can take.
>
>> +	/* check that ll is provided and valid */
>> +	if (attr_ex->comp_mask & IBV_AH_ATTR_EX_LL) {
>> +		if (ARPHRD_ETHER != attr_ex->ll.sa.sa_family ||
>                    ^^^^^^^^^^^
> As I mentioned before, ARPHDR_ETHER is not correct for sa_family.
>

I don't want to tie this mechanism to Ethernet. If you prefer in-lining 
the data (rather than using a pointer), we might just introduce a new 
type of a big sockaddr_storage like buffer with a link_layer type enum.
What do you think?

> Jason
>

Matan
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/src/mlx4.c b/src/mlx4.c
index 1ee0338..a9f3a98 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -205,6 +205,7 @@  static int mlx4_init_context(struct verbs_device *v_device,
 	verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
 	verbs_set_ctx_op(verbs_ctx, create_flow, ibv_cmd_create_flow);
 	verbs_set_ctx_op(verbs_ctx, destroy_flow, ibv_cmd_destroy_flow);
+	verbs_set_ctx_op(verbs_ctx, create_ah_ex, mlx4_create_ah_ex);
 
 	return 0;
 
diff --git a/src/mlx4.h b/src/mlx4.h
index d71450f..3015357 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -431,6 +431,8 @@  struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn);
 int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp);
 void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn);
 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+struct ibv_ah *mlx4_create_ah_ex(struct ibv_pd *pd,
+				 struct ibv_ah_attr_ex *attr_ex);
 int mlx4_destroy_ah(struct ibv_ah *ah);
 int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
 		   struct mlx4_ah *ah);
diff --git a/src/verbs.c b/src/verbs.c
index 623d576..ccfd678 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -40,6 +40,8 @@ 
 #include <pthread.h>
 #include <errno.h>
 #include <netinet/in.h>
+#include <net/if.h>
+#include <net/if_arp.h>
 
 #include "mlx4.h"
 #include "mlx4-abi.h"
@@ -783,13 +785,11 @@  static int mlx4_resolve_grh_to_l2(struct ibv_pd *pd, struct mlx4_ah *ah,
 	return 0;
 }
 
-struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+static struct ibv_ah *mlx4_create_ah_common(struct ibv_pd *pd,
+					    struct ibv_ah_attr *attr,
+					    uint8_t link_layer)
 {
 	struct mlx4_ah *ah;
-	struct ibv_port_attr port_attr;
-
-	if (ibv_query_port(pd->context, attr->port_num, &port_attr))
-		return NULL;
 
 	ah = malloc(sizeof *ah);
 	if (!ah)
@@ -799,7 +799,7 @@  struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 
 	ah->av.port_pd   = htonl(to_mpd(pd)->pdn | (attr->port_num << 24));
 
-	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+	if (link_layer != IBV_LINK_LAYER_ETHERNET) {
 		ah->av.g_slid = attr->src_path_bits;
 		ah->av.dlid   = htons(attr->dlid);
 		ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28);
@@ -820,13 +820,76 @@  struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 		memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
 	}
 
-	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET)
-		if (mlx4_resolve_grh_to_l2(pd, ah, attr)) {
-			free(ah);
-			return NULL;
+	return &ah->ibv_ah;
+}
+
+struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+	struct ibv_ah *ah;
+	struct ibv_port_attr port_attr;
+
+	if (ibv_query_port(pd->context, attr->port_num, &port_attr))
+		return NULL;
+
+	ah = mlx4_create_ah_common(pd, attr, port_attr.link_layer);
+	if (NULL != ah &&
+	    (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET ||
+	    !mlx4_resolve_grh_to_l2(pd, to_mah(ah), attr)))
+		return ah;
+
+	if (ah)
+		free(ah);
+	return NULL;
+}
+
+struct ibv_ah *mlx4_create_ah_ex(struct ibv_pd *pd,
+				 struct ibv_ah_attr_ex *attr_ex)
+{
+	struct ibv_port_attr port_attr;
+	struct ibv_ah *ah;
+	struct mlx4_ah *mah;
+
+	if (ibv_query_port(pd->context, attr_ex->port_num, &port_attr))
+		return NULL;
+
+	ah = mlx4_create_ah_common(pd, (struct ibv_ah_attr *)attr_ex,
+				   port_attr.link_layer);
+
+	if (NULL == ah)
+		return NULL;
+
+	mah = to_mah(ah);
+
+	/* If vlan was given, check that we could use it */
+	if (attr_ex->comp_mask & IBV_AH_ATTR_EX_VID &&
+	    attr_ex->vid <= 0xfff &&
+	    !(attr_ex->comp_mask & IBV_AH_ATTR_EX_LL))
+		goto err;
+
+	/* check that ll is provided and valid */
+	if (attr_ex->comp_mask & IBV_AH_ATTR_EX_LL) {
+		if (ARPHRD_ETHER != attr_ex->ll.sa.sa_family ||
+		    port_attr.link_layer != IBV_LINK_LAYER_ETHERNET)
+			/* mlx4 provider currently only support ethernet
+			 * extensions */
+			goto err;
+
+		memcpy(mah->mac, attr_ex->ll.sa.sa_data,
+		       IFHWADDRLEN);
+
+		if (attr_ex->comp_mask & IBV_AH_ATTR_EX_VID &&
+		    attr_ex->vid <= 0xfff) {
+				mah->av.port_pd |= htonl(1 << 29);
+				mah->vlan = attr_ex->vid |
+					((attr_ex->sl & 7) << 13);
 		}
+	}
 
-	return &ah->ibv_ah;
+	return ah;
+
+err:
+	free(ah);
+	return NULL;
 }
 
 int mlx4_destroy_ah(struct ibv_ah *ah)