diff mbox

[for-next,2/2] IB/core: Support for CMA multicast join flags

Message ID 1467550074-24061-3-git-send-email-leon@kernel.org (mailing list archive)
State Changes Requested
Headers show

Commit Message

Leon Romanovsky July 3, 2016, 12:47 p.m. UTC
From: Alex Vesker <valex@mellanox.com>

Added UCMA and CMA support for multicast join flags. Flags are
passed using UCMA CM join command previously reserved fields.
Currently supporting two join flags indicating two different
multicast JoinStates:

1. Full Member:
   The initiator creates the Multicast group(MCG) if it wasn't
   previously created, can send Multicast messages to the group
   and receive messages from the MCG.

2. Send Only Full Member:
   The initiator creates the Multicast group(MCG) if it wasn't
   previously created, can send Multicast messages to the group
   but doesn't receive any messages from the MCG.

   IB: Send Only Full Member requires a query of ClassPortInfo
       to determine if SM/SA supports this option. If SM/SA
       doesn't support Send-Only there will be no join request
       sent and an error will be returned.

   ETH: When Send Only Full Member is requested no IGMP join
	will be sent.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Reviewed by: Hal Rosenstock <hal@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/cma.c    | 98 +++++++++++++++++++++++++++++++++++++---
 drivers/infiniband/core/ucma.c   | 18 ++++++--
 include/rdma/ib_sa.h             |  5 ++
 include/rdma/rdma_cm.h           |  4 +-
 include/uapi/rdma/rdma_user_cm.h |  9 +++-
 5 files changed, 122 insertions(+), 12 deletions(-)

Comments

Or Gerlitz July 3, 2016, 1:46 p.m. UTC | #1
On Sun, Jul 3, 2016 at 3:47 PM, Leon Romanovsky <leon@kernel.org> wrote:
> From: Alex Vesker <valex@mellanox.com>
>
> Added UCMA and CMA support for multicast join flags. Flags are
> passed using UCMA CM join command previously reserved fields.
> Currently supporting two join flags indicating two different
> multicast JoinStates:

[...]

>    ETH: When Send Only Full Member is requested no IGMP join
>         will be sent.

what's the use case for the ETH side of things?

>  drivers/infiniband/core/cma.c    | 98 +++++++++++++++++++++++++++++++++++++---
>  drivers/infiniband/core/ucma.c   | 18 ++++++--
>  include/rdma/ib_sa.h             |  5 ++
>  include/rdma/rdma_cm.h           |  4 +-
>  include/uapi/rdma/rdma_user_cm.h |  9 +++-
>  5 files changed, 122 insertions(+), 12 deletions(-)


For the ease/robustness of review for UAPI changes, we have a long
time common practice
to break things like this one to IB core kernel only patch, and one
that deals the user-space
facing things (e.g here ucma.c and rdma_user_cm.h), please do that.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky July 4, 2016, 4:51 a.m. UTC | #2
On Sun, Jul 03, 2016 at 04:46:23PM +0300, Or Gerlitz wrote:
> On Sun, Jul 3, 2016 at 3:47 PM, Leon Romanovsky <leon@kernel.org> wrote:
> > From: Alex Vesker <valex@mellanox.com>
> >
> 
> >  drivers/infiniband/core/cma.c    | 98 +++++++++++++++++++++++++++++++++++++---
> >  drivers/infiniband/core/ucma.c   | 18 ++++++--
> >  include/rdma/ib_sa.h             |  5 ++
> >  include/rdma/rdma_cm.h           |  4 +-
> >  include/uapi/rdma/rdma_user_cm.h |  9 +++-
> >  5 files changed, 122 insertions(+), 12 deletions(-)
> 
> 
> For the ease/robustness of review for UAPI changes, we have a long
> time common practice
> to break things like this one to IB core kernel only patch, and one
> that deals the user-space

Or,
You are right, this practice exists and we are following it as much as it makes sense.
This specific case doesn't need to be separated, because it introduces one logical
change and separate patches will be useless as standalone patches.

All properties which we are looking in the patch (correct logic, bisectable,
buildable and easy reviewable) exist in this patch.

> facing things (e.g here ucma.c and rdma_user_cm.h), please do that.

We will be happy to hear other feedback as well.

Thanks.

> 
> Or.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ad1b1ad..a3c9930 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -68,6 +68,7 @@  MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
 #define CMA_MAX_CM_RETRIES 15
 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
 #define CMA_IBOE_PACKET_LIFETIME 18
@@ -162,6 +163,14 @@  struct rdma_bind_list {
 	unsigned short		port;
 };
 
+struct class_port_info_context {
+	struct ib_class_port_info	*class_port_info;
+	struct ib_device		*device;
+	struct completion		done;
+	struct ib_sa_query		*sa_query;
+	u8				port_num;
+};
+
 static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
 			struct rdma_bind_list *bind_list, int snum)
 {
@@ -306,6 +315,7 @@  struct cma_multicast {
 	struct sockaddr_storage	addr;
 	struct kref		mcref;
 	bool			igmp_joined;
+	u8			join_state;
 };
 
 struct cma_work {
@@ -3752,10 +3762,63 @@  static void cma_set_mgid(struct rdma_id_private *id_priv,
 	}
 }
 
+static void cma_query_sa_classport_info_cb(int status,
+					   struct ib_class_port_info *rec,
+					   void *context)
+{
+	struct class_port_info_context *cb_ctx = context;
+
+	WARN_ON(!context);
+
+	if (status || !rec) {
+		pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
+			 cb_ctx->device->name, cb_ctx->port_num, status);
+		goto out;
+	}
+
+	memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
+
+out:
+	complete(&cb_ctx->done);
+}
+
+static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
+				       struct ib_class_port_info *class_port_info)
+{
+	struct class_port_info_context *cb_ctx;
+	int ret;
+
+	cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
+	if (!cb_ctx)
+		return -ENOMEM;
+
+	cb_ctx->device = device;
+	cb_ctx->class_port_info = class_port_info;
+	cb_ctx->port_num = port_num;
+	init_completion(&cb_ctx->done);
+
+	ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
+					     CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
+					     GFP_KERNEL, cma_query_sa_classport_info_cb,
+					     cb_ctx, &cb_ctx->sa_query);
+	if (ret < 0) {
+		pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
+		       device->name, port_num, ret);
+		goto out;
+	}
+
+	wait_for_completion(&cb_ctx->done);
+
+out:
+	kfree(cb_ctx);
+	return ret;
+}
+
 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 				 struct cma_multicast *mc)
 {
 	struct ib_sa_mcmember_rec rec;
+	struct ib_class_port_info class_port_info;
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	ib_sa_comp_mask comp_mask;
 	int ret;
@@ -3774,7 +3837,24 @@  static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 	rec.qkey = cpu_to_be32(id_priv->qkey);
 	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
 	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
-	rec.join_state = 1;
+	rec.join_state = mc->join_state;
+
+	if (rec.join_state == IB_SA_MCMEMBER_REC_JOIN_STATE_SO_FULL_MEMBER) {
+		ret = cma_query_sa_classport_info(id_priv->id.device,
+						  id_priv->id.port_num,
+						  &class_port_info);
+
+		if (ret)
+			return ret;
+
+		if (!(ib_get_cpi_capmask2(&class_port_info) &
+		      IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
+			pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+				"RDMA CM: SM doesn't support Send Only Full Member option\n",
+				id_priv->id.device->name, id_priv->id.port_num);
+			return -EOPNOTSUPP;
+		}
+	}
 
 	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
 		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
@@ -3843,6 +3923,9 @@  static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
 	struct net_device *ndev = NULL;
 	enum ib_gid_type gid_type;
+	bool send_only;
+
+	send_only = mc->join_state == IB_SA_MCMEMBER_REC_JOIN_STATE_SO_FULL_MEMBER;
 
 	if (cma_zero_addr((struct sockaddr *)&mc->addr))
 		return -EINVAL;
@@ -3878,10 +3961,12 @@  static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	if (addr->sa_family == AF_INET) {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
 			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
-			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
-					    true);
-			if (!err)
-				mc->igmp_joined = true;
+			if (!send_only) {
+				err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+						    true);
+				if (!err)
+					mc->igmp_joined = true;
+			}
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -3911,7 +3996,7 @@  out1:
 }
 
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
-			void *context)
+			u8 join_state, void *context)
 {
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc;
@@ -3930,6 +4015,7 @@  int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	mc->context = context;
 	mc->id_priv = id_priv;
 	mc->igmp_joined = false;
+	mc->join_state = join_state;
 	spin_lock(&id_priv->lock);
 	list_add(&mc->list, &id_priv->mc_list);
 	spin_unlock(&id_priv->lock);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c0f3826..15020a8 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -106,6 +106,7 @@  struct ucma_multicast {
 	int			events_reported;
 
 	u64			uid;
+	u8			join_state;
 	struct list_head	list;
 	struct sockaddr_storage	addr;
 };
@@ -1317,12 +1318,20 @@  static ssize_t ucma_process_join(struct ucma_file *file,
 	struct ucma_multicast *mc;
 	struct sockaddr *addr;
 	int ret;
+	u8 join_state;
 
 	if (out_len < sizeof(resp))
 		return -ENOSPC;
 
 	addr = (struct sockaddr *) &cmd->addr;
-	if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+	if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+		return -EINVAL;
+
+	if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
+		join_state = IB_SA_MCMEMBER_REC_JOIN_STATE_FULL_MEMBER;
+	else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
+		join_state = IB_SA_MCMEMBER_REC_JOIN_STATE_SO_FULL_MEMBER;
+	else
 		return -EINVAL;
 
 	ctx = ucma_get_ctx(file, cmd->id);
@@ -1335,10 +1344,11 @@  static ssize_t ucma_process_join(struct ucma_file *file,
 		ret = -ENOMEM;
 		goto err1;
 	}
-
+	mc->join_state = join_state;
 	mc->uid = cmd->uid;
 	memcpy(&mc->addr, addr, cmd->addr_size);
-	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
+	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
+				  join_state, mc);
 	if (ret)
 		goto err2;
 
@@ -1382,7 +1392,7 @@  static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
 	join_cmd.uid = cmd.uid;
 	join_cmd.id = cmd.id;
 	join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
-	join_cmd.reserved = 0;
+	join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
 	memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
 
 	return ucma_process_join(file, &join_cmd, out_len);
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 3840416..edec8ee 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -189,6 +189,11 @@  static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *re
 #define IB_SA_MCMEMBER_REC_JOIN_STATE			IB_SA_COMP_MASK(16)
 #define IB_SA_MCMEMBER_REC_PROXY_JOIN			IB_SA_COMP_MASK(17)
 
+#define IB_SA_MCMEMBER_REC_JOIN_STATE_FULL_MEMBER       BIT(0)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE_NON_MEMBER        BIT(1)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE_SO_NON_MEMBER     BIT(2)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE_SO_FULL_MEMBER    BIT(3)
+
 struct ib_sa_mcmember_rec {
 	union ib_gid mgid;
 	union ib_gid port_gid;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index afe44fd..81fb1d1 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -333,11 +333,13 @@  int rdma_disconnect(struct rdma_cm_id *id);
  *   address.
  * @id: Communication identifier associated with the request.
  * @addr: Multicast address identifying the group to join.
+ * @join_state: Multicast JoinState bitmap requested by port.
+ *		Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits.
  * @context: User-defined context associated with the join request, returned
  * to the user through the private_data pointer in multicast events.
  */
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
-			void *context);
+			u8 join_state, void *context);
 
 /**
  * rdma_leave_multicast - Leave the multicast group specified by the given
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 3066718..01923d4 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -244,12 +244,19 @@  struct rdma_ucm_join_ip_mcast {
 	__u32 id;
 };
 
+/* Multicast join flags */
+enum {
+	RDMA_MC_JOIN_FLAG_FULLMEMBER,
+	RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER,
+	RDMA_MC_JOIN_FLAG_RESERVED,
+};
+
 struct rdma_ucm_join_mcast {
 	__u64 response;		/* rdma_ucma_create_id_resp */
 	__u64 uid;
 	__u32 id;
 	__u16 addr_size;
-	__u16 reserved;
+	__u16 join_flags;
 	struct sockaddr_storage addr;
 };