diff mbox

cma: resolve to first active IB port

Message ID CAMGffEkGcxdYutV0cZYAzeu_kZOgTmuqTbMNfnPuFbHj+oT+2g@mail.gmail.com (mailing list archive)
State Superseded
Headers show

Commit Message

Jinpu Wang Dec. 9, 2016, 9:31 a.m. UTC
Hi Hefty,

On Thu, Dec 8, 2016 at 6:47 PM, Hefty, Sean <sean.hefty@intel.com> wrote:
>> > diff --git a/drivers/infiniband/core/cma.c
>> b/drivers/infiniband/core/cma.c
>> > index 2a6fc47..9e46b42 100644
>> > --- a/drivers/infiniband/core/cma.c
>> > +++ b/drivers/infiniband/core/cma.c
>> > @@ -684,6 +684,8 @@ static int cma_resolve_ib_dev(struct
>> > rdma_id_private *id_priv)
>> >              for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
>> >                                 &gid, NULL);
>> >                   i++) {
>> > +                struct ib_port_attr attr;
>> > +
>> >                  if (!memcmp(&gid, dgid, sizeof(gid))) {
>> >                      cma_dev = cur_dev;
>> >                      sgid = gid;
>> > @@ -692,7 +694,9 @@ static int cma_resolve_ib_dev(struct
>> > rdma_id_private *id_priv)
>> >                  }
>> >
>> >                  if (!cma_dev && (gid.global.subnet_prefix ==
>> > -                         dgid->global.subnet_prefix)) {
>> > +                    dgid->global.subnet_prefix) &&
>> > +                    (!ib_query_port(cur_dev->device, p, &attr) &&
>> > +                    attr.state == IB_PORT_ACTIVE)) {
>
> I do have a concern about accessing non-cached port data as part of this call.  Can we cache the port state and use that instead?

Thanks for review.

Sure, I updated the v2 patch as attached, is this what you want?

Note: in cma_resolve_loopback, it does the same, I leave it like that
for now, I can also update this part if you like.

Comments

Jason Gunthorpe Dec. 9, 2016, 4:38 p.m. UTC | #1
On Fri, Dec 09, 2016 at 10:31:00AM +0100, Jinpu Wang wrote:

> > I do have a concern about accessing non-cached port data as part
> > of this call.  Can we cache the port state and use that instead?
> 
> Thanks for review.
> 
> Sure, I updated the v2 patch as attached, is this what you want?
> 
> Note: in cma_resolve_loopback, it does the same, I leave it like that
> for now, I can also update this part if you like.

I feel like we already have a cache for subnet prefix, or maybe
another patch series added one? Can't remember, sorry.

In any event, this caching should probably live in the core code not
the cma....

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean Dec. 9, 2016, 5:17 p.m. UTC | #2
> > Sure, I updated the v2 patch as attached, is this what you want?
> >
> > Note: in cma_resolve_loopback, it does the same, I leave it like that
> > for now, I can also update this part if you like.
> 
> I feel like we already have a cache for subnet prefix, or maybe
> another patch series added one? Can't remember, sorry.

Yep - see core/cache.c.

Jinpu, port state caching should go here as well.  If you can update the loopback call as well, that would be great.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jinpu Wang Dec. 12, 2016, 8:43 a.m. UTC | #3
On Fri, Dec 9, 2016 at 6:17 PM, Hefty, Sean <sean.hefty@intel.com> wrote:
>> > Sure, I updated the v2 patch as attached, is this what you want?
>> >
>> > Note: in cma_resolve_loopback, it does the same, I leave it like that
>> > for now, I can also update this part if you like.
>>
>> I feel like we already have a cache for subnet prefix, or maybe
>> another patch series added one? Can't remember, sorry.
>
> Yep - see core/cache.c.
>
> Jinpu, port state caching should go here as well.  If you can update the loopback call as well, that would be great.

Thanks Jason and Hefty, good idea!
I will send a patchset to cache port state in cache module, will also
update loopback call as well.
diff mbox

Patch

From f890782127b7febb11f90a1733a8220bcc944c47 Mon Sep 17 00:00:00 2001
From: Jack Wang <jinpu.wang@profitbricks.com>
Date: Tue, 6 Dec 2016 09:01:04 +0100
Subject: [PATCH] cma: resolve to first active ib_port

When resolve addr if we don't give src addr, cma core will try to resolve to
ib device on itself, current logic is only check if it has same
subnet_prefix, which is not enough if we use default well known gid,
we should also check if port is active.

v2: cache port_active state in cma_add_one, also register event_handler
    to track port events.

Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
---
 drivers/infiniband/core/cma.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2a6fc47..c7a7689 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -151,10 +151,12 @@  static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
 struct cma_device {
 	struct list_head	list;
 	struct ib_device	*device;
+	struct ib_event_handler event_handler;
 	struct completion	comp;
 	atomic_t		refcount;
 	struct list_head	id_list;
 	enum ib_gid_type	*default_gid_type;
+	int			*port_active;
 };
 
 struct rdma_bind_list {
@@ -692,7 +694,8 @@  static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 				}
 
 				if (!cma_dev && (gid.global.subnet_prefix ==
-						 dgid->global.subnet_prefix)) {
+				    dgid->global.subnet_prefix) &&
+				    cur_dev->port_active[p]) {
 					cma_dev = cur_dev;
 					sgid = gid;
 					id_priv->id.port_num = p;
@@ -4170,17 +4173,43 @@  static struct notifier_block cma_nb = {
 	.notifier_call = cma_netdev_callback
 };
 
+static void cma_event_handler(struct ib_event_handler *handler,
+			      struct ib_event *event)
+{
+	struct cma_device *cma_dev =
+		container_of(handler, typeof(*cma_dev), event_handler);
+	u8 port = event->element.port_num;
+
+	/* we're only interested in port Up/Down events */
+	if ( event->event != IB_EVENT_PORT_ACTIVE &&
+	     event->event != IB_EVENT_PORT_ERR)
+		return;
+
+	/* cache the state of the port */
+	if (event->event == IB_EVENT_PORT_ACTIVE)
+		cma_dev->port_active[port] = 1;
+	else
+		cma_dev->port_active[port] = 0;
+}
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
 	struct rdma_id_private *id_priv;
 	unsigned int i;
 	unsigned long supported_gids = 0;
+	struct ib_port_attr port_attr;
 
 	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
 	if (!cma_dev)
 		return;
 
+	cma_dev->port_active = kmalloc(sizeof (*cma_dev->port_active) *
+				       (device->phys_port_cnt + 1), GFP_KERNEL);
+	if (!cma_dev->port_active) {
+		kfree(cma_dev);
+		return;
+	}
 	cma_dev->device = device;
 	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
 					    sizeof(*cma_dev->default_gid_type),
@@ -4194,12 +4223,21 @@  static void cma_add_one(struct ib_device *device)
 		WARN_ON(!supported_gids);
 		cma_dev->default_gid_type[i - rdma_start_port(device)] =
 			find_first_bit(&supported_gids, BITS_PER_LONG);
+		if (!ib_query_port(cma_dev->device, i, &port_attr)) {
+			cma_dev->port_active[i] =
+				port_attr.state == IB_PORT_ACTIVE ? 1 : 0;
+		} else
+			cma_dev->port_active[i] = 0;
 	}
 
 	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
+	INIT_IB_EVENT_HANDLER(&cma_dev->event_handler, device,
+			      cma_event_handler);
+	if (ib_register_event_handler(&cma_dev->event_handler))
+		pr_warn("fail to register event handler\n");
 
 	mutex_lock(&lock);
 	list_add_tail(&cma_dev->list, &dev_list);
@@ -4269,12 +4307,14 @@  static void cma_remove_one(struct ib_device *device, void *client_data)
 	if (!cma_dev)
 		return;
 
+	ib_unregister_event_handler(&cma_dev->event_handler);
 	mutex_lock(&lock);
 	list_del(&cma_dev->list);
 	mutex_unlock(&lock);
 
 	cma_process_remove(cma_dev);
 	kfree(cma_dev->default_gid_type);
+	kfree(cma_dev->port_active);
 	kfree(cma_dev);
 }
 
-- 
2.7.4