diff mbox

[3/4] infiniband-diags: properly resolve node guids

Message ID 20110705120815.3cc7d59b.weiny2@llnl.gov (mailing list archive)
State Rejected, archived
Delegated to: Ira Weiny
Headers show

Commit Message

Ira Weiny July 5, 2011, 7:08 p.m. UTC
Signed-off-by: Ira Weiny <weiny2@llnl.gov>
---
 include/ibdiag_common.h |    3 ++
 src/ibdiag_common.c     |   55 +++++++++++++++++++++++++++++++++++++++++++++++
 src/iblinkinfo.c        |    5 +--
 src/ibqueryerrors.c     |    8 ++----
 4 files changed, 63 insertions(+), 8 deletions(-)

Comments

Hal Rosenstock July 8, 2011, 9:42 p.m. UTC | #1
On 7/5/2011 3:08 PM, Ira Weiny wrote:

Can you explain what issue this solves or is it just to move to the SA
way to do this ?

> 
> Signed-off-by: Ira Weiny <weiny2@llnl.gov>
> ---
>  include/ibdiag_common.h |    3 ++
>  src/ibdiag_common.c     |   55 +++++++++++++++++++++++++++++++++++++++++++++++
>  src/iblinkinfo.c        |    5 +--
>  src/ibqueryerrors.c     |    8 ++----
>  4 files changed, 63 insertions(+), 8 deletions(-)
> 
> diff --git a/include/ibdiag_common.h b/include/ibdiag_common.h
> index 69cddfb..e107a43 100644
> --- a/include/ibdiag_common.h
> +++ b/include/ibdiag_common.h
> @@ -126,4 +126,7 @@ void sa_report_err(int status);
>  		comp_mask |= IB_##name##_COMPMASK_##mask; \
>  	}
>  
> +int resolve_node_guid(ib_portid_t *port_id, char *node_guid,
> +			struct ibmad_port *ibmad_port);
> +
>  #endif				/* _IBDIAG_COMMON_H_ */
> diff --git a/src/ibdiag_common.c b/src/ibdiag_common.c
> index 6d03a43..66dc132 100644
> --- a/src/ibdiag_common.c
> +++ b/src/ibdiag_common.c
> @@ -509,3 +509,58 @@ void sa_report_err(int status)
>  	fprintf(stderr, "ERROR: Query result returned 0x%04x, %s%s\n",
>  		status, sm_err_str, sa_err_str);
>  }
> +
> +int resolve_node_guid(ib_portid_t *port_id, char *node_guid,
> +			struct ibmad_port *ibmad_port)
> +{
> +	ib_node_record_t nr;
> +	ib_node_record_t *found_nr;
> +	uint64_t comp_mask = 0;
> +	struct sa_query_result result;
> +	uint64_t guid = strtoull(node_guid, NULL, 0);
> +	bind_handle_t h;
> +	int ret = 0;
> +
> +	if (guid == 0 || guid == ULLONG_MAX)
> +		return -EINVAL;
> +
> +	memset(&nr, 0, sizeof(nr));
> +	CHECK_AND_SET_VAL(guid, 64, 0, nr.node_info.node_guid, NR, NODEGUID);
> +
> +	if (!ibd_timeout)
> +		ibd_timeout = MAD_DEF_TIMEOUT_MS;
> +
> +	h = sa_get_bind_handle();
> +	if (!h)
> +		return -EIO;
> +
> +	ret = sa_query(h, IB_MAD_METHOD_GET_TABLE, IB_SA_ATTR_NODERECORD, 0,
> +			   cl_hton64(comp_mask), 0, &nr, &result);
> +	if (ret) {
> +		fprintf(stderr, "Query SA failed: %s\n", ib_get_err_str(ret));
> +		ret = -EIO;
> +		goto free_bind_handle;
> +	}
> +
> +	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +		sa_report_err(result.status);
> +		ret = -EIO;
> +		goto free_result;
> +	}
> +
> +	if (result.result_cnt > 1)
> +		fprintf(stderr, "%s resolved to more than one port.\n",
> +			node_guid);

Should the request just be a GET rather than GET_TABLE and avoid this
check ? I don't think multiple nodes can register with same Node GUID,
can they ? Also, I think it makes eliminates this check and the missing
0 check.

> +
> +	found_nr = (ib_node_record_t *)sa_get_query_rec(result.p_result_madw,
> +							0);
> +	guid = cl_ntoh64(found_nr->node_info.port_guid);
> +	ret = ib_resolve_guid_via(port_id, &guid, NULL, ibd_timeout,
> +				  ibmad_port);

If you're avoiding the ib_resolve_portid_str_via routine, maybe also
avoid this here and do the SA PR lookup in sa query style code here ?

-- Hal

> +
> +free_result:
> +	sa_free_result_mad(&result);
> +free_bind_handle:
> +	sa_free_bind_handle(h);
> +	return ret;
> +}
> diff --git a/src/iblinkinfo.c b/src/iblinkinfo.c
> index 8cf755d..2f69acf 100644
> --- a/src/iblinkinfo.c
> +++ b/src/iblinkinfo.c
> @@ -619,9 +619,8 @@ int main(int argc, char **argv)
>  			IBWARN("Failed to resolve %s; attempting full scan\n",
>  			       dr_path);
>  	} else if (guid_str) {
> -		if ((resolved =
> -		     ib_resolve_portid_str_via(&port_id, guid_str, IB_DEST_GUID,
> -					       NULL, ibmad_port)) < 0)
> +		if ((resolved = resolve_node_guid(&port_id, guid_str,
> +						  ibmad_port)) < 0)
>  			IBWARN("Failed to resolve %s; attempting full scan\n",
>  			       guid_str);
>  	}
> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 1bba0e0..4b59c6f 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -914,11 +914,9 @@ int main(int argc, char **argv)
>  			IBWARN("Failed to resolve %s; attempting full scan",
>  			       dr_path);
>  	} else if (node_guid_str) {
> -		if ((resolved =
> -		     ib_resolve_portid_str_via(&portid, node_guid_str,
> -					       IB_DEST_GUID, ibd_sm_id,
> -					       ibmad_port)) < 0)
> -			IBWARN("Failed to resolve %s; attempting full scan",
> +		if ((resolved = resolve_node_guid(&portid, node_guid_str,
> +						  ibmad_port)) < 0)
> +			IBWARN("Failed to resolve %s; attempting full scan\n",
>  			       node_guid_str);
>  	}
>  

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 8, 2011, 9:50 p.m. UTC | #2
On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:

> Should the request just be a GET rather than GET_TABLE and avoid this
> check ? I don't think multiple nodes can register with same Node GUID,
> can they ? Also, I think it makes eliminates this check and the missing
> 0 check.

Multiport HCAs should (and do..) show up with multiple node
records. There is one node record per end port, not per node. This is
why using node GUID as an end port identifier is a bad choice.

However, you could use GET and look at the return code to disambiguate
no records/one record/many records.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hal Rosenstock July 8, 2011, 9:59 p.m. UTC | #3
On 7/8/2011 5:50 PM, Jason Gunthorpe wrote:
> On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:
> 
>> Should the request just be a GET rather than GET_TABLE and avoid this
>> check ? I don't think multiple nodes can register with same Node GUID,
>> can they ? Also, I think it makes eliminates this check and the missing
>> 0 check.
> 
> Multiport HCAs should (and do..) show up with multiple node
> records. There is one node record per end port, not per node. This is
> why using node GUID as an end port identifier is a bad choice.

Before this patch, it did used to use the port GUID for this.

> However, you could use GET and look at the return code to disambiguate
> no records/one record/many records.

Yes, that was getting at (and that there was no check for no records
returned with the get table code).

-- Hal

> Jason
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny July 8, 2011, 10:29 p.m. UTC | #4
On Fri, 8 Jul 2011 14:59:01 -0700
Hal Rosenstock <hal@dev.mellanox.co.il> wrote:

> On 7/8/2011 5:50 PM, Jason Gunthorpe wrote:
> > On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:
> > 
> >> Should the request just be a GET rather than GET_TABLE and avoid this
> >> check ? I don't think multiple nodes can register with same Node GUID,
> >> can they ? Also, I think it makes eliminates this check and the missing
> >> 0 check.
> > 
> > Multiport HCAs should (and do..) show up with multiple node
> > records. There is one node record per end port, not per node. This is
> > why using node GUID as an end port identifier is a bad choice.

It is _not_ a bad choice if you are looking for a "node".

> 
> Before this patch, it did used to use the port GUID for this.

The point of this patch is to do the right thing when the user is requesting a node they want information about.  The next step is to accept NodeDescription and use that from the NodeRecord as a key.

> 
> > However, you could use GET and look at the return code to disambiguate
> > no records/one record/many records.
> 
> Yes, that was getting at (and that there was no check for no records
> returned with the get table code).

Ok, that is a bug.  We should check for no records.

As for multiple records, I left that for a future patch which would query all of those ports.

Ira

> 
> -- Hal
> 
> > Jason
> > 
>
Jason Gunthorpe July 8, 2011, 10:55 p.m. UTC | #5
On Fri, Jul 08, 2011 at 03:29:53PM -0700, Ira Weiny wrote:
> On Fri, 8 Jul 2011 14:59:01 -0700
> Hal Rosenstock <hal@dev.mellanox.co.il> wrote:
> 
> > On 7/8/2011 5:50 PM, Jason Gunthorpe wrote:
> > > On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:
> > > 
> > >> Should the request just be a GET rather than GET_TABLE and avoid this
> > >> check ? I don't think multiple nodes can register with same Node GUID,
> > >> can they ? Also, I think it makes eliminates this check and the missing
> > >> 0 check.
> > > 
> > > Multiport HCAs should (and do..) show up with multiple node
> > > records. There is one node record per end port, not per node. This is
> > > why using node GUID as an end port identifier is a bad choice.
> 
> It is _not_ a bad choice if you are looking for a "node".

But very few diags seem to be designed around the idea that they will
operate on a bundle of end ports (eg a node), they tend to be one end
port only, so asking for a "node" is nonsense. What does it mean?
Operate on a random end port of that node? All end ports? Just fail
with error?

I don't like this trend to make node GUID the default GUID input
format for diags. FWIW, ibtool consistently uses port GUID as the
default GUID type for all end port specifications.

> > Before this patch, it did used to use the port GUID for this.
> 
> The point of this patch is to do the right thing when the user is
> requesting a node they want information about.  The next step is to
> accept NodeDescription and use that from the NodeRecord as a key.

Well, it looks like this is a bug fix for both iblinkinfo and
ibqueryerrors, eg they seem to be documented to accept node GUIDs but
were treating them as port GUIDs in one place and node GUIDs in
another. Though please update the -S section of the man page for
iblinkinfo to reflect the GUID is a node GUID..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hal Rosenstock July 11, 2011, 7:02 p.m. UTC | #6
Ira,

On 7/8/2011 6:29 PM, Ira Weiny wrote:
> On Fri, 8 Jul 2011 14:59:01 -0700
> Hal Rosenstock <hal@dev.mellanox.co.il> wrote:
> 
>> On 7/8/2011 5:50 PM, Jason Gunthorpe wrote:
>>> On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:
>>>
>>>> Should the request just be a GET rather than GET_TABLE and avoid this
>>>> check ? I don't think multiple nodes can register with same Node GUID,
>>>> can they ? Also, I think it makes eliminates this check and the missing
>>>> 0 check.
>>>
>>> Multiport HCAs should (and do..) show up with multiple node
>>> records. There is one node record per end port, not per node. This is
>>> why using node GUID as an end port identifier is a bad choice.
> 
> It is _not_ a bad choice if you are looking for a "node".

One could also equally well query for the node records to which a port
GUID belongs if this is better to keep the guid meaning consistent.

Looking at the current man pages though, ibqueryerrors does say node
GUID and iblinkinfo says switch GUID which is the node GUID.

-- Hal

> 
>>
>> Before this patch, it did used to use the port GUID for this.
> 
> The point of this patch is to do the right thing when the user is requesting a node they want information about.  The next step is to accept NodeDescription and use that from the NodeRecord as a key.
> 
>>
>>> However, you could use GET and look at the return code to disambiguate
>>> no records/one record/many records.
>>
>> Yes, that was getting at (and that there was no check for no records
>> returned with the get table code).
> 
> Ok, that is a bug.  We should check for no records.
> 
> As for multiple records, I left that for a future patch which would query all of those ports.
> 
> Ira
> 
>>
>> -- Hal
>>
>>> Jason
>>>
>>
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny July 11, 2011, 9:06 p.m. UTC | #7
On Fri, 8 Jul 2011 15:55:10 -0700
Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

> On Fri, Jul 08, 2011 at 03:29:53PM -0700, Ira Weiny wrote:
> > On Fri, 8 Jul 2011 14:59:01 -0700
> > Hal Rosenstock <hal@dev.mellanox.co.il> wrote:
> > 
> > > On 7/8/2011 5:50 PM, Jason Gunthorpe wrote:
> > > > On Fri, Jul 08, 2011 at 05:42:38PM -0400, Hal Rosenstock wrote:
> > > > 
> > > >> Should the request just be a GET rather than GET_TABLE and avoid this
> > > >> check ? I don't think multiple nodes can register with same Node GUID,
> > > >> can they ? Also, I think it makes eliminates this check and the missing
> > > >> 0 check.
> > > > 
> > > > Multiport HCAs should (and do..) show up with multiple node
> > > > records. There is one node record per end port, not per node. This is
> > > > why using node GUID as an end port identifier is a bad choice.
> > 
> > It is _not_ a bad choice if you are looking for a "node".
> 
> But very few diags seem to be designed around the idea that they will
> operate on a bundle of end ports (eg a node), they tend to be one end
> port only, so asking for a "node" is nonsense.

Why do you object to tools which report information for an entire node?  Nodes,
specifically switches, are much more manageable chunks than an entire fabric.

> What does it mean?
> Operate on a random end port of that node?

For some queries, like NodeInfo, yes that is all we need.

> All end ports?

Yes, why not?

While this particular patch only supports the fist port found.  That will
support 90% of the fabrics out there which have a single port of an HCA
connected to a fabric.  That is why I added the warning message indicating
there was a problem, as well as responding to Hal that further work would be
required.

> Just fail with error?

Only if you can't resolve what the user is looking for.

> 
> I don't like this trend to make node GUID the default GUID input
> format for diags. FWIW, ibtool consistently uses port GUID as the
> default GUID type for all end port specifications.

I am not proposing this for all tools.  Why shouldn't a user be able to query
more than a single port at a time in some "higher level" tools?

Also how would you propose to resolve a query via NodeDescription?  Put
yourself in the shoes of the administrator who is trying to manage 1000's of
"nodes" in a system.  Names are much easier to deal with than GUID's and
LID's.

> 
> > > Before this patch, it did used to use the port GUID for this.
> > 
> > The point of this patch is to do the right thing when the user is
> > requesting a node they want information about.  The next step is to
> > accept NodeDescription and use that from the NodeRecord as a key.
> 
> Well, it looks like this is a bug fix for both iblinkinfo and
> ibqueryerrors, eg they seem to be documented to accept node GUIDs but
> were treating them as port GUIDs in one place and node GUIDs in
> another.

Yes this was based on the assumption that PortGUID of [E]SP0 == node GUID,
which AFAIK works on all current switches, but is wrong.  This was "ok" when
iblinkinfo and ibqueryerrors only supported switches.  I wanted to expand
them.

> Though please update the -S section of the man page for
> iblinkinfo to reflect the GUID is a node GUID..

This was included as part of the next patch 3/4 "infiniband-diags:
libibnetdisc/iblinkinfo Allow for a partial fabric query centered around a CA"

Ira

> 
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 11, 2011, 9:18 p.m. UTC | #8
On Mon, Jul 11, 2011 at 02:06:02PM -0700, Ira Weiny wrote:

> > But very few diags seem to be designed around the idea that they will
> > operate on a bundle of end ports (eg a node), they tend to be one end
> > port only, so asking for a "node" is nonsense.
> 
> Why do you object to tools which report information for an entire
> node?  Nodes, specifically switches, are much more manageable chunks
> than an entire fabric.

I don't object to that, I'm just pointing out that most of the tools
aren't like that today, and many don't have a clear way to format
their output in a multi-end-port format.

And, I don't think there is anything wrong with reporting a whole
switch node either - but the portGUID should be identifier, not the
node GUID.

> > I don't like this trend to make node GUID the default GUID input
> > format for diags. FWIW, ibtool consistently uses port GUID as the
> > default GUID type for all end port specifications.
> 
> I am not proposing this for all tools.  Why shouldn't a user be able to query
> more than a single port at a time in some "higher level" tools?

I'd much rather see only portGUID used as an argument and a
--all-ports option that would report all HCA ports - by automatically
doing the necessary SA operations to find them. This is much better
than having to force an admin to use port GUIDs in some tools and node
GUID in (very few) other tools.

Ie, admins should never need to know what the node GUID is, and they
certainly should not be required to keep track of both a port GUID and
a node GUID for every CA just to use one tool or another.

> Also how would you propose to resolve a query via NodeDescription?
> Put yourself in the shoes of the administrator who is trying to
> manage 1000's of "nodes" in a system.  Names are much easier to deal
> with than GUID's and LID's.

I've no objection to searching by node description, as long as the
tool supports a multiple end port output format. Don't see what this
has to do with node GUID support :)

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny July 12, 2011, 11:52 p.m. UTC | #9
On Mon, 11 Jul 2011 14:18:43 -0700
Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

> On Mon, Jul 11, 2011 at 02:06:02PM -0700, Ira Weiny wrote:
> 
> > > But very few diags seem to be designed around the idea that they will
> > > operate on a bundle of end ports (eg a node), they tend to be one end
> > > port only, so asking for a "node" is nonsense.
> > 
> > Why do you object to tools which report information for an entire
> > node?  Nodes, specifically switches, are much more manageable chunks
> > than an entire fabric.
> 
> I don't object to that, I'm just pointing out that most of the tools
> aren't like that today, and many don't have a clear way to format
> their output in a multi-end-port format.
> 
> And, I don't think there is anything wrong with reporting a whole
> switch node either - but the portGUID should be identifier, not the
> node GUID.

Ok, I see where you are coming from but I am not sure I agree with you.

I guess I don't see an issue with using NodeGUID as a unique ID for a node.
The node-name-map functionality in complib provides a translation from
NodeGUID's to names.  (For those NodeDescriptions which are not programmable
in FW.)

> 
> > > I don't like this trend to make node GUID the default GUID input
> > > format for diags. FWIW, ibtool consistently uses port GUID as the
> > > default GUID type for all end port specifications.
> > 
> > I am not proposing this for all tools.  Why shouldn't a user be able to query
> > more than a single port at a time in some "higher level" tools?
> 
> I'd much rather see only portGUID used as an argument and a
> --all-ports option that would report all HCA ports - by automatically
> doing the necessary SA operations to find them. This is much better
> than having to force an admin to use port GUIDs in some tools and node
> GUID in (very few) other tools.
> 
> Ie, admins should never need to know what the node GUID is, and they
> certainly should not be required to keep track of both a port GUID and
> a node GUID for every CA just to use one tool or another.

I see your point.  However, I don't think it is correct to consider a PortGUID
a representation of a node.  Specifically what does "iblinkinfo" print for a
[E]SP0 PortGUID?  (for example when not run with "--all-ports"). iblinkinfo and
by derivation ibqueryerrors reports information for all the links (all the
ports) on a node.

Frankly this is a part of the spec which I consider convoluted and perhaps
there is no good way around it.  Specifically querying NodeInfo on a Node
changes depending on which port the NodeInfo was requested on.  This just
seems wrong.  ;-)

To me when I wrote iblinkinfo it just made sense that you would request
information about a particular node (specifically switch).  Otherwise the
output would be pretty stupid for a switch PortGUID.  Therefore the NodeGUID
seemed reasonable.

> 
> > Also how would you propose to resolve a query via NodeDescription?
> > Put yourself in the shoes of the administrator who is trying to
> > manage 1000's of "nodes" in a system.  Names are much easier to deal
> > with than GUID's and LID's.
> 
> I've no objection to searching by node description, as long as the
> tool supports a multiple end port output format. Don't see what this
> has to do with node GUID support :)

Only that searching for NodeRecords to resolve nodes on the fabric is a
reasonable way to go.  This patch moves in that direction.

Ira

> 
> Jason
Jason Gunthorpe July 13, 2011, 5:09 a.m. UTC | #10
On Tue, Jul 12, 2011 at 04:52:50PM -0700, Ira Weiny wrote:

> > And, I don't think there is anything wrong with reporting a whole
> > switch node either - but the portGUID should be identifier, not the
> > node GUID.
> 
> Ok, I see where you are coming from but I am not sure I agree with you.
> 
> I guess I don't see an issue with using NodeGUID as a unique ID for a node.
> The node-name-map functionality in complib provides a translation from
> NodeGUID's to names.  (For those NodeDescriptions which are not programmable
> in FW.)

Well, I never liked that functionality in complib either ;) I'd have
been much happier if portGUID was the key. That avoids the ambiguity
of refering to mulitple ports using a single name, and avoids needing
to do a NodeInfoRecord query followed by a PathRecord query to setup
a path to a user defined name.

> > Ie, admins should never need to know what the node GUID is, and they
> > certainly should not be required to keep track of both a port GUID and
> > a node GUID for every CA just to use one tool or another.
> 
> I see your point.  However, I don't think it is correct to consider
> a PortGUID a representation of a node.  Specifically what does
> "iblinkinfo" print for a [E]SP0 PortGUID?  (for example when not run
> with "--all-ports"). iblinkinfo and by derivation ibqueryerrors
> reports information for all the links (all the ports) on a node.

A tool should behave exactly the same if it is given a GID, port GUID, LID
or directed route path (and perhaps in future an IP address that would
be ARP'd to get a GID). Not supporting those address options in a tool
goes against the UI design of every other tool in the diags suite.

For a program like iblinkinfo I think it would be appropriate to show
the entire switch, and to show only the single CA port. IIRC this is
what the ibtool version of iblinkinfo does.

If you think displaying an entire CA node is appropriate then use the
SA to translate from a standard end port identifier to a node GUID and
then use the SA to enumerate all the CA node end ports. Don't require
the user to enter a node identifier just to avoid some processing in a
diag tool :(

The point is, nobody should be expected to know what node GUIDs are,
we want people to refer to fabric devices by GID, LID or DR path,
which are all more natural.

> Frankly this is a part of the spec which I consider convoluted and perhaps
> there is no good way around it.  Specifically querying NodeInfo on a Node
> changes depending on which port the NodeInfo was requested on.  This just
> seems wrong.  ;-)

NodeInfo and the NodeGUID exist in the form they do to make discovery
work, and they work well in that capacity.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/ibdiag_common.h b/include/ibdiag_common.h
index 69cddfb..e107a43 100644
--- a/include/ibdiag_common.h
+++ b/include/ibdiag_common.h
@@ -126,4 +126,7 @@  void sa_report_err(int status);
 		comp_mask |= IB_##name##_COMPMASK_##mask; \
 	}
 
+int resolve_node_guid(ib_portid_t *port_id, char *node_guid,
+			struct ibmad_port *ibmad_port);
+
 #endif				/* _IBDIAG_COMMON_H_ */
diff --git a/src/ibdiag_common.c b/src/ibdiag_common.c
index 6d03a43..66dc132 100644
--- a/src/ibdiag_common.c
+++ b/src/ibdiag_common.c
@@ -509,3 +509,58 @@  void sa_report_err(int status)
 	fprintf(stderr, "ERROR: Query result returned 0x%04x, %s%s\n",
 		status, sm_err_str, sa_err_str);
 }
+
+int resolve_node_guid(ib_portid_t *port_id, char *node_guid,
+			struct ibmad_port *ibmad_port)
+{
+	ib_node_record_t nr;
+	ib_node_record_t *found_nr;
+	uint64_t comp_mask = 0;
+	struct sa_query_result result;
+	uint64_t guid = strtoull(node_guid, NULL, 0);
+	bind_handle_t h;
+	int ret = 0;
+
+	if (guid == 0 || guid == ULLONG_MAX)
+		return -EINVAL;
+
+	memset(&nr, 0, sizeof(nr));
+	CHECK_AND_SET_VAL(guid, 64, 0, nr.node_info.node_guid, NR, NODEGUID);
+
+	if (!ibd_timeout)
+		ibd_timeout = MAD_DEF_TIMEOUT_MS;
+
+	h = sa_get_bind_handle();
+	if (!h)
+		return -EIO;
+
+	ret = sa_query(h, IB_MAD_METHOD_GET_TABLE, IB_SA_ATTR_NODERECORD, 0,
+			   cl_hton64(comp_mask), 0, &nr, &result);
+	if (ret) {
+		fprintf(stderr, "Query SA failed: %s\n", ib_get_err_str(ret));
+		ret = -EIO;
+		goto free_bind_handle;
+	}
+
+	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+		sa_report_err(result.status);
+		ret = -EIO;
+		goto free_result;
+	}
+
+	if (result.result_cnt > 1)
+		fprintf(stderr, "%s resolved to more than one port.\n",
+			node_guid);
+
+	found_nr = (ib_node_record_t *)sa_get_query_rec(result.p_result_madw,
+							0);
+	guid = cl_ntoh64(found_nr->node_info.port_guid);
+	ret = ib_resolve_guid_via(port_id, &guid, NULL, ibd_timeout,
+				  ibmad_port);
+
+free_result:
+	sa_free_result_mad(&result);
+free_bind_handle:
+	sa_free_bind_handle(h);
+	return ret;
+}
diff --git a/src/iblinkinfo.c b/src/iblinkinfo.c
index 8cf755d..2f69acf 100644
--- a/src/iblinkinfo.c
+++ b/src/iblinkinfo.c
@@ -619,9 +619,8 @@  int main(int argc, char **argv)
 			IBWARN("Failed to resolve %s; attempting full scan\n",
 			       dr_path);
 	} else if (guid_str) {
-		if ((resolved =
-		     ib_resolve_portid_str_via(&port_id, guid_str, IB_DEST_GUID,
-					       NULL, ibmad_port)) < 0)
+		if ((resolved = resolve_node_guid(&port_id, guid_str,
+						  ibmad_port)) < 0)
 			IBWARN("Failed to resolve %s; attempting full scan\n",
 			       guid_str);
 	}
diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 1bba0e0..4b59c6f 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -914,11 +914,9 @@  int main(int argc, char **argv)
 			IBWARN("Failed to resolve %s; attempting full scan",
 			       dr_path);
 	} else if (node_guid_str) {
-		if ((resolved =
-		     ib_resolve_portid_str_via(&portid, node_guid_str,
-					       IB_DEST_GUID, ibd_sm_id,
-					       ibmad_port)) < 0)
-			IBWARN("Failed to resolve %s; attempting full scan",
+		if ((resolved = resolve_node_guid(&portid, node_guid_str,
+						  ibmad_port)) < 0)
+			IBWARN("Failed to resolve %s; attempting full scan\n",
 			       node_guid_str);
 	}