diff mbox

[v3,rdma-next,08/10] RDMA/nldev: provide detailed MR information

Message ID 1316f98139ad9f6be44ace244f2a15cdf40c1a27.1519688087.git.swise@opengridcomputing.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Steve Wise Feb. 26, 2018, 11:22 p.m. UTC
Implement the RDMA nldev netlink interface for dumping detailed
MR information.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/infiniband/core/nldev.c      | 73 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/restrack.c   | 36 +++++++++++-------
 drivers/infiniband/core/uverbs_cmd.c |  2 +
 drivers/infiniband/core/verbs.c      |  3 ++
 include/rdma/ib_verbs.h              |  5 +++
 include/rdma/restrack.h              |  4 ++
 include/uapi/rdma/rdma_netlink.h     | 10 +++++
 7 files changed, 120 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe Feb. 27, 2018, 10:05 p.m. UTC | #1
On Mon, Feb 26, 2018 at 03:22:42PM -0800, Steve Wise wrote:

> +static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
> +			     struct rdma_restrack_entry *res, uint32_t port)
> +{
> +	struct ib_mr *mr = container_of(res, struct ib_mr, res);
> +	struct nlattr *entry_attr;
> +
> +	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> +	if (!entry_attr)
> +		goto out;
> +
> +	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
> +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
> +			goto err;
> +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
> +			goto err;
> +		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
> +				      mr->iova, 0))

?? is '0' right here? Expecting a defined attribute constant for padding.

> +			goto err;
> +	}
> +
> +	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
> +		goto err;

Ditto, like wise everywhere

> +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr->page_size))
> +		goto err;

Still not really sure what pgsize is supposed to be, I think we should
drop it??

> +	/*
> +	 * Existence of task means that it is user MR and netlink
> +	 * user is invited to go and read /proc/PID/comm to get name
> +	 * of the task file and res->task_com should be NULL.
> +	 */
> +	if (rdma_is_kernel_res(res)) {
> +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> +				   res->kern_name))
> +			goto err;
> +	} else {
> +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
> +				task_pid_vnr(res->task)))
> +			goto err;
> +	}

This block seems duplicated still, needs a helper I think.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 27, 2018, 10:21 p.m. UTC | #2
> 
> On Mon, Feb 26, 2018 at 03:22:42PM -0800, Steve Wise wrote:
> 
> > +static int fill_res_mr_entry(struct sk_buff *msg, struct
netlink_callback
> *cb,
> > +			     struct rdma_restrack_entry *res, uint32_t port)
> > +{
> > +	struct ib_mr *mr = container_of(res, struct ib_mr, res);
> > +	struct nlattr *entry_attr;
> > +
> > +	entry_attr = nla_nest_start(msg,
> RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> > +	if (!entry_attr)
> > +		goto out;
> > +
> > +	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
> > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr-
> >rkey))
> > +			goto err;
> > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr-
> >lkey))
> > +			goto err;
> > +		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
> > +				      mr->iova, 0))
> 
> ?? is '0' right here? Expecting a defined attribute constant for padding.


What does the pad do exactly?  I replicated other code I saw that use 0 for
the pad.  But I can add a NLDEV_ATTR_RES_PAD if that is the correct way to
go.

> 
> > +			goto err;
> > +	}
> > +
> > +	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr-
> >length, 0))
> > +		goto err;
> 
> Ditto, like wise everywhere
> 
> > +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr-
> >page_size))
> > +		goto err;
> 
> Still not really sure what pgsize is supposed to be, I think we should
> drop it??
> 

Let us say we register a MR that has a backing memory scattergather list of
10 64KB entries, each 64KB entry, though, is physically contiguous.   The
page-size for that registration, then can be 64KB even though the host page
size is 4KB.  So the page list for the registration with the rdma device can
be 10 64K entries, each of size 64KB...


> > +	/*
> > +	 * Existence of task means that it is user MR and netlink
> > +	 * user is invited to go and read /proc/PID/comm to get name
> > +	 * of the task file and res->task_com should be NULL.
> > +	 */
> > +	if (rdma_is_kernel_res(res)) {
> > +		if (nla_put_string(msg,
> RDMA_NLDEV_ATTR_RES_KERN_NAME,
> > +				   res->kern_name))
> > +			goto err;
> > +	} else {
> > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
> > +				task_pid_vnr(res->task)))
> > +			goto err;
> > +	}
> 
> This block seems duplicated still, needs a helper I think.
> 

I'll look into it.

Thanks


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 27, 2018, 10:27 p.m. UTC | #3
On Tue, Feb 27, 2018 at 04:21:36PM -0600, Steve Wise wrote:
> > 
> > On Mon, Feb 26, 2018 at 03:22:42PM -0800, Steve Wise wrote:
> > 
> > > +static int fill_res_mr_entry(struct sk_buff *msg, struct
> netlink_callback
> > *cb,
> > > +			     struct rdma_restrack_entry *res, uint32_t port)
> > > +{
> > > +	struct ib_mr *mr = container_of(res, struct ib_mr, res);
> > > +	struct nlattr *entry_attr;
> > > +
> > > +	entry_attr = nla_nest_start(msg,
> > RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> > > +	if (!entry_attr)
> > > +		goto out;
> > > +
> > > +	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
> > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr-
> > >rkey))
> > > +			goto err;
> > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr-
> > >lkey))
> > > +			goto err;
> > > +		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
> > > +				      mr->iova, 0))
> > 
> > ?? is '0' right here? Expecting a defined attribute constant for padding.
> 
> 
> What does the pad do exactly?  I replicated other code I saw that use 0 for
> the pad.  But I can add a NLDEV_ATTR_RES_PAD if that is the correct way to
> go.

Sometimes netlink will try to align the u64s to 8 bytes in the skb. If
it needs to make an alignment adjustment then it stuffs a dummy
attribute with the given ID. The ID should never be used for any other
attribute and must be ignored by userspace.

Safest to put it in the general enum to ensure it isn't re-used.

> > > +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr-
> > >page_size))
> > > +		goto err;
> > 
> > Still not really sure what pgsize is supposed to be, I think we should
> > drop it??
> > 
> 
> Let us say we register a MR that has a backing memory scattergather list of
> 10 64KB entries, each 64KB entry, though, is physically contiguous.   The
> page-size for that registration, then can be 64KB even though the host page
> size is 4KB.  So the page list for the registration with the rdma device can
> be 10 64K entries, each of size 64KB...

So what is the value?

'max contiguous page size' of any page in the registration or 'min
contiguous page size' of any page in the registration?

Seems like it needs a better name at least..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 27, 2018, 10:30 p.m. UTC | #4
> 
> On Tue, Feb 27, 2018 at 04:21:36PM -0600, Steve Wise wrote:
> > >
> > > On Mon, Feb 26, 2018 at 03:22:42PM -0800, Steve Wise wrote:
> > >
> > > > +static int fill_res_mr_entry(struct sk_buff *msg, struct
> > netlink_callback
> > > *cb,
> > > > +			     struct rdma_restrack_entry *res,
uint32_t port)
> > > > +{
> > > > +	struct ib_mr *mr = container_of(res, struct ib_mr, res);
> > > > +	struct nlattr *entry_attr;
> > > > +
> > > > +	entry_attr = nla_nest_start(msg,
> > > RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> > > > +	if (!entry_attr)
> > > > +		goto out;
> > > > +
> > > > +	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
> > > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr-
> > > >rkey))
> > > > +			goto err;
> > > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr-
> > > >lkey))
> > > > +			goto err;
> > > > +		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
> > > > +				      mr->iova, 0))
> > >
> > > ?? is '0' right here? Expecting a defined attribute constant for
padding.
> >
> >
> > What does the pad do exactly?  I replicated other code I saw that use 0
for
> > the pad.  But I can add a NLDEV_ATTR_RES_PAD if that is the correct way
to
> > go.
> 
> Sometimes netlink will try to align the u64s to 8 bytes in the skb. If
> it needs to make an alignment adjustment then it stuffs a dummy
> attribute with the given ID. The ID should never be used for any other
> attribute and must be ignored by userspace.
> 
> Safest to put it in the general enum to ensure it isn't re-used.
> 
> > > > +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr-
> > > >page_size))
> > > > +		goto err;
> > >
> > > Still not really sure what pgsize is supposed to be, I think we should
> > > drop it??
> > >
> >
> > Let us say we register a MR that has a backing memory scattergather list
of
> > 10 64KB entries, each 64KB entry, though, is physically contiguous.
The
> > page-size for that registration, then can be 64KB even though the host
page
> > size is 4KB.  So the page list for the registration with the rdma device
can
> > be 10 64K entries, each of size 64KB...
> 
> So what is the value?
> 
> 'max contiguous page size' of any page in the registration or 'min
> contiguous page size' of any page in the registration?
> 
> Seems like it needs a better name at least..
> 
> Jason

All "pages" in the REG_MR page list are of the same size.  So pgsize is the
size of each entry...



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 28, 2018, 12:19 a.m. UTC | #5
> > > Let us say we register a MR that has a backing memory scattergather
list
> of
> > > 10 64KB entries, each 64KB entry, though, is physically contiguous.
The
> > > page-size for that registration, then can be 64KB even though the host
> page
> > > size is 4KB.  So the page list for the registration with the rdma
device can
> > > be 10 64K entries, each of size 64KB...
> >
> > So what is the value?
> >
> > 'max contiguous page size' of any page in the registration or 'min
> > contiguous page size' of any page in the registration?
> >
> > Seems like it needs a better name at least..
> >
> > Jason
> 
> All "pages" in the REG_MR page list are of the same size.  So pgsize is
the size
> of each entry...
> 

Looking at all the users that register memory, it appears they always use
PAGE_SIZE or 4K.  So perhaps passing this up is useless...

But the rdma devices do support a range of page sizes, and the intent of
ib_mr.page_size was to express what page_size is being used for that MR.

I'm removing it...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 28, 2018, 3:23 a.m. UTC | #6
On Tue, Feb 27, 2018 at 06:19:34PM -0600, Steve Wise wrote:

> But the rdma devices do support a range of page sizes, and the intent of
> ib_mr.page_size was to express what page_size is being used for that MR.

I think reporting a min & max page size used in the actual physical
hardware could be useful, but as you noticed the internal page size
thing is pretty useless.

The drivers aggregate larger pages on their own based on their own
internal capability, so new reporting would need driver changes.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 28, 2018, 6:51 a.m. UTC | #7
On Tue, Feb 27, 2018 at 03:27:42PM -0700, Jason Gunthorpe wrote:
> On Tue, Feb 27, 2018 at 04:21:36PM -0600, Steve Wise wrote:
> > >
> > > On Mon, Feb 26, 2018 at 03:22:42PM -0800, Steve Wise wrote:
> > >
> > > > +static int fill_res_mr_entry(struct sk_buff *msg, struct
> > netlink_callback
> > > *cb,
> > > > +			     struct rdma_restrack_entry *res, uint32_t port)
> > > > +{
> > > > +	struct ib_mr *mr = container_of(res, struct ib_mr, res);
> > > > +	struct nlattr *entry_attr;
> > > > +
> > > > +	entry_attr = nla_nest_start(msg,
> > > RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> > > > +	if (!entry_attr)
> > > > +		goto out;
> > > > +
> > > > +	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
> > > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr-
> > > >rkey))
> > > > +			goto err;
> > > > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr-
> > > >lkey))
> > > > +			goto err;
> > > > +		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
> > > > +				      mr->iova, 0))
> > >
> > > ?? is '0' right here? Expecting a defined attribute constant for padding.
> >
> >
> > What does the pad do exactly?  I replicated other code I saw that use 0 for
> > the pad.  But I can add a NLDEV_ATTR_RES_PAD if that is the correct way to
> > go.
>
> Sometimes netlink will try to align the u64s to 8 bytes in the skb. If
> it needs to make an alignment adjustment then it stuffs a dummy
> attribute with the given ID. The ID should never be used for any other
> attribute and must be ignored by userspace.
>
> Safest to put it in the general enum to ensure it isn't re-used.

It is already done but not really explicitly as you would like.
0 is equal to RDMA_NLDEV_ATTR_UNSPEC and user space is ignoring this
attribute.

Thanks
Steve Wise Feb. 28, 2018, 3:37 p.m. UTC | #8
> 
> On Tue, Feb 27, 2018 at 06:19:34PM -0600, Steve Wise wrote:
> 
> > But the rdma devices do support a range of page sizes, and the intent of
> > ib_mr.page_size was to express what page_size is being used for that MR.
> 
> I think reporting a min & max page size used in the actual physical
> hardware could be useful, but as you noticed the internal page size
> thing is pretty useless.
> 
> The drivers aggregate larger pages on their own based on their own
> internal capability, so new reporting would need driver changes.

The device supported page sizes are advertised already via
ib_device_attr.page_size_cap, which is a bit mask of supported page sizes.

Steve.




--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index b68b35c..611b256 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -85,6 +85,13 @@ 
 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
+	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
+	[RDMA_NLDEV_ATTR_RES_PGSIZE]		= { .type = NLA_U32 },
 };
 
 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -197,6 +204,7 @@  static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 		[RDMA_RESTRACK_CQ] = "cq",
 		[RDMA_RESTRACK_QP] = "qp",
 		[RDMA_RESTRACK_CM_ID] = "cm_id",
+		[RDMA_RESTRACK_MR] = "mr",
 	};
 
 	struct rdma_restrack_root *res = &device->res;
@@ -404,6 +412,55 @@  static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
 	return -EMSGSIZE;
 }
 
+static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
+			     struct rdma_restrack_entry *res, uint32_t port)
+{
+	struct ib_mr *mr = container_of(res, struct ib_mr, res);
+	struct nlattr *entry_attr;
+
+	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+	if (!entry_attr)
+		goto out;
+
+	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+			goto err;
+		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+			goto err;
+		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
+				      mr->iova, 0))
+			goto err;
+	}
+
+	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+		goto err;
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr->page_size))
+		goto err;
+
+	/*
+	 * Existence of task means that it is user MR and netlink
+	 * user is invited to go and read /proc/PID/comm to get name
+	 * of the task file and res->task_com should be NULL.
+	 */
+	if (rdma_is_kernel_res(res)) {
+		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+				   res->kern_name))
+			goto err;
+	} else {
+		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+				task_pid_vnr(res->task)))
+			goto err;
+	}
+
+	nla_nest_end(msg, entry_attr);
+	return 0;
+
+err:
+	nla_nest_cancel(msg, entry_attr);
+out:
+	return -EMSGSIZE;
+}
+
 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
@@ -839,6 +896,12 @@  static int res_get_common_dumpit(struct sk_buff *skb,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
 	},
+	[RDMA_RESTRACK_MR] = {
+		.fill_res_func = fill_res_mr_entry,
+		.res_type = RDMA_RESTRACK_MR,
+		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
+		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+	},
 };
 
 static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
@@ -861,6 +924,13 @@  static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
 				     &fill_entries[RDMA_RESTRACK_CQ]);
 }
 
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	return res_get_common_dumpit(skb, cb,
+				     &fill_entries[RDMA_RESTRACK_MR]);
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -893,6 +963,9 @@  static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
 		.dump = nldev_res_get_cq_dumpit,
 	},
+	[RDMA_NLDEV_CMD_RES_MR_GET] = {
+		.dump = nldev_res_get_mr_dumpit,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index a85d8f5..58b05a9 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -43,22 +43,28 @@  int rdma_restrack_count(struct rdma_restrack_root *res,
 
 static void set_kern_name(struct rdma_restrack_entry *res)
 {
-	enum rdma_restrack_type type = res->type;
-	struct ib_qp *qp;
+	struct ib_pd *pd;
 
-	if (type != RDMA_RESTRACK_QP)
-		/* Other types already have this name embedded in */
-		return;
-
-	qp = container_of(res, struct ib_qp, res);
-	if (!qp->pd) {
-		WARN_ONCE(true, "XRC QPs are not supported\n");
-		/* Survive, despite the programmer's error */
-		res->kern_name = " ";
-		return;
+	switch (res->type) {
+	case RDMA_RESTRACK_QP:
+		pd = container_of(res, struct ib_qp, res)->pd;
+		if (!pd) {
+			WARN_ONCE(true, "XRC QPs are not supported\n");
+			/* Survive, despite the programmer's error */
+			res->kern_name = " ";
+		}
+		break;
+	case RDMA_RESTRACK_MR:
+		pd = container_of(res, struct ib_mr, res)->pd;
+		break;
+	default:
+		/* Other types set kern_name directly */
+		pd = NULL;
+		break;
 	}
 
-	res->kern_name = qp->pd->res.kern_name;
+	if (pd)
+		res->kern_name = pd->res.kern_name;
 }
 
 static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
@@ -73,6 +79,8 @@  static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	case RDMA_RESTRACK_CM_ID:
 		return container_of(res, struct rdma_id_private,
 				    res)->id.device;
+	case RDMA_RESTRACK_MR:
+		return container_of(res, struct ib_mr, res)->device;
 	default:
 		WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
 		return NULL;
@@ -90,6 +98,8 @@  static bool res_is_user(struct rdma_restrack_entry *res)
 		return container_of(res, struct ib_qp, res)->uobject;
 	case RDMA_RESTRACK_CM_ID:
 		return !container_of(res, struct rdma_id_private, res)->caller;
+	case RDMA_RESTRACK_MR:
+		return container_of(res, struct ib_mr, res)->pd->uobject;
 	default:
 		WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
 		return false;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 25a0e0e..99f01c4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -693,6 +693,8 @@  ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	mr->pd      = pd;
 	mr->uobject = uobj;
 	atomic_inc(&pd->usecnt);
+	mr->res.type = RDMA_RESTRACK_MR;
+	rdma_restrack_add(&mr->res);
 
 	uobj->object = mr;
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 93025d2..d80dbe1 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1622,6 +1622,7 @@  int ib_dereg_mr(struct ib_mr *mr)
 	struct ib_pd *pd = mr->pd;
 	int ret;
 
+	rdma_restrack_del(&mr->res);
 	ret = mr->device->dereg_mr(mr);
 	if (!ret)
 		atomic_dec(&pd->usecnt);
@@ -1658,6 +1659,8 @@  struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
 		mr->uobject = NULL;
 		atomic_inc(&pd->usecnt);
 		mr->need_inval = false;
+		mr->res.type = RDMA_RESTRACK_MR;
+		rdma_restrack_add(&mr->res);
 	}
 
 	return mr;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 73b2387..7df3274 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1772,6 +1772,11 @@  struct ib_mr {
 		struct ib_uobject	*uobject;	/* user */
 		struct list_head	qp_entry;	/* FR */
 	};
+
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry res;
 };
 
 struct ib_mw {
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index bbb1a8f..2257cf5 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -33,6 +33,10 @@  enum rdma_restrack_type {
 	 */
 	RDMA_RESTRACK_CM_ID,
 	/**
+	 * @RDMA_RESTRACK_MR: Memory Region (MR)
+	 */
+	RDMA_RESTRACK_MR,
+	/**
 	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
 	 */
 	RDMA_RESTRACK_MAX
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 36cf1f0..83fba48 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -242,6 +242,8 @@  enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -372,6 +374,14 @@  enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_RES_USECNT,		/* u64 */
 	RDMA_NLDEV_ATTR_RES_POLL_CTX,		/* u8 */
 
+	RDMA_NLDEV_ATTR_RES_MR,			/* nested table */
+	RDMA_NLDEV_ATTR_RES_MR_ENTRY,		/* nested table */
+	RDMA_NLDEV_ATTR_RES_RKEY,		/* u32 */
+	RDMA_NLDEV_ATTR_RES_LKEY,		/* u32 */
+	RDMA_NLDEV_ATTR_RES_IOVA,		/* u64 */
+	RDMA_NLDEV_ATTR_RES_MRLEN,		/* u64 */
+	RDMA_NLDEV_ATTR_RES_PGSIZE,		/* u32 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */