diff mbox

[PATCHv3,2/3] IB/core: added support to use rdma cgroup controller

Message ID 1454167387-31260-3-git-send-email-pandit.parav@gmail.com (mailing list archive)
State Superseded
Headers show

Commit Message

Parav Pandit Jan. 30, 2016, 3:23 p.m. UTC
- Added support APIs for IB core to register/unregister every RDMA device
with rdma cgroup for tracking verbs and hw resources.
- IB core registers with rdma cgroup controller and also defines resources
that can be accounted.
- Added support APIs for uverbs layer to make use of rdma controller.
- Added uverbs layer to perform resource charge/uncharge functionality.

Signed-off-by: Parav Pandit <pandit.parav@gmail.com>
---
 drivers/infiniband/core/Makefile      |   1 +
 drivers/infiniband/core/cgroup.c      | 108 ++++++++++++++++++
 drivers/infiniband/core/core_priv.h   |  45 ++++++++
 drivers/infiniband/core/device.c      |   8 ++
 drivers/infiniband/core/uverbs_cmd.c  | 209 +++++++++++++++++++++++++++++++---
 drivers/infiniband/core/uverbs_main.c |  28 +++++
 include/rdma/ib_verbs.h               |  27 ++++-
 7 files changed, 410 insertions(+), 16 deletions(-)
 create mode 100644 drivers/infiniband/core/cgroup.c
diff mbox

Patch

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d43a899..df40cee 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -13,6 +13,7 @@  ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
 				roce_gid_mgmt.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
 
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 0000000..be0a2b8
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,108 @@ 
+#include <linux/kernel.h>
+#include <linux/parser.h>
+#include <linux/cgroup_rdma.h>
+
+#include "core_priv.h"
+
+/**
+ * resource table definition as to be seen by the user.
+ * Need to add entries to it when more resources are
+ * added/defined at IB verb/core layer.
+ */
+static match_table_t resource_tokens = {
+	{RDMA_VERB_RESOURCE_UCTX, "uctx=%d"},
+	{RDMA_VERB_RESOURCE_AH, "ah=%d"},
+	{RDMA_VERB_RESOURCE_PD, "pd=%d"},
+	{RDMA_VERB_RESOURCE_CQ, "cq=%d"},
+	{RDMA_VERB_RESOURCE_MR, "mr=%d"},
+	{RDMA_VERB_RESOURCE_MW, "mw=%d"},
+	{RDMA_VERB_RESOURCE_SRQ, "srq=%d"},
+	{RDMA_VERB_RESOURCE_QP, "qp=%d"},
+	{RDMA_VERB_RESOURCE_FLOW, "flow=%d"},
+	{-1, NULL}
+};
+
+/**
+ * setup table pointers for RDMA cgroup to access.
+ */
+static struct rdmacg_pool_info verbs_token_info = {
+	.resource_table = resource_tokens,
+	.resource_count =
+		(sizeof(resource_tokens) / sizeof(struct match_token)) - 1,
+};
+
+static struct rdmacg_pool_info*
+	rdmacg_get_resource_pool_tokens(struct rdmacg_device *device)
+{
+	return &verbs_token_info;
+}
+
+static struct rdmacg_resource_pool_ops verbs_pool_ops = {
+	.get_resource_pool_tokens = &rdmacg_get_resource_pool_tokens,
+};
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ *          accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * HCA drivers should set resource pool ops first if they wish
+ * to support hw specific resource accounting before IB core
+ * registers with rdma cgroup.
+ */
+void ib_device_register_rdmacg(struct ib_device *device)
+{
+	rdmacg_set_rpool_ops(&device->cg_device,
+			     RDMACG_RESOURCE_POOL_VERB,
+			     &verbs_pool_ops);
+	rdmacg_register_device(&device->cg_device, device->name);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation of user application cannot be done
+ * for this device to avoid any leak in accounting.
+ * HCA drivers should clear resource pool ops after ib stack
+ * unregisters with rdma cgroup.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+	rdmacg_unregister_device(&device->cg_device);
+	rdmacg_clear_rpool_ops(&device->cg_device,
+			       RDMACG_RESOURCE_POOL_VERB);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+			 struct ib_device *device,
+			 enum rdmacg_resource_pool_type type,
+			 int resource_index, int num)
+{
+	return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+				 type, resource_index, num);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+			struct ib_device *device,
+			enum rdmacg_resource_pool_type type,
+			int resource_index, int num)
+{
+	rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+			type, resource_index, num);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
+
+int ib_rdmacg_query_limit(struct ib_device *device,
+			  enum rdmacg_resource_pool_type type,
+			  int *limits, int max_count)
+{
+	return rdmacg_query_limit(&device->cg_device, type, limits, max_count);
+}
+EXPORT_SYMBOL(ib_rdmacg_query_limit);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 5cf6eb7..977988a 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -37,6 +37,7 @@ 
 #include <linux/spinlock.h>
 
 #include <rdma/ib_verbs.h>
+#include <linux/cgroup_rdma.h>
 
 int  ib_device_register_sysfs(struct ib_device *device,
 			      int (*port_callback)(struct ib_device *,
@@ -92,4 +93,48 @@  int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
+#ifdef CONFIG_CGROUP_RDMA
+
+void ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+			 struct ib_device *device,
+			 enum rdmacg_resource_pool_type type,
+			 int resource_index, int num);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+			struct ib_device *device,
+			enum rdmacg_resource_pool_type type,
+			int resource_index, int num);
+
+int ib_rdmacg_query_limit(struct ib_device *device,
+			  enum rdmacg_resource_pool_type type,
+			  int *limits, int max_count);
+#else
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+				       struct ib_device *device,
+				       enum rdmacg_resource_pool_type type,
+				       int resource_index, int num)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+				      struct ib_device *device,
+				      enum rdmacg_resource_pool_type type,
+				      int resource_index, int num)
+{ }
+
+static inline int ib_rdmacg_query_limit(struct ib_device *device,
+					enum rdmacg_resource_pool_type type,
+					int *limits, int max_count)
+{
+	int i;
+
+	for (i = 0; i < max_count; i++)
+		limits[i] = S32_MAX;
+
+	return 0;
+}
+#endif
+
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 179e813..59cab6b 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -352,6 +352,10 @@  int ib_register_device(struct ib_device *device,
 		goto out;
 	}
 
+#ifdef CONFIG_CGROUP_RDMA
+	ib_device_register_rdmacg(device);
+#endif
+
 	ret = ib_device_register_sysfs(device, port_callback);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -405,6 +409,10 @@  void ib_unregister_device(struct ib_device *device)
 
 	mutex_unlock(&device_mutex);
 
+#ifdef CONFIG_CGROUP_RDMA
+	ib_device_unregister_rdmacg(device);
+#endif
+
 	ib_device_unregister_sysfs(device);
 	ib_cache_cleanup_one(device);
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 94816ae..78006d6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -294,6 +294,7 @@  ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 #endif
 	struct ib_ucontext		 *ucontext;
 	struct file			 *filp;
+	struct ib_rdmacg_object          cg_obj;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -313,13 +314,21 @@  ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
+	ret = ib_rdmacg_try_charge(&cg_obj, ib_dev,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_UCTX, 1);
+	if (ret)
+		goto err;
+
 	ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
 	if (IS_ERR(ucontext)) {
 		ret = PTR_ERR(ucontext);
-		goto err;
+		goto err_alloc;
 	}
 
 	ucontext->device = ib_dev;
+	ucontext->cg_obj = cg_obj;
+
 	INIT_LIST_HEAD(&ucontext->pd_list);
 	INIT_LIST_HEAD(&ucontext->mr_list);
 	INIT_LIST_HEAD(&ucontext->mw_list);
@@ -386,6 +395,10 @@  err_free:
 	put_pid(ucontext->tgid);
 	ib_dev->dealloc_ucontext(ucontext);
 
+err_alloc:
+	ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_UCTX, 1);
+
 err:
 	mutex_unlock(&file->mutex);
 	return ret;
@@ -394,7 +407,8 @@  err:
 static void copy_query_dev_fields(struct ib_uverbs_file *file,
 				  struct ib_device *ib_dev,
 				  struct ib_uverbs_query_device_resp *resp,
-				  struct ib_device_attr *attr)
+				  struct ib_device_attr *attr,
+				  int *limits)
 {
 	resp->fw_ver		= attr->fw_ver;
 	resp->node_guid		= ib_dev->node_guid;
@@ -405,14 +419,19 @@  static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->vendor_part_id	= attr->vendor_part_id;
 	resp->hw_ver		= attr->hw_ver;
 	resp->max_qp		= attr->max_qp;
+	resp->max_qp		= min_t(int, attr->max_qp,
+					limits[RDMA_VERB_RESOURCE_QP]);
 	resp->max_qp_wr		= attr->max_qp_wr;
 	resp->device_cap_flags	= attr->device_cap_flags;
 	resp->max_sge		= attr->max_sge;
 	resp->max_sge_rd	= attr->max_sge_rd;
-	resp->max_cq		= attr->max_cq;
+	resp->max_cq		= min_t(int, attr->max_cq,
+					limits[RDMA_VERB_RESOURCE_CQ]);
 	resp->max_cqe		= attr->max_cqe;
-	resp->max_mr		= attr->max_mr;
-	resp->max_pd		= attr->max_pd;
+	resp->max_mr		= min_t(int, attr->max_mr,
+					limits[RDMA_VERB_RESOURCE_MR]);
+	resp->max_pd		= min_t(int, attr->max_pd,
+					limits[RDMA_VERB_RESOURCE_PD]);
 	resp->max_qp_rd_atom	= attr->max_qp_rd_atom;
 	resp->max_ee_rd_atom	= attr->max_ee_rd_atom;
 	resp->max_res_rd_atom	= attr->max_res_rd_atom;
@@ -421,16 +440,19 @@  static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->atomic_cap		= attr->atomic_cap;
 	resp->max_ee			= attr->max_ee;
 	resp->max_rdd			= attr->max_rdd;
-	resp->max_mw			= attr->max_mw;
+	resp->max_mw			= min_t(int, attr->max_mw,
+						limits[RDMA_VERB_RESOURCE_MW]);
 	resp->max_raw_ipv6_qp		= attr->max_raw_ipv6_qp;
 	resp->max_raw_ethy_qp		= attr->max_raw_ethy_qp;
 	resp->max_mcast_grp		= attr->max_mcast_grp;
 	resp->max_mcast_qp_attach	= attr->max_mcast_qp_attach;
 	resp->max_total_mcast_qp_attach	= attr->max_total_mcast_qp_attach;
-	resp->max_ah			= attr->max_ah;
+	resp->max_ah			= min_t(int, attr->max_ah,
+						limits[RDMA_VERB_RESOURCE_AH]);
 	resp->max_fmr			= attr->max_fmr;
 	resp->max_map_per_fmr		= attr->max_map_per_fmr;
-	resp->max_srq			= attr->max_srq;
+	resp->max_srq			= min_t(int, attr->max_srq,
+						limits[RDMA_VERB_RESOURCE_SRQ]);
 	resp->max_srq_wr		= attr->max_srq_wr;
 	resp->max_srq_sge		= attr->max_srq_sge;
 	resp->max_pkeys			= attr->max_pkeys;
@@ -447,6 +469,7 @@  ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 	struct ib_uverbs_query_device_resp resp;
 	struct ib_device_attr              attr;
 	int                                ret;
+	int                                limits[RDMA_VERB_RESOURCE_MAX];
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -458,14 +481,23 @@  ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ret = ib_rdmacg_query_limit(ib_dev,
+				    RDMACG_RESOURCE_POOL_VERB,
+				    limits, RDMA_VERB_RESOURCE_MAX);
+	if (ret)
+		goto err;
+
 	memset(&resp, 0, sizeof resp);
-	copy_query_dev_fields(file, ib_dev, &resp, &attr);
+	copy_query_dev_fields(file, ib_dev, &resp, &attr, limits);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		return -EFAULT;
 
 	return in_len;
+
+err:
+	return ret;
 }
 
 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
@@ -545,6 +577,14 @@  ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -ENOMEM;
 
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, file->device->ib_dev,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_PD, 1);
+	if (ret) {
+		kfree(uobj);
+		return -EPERM;
+	}
+
 	init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
 	down_write(&uobj->mutex);
 
@@ -590,6 +630,9 @@  err_idr:
 	ib_dealloc_pd(pd);
 
 err:
+	ib_rdmacg_uncharge(&uobj->cg_obj, file->device->ib_dev,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_PD, 1);
 	put_uobj_write(uobj);
 	return ret;
 }
@@ -602,6 +645,7 @@  ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 	struct ib_uverbs_dealloc_pd cmd;
 	struct ib_uobject          *uobj;
 	struct ib_pd		   *pd;
+	struct ib_device           *device;
 	int                         ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -622,6 +666,12 @@  ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 	if (ret)
 		goto err_put;
 
+	device = uobj->context->device;
+
+	ib_rdmacg_uncharge(&uobj->cg_obj, device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_PD, 1);
+
 	uobj->live = 0;
 	put_uobj_write(uobj);
 
@@ -995,6 +1045,12 @@  ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 		}
 	}
 
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_MR, 1);
+	if (ret)
+		goto err_charge;
+
 	mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
 				     cmd.access_flags, &udata);
 	if (IS_ERR(mr)) {
@@ -1043,6 +1099,11 @@  err_unreg:
 	ib_dereg_mr(mr);
 
 err_put:
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_MR, 1);
+
+err_charge:
 	put_pd_read(pd);
 
 err_free:
@@ -1152,6 +1213,7 @@  ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 	struct ib_uverbs_dereg_mr cmd;
 	struct ib_mr             *mr;
 	struct ib_uobject	 *uobj;
+	struct ib_pd             *pd;
 	int                       ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1163,6 +1225,8 @@  ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 
 	mr = uobj->object;
 
+	pd = mr->pd;
+
 	ret = ib_dereg_mr(mr);
 	if (!ret)
 		uobj->live = 0;
@@ -1172,6 +1236,10 @@  ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_MR, 1);
+
 	idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -1214,6 +1282,12 @@  ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 		goto err_free;
 	}
 
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_MW, 1);
+	if (ret)
+		goto err_charge;
+
 	mw = pd->device->alloc_mw(pd, cmd.mw_type);
 	if (IS_ERR(mw)) {
 		ret = PTR_ERR(mw);
@@ -1259,6 +1333,11 @@  err_unalloc:
 	ib_dealloc_mw(mw);
 
 err_put:
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_MW, 1);
+
+err_charge:
 	put_pd_read(pd);
 
 err_free:
@@ -1273,6 +1352,7 @@  ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_dealloc_mw cmd;
 	struct ib_mw               *mw;
+	struct ib_pd               *pd;
 	struct ib_uobject	   *uobj;
 	int                         ret = -EINVAL;
 
@@ -1284,6 +1364,7 @@  ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 		return -EINVAL;
 
 	mw = uobj->object;
+	pd = mw->pd;
 
 	ret = ib_dealloc_mw(mw);
 	if (!ret)
@@ -1294,6 +1375,10 @@  ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_MW, 1);
+
 	idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -1393,6 +1478,12 @@  static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
 		attr.flags = cmd->flags;
 
+	ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, file->device->ib_dev,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_CQ, 1);
+	if (ret)
+		goto err_charge;
+
 	cq = ib_dev->create_cq(ib_dev, &attr,
 					     file->ucontext, uhw);
 	if (IS_ERR(cq)) {
@@ -1440,6 +1531,11 @@  err_free:
 	ib_destroy_cq(cq);
 
 err_file:
+	ib_rdmacg_uncharge(&obj->uobject.cg_obj, file->device->ib_dev,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_CQ, 1);
+
+err_charge:
 	if (ev_file)
 		ib_uverbs_release_ucq(file, ev_file, obj);
 
@@ -1720,6 +1816,10 @@  ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_CQ, 1);
+
 	idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -1775,6 +1875,12 @@  static int create_qp(struct ib_uverbs_file *file,
 		  &qp_lock_class);
 	down_write(&obj->uevent.uobject.mutex);
 
+	pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
+	if (!pd) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
 	if (cmd->qp_type == IB_QPT_XRC_TGT) {
 		xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
 				     &xrcd_uobj);
@@ -1809,8 +1915,7 @@  static int create_qp(struct ib_uverbs_file *file,
 
 		scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
 		rcq = rcq ?: scq;
-		pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
-		if (!pd || !scq) {
+		if (!scq) {
 			ret = -EINVAL;
 			goto err_put;
 		}
@@ -1856,6 +1961,12 @@  static int create_qp(struct ib_uverbs_file *file,
 			goto err_put;
 		}
 
+	ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_QP, 1);
+	if (ret)
+		goto err_put;
+
 	if (cmd->qp_type == IB_QPT_XRC_TGT)
 		qp = ib_create_qp(pd, &attr);
 	else
@@ -1863,7 +1974,7 @@  static int create_qp(struct ib_uverbs_file *file,
 
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
-		goto err_put;
+		goto err_create;
 	}
 
 	if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1938,6 +2049,11 @@  err_cb:
 err_destroy:
 	ib_destroy_qp(qp);
 
+err_create:
+	ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_QP, 1);
+
 err_put:
 	if (xrcd)
 		put_xrcd_read(xrcd_uobj);
@@ -2377,6 +2493,7 @@  ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_uobject		*uobj;
 	struct ib_qp               	*qp;
+	struct ib_pd                    *pd;
 	struct ib_uqp_object        	*obj;
 	int                        	 ret = -EINVAL;
 
@@ -2389,6 +2506,7 @@  ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -EINVAL;
 	qp  = uobj->object;
+	pd  = qp->pd;
 	obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
 	if (!list_empty(&obj->mcast_list)) {
@@ -2405,6 +2523,10 @@  ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_QP, 1);
+
 	if (obj->uxrcd)
 		atomic_dec(&obj->uxrcd->refcnt);
 
@@ -2846,10 +2968,16 @@  ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	memset(&attr.dmac, 0, sizeof(attr.dmac));
 	memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_AH, 1);
+	if (ret)
+		goto err_put;
+
 	ah = ib_create_ah(pd, &attr);
 	if (IS_ERR(ah)) {
 		ret = PTR_ERR(ah);
-		goto err_put;
+		goto err_create;
 	}
 
 	ah->uobject  = uobj;
@@ -2885,6 +3013,11 @@  err_copy:
 err_destroy:
 	ib_destroy_ah(ah);
 
+err_create:
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_AH, 1);
+
 err_put:
 	put_pd_read(pd);
 
@@ -2899,6 +3032,7 @@  ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_destroy_ah cmd;
 	struct ib_ah		   *ah;
+	struct ib_pd		   *pd;
 	struct ib_uobject	   *uobj;
 	int			    ret;
 
@@ -2909,6 +3043,7 @@  ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -EINVAL;
 	ah = uobj->object;
+	pd = ah->pd;
 
 	ret = ib_destroy_ah(ah);
 	if (!ret)
@@ -2919,6 +3054,10 @@  ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_AH, 1);
+
 	idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -3171,10 +3310,17 @@  int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		err = -EINVAL;
 		goto err_free;
 	}
+
+	err = ib_rdmacg_try_charge(&uobj->cg_obj, qp->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_FLOW, 1);
+	if (err)
+		goto err_free;
+
 	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
 	if (IS_ERR(flow_id)) {
 		err = PTR_ERR(flow_id);
-		goto err_free;
+		goto err_create;
 	}
 	flow_id->qp = qp;
 	flow_id->uobject = uobj;
@@ -3208,6 +3354,10 @@  err_copy:
 	idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
 destroy_flow:
 	ib_destroy_flow(flow_id);
+err_create:
+	ib_rdmacg_uncharge(&uobj->cg_obj, qp->pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_FLOW, 1);
 err_free:
 	kfree(flow_attr);
 err_put:
@@ -3228,6 +3378,7 @@  int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_flow	cmd;
 	struct ib_flow			*flow_id;
 	struct ib_uobject		*uobj;
+	struct ib_pd			*pd;
 	int				ret;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3245,11 +3396,16 @@  int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -EINVAL;
 	flow_id = uobj->object;
+	pd = flow_id->qp->pd;
 
 	ret = ib_destroy_flow(flow_id);
 	if (!ret)
 		uobj->live = 0;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_FLOW, 1);
+
 	put_uobj_write(uobj);
 
 	idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
@@ -3316,6 +3472,12 @@  static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 
+	ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_SRQ, 1);
+	if (ret)
+		goto err_put_cq;
+
 	srq = pd->device->create_srq(pd, &attr, udata);
 	if (IS_ERR(srq)) {
 		ret = PTR_ERR(srq);
@@ -3380,6 +3542,9 @@  err_destroy:
 	ib_destroy_srq(srq);
 
 err_put:
+	ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_SRQ, 1);
 	put_pd_read(pd);
 
 err_put_cq:
@@ -3540,6 +3705,7 @@  ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_uobject		 *uobj;
 	struct ib_srq               	 *srq;
+	struct ib_pd                     *pd;
 	struct ib_uevent_object        	 *obj;
 	int                         	  ret = -EINVAL;
 	struct ib_usrq_object		 *us;
@@ -3554,6 +3720,7 @@  ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	srq = uobj->object;
 	obj = container_of(uobj, struct ib_uevent_object, uobject);
 	srq_type = srq->srq_type;
+	pd = srq->pd;
 
 	ret = ib_destroy_srq(srq);
 	if (!ret)
@@ -3564,6 +3731,10 @@  ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_SRQ, 1);
+
 	if (srq_type == IB_SRQT_XRC) {
 		us = container_of(obj, struct ib_usrq_object, uevent);
 		atomic_dec(&us->uxrcd->refcnt);
@@ -3597,6 +3768,7 @@  int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	struct ib_uverbs_ex_query_device_resp resp;
 	struct ib_uverbs_ex_query_device  cmd;
 	struct ib_device_attr attr;
+	int    limits[RDMA_VERB_RESOURCE_MAX];
 	int err;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3623,7 +3795,14 @@  int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	if (err)
 		return err;
 
-	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
+	err = ib_rdmacg_query_limit(ib_dev,
+				    RDMACG_RESOURCE_POOL_VERB,
+				    limits, RDMA_VERB_RESOURCE_MAX);
+	if (err)
+		goto end;
+
+	copy_query_dev_fields(file, ib_dev, &resp.base, &attr, limits);
+
 	resp.comp_mask = 0;
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef288..1d8292c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -49,6 +49,7 @@ 
 #include <asm/uaccess.h>
 
 #include "uverbs.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -214,6 +215,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
 		struct ib_ah *ah = uobj->object;
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, ah->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_AH, 1);
 		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 		ib_destroy_ah(ah);
 		kfree(uobj);
@@ -223,6 +227,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
 		struct ib_mw *mw = uobj->object;
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, mw->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_MW, 1);
 		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 		ib_dealloc_mw(mw);
 		kfree(uobj);
@@ -231,6 +238,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
 		struct ib_flow *flow_id = uobj->object;
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, flow_id->qp->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_FLOW, 1);
 		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
 		ib_destroy_flow(flow_id);
 		kfree(uobj);
@@ -245,6 +255,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		if (qp != qp->real_qp) {
 			ib_close_qp(qp);
 		} else {
+			ib_rdmacg_uncharge(&uobj->cg_obj, qp->pd->device,
+					   RDMACG_RESOURCE_POOL_VERB,
+					   RDMA_VERB_RESOURCE_QP, 1);
 			ib_uverbs_detach_umcast(qp, uqp);
 			ib_destroy_qp(qp);
 		}
@@ -257,6 +270,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		struct ib_uevent_object *uevent =
 			container_of(uobj, struct ib_uevent_object, uobject);
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, srq->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_SRQ, 1);
 		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 		ib_destroy_srq(srq);
 		ib_uverbs_release_uevent(file, uevent);
@@ -269,6 +285,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		struct ib_ucq_object *ucq =
 			container_of(uobj, struct ib_ucq_object, uobject);
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, cq->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_CQ, 1);
 		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 		ib_destroy_cq(cq);
 		ib_uverbs_release_ucq(file, ev_file, ucq);
@@ -278,6 +297,9 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
 		struct ib_mr *mr = uobj->object;
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, mr->pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_MR, 1);
 		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 		ib_dereg_mr(mr);
 		kfree(uobj);
@@ -298,11 +320,17 @@  static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
 
+		ib_rdmacg_uncharge(&uobj->cg_obj, pd->device,
+				   RDMACG_RESOURCE_POOL_VERB,
+				   RDMA_VERB_RESOURCE_PD, 1);
 		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
 		ib_dealloc_pd(pd);
 		kfree(uobj);
 	}
 
+	ib_rdmacg_uncharge(&context->cg_obj, context->device,
+			   RDMACG_RESOURCE_POOL_VERB,
+			   RDMA_VERB_RESOURCE_UCTX, 1);
 	put_pid(context->tgid);
 
 	return context->device->dealloc_ucontext(context);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9a68a19..e109752 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,8 @@ 
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
+#include <linux/cgroup_rdma.h>
+
 extern struct workqueue_struct *ib_wq;
 
 union ib_gid {
@@ -95,6 +97,19 @@  enum rdma_protocol_type {
 	RDMA_PROTOCOL_USNIC_UDP
 };
 
+enum rdma_resource_type {
+	RDMA_VERB_RESOURCE_UCTX,
+	RDMA_VERB_RESOURCE_AH,
+	RDMA_VERB_RESOURCE_PD,
+	RDMA_VERB_RESOURCE_CQ,
+	RDMA_VERB_RESOURCE_MR,
+	RDMA_VERB_RESOURCE_MW,
+	RDMA_VERB_RESOURCE_SRQ,
+	RDMA_VERB_RESOURCE_QP,
+	RDMA_VERB_RESOURCE_FLOW,
+	RDMA_VERB_RESOURCE_MAX,
+};
+
 __attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type);
 
@@ -1231,6 +1246,12 @@  struct ib_fmr_attr {
 
 struct ib_umem;
 
+struct ib_rdmacg_object {
+#ifdef CONFIG_CGROUP_RDMA
+	struct rdma_cgroup	*cg;		/* owner rdma cgroup */
+#endif
+};
+
 struct ib_ucontext {
 	struct ib_device       *device;
 	struct list_head	pd_list;
@@ -1261,12 +1282,14 @@  struct ib_ucontext {
 	struct list_head	no_private_counters;
 	int                     odp_mrs_count;
 #endif
+	struct ib_rdmacg_object cg_obj;
 };
 
 struct ib_uobject {
 	u64			user_handle;	/* handle given to us by userspace */
 	struct ib_ucontext     *context;	/* associated user context */
 	void		       *object;		/* containing object */
+	struct ib_rdmacg_object cg_obj;
 	struct list_head	list;		/* link to context's list */
 	int			id;		/* index into kernel idr */
 	struct kref		ref;
@@ -1822,7 +1845,9 @@  struct ib_device {
 	u16                          is_switch:1;
 	u8                           node_type;
 	u8                           phys_port_cnt;
-
+#ifdef CONFIG_CGROUP_RDMA
+	struct rdmacg_device	     cg_device;
+#endif
 	/**
 	 * The following mandatory functions are used only at device
 	 * registration.  Keep functions such as these at the end of this