diff mbox

[for-next,3/7] IB/core: Add support for fd objects

Message ID 1484132033-3346-4-git-send-email-matanb@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Matan Barak Jan. 11, 2017, 10:53 a.m. UTC
The completion channel we use in verbs infrastructure is FD based.
Previously, we had a separate way to manage this object. Since we
strive for a single way to manage any kind of object in this
infrastructure, we conceptually treat all objects as subclasses
of ib_uobject.

This commit adds the necessary mechanism to support FD based objects
like their IDR counterparts. FD objects release need to be synchronized
with context release. Since FDs could outlives their context, we use
a kref on this lock. We initialize the lock and the kref in downstream
patches. This is acceptable, as we don't use this infrastructure until
a later point in this series.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c   | 134 +++++++++++++++++++++++++++++++++-
 drivers/infiniband/core/rdma_core.h   |  12 +++
 drivers/infiniband/core/uverbs_main.c |   2 +-
 include/rdma/ib_verbs.h               |   4 +-
 include/rdma/uverbs_ioctl.h           |   8 ++
 5 files changed, 157 insertions(+), 3 deletions(-)

Comments

Jason Gunthorpe Jan. 11, 2017, 11:58 p.m. UTC | #1
On Wed, Jan 11, 2017 at 12:53:49PM +0200, Matan Barak wrote:

> This commit adds the necessary mechanism to support FD based objects
> like their IDR counterparts. FD objects release need to be synchronized
> with context release. Since FDs could outlives their context, we use
> a kref on this lock. We initialize the lock and the kref in downstream
> patches. This is acceptable, as we don't use this infrastructure until
> a later point in this series.

This seems unnecessarily complicated. Just hold the kref on the
ucontext in the fd. There isn't a problem with a dummy version of that
struct hanging around..

> +static struct ib_uobject *uverbs_get_uobject_from_fd(const struct uverbs_type_alloc_action *type_alloc,
> +						     struct ib_ucontext *ucontext,
> +						     enum uverbs_idr_access access,
> +						     unsigned int fd)
> +{
> +	if (access == UVERBS_ACCESS_NEW) {
> +		int _fd;

Something has gone terribly wrong if you need _ to disambiguate with a
function argument...
> +	} else if (access == UVERBS_ACCESS_READ) {
> +		struct file *f = fget(fd);
> +		struct ib_uobject *uobject;
> +
> +		if (!f)
> +			return ERR_PTR(-EBADF);
> +
> +		uobject = uverbs_priv_fd_to_uobject(f->private_data);
> +		if (f->f_op != type_alloc->fd.fops ||
> +		    !uobject->context) {

I think the if should be split, if fops isn't correct then do not even
call uverbs_priv_fd_to_uobject.

> +void uverbs_cleanup_fd(void *private_data)
> +{
> +	struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(private_data);
> +
> +	kfree(uobject);
> +}

Woah, this is not OK at this point in the series. There is really too
much stuff bundled into patch 7 to make proper sense of this.

I don't like this design. Do not implicitly prepend ib_uobject to
structures. Require the users to include the struct as is common in
linux.

Do not drop the kref out of ib_uobject, that should be the master
kref, drop the kref out of ib_uverbs_event_file instead.

> +static inline void *uverbs_fd_uobj_to_priv(struct ib_uobject *uobj)
> +{
> +	return uobj + 1;
> +}

Which means stuff like this can go away..

> -	int			id;		/* index into kernel idr */
> +	int			id;		/* index into kernel idr/fd */

Why do we need to store the fd number at all? We can *never* use it in
the kernel except as the argument to fdinstall. For that reason we
should never store it.

Can you move the fdallocate into finalize? At the very least 0 the
value after fdinstall so people don't get the wrong idea down the
road.

>  	int				order;
>  	size_t				obj_size;
>  	free_type			free_fn;
> +	struct {
> +		const struct file_operations	*fops;
> +		const char			*name;

Maybe name should be common

This idea also seems reasonable to me in general, but again, I'd
rather see it more completely implemented in this patch instead of
deferring so much stuff in to the giant #7 patch.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak Jan. 17, 2017, 5:24 p.m. UTC | #2
On Thu, Jan 12, 2017 at 1:58 AM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Wed, Jan 11, 2017 at 12:53:49PM +0200, Matan Barak wrote:
>
>> This commit adds the necessary mechanism to support FD based objects
>> like their IDR counterparts. FD objects release need to be synchronized
>> with context release. Since FDs could outlives their context, we use
>> a kref on this lock. We initialize the lock and the kref in downstream
>> patches. This is acceptable, as we don't use this infrastructure until
>> a later point in this series.
>
> This seems unnecessarily complicated. Just hold the kref on the
> ucontext in the fd. There isn't a problem with a dummy version of that
> struct hanging around..
>

Ok, I'll split the current event file structure to a completion event
file which begins with  and ib_uobject
and an asynchronous event_file.

>> +static struct ib_uobject *uverbs_get_uobject_from_fd(const struct uverbs_type_alloc_action *type_alloc,
>> +                                                  struct ib_ucontext *ucontext,
>> +                                                  enum uverbs_idr_access access,
>> +                                                  unsigned int fd)
>> +{
>> +     if (access == UVERBS_ACCESS_NEW) {
>> +             int _fd;
>
> Something has gone terribly wrong if you need _ to disambiguate with a
> function argument...

I'll rename it to new_fd.

>> +     } else if (access == UVERBS_ACCESS_READ) {
>> +             struct file *f = fget(fd);
>> +             struct ib_uobject *uobject;
>> +
>> +             if (!f)
>> +                     return ERR_PTR(-EBADF);
>> +
>> +             uobject = uverbs_priv_fd_to_uobject(f->private_data);
>> +             if (f->f_op != type_alloc->fd.fops ||
>> +                 !uobject->context) {
>
> I think the if should be split, if fops isn't correct then do not even
> call uverbs_priv_fd_to_uobject.
>

If we start file objects with ib_uobject, we no longer need this
conversion function.

>> +void uverbs_cleanup_fd(void *private_data)
>> +{
>> +     struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(private_data);
>> +
>> +     kfree(uobject);
>> +}
>
> Woah, this is not OK at this point in the series. There is really too
> much stuff bundled into patch 7 to make proper sense of this.
>

This is a standard structure of a series - you first build up the
infrastructure and then use it
to change everything. I'm a bit worried that embedding the actual
changes with the infrastructural
changes will require massive amounts of testing to verify it's bisect-able.

> I don't like this design. Do not implicitly prepend ib_uobject to
> structures. Require the users to include the struct as is common in
> linux.
>

I'll change that. It means we'll have different structures for the
completion event and the
asynchronous event (there will be a shared part of course).

> Do not drop the kref out of ib_uobject, that should be the master
> kref, drop the kref out of ib_uverbs_event_file instead.
>

I didn't really follow, could you please clarify?

>> +static inline void *uverbs_fd_uobj_to_priv(struct ib_uobject *uobj)
>> +{
>> +     return uobj + 1;
>> +}
>
> Which means stuff like this can go away..
>

Right

>> -     int                     id;             /* index into kernel idr */
>> +     int                     id;             /* index into kernel idr/fd */
>
> Why do we need to store the fd number at all? We can *never* use it in
> the kernel except as the argument to fdinstall. For that reason we
> should never store it.
>

It's only used for fdinstall.

> Can you move the fdallocate into finalize? At the very least 0 the
> value after fdinstall so people don't get the wrong idea down the
> road.
>

It can't be moved to finalize, as fdallocate could fail and we assume nothing
in the finalize stage could fail. However, I'll zero this value.

>>       int                             order;
>>       size_t                          obj_size;
>>       free_type                       free_fn;
>> +     struct {
>> +             const struct file_operations    *fops;
>> +             const char                      *name;
>
> Maybe name should be common
>

This could be a nice enhancement when we add an ability to print the
parsing tree.
I prefer to postpone this till we actually have code that uses this name.

> This idea also seems reasonable to me in general, but again, I'd
> rather see it more completely implemented in this patch instead of
> deferring so much stuff in to the giant #7 patch.
>

I agree it could make the review easier, but the effort to make sure the code
is really bisectable will be pretty big.

> Jason

Matan

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 17, 2017, 7:11 p.m. UTC | #3
On Tue, Jan 17, 2017 at 07:24:59PM +0200, Matan Barak wrote:
> >> +void uverbs_cleanup_fd(void *private_data)
> >> +{
> >> +     struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(private_data);
> >> +
> >> +     kfree(uobject);
> >> +}
> >
> > Woah, this is not OK at this point in the series. There is really too
> > much stuff bundled into patch 7 to make proper sense of this.
> >
> 
> This is a standard structure of a series - you first build up the
> infrastructure and then use it to change everything.

Well, again, no, this is not normal. At this point in the series the
lifetime model for uboject is totally screwed up between the new/old
code. That is not OK

Do not get confused with how you write *new code* vs how you
*transform* old code. This is the latter and I expect each patch in
the series to globally follow or change the overal invarients. Do not
introduce two incompatible schemes.

> worried that embedding the actual changes with the infrastructural
> changes will require massive amounts of testing to verify it's
> bisect-able.

If each patch makes internal self consisent sense and compiles I'm
happy enough... Not asking that every patch be exhaustively tested.

As it stands this series is pretty useless for bisection because
everything hinges on the final patch, so having at least the ideas
properly broken up is a win from that standpoint.

> > Do not drop the kref out of ib_uobject, that should be the master
> > kref, drop the kref out of ib_uverbs_event_file instead.
> 
> I didn't really follow, could you please clarify?

In patch 7 you delete the kref from ib_uobject, but keep a kref in
ib_uverbs_event_file.

Instead you should keep the kref in ib_uobject, make it work properly,
and discard the kref in ib_uverbs_event_file when you add in
ib_uobject as a member.

The uobject kref semantics around the IDR should be pretty simple: The
IDR holds a kref on all the members of the IDR. get on add, put on
remove.

The per-uobject rwlock prevents removing the object from the IDR so
anything operating within the read side of the rwlock does not need to
kref get. Only when the uobject is transfered outside that rwlock is a
get required (eg for fops private_data)

Not mangling the kref model of uobject should make the two APIs more
compatible.

> It can't be moved to finalize, as fdallocate could fail and we assume nothing
> in the finalize stage could fail. However, I'll zero this value.

Okay, sure, -1 or something.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 09b44ec..193591d 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -64,10 +64,20 @@  static struct ib_uobject *get_uobj_rcu(int id, struct ib_ucontext *context)
 }
 
 struct ib_ucontext_lock {
+	struct kref  ref;
 	/* locking the uobjects_list */
 	struct mutex lock;
 };
 
+static void release_uobjects_list_lock(struct kref *ref)
+{
+	struct ib_ucontext_lock *lock = container_of(ref,
+						     struct ib_ucontext_lock,
+						     ref);
+
+	kfree(lock);
+}
+
 static void init_uobj(struct ib_uobject *uobj, struct ib_ucontext *context)
 {
 	init_rwsem(&uobj->usecnt);
@@ -184,6 +194,75 @@  static struct ib_uobject *uverbs_get_uobject_from_idr(const struct uverbs_type_a
 	return uobj;
 }
 
+static struct ib_uobject *uverbs_priv_fd_to_uobject(void *priv)
+{
+	return priv - sizeof(struct ib_uobject);
+}
+
+static struct ib_uobject *uverbs_get_uobject_from_fd(const struct uverbs_type_alloc_action *type_alloc,
+						     struct ib_ucontext *ucontext,
+						     enum uverbs_idr_access access,
+						     unsigned int fd)
+{
+	if (access == UVERBS_ACCESS_NEW) {
+		int _fd;
+		struct ib_uobject *uobj = NULL;
+		struct file *filp;
+
+		_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (_fd < 0)
+			return ERR_PTR(_fd);
+
+		uobj = kmalloc(type_alloc->obj_size, GFP_KERNEL);
+		if (!uobj) {
+			put_unused_fd(_fd);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		init_uobj(uobj, ucontext);
+		filp = anon_inode_getfile(type_alloc->fd.name,
+					  type_alloc->fd.fops,
+					  uverbs_fd_uobj_to_priv(uobj),
+					  type_alloc->fd.flags);
+		if (IS_ERR(filp)) {
+			put_unused_fd(_fd);
+			kfree(uobj);
+			return (void *)filp;
+		}
+
+		/*
+		 * user_handle should be filled by the user,
+		 * the list is filled in the commit operation.
+		 */
+		uobj->type = type_alloc;
+		uobj->id = _fd;
+		uobj->object = filp;
+
+		return uobj;
+	} else if (access == UVERBS_ACCESS_READ) {
+		struct file *f = fget(fd);
+		struct ib_uobject *uobject;
+
+		if (!f)
+			return ERR_PTR(-EBADF);
+
+		uobject = uverbs_priv_fd_to_uobject(f->private_data);
+		if (f->f_op != type_alloc->fd.fops ||
+		    !uobject->context) {
+			fput(f);
+			return ERR_PTR(-EBADF);
+		}
+
+		/*
+		 * No need to protect it with a ref count, as fget increases
+		 * f_count.
+		 */
+		return uobject;
+	} else {
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+}
+
 struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_type_alloc_action *type_alloc,
 						   struct ib_ucontext *ucontext,
 						   enum uverbs_idr_access access,
@@ -193,7 +272,8 @@  struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_type_allo
 		return uverbs_get_uobject_from_idr(type_alloc, ucontext, access,
 						   id);
 	else
-		return ERR_PTR(-ENOENT);
+		return uverbs_get_uobject_from_fd(type_alloc, ucontext, access,
+						  id);
 }
 
 static void uverbs_uobject_add(struct ib_uobject *uobject)
@@ -253,12 +333,64 @@  static void uverbs_finalize_idr(struct ib_uobject *uobj,
 	}
 }
 
+static void uverbs_finalize_fd(struct ib_uobject *uobj,
+			       enum uverbs_idr_access access,
+			       bool commit)
+{
+	struct file *filp = uobj->object;
+
+	if (access == UVERBS_ACCESS_NEW) {
+		if (commit) {
+			uobj->uobjects_lock = uobj->context->uobjects_lock;
+			kref_get(&uobj->uobjects_lock->ref);
+			uverbs_uobject_add(uobj);
+			fd_install(uobj->id, uobj->object);
+		} else {
+			/* Unsuccessful NEW */
+			fput(filp);
+			put_unused_fd(uobj->id);
+			kfree(uobj);
+		}
+	} else {
+		fput(filp);
+	}
+}
+
 void uverbs_finalize_object(struct ib_uobject *uobj,
 			    enum uverbs_idr_access access,
 			    bool commit)
 {
 	if (uobj->type->type == UVERBS_ATTR_TYPE_IDR)
 		uverbs_finalize_idr(uobj, access, commit);
+	else if (uobj->type->type == UVERBS_ATTR_TYPE_FD)
+		uverbs_finalize_fd(uobj, access, commit);
 	else
 		WARN_ON(true);
 }
+
+static void uverbs_remove_fd(struct ib_uobject *uobject)
+{
+	if (uobject->context) {
+		list_del(&uobject->list);
+		uobject->context = NULL;
+	}
+}
+
+/* user should release the uobject in the release file_operation callback. */
+void uverbs_close_fd(struct file *f)
+{
+	struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(f->private_data);
+
+	mutex_lock(&uobject->uobjects_lock->lock);
+	uverbs_remove_fd(uobject);
+	mutex_unlock(&uobject->uobjects_lock->lock);
+	kref_put(&uobject->uobjects_lock->ref, release_uobjects_list_lock);
+}
+
+void uverbs_cleanup_fd(void *private_data)
+{
+	struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(private_data);
+
+	kfree(uobject);
+}
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 0142573..c71a51c 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -57,5 +57,17 @@  struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_type_allo
 void uverbs_finalize_object(struct ib_uobject *uobj,
 			    enum uverbs_idr_access access,
 			    bool success);
+/*
+ * Indicate this fd is no longer used by this consumer, but its memory isn't
+ * released yet. The memory is released only when ib_uverbs_cleanup_fd is
+ * called.
+ */
+void uverbs_close_fd(struct file *f);
+void uverbs_cleanup_fd(void *private_data);
+
+static inline void *uverbs_fd_uobj_to_priv(struct ib_uobject *uobj)
+{
+	return uobj + 1;
+}
 
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index daee2ba6..6e38a7c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -339,7 +339,7 @@  static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
 	complete(&dev->comp);
 }
 
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
 {
 	struct ib_uverbs_file *file =
 		container_of(ref, struct ib_uverbs_file, ref);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 47f560d..7992fcd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1335,6 +1335,7 @@  struct ib_fmr_attr {
 
 struct ib_ucontext {
 	struct ib_device       *device;
+	struct ib_uverbs_file  *ufile;
 	struct list_head	pd_list;
 	struct list_head	mr_list;
 	struct list_head	mw_list;
@@ -1376,7 +1377,7 @@  struct ib_uobject {
 	struct ib_ucontext     *context;	/* associated user context */
 	void		       *object;		/* containing object */
 	struct list_head	list;		/* link to context's list */
-	int			id;		/* index into kernel idr */
+	int			id;		/* index into kernel idr/fd */
 	struct kref		ref;
 	struct rw_semaphore	mutex;		/* protects .live */
 	struct rw_semaphore	usecnt;		/* protects exclusive access */
@@ -1384,6 +1385,7 @@  struct ib_uobject {
 	int			live;
 
 	const struct uverbs_type_alloc_action *type;
+	struct ib_ucontext_lock	*uobjects_lock;
 };
 
 struct ib_udata {
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 903f6b3..189e323 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -35,8 +35,11 @@ 
 
 #include <linux/kernel.h>
 
+struct ib_uobject;
+
 enum uverbs_attr_type {
 	UVERBS_ATTR_TYPE_IDR,
+	UVERBS_ATTR_TYPE_FD,
 };
 
 enum uverbs_idr_access {
@@ -55,6 +58,11 @@  struct uverbs_type_alloc_action {
 	int				order;
 	size_t				obj_size;
 	free_type			free_fn;
+	struct {
+		const struct file_operations	*fops;
+		const char			*name;
+		int				flags;
+	} fd;
 };
 
 #endif