diff mbox

[V2,for-next,7/7] IB/core: Change completion channel to use the reworked objects schema

Message ID 1489939145-125246-8-git-send-email-matanb@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Matan Barak March 19, 2017, 3:59 p.m. UTC
This patch adds the standard fd based type - completion_channel.
The completion_channel is now prefixed with ib_uobject, similarly
to the rest of the uobjects.
This requires a few changes:
(1) We define a new completion channel fd based object type.
(2) completion_event and async_event are now two different types.
    This means they use different fops.
(3) We release the completion_channel exactly as we release other
    idr based objects.
(4) Since ib_uobjects are already kref-ed, we only add the kref to the
    async event.

A fd object requires filling out several parameters. Its op pointer
should point to uverbs_fd_ops and its size should be at least the
size if ib_uobject. We use a macro to make the type declaration
easier.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
---
 drivers/infiniband/core/uverbs.h           |  26 ++-
 drivers/infiniband/core/uverbs_cmd.c       |  58 +++---
 drivers/infiniband/core/uverbs_main.c      | 279 +++++++++++++++++------------
 drivers/infiniband/core/uverbs_std_types.c |  33 +++-
 include/rdma/uverbs_std_types.h            |   1 +
 include/rdma/uverbs_types.h                |   9 +
 6 files changed, 258 insertions(+), 148 deletions(-)

Comments

Jason Gunthorpe March 29, 2017, 2:53 p.m. UTC | #1
On Sun, Mar 19, 2017 at 05:59:05PM +0200, Matan Barak wrote:
> +static struct ib_uverbs_completion_event_file *
> +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
> +{
> +	struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
> +						fd, context);
> +	struct ib_uobject_file *uobj_file;
> +
> +	if (IS_ERR(uobj))
> +		return (void *)uobj;
> +
> +	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
> +
> +	uverbs_uobject_get(&uobj_file->uobj);
> +	uobj_put_read(uobj);

That looks odd, isn't uobj == uobj_file->uobj ? 

> +	kref_init(&ev_file->ref);
> +	filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
>  				  ev_file, O_RDONLY);

It seems weird this name occures twice:

> +const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
> +	.type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
> +	.context_closed = uverbs_hot_unplug_completion_event_file,
> +	.fops = &uverbs_event_fops,
> +	.name = "[infinibandevent]",
> +	.flags = O_RDONLY,
> +};

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak March 29, 2017, 6:21 p.m. UTC | #2
On Wed, Mar 29, 2017 at 5:53 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Sun, Mar 19, 2017 at 05:59:05PM +0200, Matan Barak wrote:
>> +static struct ib_uverbs_completion_event_file *
>> +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
>> +{
>> +     struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
>> +                                             fd, context);
>> +     struct ib_uobject_file *uobj_file;
>> +
>> +     if (IS_ERR(uobj))
>> +             return (void *)uobj;
>> +
>> +     uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
>> +
>> +     uverbs_uobject_get(&uobj_file->uobj);
>> +     uobj_put_read(uobj);
>
> That looks odd, isn't uobj == uobj_file->uobj ?
>

Yeah, they're essentially the same. The uverbs_uobject_get is intended
to increase the reference count
on the returned object. The uobj_put_read is paired with uobj_get_read
(first line of this function).
We could change them both to uobj.

>> +     kref_init(&ev_file->ref);
>> +     filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
>>                                 ev_file, O_RDONLY);
>
> It seems weird this name occures twice:
>

Unfortunately, we have to keep the current names. The old code created
the async fd
and the completion channel fd and both are called the same.

>> +const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
>> +     .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
>> +     .context_closed = uverbs_hot_unplug_completion_event_file,
>> +     .fops = &uverbs_event_fops,
>> +     .name = "[infinibandevent]",
>> +     .flags = O_RDONLY,
>> +};
>
> Jason

Matan

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 29, 2017, 10:29 p.m. UTC | #3
On Wed, Mar 29, 2017 at 09:21:09PM +0300, Matan Barak wrote:
> On Wed, Mar 29, 2017 at 5:53 PM, Jason Gunthorpe
> <jgunthorpe@obsidianresearch.com> wrote:
> > On Sun, Mar 19, 2017 at 05:59:05PM +0200, Matan Barak wrote:
> >> +static struct ib_uverbs_completion_event_file *
> >> +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
> >> +{
> >> +     struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
> >> +                                             fd, context);
> >> +     struct ib_uobject_file *uobj_file;
> >> +
> >> +     if (IS_ERR(uobj))
> >> +             return (void *)uobj;
> >> +
> >> +     uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
> >> +
> >> +     uverbs_uobject_get(&uobj_file->uobj);
> >> +     uobj_put_read(uobj);
> >
> > That looks odd, isn't uobj == uobj_file->uobj ?
> 
> Yeah, they're essentially the same. The uverbs_uobject_get is intended
> to increase the reference count
> on the returned object. The uobj_put_read is paired with uobj_get_read
> (first line of this function).
> We could change them both to uobj.

I'm confused why you would ever write

uverbs_uobject_get(uobj);
uobj_put_read(uobj);

Maybe drop both lines and just add a comment that the ref from
uobj_get_read moves from the stack to XX

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak March 30, 2017, 3:47 p.m. UTC | #4
On Thu, Mar 30, 2017 at 1:29 AM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Wed, Mar 29, 2017 at 09:21:09PM +0300, Matan Barak wrote:
>> On Wed, Mar 29, 2017 at 5:53 PM, Jason Gunthorpe
>> <jgunthorpe@obsidianresearch.com> wrote:
>> > On Sun, Mar 19, 2017 at 05:59:05PM +0200, Matan Barak wrote:
>> >> +static struct ib_uverbs_completion_event_file *
>> >> +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
>> >> +{
>> >> +     struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
>> >> +                                             fd, context);
>> >> +     struct ib_uobject_file *uobj_file;
>> >> +
>> >> +     if (IS_ERR(uobj))
>> >> +             return (void *)uobj;
>> >> +
>> >> +     uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
>> >> +
>> >> +     uverbs_uobject_get(&uobj_file->uobj);
>> >> +     uobj_put_read(uobj);
>> >
>> > That looks odd, isn't uobj == uobj_file->uobj ?
>>
>> Yeah, they're essentially the same. The uverbs_uobject_get is intended
>> to increase the reference count
>> on the returned object. The uobj_put_read is paired with uobj_get_read
>> (first line of this function).
>> We could change them both to uobj.
>
> I'm confused why you would ever write
>
> uverbs_uobject_get(uobj);
> uobj_put_read(uobj);
>
> Maybe drop both lines and just add a comment that the ref from
> uobj_get_read moves from the stack to XX
>

The problem with that is that get_read also locks the object for
reading (so if you try to lock it for write or destroy it, you'll get
a locking error).
So, you want to increase the reference count to ensure it exists in the memory,
but unlock it.

> Jason

Matan
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 30, 2017, 4:12 p.m. UTC | #5
On Thu, Mar 30, 2017 at 06:47:02PM +0300, Matan Barak wrote:

> The problem with that is that get_read also locks the object for
> reading (so if you try to lock it for write or destroy it, you'll
> get a locking error).  So, you want to increase the reference count
> to ensure it exists in the memory, but unlock it.

Okay, I see. I've looked at the whole thing more closely and there are
more places that should be holding krefs, so I think you should make a
function specifically for this pattern.

/* Convert a locked reference obtained from rdma_lookup_get_uobject to
   a simple kref by dropping the read/write lock. The caller must pair
   this with uverbs_uobject_put */
void rdma_lookup_to_kref(struct ib_uobject *uobj, bool write);

Similarly I think you should add a uobj_remove_commit_kref() wrapper
which does remove_commit but leaves the caller with a kref, as that
pattern seems to come up alot in this code as well.

Looking at things after this series is applied, there are two more
areas that need some more work in future patches before adding the new
uapi stuff..

usecnt is confused, the uapi part makes sense, we must hold uobj read
when doing atomic_inc and uobj write when doing atomic_dec. However
the kapi does something entirely different. Maybe we should delete
usecnt from the kapi side?

Lots of places that incr usecnt fail to hold the kref:

     pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
     
     mr->device  = pd->device;
     mr->pd      = pd;
     mr->uobject = uobj;
     atomic_inc(&pd->usecnt);

     uobj_put_obj_read(pd);

It really should be doing rdma_lookup_to_kref(pd) and have the proper
put in the destroy of the pd uobj. Many other places like this as
well.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak March 30, 2017, 6:54 p.m. UTC | #6
On Thu, Mar 30, 2017 at 7:12 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Thu, Mar 30, 2017 at 06:47:02PM +0300, Matan Barak wrote:
>
>> The problem with that is that get_read also locks the object for
>> reading (so if you try to lock it for write or destroy it, you'll
>> get a locking error).  So, you want to increase the reference count
>> to ensure it exists in the memory, but unlock it.
>
> Okay, I see. I've looked at the whole thing more closely and there are
> more places that should be holding krefs, so I think you should make a
> function specifically for this pattern.
>
> /* Convert a locked reference obtained from rdma_lookup_get_uobject to
>    a simple kref by dropping the read/write lock. The caller must pair
>    this with uverbs_uobject_put */
> void rdma_lookup_to_kref(struct ib_uobject *uobj, bool write);
>
> Similarly I think you should add a uobj_remove_commit_kref() wrapper
> which does remove_commit but leaves the caller with a kref, as that
> pattern seems to come up alot in this code as well.
>

Actually, this isn't that frequent pattern. It mainly happens in
ib_uverbs_lookup_comp_file.
There's a different pattern of inc_kref and then remove_commit which
is used in order to
copy the latest events_reported info before releasing the uobject's memory.

> Looking at things after this series is applied, there are two more
> areas that need some more work in future patches before adding the new
> uapi stuff..
>
> usecnt is confused, the uapi part makes sense, we must hold uobj read
> when doing atomic_inc and uobj write when doing atomic_dec. However
> the kapi does something entirely different. Maybe we should delete
> usecnt from the kapi side?
>
> Lots of places that incr usecnt fail to hold the kref:
>
>      pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
>
>      mr->device  = pd->device;
>      mr->pd      = pd;
>      mr->uobject = uobj;
>      atomic_inc(&pd->usecnt);
>
>      uobj_put_obj_read(pd);
>
> It really should be doing rdma_lookup_to_kref(pd) and have the proper
> put in the destroy of the pd uobj. Many other places like this as
> well.
>

There is a usecnt in the uobject, which is used to count the
concurrent readers or
indicate if the uobject is write locked.
Most objects (as opposed to uobjects) has usecnt variable to count how
many objects
actually use them. For example, in the reg_mr handler, we inc the
object's usecnt right
after assigning it (as you mentioned).

The object's destroy function should test the usecnt and make sure no
one uses it before
destroying it. When this destroy function comes from the user-space,
the pd object is
locked, so this test would be atomic and safe. The uobject won't be
destroyed, until
the object itself is destroyed. So effectively, it's like taking another kref :)
When you come from the kernel's context, the ULP has to make sure you
aren't trying
to destroy an object while using it in creation of another object.
In context teardown, we rely on order, so this should be safe as well.

The verbs layer itself (drivers/infiniband/verbs.c) increase the
object's pd as well when
needed (for example, ib_create_ah). I haven't looked at each of the
handler, but I haven't
changed this code too, so I guess this has nothing to do with this
series. When we introduce
the ioctl handlers, we'll of course need to make sure we increase the
refcounts on objects
correctly.

> Jason

Matan
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe April 5, 2017, 4:10 p.m. UTC | #7
On Thu, Mar 30, 2017 at 09:54:52PM +0300, Matan Barak wrote:
> > /* Convert a locked reference obtained from rdma_lookup_get_uobject to
> >    a simple kref by dropping the read/write lock. The caller must pair
> >    this with uverbs_uobject_put */
> > void rdma_lookup_to_kref(struct ib_uobject *uobj, bool write);
> >
> > Similarly I think you should add a uobj_remove_commit_kref() wrapper
> > which does remove_commit but leaves the caller with a kref, as that
> > pattern seems to come up alot in this code as well.
> >
> 
> Actually, this isn't that frequent pattern. It mainly happens in
> ib_uverbs_lookup_comp_file.  There's a different pattern of inc_kref
> and then remove_commit which is used in order to copy the latest
> events_reported info before releasing the uobject's memory.

That is the one I was talking about..

> When this destroy function comes from the user-space, the pd object
> is locked, so this test would be atomic and safe. The uobject won't
> be destroyed, until the object itself is destroyed. So effectively,
> it's like taking another kref :)

And this is the sketchyness I don't like - this is something that
would be easy to break and you have to rely on quite a few assumptions
to know that this is 'safe'.

It would be much better to hold the kref - that should be cleaned up
before start messing with this much more.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak April 6, 2017, 2:10 p.m. UTC | #8
On Wed, Apr 5, 2017 at 7:10 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Thu, Mar 30, 2017 at 09:54:52PM +0300, Matan Barak wrote:
>> > /* Convert a locked reference obtained from rdma_lookup_get_uobject to
>> >    a simple kref by dropping the read/write lock. The caller must pair
>> >    this with uverbs_uobject_put */
>> > void rdma_lookup_to_kref(struct ib_uobject *uobj, bool write);
>> >
>> > Similarly I think you should add a uobj_remove_commit_kref() wrapper
>> > which does remove_commit but leaves the caller with a kref, as that
>> > pattern seems to come up alot in this code as well.
>> >
>>
>> Actually, this isn't that frequent pattern. It mainly happens in
>> ib_uverbs_lookup_comp_file.  There's a different pattern of inc_kref
>> and then remove_commit which is used in order to copy the latest
>> events_reported info before releasing the uobject's memory.
>
> That is the one I was talking about..
>
>> When this destroy function comes from the user-space, the pd object
>> is locked, so this test would be atomic and safe. The uobject won't
>> be destroyed, until the object itself is destroyed. So effectively,
>> it's like taking another kref :)
>
> And this is the sketchyness I don't like - this is something that
> would be easy to break and you have to rely on quite a few assumptions
> to know that this is 'safe'.
>
> It would be much better to hold the kref - that should be cleaned up
> before start messing with this much more.
>

Effectively this kref is used to capture the dependencies between
various objects.
Obviously, It should be part of the verbs layer (and not part of the
uverbs layer).
It relies on the caller's code to provide atomic behavior, meaning - a
ULP (or uverbs)
should guarantee destroy_x isn't called while using x as a dependency
of a new object.
This won't solve the "inc_uobject_kref; destroy_uobject" schema, as in
this case we really
want to destroy the object, but we want to keep the uobject alive in
order to get information
about the object's state at destruction.

If we use kref on the uobject only, we lose the kref count in ULPs.
Moreover, we'll need to somehow
return the information about the destruction state from the destroy
call (instead of writing it to the uobject).
This is because uobject's kref == 0 will be the only case leading for
object destruction and memory deallocation.
This is pretty big change. It could be done later if we want (I'm not
sure the current approach of object and uobject
having different lifetime isn't better) and not as part of the ABI changes :)

> Jason

Matan
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe April 6, 2017, 4:42 p.m. UTC | #9
On Thu, Apr 06, 2017 at 05:10:51PM +0300, Matan Barak wrote:
> Effectively this kref is used to capture the dependencies between
> various objects.

No, that is what the usecnt is for, the kref is just to make sure we
can still *access* the usecnt without derefing freed memory.

> This won't solve the "inc_uobject_kref; destroy_uobject" schema, as
> in this case we really want to destroy the object, but we want to
> keep the uobject alive in order to get information about the
> object's state at destruction.

Right, this is why I said you need a destory_uobject varient that
retains the kref with the caller.

> If we use kref on the uobject only, we lose the kref count in ULPs.
> Moreover, we'll need to somehow

No, we can't really do that obviously..

I'd rather see the usecnt hoisted entirely into the uverbs layer where
it can work sanely with proper locking and reserve a second
for-debugging-only WARN_ON scheme in the verbs layer that checks
cleanup ordering for the kapi.

The kapi returning error codes on destroy is insane, I cleaned up PD
at one point, but they all need fixing...

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matan Barak April 9, 2017, 3:13 p.m. UTC | #10
On Thu, Apr 6, 2017 at 7:42 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Thu, Apr 06, 2017 at 05:10:51PM +0300, Matan Barak wrote:
>> Effectively this kref is used to capture the dependencies between
>> various objects.
>
> No, that is what the usecnt is for, the kref is just to make sure we
> can still *access* the usecnt without derefing freed memory.
>

There are three usecnt(s) actually. One is intended to determine how
many readers
(or if there's a writer) currently accessing this uobject. Zero here
means no one
accessing this uobject and we could lock it for either read/write.
This is uverbs only.

Another one is to protect the lifetime of the uobject.

The last usecnt is intended to capture dependencies, meaning when use_cnt > 1
we can't free that object as there are dependent objects. Nowadays,
this use_cnt is
in the verbs layer and semi-protects (when things are doing serially)
kernel layer as well.

>> This won't solve the "inc_uobject_kref; destroy_uobject" schema, as
>> in this case we really want to destroy the object, but we want to
>> keep the uobject alive in order to get information about the
>> object's state at destruction.
>
> Right, this is why I said you need a destory_uobject varient that
> retains the kref with the caller.
>

So currently we have three states in uverbs (when an object is destroyed):
(a) uobject is alive, object is alive
(b) uobject is alive, object isn't
(c) both are destroyed

You need to somehow capture the second state. This is used in order to
write the updated last
information when an object is destroyed (so no new events could be
generated on this object).

>> If we use kref on the uobject only, we lose the kref count in ULPs.
>> Moreover, we'll need to somehow
>
> No, we can't really do that obviously..
>
> I'd rather see the usecnt hoisted entirely into the uverbs layer where
> it can work sanely with proper locking and reserve a second
> for-debugging-only WARN_ON scheme in the verbs layer that checks
> cleanup ordering for the kapi.
 >
> The kapi returning error codes on destroy is insane, I cleaned up PD
> at one point, but they all need fixing...
>

The current solution in the verbs layer is really half baked. It works
well as long
as you don't try to use an object while destroying it. If you do such
a non-sense action,
you should do that proper locking and checking yourself.

So, if we move that to a uobject, we need three reference counts:
(a) read_write_count
(b) dependencies_count (or object_refcount)
(c) uobject_refcount

Anyway, this is really unrelated to the ABI work. In any solution we
choose right now, we could
move that reference count from objects to uobjects later on. The
purpose here isn't doing a cleanup.
The locking change was done as the new ABI relies on that new behavior
to be dead-lock free, but the
refcount change change could be delayed without risking anything.

> Jason

Matan
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 8102698..4a9c959 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -102,17 +102,25 @@  struct ib_uverbs_device {
 };
 
 struct ib_uverbs_event_file {
-	struct kref				ref;
-	int					is_async;
-	struct ib_uverbs_file		       *uverbs_file;
 	spinlock_t				lock;
 	int					is_closed;
 	wait_queue_head_t			poll_wait;
 	struct fasync_struct		       *async_queue;
 	struct list_head			event_list;
+};
+
+struct ib_uverbs_async_event_file {
+	struct ib_uverbs_event_file		ev_file;
+	struct ib_uverbs_file		       *uverbs_file;
+	struct kref				ref;
 	struct list_head			list;
 };
 
+struct ib_uverbs_completion_event_file {
+	struct ib_uobject_file			uobj_file;
+	struct ib_uverbs_event_file		ev_file;
+};
+
 struct ib_uverbs_file {
 	struct kref				ref;
 	struct mutex				mutex;
@@ -120,7 +128,7 @@  struct ib_uverbs_file {
 	struct ib_uverbs_device		       *device;
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
-	struct ib_uverbs_event_file	       *async_file;
+	struct ib_uverbs_async_event_file       *async_file;
 	struct list_head			list;
 	int					is_closed;
 
@@ -182,14 +190,14 @@  struct ib_ucq_object {
 	u32			async_events_reported;
 };
 
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
-					struct ib_device *ib_dev,
-					int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+					      struct ib_device *ib_dev);
 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
 
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
-			   struct ib_uverbs_event_file *ev_file,
+			   struct ib_uverbs_completion_event_file *ev_file,
 			   struct ib_ucq_object *uobj);
 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
 			      struct ib_uevent_object *uobj);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 304a3ec..135e3f6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -47,6 +47,25 @@ 
 #include "uverbs.h"
 #include "core_priv.h"
 
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
+{
+	struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+						fd, context);
+	struct ib_uobject_file *uobj_file;
+
+	if (IS_ERR(uobj))
+		return (void *)uobj;
+
+	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+
+	uverbs_uobject_get(&uobj_file->uobj);
+	uobj_put_read(uobj);
+
+	return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+			    uobj_file);
+}
+
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      struct ib_device *ib_dev,
 			      const char __user *buf,
@@ -110,7 +129,7 @@  ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		goto err_free;
 	resp.async_fd = ret;
 
-	filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+	filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
 	if (IS_ERR(filp)) {
 		ret = PTR_ERR(filp);
 		goto err_fd;
@@ -899,8 +918,8 @@  ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_create_comp_channel	   cmd;
 	struct ib_uverbs_create_comp_channel_resp  resp;
-	struct file				  *filp;
-	int ret;
+	struct ib_uobject			  *uobj;
+	struct ib_uverbs_completion_event_file	  *ev_file;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -908,25 +927,23 @@  ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	ret = get_unused_fd_flags(O_CLOEXEC);
-	if (ret < 0)
-		return ret;
-	resp.fd = ret;
+	uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
 
-	filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
-	if (IS_ERR(filp)) {
-		put_unused_fd(resp.fd);
-		return PTR_ERR(filp);
-	}
+	resp.fd = uobj->id;
+
+	ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+			       uobj_file.uobj);
+	ib_uverbs_init_event_file(&ev_file->ev_file);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
-		put_unused_fd(resp.fd);
-		fput(filp);
+		uobj_alloc_abort(uobj);
 		return -EFAULT;
 	}
 
-	fd_install(resp.fd, filp);
+	uobj_alloc_commit(uobj);
 	return in_len;
 }
 
@@ -944,7 +961,7 @@  static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 				       void *context)
 {
 	struct ib_ucq_object           *obj;
-	struct ib_uverbs_event_file    *ev_file = NULL;
+	struct ib_uverbs_completion_event_file    *ev_file = NULL;
 	struct ib_cq                   *cq;
 	int                             ret;
 	struct ib_uverbs_ex_create_cq_resp resp;
@@ -959,9 +976,10 @@  static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 		return obj;
 
 	if (cmd->comp_channel >= 0) {
-		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
-		if (!ev_file) {
-			ret = -EINVAL;
+		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+						     file->ucontext);
+		if (IS_ERR(ev_file)) {
+			ret = PTR_ERR(ev_file);
 			goto err;
 		}
 	}
@@ -989,7 +1007,7 @@  static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	cq->uobject       = &obj->uobject;
 	cq->comp_handler  = ib_uverbs_comp_handler;
 	cq->event_handler = ib_uverbs_cq_event_handler;
-	cq->cq_context    = ev_file;
+	cq->cq_context    = &ev_file->ev_file;
 	atomic_set(&cq->usecnt, 0);
 
 	obj->uobject.object = cq;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 784eccc..e34c947 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -155,37 +155,37 @@  static void ib_uverbs_release_dev(struct kobject *kobj)
 	.release = ib_uverbs_release_dev,
 };
 
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
 {
-	struct ib_uverbs_event_file *file =
-		container_of(ref, struct ib_uverbs_event_file, ref);
+	struct ib_uverbs_async_event_file *file =
+		container_of(ref, struct ib_uverbs_async_event_file, ref);
 
 	kfree(file);
 }
 
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
-			  struct ib_uverbs_event_file *ev_file,
+			  struct ib_uverbs_completion_event_file *ev_file,
 			  struct ib_ucq_object *uobj)
 {
 	struct ib_uverbs_event *evt, *tmp;
 
 	if (ev_file) {
-		spin_lock_irq(&ev_file->lock);
+		spin_lock_irq(&ev_file->ev_file.lock);
 		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
 			list_del(&evt->list);
 			kfree(evt);
 		}
-		spin_unlock_irq(&ev_file->lock);
+		spin_unlock_irq(&ev_file->ev_file.lock);
 
-		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+		uverbs_uobject_put(&ev_file->uobj_file.uobj);
 	}
 
-	spin_lock_irq(&file->async_file->lock);
+	spin_lock_irq(&file->async_file->ev_file.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
 		list_del(&evt->list);
 		kfree(evt);
 	}
-	spin_unlock_irq(&file->async_file->lock);
+	spin_unlock_irq(&file->async_file->ev_file.lock);
 }
 
 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -193,12 +193,12 @@  void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_event *evt, *tmp;
 
-	spin_lock_irq(&file->async_file->lock);
+	spin_lock_irq(&file->async_file->ev_file.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
 		list_del(&evt->list);
 		kfree(evt);
 	}
-	spin_unlock_irq(&file->async_file->lock);
+	spin_unlock_irq(&file->async_file->ev_file.lock);
 }
 
 void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -249,10 +249,12 @@  void ib_uverbs_release_file(struct kref *ref)
 	kfree(file);
 }
 
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
-				    size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file,
+				    struct ib_uverbs_file *uverbs_file,
+				    struct file *filp, char __user *buf,
+				    size_t count, loff_t *pos,
+				    bool is_async)
 {
-	struct ib_uverbs_event_file *file = filp->private_data;
 	struct ib_uverbs_event *event;
 	int eventsz;
 	int ret = 0;
@@ -271,12 +273,12 @@  static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
 			 * and wake_up() guarentee this will see the null set
 			 * without using RCU
 			 */
-					     !file->uverbs_file->device->ib_dev)))
+					     !uverbs_file->device->ib_dev)))
 			return -ERESTARTSYS;
 
 		/* If device was disassociated and no event exists set an error */
 		if (list_empty(&file->event_list) &&
-		    !file->uverbs_file->device->ib_dev)
+		    !uverbs_file->device->ib_dev)
 			return -EIO;
 
 		spin_lock_irq(&file->lock);
@@ -284,7 +286,7 @@  static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
 
 	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
 
-	if (file->is_async)
+	if (is_async)
 		eventsz = sizeof (struct ib_uverbs_async_event_desc);
 	else
 		eventsz = sizeof (struct ib_uverbs_comp_event_desc);
@@ -314,11 +316,31 @@  static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
 	return ret;
 }
 
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+					  size_t count, loff_t *pos)
+{
+	struct ib_uverbs_async_event_file *file = filp->private_data;
+
+	return ib_uverbs_event_read(&file->ev_file, file->uverbs_file, filp,
+				    buf, count, pos, true);
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+					 size_t count, loff_t *pos)
+{
+	struct ib_uverbs_completion_event_file *comp_ev_file =
+		filp->private_data;
+
+	return ib_uverbs_event_read(&comp_ev_file->ev_file,
+				    comp_ev_file->uobj_file.ufile, filp,
+				    buf, count, pos, false);
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_file *file,
+					 struct file *filp,
 					 struct poll_table_struct *wait)
 {
 	unsigned int pollflags = 0;
-	struct ib_uverbs_event_file *file = filp->private_data;
 
 	poll_wait(filp, &file->poll_wait, wait);
 
@@ -330,49 +352,98 @@  static unsigned int ib_uverbs_event_poll(struct file *filp,
 	return pollflags;
 }
 
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+					       struct poll_table_struct *wait)
+{
+	return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+					      struct poll_table_struct *wait)
+{
+	struct ib_uverbs_completion_event_file *comp_ev_file =
+		filp->private_data;
+
+	return ib_uverbs_event_poll(&comp_ev_file->ev_file, filp, wait);
+}
+
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
 {
 	struct ib_uverbs_event_file *file = filp->private_data;
 
 	return fasync_helper(fd, filp, on, &file->async_queue);
 }
 
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
 {
-	struct ib_uverbs_event_file *file = filp->private_data;
+	struct ib_uverbs_completion_event_file *comp_ev_file =
+		filp->private_data;
+
+	return fasync_helper(fd, filp, on, &comp_ev_file->ev_file.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+	struct ib_uverbs_async_event_file *file = filp->private_data;
+	struct ib_uverbs_file *uverbs_file = file->uverbs_file;
 	struct ib_uverbs_event *entry, *tmp;
 	int closed_already = 0;
 
-	mutex_lock(&file->uverbs_file->device->lists_mutex);
-	spin_lock_irq(&file->lock);
-	closed_already = file->is_closed;
-	file->is_closed = 1;
-	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+	mutex_lock(&uverbs_file->device->lists_mutex);
+	spin_lock_irq(&file->ev_file.lock);
+	closed_already = file->ev_file.is_closed;
+	file->ev_file.is_closed = 1;
+	list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) {
 		if (entry->counter)
 			list_del(&entry->obj_list);
 		kfree(entry);
 	}
-	spin_unlock_irq(&file->lock);
+	spin_unlock_irq(&file->ev_file.lock);
 	if (!closed_already) {
 		list_del(&file->list);
-		if (file->is_async)
-			ib_unregister_event_handler(&file->uverbs_file->
-				event_handler);
+		ib_unregister_event_handler(&uverbs_file->event_handler);
+	}
+	mutex_unlock(&uverbs_file->device->lists_mutex);
+
+	kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+	kref_put(&file->ref, ib_uverbs_release_async_event_file);
+
+	return 0;
+}
+
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+	struct ib_uverbs_completion_event_file *file = filp->private_data;
+	struct ib_uverbs_event *entry, *tmp;
+
+	spin_lock_irq(&file->ev_file.lock);
+	list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) {
+		if (entry->counter)
+			list_del(&entry->obj_list);
+		kfree(entry);
 	}
-	mutex_unlock(&file->uverbs_file->device->lists_mutex);
+	spin_unlock_irq(&file->ev_file.lock);
 
-	kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
-	kref_put(&file->ref, ib_uverbs_release_event_file);
+	uverbs_close_fd(filp);
 
 	return 0;
 }
 
-static const struct file_operations uverbs_event_fops = {
+const struct file_operations uverbs_event_fops = {
 	.owner	 = THIS_MODULE,
-	.read	 = ib_uverbs_event_read,
-	.poll    = ib_uverbs_event_poll,
-	.release = ib_uverbs_event_close,
-	.fasync  = ib_uverbs_event_fasync,
+	.read	 = ib_uverbs_comp_event_read,
+	.poll    = ib_uverbs_comp_event_poll,
+	.release = ib_uverbs_comp_event_close,
+	.fasync  = ib_uverbs_comp_event_fasync,
+	.llseek	 = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
+	.owner	 = THIS_MODULE,
+	.read	 = ib_uverbs_async_event_read,
+	.poll    = ib_uverbs_async_event_poll,
+	.release = ib_uverbs_async_event_close,
+	.fasync  = ib_uverbs_async_event_fasync,
 	.llseek	 = no_llseek,
 };
 
@@ -419,15 +490,15 @@  static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
 	struct ib_uverbs_event *entry;
 	unsigned long flags;
 
-	spin_lock_irqsave(&file->async_file->lock, flags);
-	if (file->async_file->is_closed) {
-		spin_unlock_irqrestore(&file->async_file->lock, flags);
+	spin_lock_irqsave(&file->async_file->ev_file.lock, flags);
+	if (file->async_file->ev_file.is_closed) {
+		spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags);
 		return;
 	}
 
 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
 	if (!entry) {
-		spin_unlock_irqrestore(&file->async_file->lock, flags);
+		spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags);
 		return;
 	}
 
@@ -436,13 +507,13 @@  static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
 	entry->desc.async.reserved   = 0;
 	entry->counter               = counter;
 
-	list_add_tail(&entry->list, &file->async_file->event_list);
+	list_add_tail(&entry->list, &file->async_file->ev_file.event_list);
 	if (obj_list)
 		list_add_tail(&entry->obj_list, obj_list);
-	spin_unlock_irqrestore(&file->async_file->lock, flags);
+	spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags);
 
-	wake_up_interruptible(&file->async_file->poll_wait);
-	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+	wake_up_interruptible(&file->async_file->ev_file.poll_wait);
+	kill_fasync(&file->async_file->ev_file.async_queue, SIGIO, POLL_IN);
 }
 
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -505,15 +576,23 @@  void ib_uverbs_event_handler(struct ib_event_handler *handler,
 
 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
 {
-	kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+	kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
 	file->async_file = NULL;
 }
 
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
-					struct ib_device	*ib_dev,
-					int is_async)
+void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file)
 {
-	struct ib_uverbs_event_file *ev_file;
+	spin_lock_init(&ev_file->lock);
+	INIT_LIST_HEAD(&ev_file->event_list);
+	init_waitqueue_head(&ev_file->poll_wait);
+	ev_file->is_closed   = 0;
+	ev_file->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+					      struct ib_device	*ib_dev)
+{
+	struct ib_uverbs_async_event_file *ev_file;
 	struct file *filp;
 	int ret;
 
@@ -521,16 +600,11 @@  struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	if (!ev_file)
 		return ERR_PTR(-ENOMEM);
 
-	kref_init(&ev_file->ref);
-	spin_lock_init(&ev_file->lock);
-	INIT_LIST_HEAD(&ev_file->event_list);
-	init_waitqueue_head(&ev_file->poll_wait);
+	ib_uverbs_init_event_file(&ev_file->ev_file);
 	ev_file->uverbs_file = uverbs_file;
 	kref_get(&ev_file->uverbs_file->ref);
-	ev_file->async_queue = NULL;
-	ev_file->is_closed   = 0;
-
-	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+	kref_init(&ev_file->ref);
+	filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
 				  ev_file, O_RDONLY);
 	if (IS_ERR(filp))
 		goto err_put_refs;
@@ -540,64 +614,33 @@  struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 		      &uverbs_file->device->uverbs_events_file_list);
 	mutex_unlock(&uverbs_file->device->lists_mutex);
 
-	if (is_async) {
-		WARN_ON(uverbs_file->async_file);
-		uverbs_file->async_file = ev_file;
-		kref_get(&uverbs_file->async_file->ref);
-		INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
-				      ib_dev,
-				      ib_uverbs_event_handler);
-		ret = ib_register_event_handler(&uverbs_file->event_handler);
-		if (ret)
-			goto err_put_file;
-
-		/* At that point async file stuff was fully set */
-		ev_file->is_async = 1;
-	}
+	WARN_ON(uverbs_file->async_file);
+	uverbs_file->async_file = ev_file;
+	kref_get(&uverbs_file->async_file->ref);
+	INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+			      ib_dev,
+			      ib_uverbs_event_handler);
+	ret = ib_register_event_handler(&uverbs_file->event_handler);
+	if (ret)
+		goto err_put_file;
+
+	/* At that point async file stuff was fully set */
 
 	return filp;
 
 err_put_file:
 	fput(filp);
-	kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+	kref_put(&uverbs_file->async_file->ref,
+		 ib_uverbs_release_async_event_file);
 	uverbs_file->async_file = NULL;
 	return ERR_PTR(ret);
 
 err_put_refs:
 	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
-	kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+	kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
 	return filp;
 }
 
-/*
- * Look up a completion event file by FD.  If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
-	struct ib_uverbs_event_file *ev_file = NULL;
-	struct fd f = fdget(fd);
-
-	if (!f.file)
-		return NULL;
-
-	if (f.file->f_op != &uverbs_event_fops)
-		goto out;
-
-	ev_file = f.file->private_data;
-	if (ev_file->is_async) {
-		ev_file = NULL;
-		goto out;
-	}
-
-	kref_get(&ev_file->ref);
-
-out:
-	fdput(f);
-	return ev_file;
-}
-
 static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
 {
 	u64 mask;
@@ -892,7 +935,8 @@  static int ib_uverbs_close(struct inode *inode, struct file *filp)
 	mutex_unlock(&file->device->lists_mutex);
 
 	if (file->async_file)
-		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+		kref_put(&file->async_file->ref,
+			 ib_uverbs_release_async_event_file);
 
 	kref_put(&file->ref, ib_uverbs_release_file);
 	kobject_put(&dev->kobj);
@@ -1091,7 +1135,7 @@  static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 					struct ib_device *ib_dev)
 {
 	struct ib_uverbs_file *file;
-	struct ib_uverbs_event_file *event_file;
+	struct ib_uverbs_async_event_file *event_file;
 	struct ib_event event;
 
 	/* Pending running commands to terminate */
@@ -1140,21 +1184,20 @@  static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
 		event_file = list_first_entry(&uverbs_dev->
 					      uverbs_events_file_list,
-					      struct ib_uverbs_event_file,
+					      struct ib_uverbs_async_event_file,
 					      list);
-		spin_lock_irq(&event_file->lock);
-		event_file->is_closed = 1;
-		spin_unlock_irq(&event_file->lock);
+		spin_lock_irq(&event_file->ev_file.lock);
+		event_file->ev_file.is_closed = 1;
+		spin_unlock_irq(&event_file->ev_file.lock);
 
 		list_del(&event_file->list);
-		if (event_file->is_async) {
-			ib_unregister_event_handler(&event_file->uverbs_file->
-						    event_handler);
-			event_file->uverbs_file->event_handler.device = NULL;
-		}
+		ib_unregister_event_handler(
+			&event_file->uverbs_file->event_handler);
+		event_file->uverbs_file->event_handler.device =
+			NULL;
 
-		wake_up_interruptible(&event_file->poll_wait);
-		kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+		wake_up_interruptible(&event_file->ev_file.poll_wait);
+		kill_fasync(&event_file->ev_file.async_queue, SIGIO, POLL_IN);
 	}
 	mutex_unlock(&uverbs_dev->lists_mutex);
 }
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index a514556..7f26af5 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -145,7 +145,11 @@  int uverbs_free_cq(struct ib_uobject *uobject,
 
 	ret = ib_destroy_cq(cq);
 	if (!ret || why != RDMA_REMOVE_DESTROY)
-		ib_uverbs_release_ucq(uobject->context->ufile, ev_file, ucq);
+		ib_uverbs_release_ucq(uobject->context->ufile, ev_file ?
+				      container_of(ev_file,
+						   struct ib_uverbs_completion_event_file,
+						   ev_file) : NULL,
+				      ucq);
 	return ret;
 }
 
@@ -186,6 +190,33 @@  int uverbs_free_pd(struct ib_uobject *uobject,
 	return 0;
 }
 
+int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+					    enum rdma_remove_reason why)
+{
+	struct ib_uverbs_completion_event_file *comp_event_file =
+		container_of(uobj_file, struct ib_uverbs_completion_event_file,
+			     uobj_file);
+	struct ib_uverbs_event_file *event_file = &comp_event_file->ev_file;
+
+	spin_lock_irq(&event_file->lock);
+	event_file->is_closed = 1;
+	spin_unlock_irq(&event_file->lock);
+
+	if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+		wake_up_interruptible(&event_file->poll_wait);
+		kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+	}
+	return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+	.type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+	.context_closed = uverbs_hot_unplug_completion_event_file,
+	.fops = &uverbs_event_fops,
+	.name = "[infinibandevent]",
+	.flags = O_RDONLY,
+};
+
 const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
 	.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
 	.destroy_object = uverbs_free_cq,
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 8885664..7771ce9 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -35,6 +35,7 @@ 
 
 #include <rdma/uverbs_types.h>
 
+extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel;
 extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq;
 extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp;
 extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table;
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 55b8219..fd930a9 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -147,9 +147,18 @@  struct uverbs_obj_fd_type {
 };
 
 extern const struct uverbs_obj_type_class uverbs_idr_class;
+extern const struct uverbs_obj_type_class uverbs_fd_class;
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))
+#define UVERBS_TYPE_ALLOC_FD(_size, _order)				 \
+	{								 \
+		.destroy_order = _order,				 \
+		.type_class = &uverbs_fd_class,				 \
+		.obj_size = (_size) +					 \
+			  UVERBS_BUILD_BUG_ON((_size) <			 \
+					      sizeof(struct ib_uobject_file)),\
+	}
 #define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order)				\
 	{								\
 		.destroy_order = _order,				\