@@ -63,6 +63,10 @@
(udata)->outlen = (olen); \
} while (0)
+enum uverbs_flags {
+ UVERBS_FLAG_DISASSOCIATE = 1
+};
+
/*
* Our lifetime rules for these structs are the following:
*
@@ -89,11 +93,16 @@ struct ib_uverbs_device {
int num_comp_vectors;
struct completion comp;
struct device *dev;
- struct ib_device *ib_dev;
+ struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
+ u32 flags;
+ struct srcu_struct disassociate_srcu;
+ struct mutex lists_mutex; /* protect lists */
+ struct list_head uverbs_file_list;
+ struct list_head uverbs_events_file_list;
};
struct ib_uverbs_event_file {
@@ -105,6 +114,7 @@ struct ib_uverbs_event_file {
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+ struct list_head list;
};
struct ib_uverbs_file {
@@ -114,6 +124,8 @@ struct ib_uverbs_file {
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
+ struct list_head list;
+ int is_closed;
};
struct ib_uverbs_event {
@@ -136,7 +136,12 @@ static void ib_uverbs_release_dev(struct kref *ref)
struct ib_uverbs_device *dev =
container_of(ref, struct ib_uverbs_device, ref);
- complete(&dev->comp);
+ if (!dev->ib_dev) {
+ cleanup_srcu_struct(&dev->disassociate_srcu);
+ kfree(dev);
+ } else {
+ complete(&dev->comp);
+ }
}
static void ib_uverbs_release_event_file(struct kref *ref)
@@ -202,9 +207,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
{
struct ib_uobject *uobj, *tmp;
- if (!context)
- return 0;
-
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
@@ -309,7 +311,9 @@ static void ib_uverbs_release_file(struct kref *ref)
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
- module_put(file->device->ib_dev->owner);
+ if (!(file->device->flags & UVERBS_FLAG_DISASSOCIATE))
+ module_put(file->device->ib_dev->owner);
+
kref_put(&file->device->ref, ib_uverbs_release_dev);
kfree(file);
@@ -332,9 +336,15 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->event_list)))
+ (!list_empty(&file->event_list) ||
+ !file->uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
+ /* If device was disassociated and no event exists set an error */
+ if (list_empty(&file->event_list) &&
+ !file->uverbs_file->device->ib_dev)
+ return -EIO;
+
spin_lock_irq(&file->lock);
}
@@ -397,8 +407,11 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *entry, *tmp;
+ int closed_already = 0;
+ mutex_lock(&file->uverbs_file->device->lists_mutex);
spin_lock_irq(&file->lock);
+ closed_already = file->is_closed;
file->is_closed = 1;
list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
if (entry->counter)
@@ -406,9 +419,14 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
kfree(entry);
}
spin_unlock_irq(&file->lock);
+ if (!closed_already) {
+ list_del(&file->list);
+ if (file->is_async)
+ ib_unregister_event_handler(&file->uverbs_file->
+ event_handler);
+ }
+ mutex_unlock(&file->uverbs_file->device->lists_mutex);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->event_handler);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&file->ref, ib_uverbs_release_event_file);
@@ -549,7 +567,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct file *filp;
int ret;
- ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+ ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
if (!ev_file)
return ERR_PTR(-ENOMEM);
@@ -577,6 +595,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
goto put_file;
}
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ list_add_tail(&ev_file->list,
+ &uverbs_file->device->uverbs_events_file_list);
+ mutex_unlock(&uverbs_file->device->lists_mutex);
+
kref_get(&uverbs_file->ref);
return filp;
@@ -621,12 +644,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev = file->device->ib_dev;
+ struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
__u32 flags;
-
- if (!ib_dev)
- return -ENODEV;
+ int srcu_key;
+ ssize_t ret;
if (count < sizeof hdr)
return -EINVAL;
@@ -634,6 +656,14 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ ib_dev = srcu_dereference(file->device->ib_dev,
+ &file->device->disassociate_srcu);
+ if (!ib_dev) {
+ ret = -EIO;
+ goto out;
+ }
+
flags = (hdr.command &
IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
@@ -641,26 +671,36 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
__u32 command;
if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
+ IB_USER_VERBS_CMD_COMMAND_MASK)) {
+ ret = -EINVAL;
+ goto out;
+ }
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[command])
- return -EINVAL;
+ !uverbs_cmd_table[command]) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT)
- return -EINVAL;
+ command != IB_USER_VERBS_CMD_GET_CONTEXT) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (!(ib_dev->uverbs_cmd_mask & (1ull << command)))
- return -ENOSYS;
+ if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
+ ret = -ENOSYS;
+ goto out;
+ }
- if (hdr.in_words * 4 != count)
- return -EINVAL;
+ if (hdr.in_words * 4 != count) {
+ ret = -EINVAL;
+ goto out;
+ }
- return uverbs_cmd_table[command](file, ib_dev,
+ ret = uverbs_cmd_table[command](file, ib_dev,
buf + sizeof(hdr),
hdr.in_words * 4,
hdr.out_words * 4);
@@ -671,51 +711,72 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_udata ucore;
struct ib_udata uhw;
- int err;
size_t written_count = count;
if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
+ IB_USER_VERBS_CMD_COMMAND_MASK)) {
+ ret = -EINVAL;
+ goto out;
+ }
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
- !uverbs_ex_cmd_table[command])
- return -ENOSYS;
+ !uverbs_ex_cmd_table[command]) {
+ ret = -ENOSYS;
+ goto out;
+ }
- if (!file->ucontext)
- return -EINVAL;
+ if (!file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
- return -ENOSYS;
+ if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
+ ret = -ENOSYS;
+ goto out;
+ }
- if (count < (sizeof(hdr) + sizeof(ex_hdr)))
- return -EINVAL;
+ if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
- return -EFAULT;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
+ ret = -EFAULT;
+ goto out;
+ }
count -= sizeof(hdr) + sizeof(ex_hdr);
buf += sizeof(hdr) + sizeof(ex_hdr);
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
- return -EINVAL;
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (ex_hdr.cmd_hdr_reserved)
- return -EINVAL;
+ if (ex_hdr.cmd_hdr_reserved) {
+ ret = -EINVAL;
+ goto out;
+ }
if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words)
- return -EINVAL;
+ if (!hdr.out_words && !ex_hdr.provider_out_words) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!access_ok(VERIFY_WRITE,
(void __user *) (unsigned long) ex_hdr.response,
- (hdr.out_words + ex_hdr.provider_out_words) * 8))
- return -EFAULT;
+ (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
- if (hdr.out_words || ex_hdr.provider_out_words)
- return -EINVAL;
+ if (hdr.out_words || ex_hdr.provider_out_words) {
+ ret = -EINVAL;
+ goto out;
+ }
}
INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
@@ -727,29 +788,43 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_in_words * 8,
ex_hdr.provider_out_words * 8);
- err = uverbs_ex_cmd_table[command](file,
+ ret = uverbs_ex_cmd_table[command](file,
ib_dev,
&ucore,
&uhw);
-
- if (err)
- return err;
-
- return written_count;
+ if (!ret)
+ ret = written_count;
+ } else {
+ ret = -ENOSYS;
}
- return -ENOSYS;
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return ret;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev = file->device->ib_dev;
+ struct ib_device *ib_dev;
+ int ret = 0;
+ int srcu_key;
- if (!ib_dev || !file->ucontext)
- return -ENODEV;
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ ib_dev = srcu_dereference(file->device->ib_dev,
+ &file->device->disassociate_srcu);
+ if (!ib_dev) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (!file->ucontext)
+ ret = -ENODEV;
else
- return ib_dev->mmap(file->ucontext, vma);
+ ret = ib_dev->mmap(file->ucontext, vma);
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return ret;
}
/*
@@ -766,7 +841,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
+ struct ib_device *ib_dev;
int ret;
+ int module_dependent;
+ int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
if (dev)
@@ -774,15 +852,34 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
else
return -ENXIO;
- if (!try_module_get(dev->ib_dev->owner)) {
- ret = -ENODEV;
+ srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+ mutex_lock(&dev->lists_mutex);
+ ib_dev = srcu_dereference(dev->ib_dev,
+ &dev->disassociate_srcu);
+ if (!ib_dev) {
+ ret = -EIO;
goto err;
}
- file = kmalloc(sizeof *file, GFP_KERNEL);
+ /* In case IB device supports disassociate ucontext, there is no hard
+ * dependency between uverbs device and its low level device.
+ */
+ module_dependent = !(dev->flags & UVERBS_FLAG_DISASSOCIATE);
+
+ if (module_dependent) {
+ if (!try_module_get(ib_dev->owner)) {
+ ret = -ENODEV;
+ goto err;
+ }
+ }
+
+ file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
- goto err_module;
+ if (module_dependent)
+ goto err_module;
+
+ goto err;
}
file->device = dev;
@@ -792,13 +889,18 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->mutex);
filp->private_data = file;
+ list_add_tail(&file->list, &dev->uverbs_file_list);
+ mutex_unlock(&dev->lists_mutex);
+ srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return nonseekable_open(inode, filp);
err_module:
- module_put(dev->ib_dev->owner);
+ module_put(ib_dev->owner);
err:
+ mutex_unlock(&dev->lists_mutex);
+ srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
kref_put(&dev->ref, ib_uverbs_release_dev);
return ret;
}
@@ -806,8 +908,18 @@ err:
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
-
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ struct ib_ucontext *ucontext = NULL;
+
+ mutex_lock(&file->device->lists_mutex);
+ ucontext = file->ucontext;
+ file->ucontext = NULL;
+ if (!file->is_closed) {
+ list_del(&file->list);
+ file->is_closed = 1;
+ }
+ mutex_unlock(&file->device->lists_mutex);
+ if (ucontext)
+ ib_uverbs_cleanup_ucontext(file, ucontext);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
@@ -843,12 +955,21 @@ static struct ib_client uverbs_client = {
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
char *buf)
{
+ int ret = -ENODEV;
+ int srcu_key;
struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
- return sprintf(buf, "%s\n", dev->ib_dev->name);
+ srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+ ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
+ if (ib_dev)
+ ret = sprintf(buf, "%s\n", ib_dev->name);
+ srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
+
+ return ret;
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -856,11 +977,19 @@ static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ int ret = -ENODEV;
+ int srcu_key;
+ struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
+ srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+ ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
+ if (ib_dev)
+ ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
+ srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+ return ret;
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
@@ -900,6 +1029,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
int devnum;
dev_t base;
struct ib_uverbs_device *uverbs_dev;
+ int ret;
if (!device->alloc_ucontext)
return;
@@ -912,6 +1042,13 @@ static void ib_uverbs_add_one(struct ib_device *device)
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
+ mutex_init(&uverbs_dev->lists_mutex);
+ ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
+ if (ret)
+ goto err_init;
+
+ INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
+ INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -953,6 +1090,9 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
+ if (device->disassociate_ucontext)
+ uverbs_dev->flags |= UVERBS_FLAG_DISASSOCIATE;
+
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
@@ -968,15 +1108,82 @@ err_cdev:
clear_bit(devnum, overflow_map);
err:
+ cleanup_srcu_struct(&uverbs_dev->disassociate_srcu);
+
+err_init:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
return;
}
+static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev)
+{
+ struct ib_uverbs_file *file;
+ struct ib_uverbs_event_file *event_file;
+ struct ib_device *ib_dev = uverbs_dev->ib_dev;
+ struct ib_event event;
+
+ uverbs_dev->ib_dev = NULL;
+
+ /* Pending running commands to terminate */
+ synchronize_srcu(&uverbs_dev->disassociate_srcu);
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+ event.device = ib_dev;
+
+ mutex_lock(&uverbs_dev->lists_mutex);
+ while (!list_empty(&uverbs_dev->uverbs_file_list)) {
+ struct ib_ucontext *ucontext;
+
+ file = list_first_entry(&uverbs_dev->uverbs_file_list,
+ struct ib_uverbs_file, list);
+ file->is_closed = 1;
+ ucontext = file->ucontext;
+ list_del(&file->list);
+ file->ucontext = NULL;
+ kref_get(&file->ref);
+ mutex_unlock(&uverbs_dev->lists_mutex);
+ /* We must release the mutex before going ahead and calling
+ * disassociate_ucontext. disassociate_ucontext might end up
+ * indirectly calling uverbs_close, for example due to freeing
+ * the resources (e.g mmput).
+ */
+ ib_uverbs_event_handler(&file->event_handler, &event);
+ if (ucontext) {
+ ib_dev->disassociate_ucontext(ucontext);
+ ib_uverbs_cleanup_ucontext(file, ucontext);
+ }
+
+ mutex_lock(&uverbs_dev->lists_mutex);
+ kref_put(&file->ref, ib_uverbs_release_file);
+ }
+
+ mutex_unlock(&uverbs_dev->lists_mutex);
+
+ mutex_lock(&uverbs_dev->lists_mutex);
+ while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
+ event_file = list_first_entry(&uverbs_dev->
+ uverbs_events_file_list,
+ struct ib_uverbs_event_file,
+ list);
+ event_file->is_closed = 1;
+
+ list_del(&event_file->list);
+ if (event_file->is_async)
+ ib_unregister_event_handler(&event_file->uverbs_file->
+ event_handler);
+
+ wake_up_interruptible(&event_file->poll_wait);
+ kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ }
+ mutex_unlock(&uverbs_dev->lists_mutex);
+}
+
static void ib_uverbs_remove_one(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+ int wait_clients = 1;
if (!uverbs_dev)
return;
@@ -990,9 +1197,27 @@ static void ib_uverbs_remove_one(struct ib_device *device)
else
clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ if (uverbs_dev->flags & UVERBS_FLAG_DISASSOCIATE) {
+ /* We disassociate HW resources and immediately returning, not
+ * pending to active userspace clients. Upon returning ib_device
+ * may be freed internally and is not valid any more.
+ * uverbs_device is still available, when all clients close
+ * their files, the uverbs device ref count will be zero and its
+ * resources will be freed.
+ * Note: At that step no more files can be opened on that cdev
+ * as it was deleted, however active clients can still issue
+ * commands and close their open files.
+ */
+ ib_uverbs_free_hw_resources(uverbs_dev);
+ wait_clients = 0;
+ }
+ /* ref count taken as part of add one is put back in both modes. */
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
- wait_for_completion(&uverbs_dev->comp);
- kfree(uverbs_dev);
+ if (wait_clients) {
+ wait_for_completion(&uverbs_dev->comp);
+ cleanup_srcu_struct(&uverbs_dev->disassociate_srcu);
+ kfree(uverbs_dev);
+ }
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)
@@ -1705,6 +1705,7 @@ struct ib_device {
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
struct ib_dma_mapping_ops *dma_ops;