@@ -90,23 +90,25 @@ struct tid_pageset {
#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+#define num_user_pages(vaddr, len) \
+ (1 + (((((unsigned long)(vaddr) + \
+ (unsigned long)(len) - 1) & PAGE_MASK) - \
+ ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
+
static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct rb_root *) __maybe_unused;
+ struct rb_root *);
static u32 find_phys_blocks(struct page **, unsigned,
struct tid_pageset *) __maybe_unused;
static int set_rcvarray_entry(struct file *, unsigned long, u32,
- struct tid_group *, struct page **,
- unsigned) __maybe_unused;
+ struct tid_group *, struct page **, unsigned);
static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long,
unsigned long);
static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *,
unsigned long) __maybe_unused;
static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *,
u32);
-static int mmu_rb_insert_by_addr(struct rb_root *,
- struct mmu_rb_node *) __maybe_unused;
-static int mmu_rb_insert_by_entry(struct rb_root *,
- struct mmu_rb_node *) __maybe_unused;
+static int mmu_rb_insert_by_addr(struct rb_root *, struct mmu_rb_node *);
+static int mmu_rb_insert_by_entry(struct rb_root *, struct mmu_rb_node *);
static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
unsigned long, unsigned long,
enum mmu_call_types);
@@ -168,7 +170,7 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
-static struct mmu_notifier_ops __maybe_unused mn_opts = {
+static struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};
@@ -180,12 +182,144 @@ static struct mmu_notifier_ops __maybe_unused mn_opts = {
*/
int hfi1_user_exp_rcv_init(struct file *fp)
{
- return -EINVAL;
+ struct hfi1_filedata *fd = fp->private_data;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+ unsigned tidbase;
+ int i, ret = 0;
+
+ INIT_HLIST_NODE(&fd->mn.hlist);
+ spin_lock_init(&fd->rb_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->mn.ops = &mn_opts;
+ fd->tid_rb_root = RB_ROOT;
+
+ if (!uctxt->subctxt_cnt || !fd->subctxt) {
+ exp_tid_group_init(&uctxt->tid_group_list);
+ exp_tid_group_init(&uctxt->tid_used_list);
+ exp_tid_group_init(&uctxt->tid_full_list);
+
+ tidbase = uctxt->expected_base;
+ for (i = 0; i < uctxt->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ struct tid_group *grp;
+
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp) {
+ /*
+ * If we fail here, the groups already
+ * allocated will be freed by the close
+ * call.
+ */
+ ret = -ENOMEM;
+ goto done;
+ }
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &uctxt->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+ }
+
+ if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+ fd->invalid_tid_idx = 0;
+ fd->invalid_tids = kzalloc(uctxt->expected_count *
+ sizeof(u32), GFP_KERNEL);
+ if (!fd->invalid_tids) {
+ ret = -ENOMEM;
+ goto done;
+ } else {
+ /*
+ * Register MMU notifier callbacks. If the registration
+ * fails, continue but turn off the TID caching for
+ * all user contexts.
+ */
+ ret = mmu_notifier_register(&fd->mn, current->mm);
+ if (ret) {
+ dd_dev_info(dd,
+ "Failed MMU notifier registration %d\n",
+ ret);
+ HFI1_CAP_USET(TID_UNMAP);
+ ret = 0;
+ }
+ }
+ }
+
+ if (HFI1_CAP_IS_USET(TID_UNMAP))
+ fd->mmu_rb_insert = mmu_rb_insert_by_entry;
+ else
+ fd->mmu_rb_insert = mmu_rb_insert_by_addr;
+
+ /*
+ * PSM does not have a good way to separate, count, and
+ * effectively enforce a limit on RcvArray entries used by
+ * subctxts (when context sharing is used) when TID caching
+ * is enabled. To help with that, we calculate a per-process
+ * RcvArray entry share and enforce that.
+ * If TID caching is not in use, PSM deals with usage on its
+ * own. In that case, we allow any subctxt to take all of the
+ * entries.
+ *
+ * Make sure that we set the tid counts only after successful
+ * init.
+ */
+ if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+ u16 remainder;
+
+ fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
+ remainder = uctxt->expected_count % uctxt->subctxt_cnt;
+ if (remainder && fd->subctxt < remainder)
+ fd->tid_limit++;
+ } else {
+ fd->tid_limit = uctxt->expected_count;
+ }
+done:
+ return ret;
}
int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
- return -EINVAL;
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct tid_group *grp, *gptr;
+
+ /*
+ * The notifier would have been removed when the process'es mm
+ * was freed.
+ */
+ if (current->mm && !HFI1_CAP_IS_USET(TID_UNMAP))
+ mmu_notifier_unregister(&fd->mn, current->mm);
+
+ kfree(fd->invalid_tids);
+
+ if (!uctxt->cnt) {
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list,
+ &fd->tid_rb_root);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list,
+ &fd->tid_rb_root);
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+ spin_lock(&fd->rb_lock);
+ if (!RB_EMPTY_ROOT(&fd->tid_rb_root)) {
+ struct rb_node *node;
+ struct mmu_rb_node *rbnode;
+
+ while ((node = rb_first(&fd->tid_rb_root))) {
+ rbnode = rb_entry(node, struct mmu_rb_node,
+ rbnode);
+ rb_erase(&rbnode->rbnode, &fd->tid_rb_root);
+ kfree(rbnode);
+ }
+ }
+ spin_unlock(&fd->rb_lock);
+ hfi1_clear_tids(uctxt);
+ }
+ return 0;
}
/*