diff mbox

[04/13] idr, radix-tree: Implement copy_preload

Message ID b8a5a6a9fa574cf7d2aa75d3eba578f0fc06c403.1493315290.git.bankarsandhya512@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sandhya Bankar April 27, 2017, 7:07 p.m. UTC
In the file descriptor table duplication code (called at fork()), we
need to duplicate an IDR.  But we have to do it under a lock (so another
thread doesn't open/close a fd in the middle), and there's no suitable
preload operation for this today.  Adding just idr_copy_preload() isn't
enough as another thread could grow the fd table between starting the
preload and acquiring the lock.  We also need idr_check_preload() to be
called after acquiring the lock.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/idr.h                         | 32 +++++++++++
 include/linux/radix-tree.h                  |  3 ++
 lib/radix-tree.c                            | 83 +++++++++++++++++++++++++++++
 tools/testing/radix-tree/idr-test.c         | 24 +++++++++
 tools/testing/radix-tree/linux/radix-tree.h |  2 +
 tools/testing/radix-tree/main.c             | 38 +++++++++++++
 6 files changed, 182 insertions(+)
diff mbox

Patch

diff --git a/include/linux/idr.h b/include/linux/idr.h
index d43cf01..eed1c1a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -166,6 +166,38 @@  static inline bool idr_is_empty(const struct idr *idr)
 }
 
 /**
+ * idr_copy_preload - preload for idr_copy()
+ * @src: IDR to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to idr_copy().  This function
+ * returns with preemption disabled.  Call idr_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+static inline int idr_copy_preload(const struct idr *src, gfp_t gfp)
+{
+	return radix_tree_copy_preload(&src->idr_rt, gfp);
+}
+
+/**
+ * idr_check_preload - Check the preload is still sufficient
+ * @src: IDR to be copied from
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the IDR may have expanded.  If this function returns
+ * false, more memory needs to be preallocated.
+ *
+ * Return: true if enough memory remains allocated, false to retry the
+ * preallocation.
+ */
+static inline bool idr_check_preload(const struct idr *src)
+{
+	return radix_tree_check_preload(&src->idr_rt);
+}
+
+/**
  * idr_preload_end - end preload section started with idr_preload()
  *
  * Each idr_preload() should be matched with an invocation of this
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f701e0b..f53d004 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -354,6 +354,9 @@  static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
+int radix_tree_copy_preload(const struct radix_tree_root *, gfp_t);
+bool radix_tree_check_preload(const struct radix_tree_root *);
+
 int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
 int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 855ac8e..c1d75224 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -277,6 +277,55 @@  static unsigned long next_index(unsigned long index,
 	return (index & ~node_maxindex(node)) + (offset << node->shift);
 }
 
+/**
+ * radix_tree_count_nodes - Returns the number of nodes in this tree
+ * @root: radix tree root
+ *
+ * This routine does not examine every node in the tree; it assumes that
+ * all entries in the tree at level 1 are nodes.  It will overestimate
+ * the number of nodes in the tree in the presence of entries of order
+ * RADIX_TREE_MAP_SHIFT or higher.
+ */
+static unsigned long radix_tree_count_nodes(const struct radix_tree_root *root)
+{
+	struct radix_tree_node *node, *child;
+	unsigned long n = 1;
+	void *entry = rcu_dereference_raw(root->rnode);
+	unsigned int offset = 0;
+
+	if (!radix_tree_is_internal_node(entry))
+		return 0;
+
+	node = entry_to_node(entry);
+	if (!node->shift)
+		return n;
+
+	n += node->count;
+	if (node->shift == RADIX_TREE_MAP_SHIFT)
+		return n;
+
+	while (node) {
+		if (offset == RADIX_TREE_MAP_SIZE) {
+			offset = node->offset + 1;
+			node = node->parent;
+			continue;
+		}
+
+		entry = rcu_dereference_raw(node->slots[offset]);
+		offset++;
+		if (!radix_tree_is_internal_node(entry))
+			continue;
+		child = entry_to_node(entry);
+		n += child->count;
+		if (node->shift <= 2 * RADIX_TREE_MAP_SHIFT)
+			continue;
+		offset = 0;
+		node = child;
+	}
+
+	return n;
+}
+
 #ifndef __KERNEL__
 static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
@@ -530,6 +579,40 @@  int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
+/**
+ * radix_tree_copy_preload - preload for radix_tree_copy()
+ * @src: radix tree root to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to radix_tree_copy().  This function
+ * returns with preemption disabled.  Call radix_tree_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+int radix_tree_copy_preload(const struct radix_tree_root *src, gfp_t gfp_mask)
+{
+	return __radix_tree_preload(gfp_mask, radix_tree_count_nodes(src));
+}
+
+/**
+ * radix_tree_check_preload - Check the preload is still sufficient
+ * @src: radix tree to be copied from
+ * @cookie: Cookie returned from radix_tree_copy_preload()
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the radix tree may have expanded.  Call this function
+ * to see if the preallocation needs to be expanded too.
+ *
+ * Return: true if enough memory remains allocated, false if it does not
+ * suffice.
+ */
+bool radix_tree_check_preload(const struct radix_tree_root *src)
+{
+	struct radix_tree_preload *rtp = this_cpu_ptr(&radix_tree_preloads);
+	return rtp->nr >= radix_tree_count_nodes(src);
+}
+
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /*
  * Preload with enough objects to ensure that we can split a single entry
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 3f9f429..e8d5386 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -225,6 +225,29 @@  void idr_get_next_test(void)
 	idr_destroy(&idr);
 }
 
+void idr_copy_test(void)
+{
+	DEFINE_IDR(src);
+	DEFINE_IDR(dst);
+	int i;
+	void *p;
+
+	for (i = 0; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&src, item, 0, 20000, GFP_KERNEL) == i);
+	}
+
+	idr_copy_preload(&src, GFP_KERNEL);
+	idr_for_each_entry(&src, p, i) {
+		assert(idr_alloc(&dst, p, i, i + 1, GFP_NOWAIT) == i);
+	}
+	idr_preload_end();
+
+	idr_for_each(&src, item_idr_free, NULL);
+	idr_destroy(&src);
+	idr_destroy(&dst);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -276,6 +299,7 @@  void idr_checks(void)
 	idr_tag_test();
 	idr_nowait_test();
 	idr_get_next_test();
+	idr_copy_test();
 }
 
 /*
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index bf1bb23..0c5885e 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -4,6 +4,8 @@ 
 #include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
+unsigned long radix_tree_count_nodes(const struct radix_tree_root *root);
+
 extern int kmalloc_verbose;
 extern int test_verbose;
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index bc9a784..a7504e2 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -11,6 +11,42 @@ 
 #include "test.h"
 #include "regression.h"
 
+static void count_node_check(void)
+{
+	unsigned long i, j, k;
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	assert(radix_tree_empty(&tree));
+	assert(radix_tree_count_nodes(&tree) == 0);
+	assert(item_insert(&tree, 0) == 0);
+	assert(radix_tree_count_nodes(&tree) == 0);
+
+	for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) {
+		assert(item_insert(&tree, i) == 0);
+		assert(radix_tree_count_nodes(&tree) == 1);
+	}
+
+	j = 3;
+	k = 3;
+	for (i = RADIX_TREE_MAP_SIZE; i < i * RADIX_TREE_MAP_SIZE;
+			i *= RADIX_TREE_MAP_SIZE) {
+		assert(item_insert(&tree, i) == 0);
+		assert(radix_tree_count_nodes(&tree) == j);
+		j += k;
+		k++;
+	}
+
+	assert(item_insert(&tree, i) == 0);
+	assert(radix_tree_count_nodes(&tree) == j);
+
+	item_kill_tree(&tree);
+}
+
+void basic_checks(void)
+{
+	count_node_check();
+}
+
 void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
 {
 	long idx;
@@ -362,6 +398,8 @@  int main(int argc, char **argv)
 	rcu_register_thread();
 	radix_tree_init();
 
+	basic_checks();
+
 	regression1_test();
 	regression2_test();
 	regression3_test();