diff mbox

libibverbs: Undo changes in memory range tree when madvise() fails

Message ID 4B12A679.3000800@gmail.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Alex Vainman Nov. 29, 2009, 4:51 p.m. UTC
None
diff mbox

Patch

diff --git a/src/memory.c b/src/memory.c
index 53d86b7..550015a 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -446,12 +446,121 @@  static struct ibv_mem_node *__mm_find_start(uintptr_t start, uintptr_t end)
 	return node;
 }
 
+static struct ibv_mem_node *merge_ranges(struct ibv_mem_node *node,
+					 struct ibv_mem_node *prev)
+{
+	struct ibv_mem_node *new_node = NULL;
+
+	prev->end = node->end;
+	prev->refcnt = node->refcnt;
+	__mm_remove(node);
+	new_node = prev;
+
+	return new_node;
+}
+
+static struct ibv_mem_node *split_range(struct ibv_mem_node *node,
+					uintptr_t cut_line)
+{
+	struct ibv_mem_node *new_node = NULL;
+
+	new_node = malloc(sizeof *new_node);
+	if (!new_node)
+		return NULL;
+	new_node->start  = cut_line;
+	new_node->end    = node->end;
+	new_node->refcnt = node->refcnt;
+	node->end  = cut_line - 1;
+	__mm_add(new_node);
+
+	return new_node;
+}
+
+static struct ibv_mem_node *get_start_node(uintptr_t start, uintptr_t end,
+					   int inc)
+{
+	struct ibv_mem_node *node, *tmp = NULL;
+
+	node = __mm_find_start(start, end);
+	if (node->start < start)
+		node = split_range(node, start);
+	else{
+		tmp = __mm_prev(node);
+		if (tmp && tmp->refcnt == node->refcnt + inc)
+			node = merge_ranges(node, tmp);
+	}
+	return node;
+}
+
+/*
+ * This function is being called if madvise() fails and comes to
+ * undo merging/splitting operations performed on the node.
+ */
+static struct ibv_mem_node *undo_node(struct ibv_mem_node *node,
+				      uintptr_t start, int inc)
+{
+	struct ibv_mem_node *tmp = NULL;
+
+	/*
+	 * This condition can be true only if we merged node which begins at start
+	 * and ends at node->end with previous node which begins at node->start
+	 * and ends at start - 1
+	 */
+	if (start > node->start) {
+		tmp = split_range(node, start);
+		if (tmp) {
+			node->refcnt += inc;
+			node = tmp;
+		} else
+			return NULL;
+	}
+
+	tmp  =  __mm_prev(node);
+	if (tmp && tmp->refcnt == node->refcnt)
+		node = merge_ranges(node, tmp);
+
+	tmp  =  __mm_next(node);
+	if (tmp && tmp->refcnt == node->refcnt)
+		node = merge_ranges(tmp, node);
+
+	return node;
+}
+
+/*
+ * This function is being called if madvise() fails.
+ * The node which caused madvise() to fail may contain just a sub range of [start-end]
+ * so we need to undo all the successful changes (if any) already performed on a range
+ * [start - (node->prev)->end].
+ * Function finds the node to begin rescanning from, find the end of the
+ * range to rescan and invert the operation type.
+ */
+static struct ibv_mem_node *prepare_to_roll_back(struct ibv_mem_node *node,
+						 uintptr_t start,
+						 uintptr_t *p_end,
+						 int *p_inc,
+						 int *p_advice)
+{
+	struct ibv_mem_node *tmp = NULL;
+
+	*p_inc *= -1;
+	*p_advice = *p_inc == 1 ? MADV_DONTFORK : MADV_DOFORK;
+	tmp = __mm_prev(node);
+	node = NULL;
+	if (tmp) {
+		*p_end = tmp->end;
+		if (start <= *p_end)
+			node = get_start_node(start, *p_end, *p_inc);
+	}
+	return node;
+}
+
 static int ibv_madvise_range(void *base, size_t size, int advice)
 {
 	uintptr_t start, end;
 	struct ibv_mem_node *node, *tmp;
 	int inc;
 	int ret = 0;
+	int rolling_back = 0;
 
 	if (!size)
 		return 0;
@@ -464,52 +573,21 @@  static int ibv_madvise_range(void *base, size_t size, int advice)
 
 	pthread_mutex_lock(&mm_mutex);
 
-	node = __mm_find_start(start, end);
-
-	if (node->start < start) {
-		tmp = malloc(sizeof *tmp);
-		if (!tmp) {
-			ret = -1;
-			goto out;
-		}
-
-		tmp->start  = start;
-		tmp->end    = node->end;
-		tmp->refcnt = node->refcnt;
-		node->end   = start - 1;
-
-		__mm_add(tmp);
-		node = tmp;
-	} else {
-		tmp = __mm_prev(node);
-		if (tmp && tmp->refcnt == node->refcnt + inc) {
-			tmp->end = node->end;
-			tmp->refcnt = node->refcnt;
-			__mm_remove(node);
-			node = tmp;
-		}
+	node = get_start_node(start, end, inc);
+	if (!node) {
+		ret = -1;
+		goto out;
 	}
-
 	while (node && node->start <= end) {
 		if (node->end > end) {
-			tmp = malloc(sizeof *tmp);
-			if (!tmp) {
+			if (!split_range(node, end + 1)) {
 				ret = -1;
 				goto out;
 			}
-
-			tmp->start  = end + 1;
-			tmp->end    = node->end;
-			tmp->refcnt = node->refcnt;
-			node->end   = end;
-
-			__mm_add(tmp);
 		}
 
-		node->refcnt += inc;
-
-		if ((inc == -1 && node->refcnt == 0) ||
-		    (inc ==  1 && node->refcnt == 1)) {
+		if ((inc == -1 && node->refcnt == 1) ||
+		    (inc ==  1 && node->refcnt == 0)) {
 			/*
 			 * If this is the first time through the loop,
 			 * and we merged this node with the previous
@@ -528,22 +606,41 @@  static int ibv_madvise_range(void *base, size_t size, int advice)
 				ret = madvise((void *) node->start,
 					      node->end - node->start + 1,
 					      advice);
-			if (ret)
+			if (ret) {
+				/*
+				 * undo merging/splitting operations performed on the node
+				 */
+				node = undo_node(node, start, inc);
+				if (!rolling_back) {
+					/*
+					 *if we already successfully modified sub ranges of [start-end]:
+					 *from start till node->start - 1 we need to rescan this range
+					 *and to undo all the changes.
+					 */
+					if (node)
+						node = prepare_to_roll_back(node, start, &end, &inc, &advice);
+					if (node) {
+						rolling_back = 1;
+						continue;
+					}
+				}
 				goto out;
+			}
 		}
 
+		node->refcnt += inc;
 		node = __mm_next(node);
 	}
 
 	if (node) {
 		tmp = __mm_prev(node);
-		if (tmp && node->refcnt == tmp->refcnt) {
-			tmp->end = node->end;
-			__mm_remove(node);
-		}
+		if (tmp && node->refcnt == tmp->refcnt)
+			node = merge_ranges(node, tmp);
 	}
 
 out:
+	if (rolling_back)
+		ret = -1;
 	pthread_mutex_unlock(&mm_mutex);
 
 	return ret;
@@ -568,3 +665,5 @@  int ibv_dofork_range(void *base, size_t size)
 		return 0;
 	}
 }
+
+