From patchwork Sun Nov 29 16:51:05 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Vainman X-Patchwork-Id: 63572 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nATGpHYd021275 for ; Sun, 29 Nov 2009 16:51:17 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751416AbZK2QvJ (ORCPT ); Sun, 29 Nov 2009 11:51:09 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752323AbZK2QvJ (ORCPT ); Sun, 29 Nov 2009 11:51:09 -0500 Received: from fwil.voltaire.com ([193.47.165.2]:16718 "EHLO exil.voltaire.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751416AbZK2QvI (ORCPT ); Sun, 29 Nov 2009 11:51:08 -0500 Received: from [127.0.0.1] ([172.25.5.22]) by exil.voltaire.com with Microsoft SMTPSVC(6.0.3790.3959); Sun, 29 Nov 2009 18:51:05 +0200 Message-ID: <4B12A679.3000800@gmail.com> Date: Sun, 29 Nov 2009 18:51:05 +0200 From: Alex Vainman Reply-To: alexv@voltaire.com User-Agent: Thunderbird 2.0.0.23 (Windows/20090812) MIME-Version: 1.0 To: roland CC: linux-rdma@vger.kernel.org Subject: [PATCH] libibverbs: Undo changes in memory range tree when madvise() fails X-OriginalArrivalTime: 29 Nov 2009 16:51:05.0309 (UTC) FILETIME=[2423C8D0:01CA7114] Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org diff --git a/src/memory.c b/src/memory.c index 53d86b7..550015a 100644 --- a/src/memory.c +++ b/src/memory.c @@ -446,12 +446,121 @@ static struct ibv_mem_node *__mm_find_start(uintptr_t start, uintptr_t end) return node; } +static struct ibv_mem_node *merge_ranges(struct ibv_mem_node *node, + struct ibv_mem_node *prev) +{ + struct ibv_mem_node *new_node = NULL; + + prev->end = node->end; + prev->refcnt = node->refcnt; + __mm_remove(node); + new_node = prev; + + return new_node; +} + +static struct ibv_mem_node *split_range(struct ibv_mem_node *node, + uintptr_t cut_line) +{ + struct ibv_mem_node *new_node = NULL; + + new_node = malloc(sizeof *new_node); + if (!new_node) + return NULL; + new_node->start = cut_line; + new_node->end = node->end; + new_node->refcnt = node->refcnt; + node->end = cut_line - 1; + __mm_add(new_node); + + return new_node; +} + +static struct ibv_mem_node *get_start_node(uintptr_t start, uintptr_t end, + int inc) +{ + struct ibv_mem_node *node, *tmp = NULL; + + node = __mm_find_start(start, end); + if (node->start < start) + node = split_range(node, start); + else{ + tmp = __mm_prev(node); + if (tmp && tmp->refcnt == node->refcnt + inc) + node = merge_ranges(node, tmp); + } + return node; +} + +/* + * This function is being called if madvise() fails and comes to + * undo merging/splitting operations performed on the node. + */ +static struct ibv_mem_node *undo_node(struct ibv_mem_node *node, + uintptr_t start, int inc) +{ + struct ibv_mem_node *tmp = NULL; + + /* + * This condition can be true only if we merged node which begins at start + * and ends at node->end with previous node which begins at node->start + * and ends at start - 1 + */ + if (start > node->start) { + tmp = split_range(node, start); + if (tmp) { + node->refcnt += inc; + node = tmp; + } else + return NULL; + } + + tmp = __mm_prev(node); + if (tmp && tmp->refcnt == node->refcnt) + node = merge_ranges(node, tmp); + + tmp = __mm_next(node); + if (tmp && tmp->refcnt == node->refcnt) + node = merge_ranges(tmp, node); + + return node; +} + +/* + * This function is being called if madvise() fails. + * The node which caused madvise() to fail may contain just a sub range of [start-end] + * so we need to undo all the successful changes (if any) already performed on a range + * [start - (node->prev)->end]. + * Function finds the node to begin rescanning from, find the end of the + * range to rescan and invert the operation type. + */ +static struct ibv_mem_node *prepare_to_roll_back(struct ibv_mem_node *node, + uintptr_t start, + uintptr_t *p_end, + int *p_inc, + int *p_advice) +{ + struct ibv_mem_node *tmp = NULL; + + *p_inc *= -1; + *p_advice = *p_inc == 1 ? MADV_DONTFORK : MADV_DOFORK; + tmp = __mm_prev(node); + node = NULL; + if (tmp) { + *p_end = tmp->end; + if (start <= *p_end) + node = get_start_node(start, *p_end, *p_inc); + } + return node; +} + static int ibv_madvise_range(void *base, size_t size, int advice) { uintptr_t start, end; struct ibv_mem_node *node, *tmp; int inc; int ret = 0; + int rolling_back = 0; if (!size) return 0; @@ -464,52 +573,21 @@ static int ibv_madvise_range(void *base, size_t size, int advice) pthread_mutex_lock(&mm_mutex); - node = __mm_find_start(start, end); - - if (node->start < start) { - tmp = malloc(sizeof *tmp); - if (!tmp) { - ret = -1; - goto out; - } - - tmp->start = start; - tmp->end = node->end; - tmp->refcnt = node->refcnt; - node->end = start - 1; - - __mm_add(tmp); - node = tmp; - } else { - tmp = __mm_prev(node); - if (tmp && tmp->refcnt == node->refcnt + inc) { - tmp->end = node->end; - tmp->refcnt = node->refcnt; - __mm_remove(node); - node = tmp; - } + node = get_start_node(start, end, inc); + if (!node) { + ret = -1; + goto out; } - while (node && node->start <= end) { if (node->end > end) { - tmp = malloc(sizeof *tmp); - if (!tmp) { + if (!split_range(node, end + 1)) { ret = -1; goto out; } - - tmp->start = end + 1; - tmp->end = node->end; - tmp->refcnt = node->refcnt; - node->end = end; - - __mm_add(tmp); } - node->refcnt += inc; - - if ((inc == -1 && node->refcnt == 0) || - (inc == 1 && node->refcnt == 1)) { + if ((inc == -1 && node->refcnt == 1) || + (inc == 1 && node->refcnt == 0)) { /* * If this is the first time through the loop, * and we merged this node with the previous @@ -528,22 +606,41 @@ static int ibv_madvise_range(void *base, size_t size, int advice) ret = madvise((void *) node->start, node->end - node->start + 1, advice); - if (ret) + if (ret) { + /* + * undo merging/splitting operations performed on the node + */ + node = undo_node(node, start, inc); + if (!rolling_back) { + /* + *if we already successfully modified sub ranges of [start-end]: + *from start till node->start - 1 we need to rescan this range + *and to undo all the changes. + */ + if (node) + node = prepare_to_roll_back(node, start, &end, &inc, &advice); + if (node) { + rolling_back = 1; + continue; + } + } goto out; + } } + node->refcnt += inc; node = __mm_next(node); } if (node) { tmp = __mm_prev(node); - if (tmp && node->refcnt == tmp->refcnt) { - tmp->end = node->end; - __mm_remove(node); - } + if (tmp && node->refcnt == tmp->refcnt) + node = merge_ranges(node, tmp); } out: + if (rolling_back) + ret = -1; pthread_mutex_unlock(&mm_mutex); return ret; @@ -568,3 +665,5 @@ int ibv_dofork_range(void *base, size_t size) return 0; } } + +