diff mbox

[v2] libibverbs: ibv_fork_init() and libhugetlbfs

Message ID 20100609114750.0798c664@alex-laptop (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Alexander Schmidt June 9, 2010, 9:47 a.m. UTC
None
diff mbox

Patch

--- libibverbs.git.orig/src/memory.c
+++ libibverbs.git/src/memory.c
@@ -40,6 +40,8 @@ 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <string.h>
 
 #include "ibverbs.h"
 
@@ -70,10 +72,64 @@  static pthread_mutex_t mm_mutex = PTHREA
 static int page_size;
 static int too_late;
 
+static unsigned long smaps_page_size(FILE *file)
+{
+	int n;
+	unsigned long size = 0;
+	char buf[1024];
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		if (!strstr(buf, "KernelPageSize:"))
+			continue;
+
+		n = sscanf(buf, "%*s %lu", &size);
+		if (n < 1)
+			continue;
+
+		/* page size is printed in Kb */
+		size = size * 1024;
+
+		break;
+	}
+
+	return size;
+}
+
+static unsigned long get_page_size(void *base)
+{
+	unsigned long ret = 0;
+	FILE *file;
+	char buf[1024];
+
+	file = fopen("/proc/self/smaps", "r");
+	if (!file)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		int n;
+		uintptr_t range_start, range_end;
+
+		n = sscanf(buf, "%lx-%lx", &range_start, &range_end);
+
+		if (n < 2)
+			continue;
+
+		if ((uintptr_t) base >= range_start && (uintptr_t) base < range_end) {
+			ret = smaps_page_size(file);
+			break;
+		}
+	}
+	fclose(file);
+
+out:
+	return ret;
+}
+
 int ibv_fork_init(void)
 {
-	void *tmp;
+	void *tmp, *tmp_aligned;
 	int ret;
+	unsigned long size;
 
 	if (mm_root)
 		return 0;
@@ -88,8 +144,17 @@  int ibv_fork_init(void)
 	if (posix_memalign(&tmp, page_size, page_size))
 		return ENOMEM;
 
-	ret = madvise(tmp, page_size, MADV_DONTFORK) ||
-	      madvise(tmp, page_size, MADV_DOFORK);
+	size = get_page_size(tmp);
+
+	if (size)
+		tmp_aligned = (void *)((uintptr_t)tmp & ~(size - 1));
+	else {
+		size = page_size;
+		tmp_aligned = tmp;
+	}
+
+	ret = madvise(tmp_aligned, size, MADV_DONTFORK) ||
+	      madvise(tmp_aligned, size, MADV_DOFORK);
 
 	free(tmp);
 
@@ -522,7 +587,8 @@  static struct ibv_mem_node *undo_node(st
 	return node;
 }
 
-static int ibv_madvise_range(void *base, size_t size, int advice)
+static int ibv_madvise_range(void *base, size_t size, int advice,
+			     unsigned long page_size)
 {
 	uintptr_t start, end;
 	struct ibv_mem_node *node, *tmp;
@@ -612,10 +678,28 @@  out:
 	return ret;
 }
 
+static int ibv_fork_range(void *base, size_t size, int advice)
+{
+	int ret;
+	unsigned long range_page_size;
+
+	ret = ibv_madvise_range(base, size, advice, page_size);
+
+	if (ret == -1 && errno == EINVAL) {
+		range_page_size = get_page_size(base);
+
+		if (range_page_size)
+			ret = ibv_madvise_range(base, size, advice,
+						range_page_size);
+	}
+
+	return ret;
+}
+
 int ibv_dontfork_range(void *base, size_t size)
 {
 	if (mm_root)
-		return ibv_madvise_range(base, size, MADV_DONTFORK);
+		return ibv_fork_range(base, size, MADV_DONTFORK);
 	else {
 		too_late = 1;
 		return 0;
@@ -625,7 +709,7 @@  int ibv_dontfork_range(void *base, size_
 int ibv_dofork_range(void *base, size_t size)
 {
 	if (mm_root)
-		return ibv_madvise_range(base, size, MADV_DOFORK);
+		return ibv_fork_range(base, size, MADV_DOFORK);
 	else {
 		too_late = 1;
 		return 0;