diff mbox

[RFC] libibverbs: ibv_fork_init() and libhugetlbfs

Message ID 20100507121936.283a18c6@alex-laptop (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Alexander Schmidt May 7, 2010, 10:19 a.m. UTC
None
diff mbox

Patch

--- libibverbs-1.1.2.orig/src/memory.c
+++ libibverbs-1.1.2/src/memory.c
@@ -40,6 +40,8 @@ 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <string.h>
 
 #include "ibverbs.h"
 
@@ -68,12 +70,45 @@  struct ibv_mem_node {
 static struct ibv_mem_node *mm_root;
 static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int page_size;
+static int huge_page_size;
 static int too_late;
 
+static int get_huge_page_size(void)
+{
+	int ret = -1;
+	FILE *file;
+	char *path = "/proc/meminfo";
+	char buf[1024], type[128];
+
+	file = fopen(path, "r");
+	if (!file)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		int n;
+		unsigned long size;
+
+		n = sscanf(buf, "%127s %lu %*s", &type, &size);
+
+		if (n < 2)
+			continue;
+
+		if (!strcmp(type, "Hugepagesize:")) {
+			/* huge page size is printed in Kb */
+			ret = size * 1024;
+			break;
+		}
+	}
+	fclose(file);
+
+out:
+	return ret;
+}
+
 int ibv_fork_init(void)
 {
 	void *tmp;
-	int ret;
+	int ret, size;
 
 	if (mm_root)
 		return 0;
@@ -85,11 +120,18 @@  int ibv_fork_init(void)
 	if (page_size < 0)
 		return errno;
 
-	if (posix_memalign(&tmp, page_size, page_size))
+	huge_page_size = get_huge_page_size();
+
+	if (huge_page_size > page_size)
+		size = huge_page_size;
+	else
+		size = page_size;
+
+	if (posix_memalign(&tmp, size, size))
 		return ENOMEM;
 
-	ret = madvise(tmp, page_size, MADV_DONTFORK) ||
-	      madvise(tmp, page_size, MADV_DOFORK);
+	ret = madvise(tmp, size, MADV_DONTFORK) ||
+	      madvise(tmp, size, MADV_DOFORK);
 
 	free(tmp);
 
@@ -446,11 +488,51 @@  static struct ibv_mem_node *__mm_find_st
 	return node;
 }
 
+static int is_huge_page(void *base)
+{
+	int ret = 0;
+	pid_t pid;
+	FILE *file;
+	char buf[1024], lib[128];
+
+	pid = getpid();
+	snprintf(buf, sizeof(buf), "/proc/%d/maps", pid);
+
+	file = fopen(buf, "r");
+	if (!file)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		int n;
+		char *substr;
+		uintptr_t range_start, range_end;
+
+		n = sscanf(buf, "%lx-%lx %*s %*x %*s %*u %127s",
+				&range_start, &range_end, &lib);
+
+		if (n < 3)
+			continue;
+
+		substr = strstr(lib, "libhugetlbfs");
+		if (substr) {
+			if ((uintptr_t) base >= range_start &&
+					(uintptr_t) base < range_end) {
+				ret = 1;
+				break;
+			}
+		}
+	}
+	fclose(file);
+
+out:
+	return ret;
+}
+
 static int ibv_madvise_range(void *base, size_t size, int advice)
 {
 	uintptr_t start, end;
 	struct ibv_mem_node *node, *tmp;
-	int inc;
+	int inc, range_page_size;
 	int ret = 0;
 
 	if (!size)
@@ -458,9 +540,14 @@  static int ibv_madvise_range(void *base,
 
 	inc = advice == MADV_DONTFORK ? 1 : -1;
 
-	start = (uintptr_t) base & ~(page_size - 1);
-	end   = ((uintptr_t) (base + size + page_size - 1) &
-		 ~(page_size - 1)) - 1;
+	if (huge_page_size > page_size && is_huge_page(base))
+		range_page_size = huge_page_size;
+	else
+		range_page_size = page_size;
+
+	start = (uintptr_t) base & ~(range_page_size - 1);
+	end   = ((uintptr_t) (base + size + range_page_size - 1) &
+		 ~(range_page_size - 1)) - 1;
 
 	pthread_mutex_lock(&mm_mutex);