@@ -40,6 +40,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
#include "ibverbs.h"
@@ -68,12 +70,45 @@ struct ibv_mem_node {
static struct ibv_mem_node *mm_root;
static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
static int page_size;
+static int huge_page_size;
static int too_late;
+static int get_huge_page_size(void)
+{
+ int ret = -1;
+ FILE *file;
+ char *path = "/proc/meminfo";
+ char buf[1024], type[128];
+
+ file = fopen(path, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ int n;
+ unsigned long size;
+
+ n = sscanf(buf, "%127s %lu %*s", &type, &size);
+
+ if (n < 2)
+ continue;
+
+ if (!strcmp(type, "Hugepagesize:")) {
+ /* huge page size is printed in Kb */
+ ret = size * 1024;
+ break;
+ }
+ }
+ fclose(file);
+
+out:
+ return ret;
+}
+
int ibv_fork_init(void)
{
void *tmp;
- int ret;
+ int ret, size;
if (mm_root)
return 0;
@@ -85,11 +120,18 @@ int ibv_fork_init(void)
if (page_size < 0)
return errno;
- if (posix_memalign(&tmp, page_size, page_size))
+ huge_page_size = get_huge_page_size();
+
+ if (huge_page_size > page_size)
+ size = huge_page_size;
+ else
+ size = page_size;
+
+ if (posix_memalign(&tmp, size, size))
return ENOMEM;
- ret = madvise(tmp, page_size, MADV_DONTFORK) ||
- madvise(tmp, page_size, MADV_DOFORK);
+ ret = madvise(tmp, size, MADV_DONTFORK) ||
+ madvise(tmp, size, MADV_DOFORK);
free(tmp);
@@ -446,11 +488,51 @@ static struct ibv_mem_node *__mm_find_st
return node;
}
+static int is_huge_page(void *base)
+{
+ int ret = 0;
+ pid_t pid;
+ FILE *file;
+ char buf[1024], lib[128];
+
+ pid = getpid();
+ snprintf(buf, sizeof(buf), "/proc/%d/maps", pid);
+
+ file = fopen(buf, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ int n;
+ char *substr;
+ uintptr_t range_start, range_end;
+
+ n = sscanf(buf, "%lx-%lx %*s %*x %*s %*u %127s",
+ &range_start, &range_end, &lib);
+
+ if (n < 3)
+ continue;
+
+ substr = strstr(lib, "libhugetlbfs");
+ if (substr) {
+ if ((uintptr_t) base >= range_start &&
+ (uintptr_t) base < range_end) {
+ ret = 1;
+ break;
+ }
+ }
+ }
+ fclose(file);
+
+out:
+ return ret;
+}
+
static int ibv_madvise_range(void *base, size_t size, int advice)
{
uintptr_t start, end;
struct ibv_mem_node *node, *tmp;
- int inc;
+ int inc, range_page_size;
int ret = 0;
if (!size)
@@ -458,9 +540,14 @@ static int ibv_madvise_range(void *base,
inc = advice == MADV_DONTFORK ? 1 : -1;
- start = (uintptr_t) base & ~(page_size - 1);
- end = ((uintptr_t) (base + size + page_size - 1) &
- ~(page_size - 1)) - 1;
+ if (huge_page_size > page_size && is_huge_page(base))
+ range_page_size = huge_page_size;
+ else
+ range_page_size = page_size;
+
+ start = (uintptr_t) base & ~(range_page_size - 1);
+ end = ((uintptr_t) (base + size + range_page_size - 1) &
+ ~(range_page_size - 1)) - 1;
pthread_mutex_lock(&mm_mutex);