@@ -40,6 +40,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
#include "ibverbs.h"
@@ -70,10 +72,64 @@ static pthread_mutex_t mm_mutex = PTHREA
static int page_size;
static int too_late;
+static unsigned long smaps_page_size(FILE *file)
+{
+ int n;
+ unsigned long size = 0;
+ char buf[1024];
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ if (!strstr(buf, "KernelPageSize:"))
+ continue;
+
+ n = sscanf(buf, "%*s %lu", &size);
+ if (n < 1)
+ continue;
+
+ /* page size is printed in Kb */
+ size = size * 1024;
+
+ break;
+ }
+
+ return size;
+}
+
+static unsigned long get_page_size(void *base)
+{
+ unsigned long ret = 0;
+ FILE *file;
+ char buf[1024];
+
+ file = fopen("/proc/self/smaps", "r");
+ if (!file)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ int n;
+ uintptr_t range_start, range_end;
+
+ n = sscanf(buf, "%lx-%lx", &range_start, &range_end);
+
+ if (n < 2)
+ continue;
+
+ if ((uintptr_t) base >= range_start && (uintptr_t) base < range_end) {
+ ret = smaps_page_size(file);
+ break;
+ }
+ }
+ fclose(file);
+
+out:
+ return ret;
+}
+
int ibv_fork_init(void)
{
- void *tmp;
+ void *tmp, *tmp_aligned;
int ret;
+ unsigned long size;
if (mm_root)
return 0;
@@ -88,8 +144,17 @@ int ibv_fork_init(void)
if (posix_memalign(&tmp, page_size, page_size))
return ENOMEM;
- ret = madvise(tmp, page_size, MADV_DONTFORK) ||
- madvise(tmp, page_size, MADV_DOFORK);
+ size = get_page_size(tmp);
+
+ if (size)
+ tmp_aligned = (void *)((uintptr_t)tmp & ~(size - 1));
+ else {
+ size = page_size;
+ tmp_aligned = tmp;
+ }
+
+ ret = madvise(tmp_aligned, size, MADV_DONTFORK) ||
+ madvise(tmp_aligned, size, MADV_DOFORK);
free(tmp);
@@ -522,7 +587,8 @@ static struct ibv_mem_node *undo_node(st
return node;
}
-static int ibv_madvise_range(void *base, size_t size, int advice)
+static int ibv_madvise_range(void *base, size_t size, int advice,
+ unsigned long page_size)
{
uintptr_t start, end;
struct ibv_mem_node *node, *tmp;
@@ -612,10 +678,28 @@ out:
return ret;
}
+static int ibv_fork_range(void *base, size_t size, int advice)
+{
+ int ret;
+ unsigned long range_page_size;
+
+ ret = ibv_madvise_range(base, size, advice, page_size);
+
+ if (ret == -1 && errno == EINVAL) {
+ range_page_size = get_page_size(base);
+
+ if (range_page_size)
+ ret = ibv_madvise_range(base, size, advice,
+ range_page_size);
+ }
+
+ return ret;
+}
+
int ibv_dontfork_range(void *base, size_t size)
{
if (mm_root)
- return ibv_madvise_range(base, size, MADV_DONTFORK);
+ return ibv_fork_range(base, size, MADV_DONTFORK);
else {
too_late = 1;
return 0;
@@ -625,7 +709,7 @@ int ibv_dontfork_range(void *base, size_
int ibv_dofork_range(void *base, size_t size)
{
if (mm_root)
- return ibv_madvise_range(base, size, MADV_DOFORK);
+ return ibv_fork_range(base, size, MADV_DOFORK);
else {
too_late = 1;
return 0;