==========================================================================
From d5f9ecb89ac5de7467fe587b6ccdead39ee00049 Mon Sep 17 00:00:00 2001
From: Timofey Titovets <nefelim4ag@gmail.com>
Date: Wed, 18 Apr 2018 22:32:20 +0300
Subject: [PATCH] ksm: replace jhash2 with faster hash
1. Pickup, Sioh Lee crc32 patch, after some long conversation
2. Merge with my work on xxhash
3. Add autoselect code to choice fastest hash helper.
Base idea are same, replace jhash2 with something faster.
Perf numbers:
Intel(R) Xeon(R) CPU E5-2420 v2 @ 2.20GHz
ksm: crc32c hash() 12081 MB/s
ksm: xxh64 hash() 8770 MB/s
ksm: xxh32 hash() 4529 MB/s
ksm: jhash2 hash() 1569 MB/s
As jhash2 always will be slower (for data size like PAGE_SIZE),
just drop it from choice.
Add function to autoselect hash algo during first page merging run.
Move init of zero_checksum from init, to first call of fasthash():
1. KSM Init run on early kernel init,
run perf testing stuff on main kernel boot thread looks bad to me.
2. Crypto subsystem not available at that early booting,
so crc32c even, compiled in, not available
As crypto and ksm init, run at subsys_initcall() (4) kernel level of
init, all possible consumers will run later at 5+ levels
Output after first try of KSM to hash page:
ksm: using crc32c as hash function
Thanks.
Changes:
v1 -> v2:
- Move xxhash() to xxhash.h/c and separate patches
v2 -> v3:
- Move xxhash() xxhash.c -> xxhash.h
- replace xxhash_t with 'unsigned long'
- update kerneldoc above xxhash()
v3 -> v4:
- Merge xxhash/crc32 patches
- Replace crc32 with crc32c (crc32 have same as jhash2 speed)
- Add auto speed test and auto choice of fastest hash function
v4 -> v5:
- Pickup missed xxhash patch
- Update code with compile time choicen xxhash
- Add more macros to make code more readable
- As now that only possible use xxhash or crc32c,
on crc32c allocation error, skip speed test and fallback to xxhash
- For workaround too early init problem (crc32c not available),
move zero_checksum init to first call of fastcall()
- Don't alloc page for hash testing, use arch zero pages for that
v5 -> v6:
- Use libcrc32c instead of CRYPTO API, mainly for
code/Kconfig deps Simplification
- Add crc32c_available():
libcrc32c will BUG_ON on crc32c problems,
so test crc32c available by crc32c_available()
- Simplify choice_fastest_hash()
- Simplify fasthash()
- struct rmap_item && stable_node have sizeof == 64 on x86_64,
that makes them cache friendly. As we don't suffer from hash
collisions, change hash type from unsigned long back to u32.
- Fix kbuild robot warning, make all local functions static
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
Signed-off-by: leesioh <solee@os.korea.ac.kr>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: Andrea Arcangeli <aarcange@redhat.com>
CC: linux-mm@kvack.org
CC: kvm@vger.kernel.org
---
mm/Kconfig | 1 +
mm/ksm.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 46 insertions(+), 4 deletions(-)
@@ -298,6 +298,7 @@ config MMU_NOTIFIER
config KSM
bool "Enable KSM for page merging"
depends on MMU
+ select XXHASH
help
Enable Kernel Samepage Merging: KSM periodically scans those areas
of an application's address space that an app has advised may be
@@ -25,7 +25,6 @@
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/spinlock.h>
-#include <linux/jhash.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/wait.h>
@@ -39,6 +38,9 @@
#include <linux/freezer.h>
#include <linux/oom.h>
#include <linux/numa.h>
+#include <crypto/hash.h>
+#include <linux/crc32c.h>
+#include <linux/xxhash.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -284,6 +286,47 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
sizeof(struct __struct), __alignof__(struct __struct),\
(__flags), NULL)
+static DEFINE_STATIC_KEY_FALSE(ksm_use_crc32c);
+static DEFINE_STATIC_KEY_FALSE(ksm_use_xxhash);
+
+static void fasthash_setup(void)
+{
+ struct crypto_shash *shash = crypto_alloc_shash("crc32c", 0, 0);
+
+ if (!IS_ERR(shash)) {
+ /* Use crc32c if any non-generic version is available.
+ * Generic crypto algorithms have priority 100.
+ */
+ if (crypto_tfm_alg_priority(&shash->base) > 100) {
+ static_branch_enable(&ksm_use_crc32c);
+ pr_info("ksm: using crc32c as hash function");
+ }
+ crypto_free_shash(shash);
+ }
+
+ if (!static_branch_likely(&ksm_use_crc32c)) {
+ static_branch_enable(&ksm_use_xxhash);
+ pr_info("ksm: using xxhash as hash function");
+ }
+}
+
+static u32 fasthash(const void *input, size_t length)
+{
+ if (static_branch_likely(&ksm_use_crc32c))
+ return crc32c(0, input, length);
+
+ if (static_branch_likely(&ksm_use_xxhash))
+ return (u32)xxhash(input, length, 0);
+
+ /* Is done only once on the first call of fasthash() */
+ fasthash_setup();
+
+ /* Now, that we know the hash alg., calculate checksum for zero page */
+ zero_checksum = fasthash(ZERO_PAGE(0), PAGE_SIZE);
+
+ return fasthash(input, length);
+}
+
static int __init ksm_slab_init(void)
{
rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -979,7 +1022,7 @@ static u32 calc_checksum(struct page *page)
{
u32 checksum;
void *addr = kmap_atomic(page);
- checksum = jhash2(addr, PAGE_SIZE / 4, 17);
+ checksum = fasthash(addr, PAGE_SIZE);
kunmap_atomic(addr);
return checksum;
}
@@ -3100,8 +3143,6 @@ static int __init ksm_init(void)
struct task_struct *ksm_thread;
int err;
- /* The correct value depends on page size and endianness */
- zero_checksum = calc_checksum(ZERO_PAGE(0));
/* Default to false for backwards compatibility */
ksm_use_zero_pages = false;