@@ -10,6 +10,7 @@
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/sched.h>
+#include <asm/timex.h>
#include <linux/htree.h>
@@ -44,8 +45,8 @@
*/
-/*
#define HTREE_DEBUG_INFO
+/*
#define HTREE_DEBUG_DETAIL
*/
@@ -277,7 +278,7 @@ static void htree_debug_hdata(struct htree_state *hts, struct hash_tree *hcurr,
"htf_freed",
};
- if (!hcurr)
+ if (unlikely(!hcurr))
return;
dept = hts->dept;
@@ -331,7 +332,7 @@ static void __htree_debug_walks_all(struct htree_state *hts,
for (k = 0; k < anum; k++) {
ncnt = ht_ncnt_get(htree[k].next);
- if (ncnt > 0) {
+ if (likely(ncnt > 0)) {
bits = ht_bits_from_depth(hts->sbit, hts->dept);
pr_ht_debug("d:%u b:%u [%u] %p (%u): ",
hts->dept, bits, k, &htree[k], ncnt);
@@ -407,14 +408,14 @@ static void __htree_erase_all_lock(struct htree_state *hts,
for (k = 0; k < anum; k++) {
ncnt = ht_ncnt_get(htree[k].next);
- if (ncnt > 0) {
+ if (likely(ncnt > 0)) {
bits = ht_bits_from_depth(hts->sbit, hts->dept);
hlist_for_each_entry_safe(pos, tmp,
&htree[k].head, hnode) {
hlist_del(&pos->hnode);
udata = hlist_entry_safe(pos,
struct data_struct, hdata);
- if (udata) {
+ if (likely(udata)) {
kfree(udata);
(*erased)++;
}
@@ -478,17 +479,20 @@ static u64 _htree_insert_range(struct htree_state *hts, struct htree_root *root,
{
u64 index;
u64 loop = 0, ins = 0, era = 0;
+ cycles_t time1, time2;
+ u64 latency;
struct data_struct *udata;
struct htree_data *rdata;
pr_ht_info("[++++) inserting: [s:%llu ... e:%llu] (g:%llu)\n",
start, end, gap);
+ time1 = get_cycles();
for (index = start; index <= end; index += gap) {
udata = _htree_data_alloc(index);
rdata = ht_insert_lock(hts, root, &udata->hdata, req);
if (req == htf_erase && rdata) {
udata = hlist_entry_safe(rdata, struct data_struct, hdata);
- if (udata && rdata->index == index) {
+ if (likely((udata && rdata->index == index))) {
kfree(udata);
era++;
}
@@ -498,8 +502,10 @@ static u64 _htree_insert_range(struct htree_state *hts, struct htree_root *root,
if (!(loop % HTREE_TEST_SCHED_CNT))
schedule();
}
- pr_ht_info("(++++] done: loop:%llu, inserted:%llu, same erased:%llu\n\n",
- loop, ins, era);
+ time2 = get_cycles();
+ latency = div_u64(time2 - time1, loop);
+ pr_ht_info("(++++] done: loop:%llu, inserted:%llu, same erased:%llu, \
+latency:%llu cycles\n\n", loop, ins, era, latency);
return ins - era;
}
@@ -517,16 +523,19 @@ static u64 _htree_find_range(struct htree_state *hts, struct htree_root *root,
{
u64 index;
u64 loop = 0, found = 0;
+ cycles_t time1, time2;
+ u64 latency;
struct data_struct *udata;
struct htree_data *rdata;
pr_ht_info("[****) finding: [s:%llu ... e:%llu] (g:%llu)\n",
start, end, gap);
+ time1 = get_cycles();
for (index = start; index <= end; index += gap) {
rdata = ht_find(hts, htree_first_rcu(root), index);
if (rdata) {
udata = hlist_entry_safe(rdata, struct data_struct, hdata);
- if (udata && rdata->index == index) {
+ if (likely(udata && rdata->index == index)) {
pr_ht_find("*todo: find:<%llu> %c %c %c\n",
index, udata->a, (char)udata->b, (char)udata->c);
found++;
@@ -537,8 +546,10 @@ static u64 _htree_find_range(struct htree_state *hts, struct htree_root *root,
if (!(loop % HTREE_TEST_SCHED_CNT))
schedule();
}
- pr_ht_info("(****] done: loop:%llu, found:%llu, diff:%llu\n\n",
- loop, found, loop - found);
+ time2 = get_cycles();
+ latency = div_u64(time2 - time1, loop);
+ pr_ht_info("(****] done: loop:%llu, found:%llu, diff:%llu, \
+latency:%llu cycles\n\n", loop, found, loop - found, latency);
return found;
}
@@ -555,18 +566,21 @@ static u64 _htree_erase_range(struct htree_state *hts, struct htree_root *root,
{
u64 index;
u64 loop = 0, erased = 0;
+ cycles_t time1, time2;
+ u64 latency;
struct hash_tree *htree;
struct data_struct *udata;
struct htree_data *rdata;
pr_ht_info("[----) erasing: [s:%llu ... e:%llu] (g:%llu)\n",
start, end, gap);
+ time1 = get_cycles();
for (index = start; index <= end; index += gap) {
htree = htree_first_rcu(root);
rdata = ht_erase_lock(hts, root, index);
if (rdata) {
udata = hlist_entry_safe(rdata, struct data_struct, hdata);
- if (udata && rdata->index == index) {
+ if (likely(udata && rdata->index == index)) {
pr_ht_erase("*todo: erase:<%llu> %c %c %c\n",
index, udata->a, (char)udata->b, (char)udata->c);
kfree(udata);
@@ -582,8 +596,10 @@ static u64 _htree_erase_range(struct htree_state *hts, struct htree_root *root,
if (!(loop % HTREE_TEST_SCHED_CNT))
schedule();
}
- pr_ht_info("(----] done: loop:%llu, erased:%llu, diff:%llu\n\n",
- loop, erased, loop - erased);
+ time2 = get_cycles();
+ latency = div_u64(time2 - time1, loop);
+ pr_ht_info("(----] done: loop:%llu, erased:%llu, diff:%llu, \
+latency:%llu cycles\n\n", loop, erased, loop - erased, latency);
return erased;
}
@@ -600,18 +616,21 @@ static u64 _htree_update_range(struct htree_state *hts, struct htree_root *root,
{
u64 index;
u64 loop = 0, updated = 0;
+ cycles_t time1, time2;
+ u64 latency;
struct hash_tree *htree;
struct data_struct *udata;
struct htree_data *rdata;
pr_ht_info("[####) updating: [s:%llu ... e:%llu] (g:%llu)\n",
start, end, gap);
+ time1 = get_cycles();
for (index = start; index <= end; index += gap) {
htree = htree_first_rcu(root);
rdata = ht_find(hts, htree, index);
if (rdata) {
udata = hlist_entry_safe(rdata, struct data_struct, hdata);
- if (udata && rdata->index == index) {
+ if (likely(udata && rdata->index == index)) {
pr_ht_update("*todo: update:<%llu> %c %c %c ",
index, udata->a, (char)udata->b, (char)udata->c);
/* todo: update user defined data */
@@ -633,8 +652,11 @@ static u64 _htree_update_range(struct htree_state *hts, struct htree_root *root,
if (!(loop % HTREE_TEST_SCHED_CNT))
schedule();
}
- pr_ht_info("(####] done: loop:%llu, updated:%llu, diff:%llu\n\n",
- loop, updated, loop - updated);
+ time2 = get_cycles();
+ latency = div_u64(time2 - time1, loop);
+ pr_ht_info("(####] done: loop:%llu, updated:%llu, diff:%llu, \
+latency: %llu cycles\n\n",
+ loop, updated, loop - updated, latency);
return updated;
}
@@ -651,7 +673,7 @@ static void _htree_statis(struct htree_state *hts, struct htree_root *root)
ht_statis(hts, htree_first_rcu(root), &acnt, &dcnt);
- if (hts->dcnt == dcnt && hts->acnt == acnt) {
+ if (likely(hts->dcnt == dcnt && hts->acnt == acnt)) {
pr_ht_info("[ OK ] statist: acnt:%d, dcnt:%llu ", acnt, dcnt);
} else {
pr_ht_info("[FAIL] statist: acnt:%d(%d), dcnt:%llu(%llu)\n",
@@ -676,7 +698,7 @@ static void _htree_statis_info(struct htree_state *hts, struct htree_root *root)
u64 dsum = (sizd * hts->dcnt) >> 10;
u64 smem = hsum + dsum;
- if (hts->asum == 0)
+ if (unlikely(hts->asum == 0))
_htree_statis(hts, root);
pr_ht_stat("------------------------------------------\n");
@@ -711,7 +733,7 @@ static void _htree_get_most_index(struct htree_state *hts, struct htree_root *ro
struct htree_data *hdata;
hdata = ht_most_index(hts, htree_first_rcu(root));
- if (hdata) {
+ if (likely(hdata)) {
if (hts->sort == HTREE_FLAG_ASCD) {
pr_ht_stat("[MOST] smallest index:%llu\n\n", hdata->index);
} else {
@@ -730,13 +752,13 @@ static void _htree_remove_all(struct htree_state *hts, struct htree_root *root)
{
/* remove all udata */
hts->dcnt -= htree_erase_all_lock(hts, root);
- if (hts->dcnt != 0) {
+ if (unlikely(hts->dcnt != 0)) {
pr_ht_warn("[WARN] erase remained acnt:%d, dcnt:%llu\n\n",
hts->acnt, hts->dcnt);
}
/* remove all hash trees */
- if (ht_destroy_lock(hts, root) == htf_ok) {
+ if (likely(ht_destroy_lock(hts, root) == htf_ok)) {
pr_ht_stat("[ OK ] destroy remained acnt:%d, dcnt:%llu\n\n",
hts->acnt, hts->dcnt);
} else {
@@ -761,7 +783,7 @@ static u64 _htree_test_index_loop(struct htree_state *hts, u64 start, u64 end)
u64 inserted, found, erased, updated;
u64 dcnt, slice;
- if (start > end)
+ if (unlikely(start > end))
return 0;
slice = (end - start) / 10 + 2;
@@ -769,7 +791,7 @@ static u64 _htree_test_index_loop(struct htree_state *hts, u64 start, u64 end)
htree_root_alloc(hts, &ht_root);
inserted = _htree_insert_range(hts, &ht_root, start, end, 1, htf_ins);
- if (inserted != hts->dcnt) {
+ if (unlikely(inserted != hts->dcnt)) {
pr_ht_err("[FAIL] inserted:%llu, dcnt:%llu, diff:%lld\n\n",
inserted, hts->dcnt, inserted - hts->dcnt);
}
@@ -778,7 +800,7 @@ static u64 _htree_test_index_loop(struct htree_state *hts, u64 start, u64 end)
erased = _htree_erase_range(hts, &ht_root, start, end, slice);
found = _htree_find_range(hts, &ht_root, start, end, slice);
- if (found) {
+ if (unlikely(found)) {
pr_ht_err("[FAIL] erased:%llu, found:%llu, diff:%lld\n\n",
erased, found, erased - found);
}
@@ -787,7 +809,7 @@ static u64 _htree_test_index_loop(struct htree_state *hts, u64 start, u64 end)
inserted = _htree_insert_range(hts, &ht_root, start, end, slice, htf_ins);
updated = _htree_update_range(hts, &ht_root, start, end, slice);
- if (inserted != updated) {
+ if (unlikely(inserted != updated)) {
pr_ht_err("[FAIL] inserted:%llu, updated:%llu, diff:%lld\n\n",
inserted, updated, inserted - updated);
}
@@ -916,7 +938,7 @@ static void _htree_test_idx_random(u8 idx_type, u8 sort_type, u64 maxnr)
udata = _htree_data_alloc(index);
rdata = ht_insert_lock(hts, &ht_root, &udata->hdata, htf_ins);
- if (!rdata)
+ if (likely(!rdata))
inserted++;
loop++;
if (!(loop % HTREE_TEST_SCHED_CNT))
@@ -926,7 +948,7 @@ static void _htree_test_idx_random(u8 idx_type, u8 sort_type, u64 maxnr)
_htree_statis(hts, &ht_root);
rdata = ht_find(hts, htree_first_rcu(&ht_root), check_idx);
- if (!rdata) {
+ if (unlikely(!rdata)) {
pr_ht_err("[FAIL] NOT found check index:%llu\n\n", check_idx);
}
@@ -939,7 +961,7 @@ static void _htree_test_idx_random(u8 idx_type, u8 sort_type, u64 maxnr)
rdata = ht_erase_lock(hts, &ht_root, index);
if (rdata) {
udata = hlist_entry_safe(rdata, struct data_struct, hdata);
- if (udata && rdata->index == index) {
+ if (likely(udata && rdata->index == index)) {
pr_ht_erase("*todo: erase:<%llu> %c %c %c\n",
index, udata->a, (char)udata->b, (char)udata->c);
kfree(udata);
@@ -954,7 +976,7 @@ static void _htree_test_idx_random(u8 idx_type, u8 sort_type, u64 maxnr)
_htree_statis(hts, &ht_root);
rdata = ht_find(hts, htree_first_rcu(&ht_root), check_idx);
- if (!rdata) {
+ if (unlikely(!rdata)) {
pr_ht_info("[INFO] check index:%llu (erased)\n\n", check_idx);
}
@@ -1006,7 +1028,7 @@ static void _htree_test_index_same(u8 idx_type, u8 sort_type, u64 maxnr)
pr_ht_stat("[loop) %llu: new index inserting(htf_ins)\n\n", maxnr);
inserted = _htree_insert_range(hts, &ht_root, 0, maxnr, gap - 1, htf_ins);
- if (inserted != hts->dcnt) {
+ if (unlikely(inserted != hts->dcnt)) {
pr_ht_err("[FAIL] inserted:%llu, dcnt:%llu, diff:%lld\n\n",
inserted, hts->dcnt, inserted - hts->dcnt);
}
@@ -1015,20 +1037,20 @@ static void _htree_test_index_same(u8 idx_type, u8 sort_type, u64 maxnr)
pr_ht_stat("[loop) %llu: SAME index inserting(htf_erase)\n\n", maxnr);
inserted = _htree_insert_range(hts, &ht_root, 1, maxnr, gap, htf_erase);
- if (inserted != 0) {
+ if (unlikely(inserted != 0)) {
pr_ht_err("[FAIL] inserted:%llu, dcnt:%llu, diff:%lld\n\n",
inserted, hts->dcnt, inserted - hts->dcnt);
}
pr_ht_stat("[loop) %llu: SAME index inserting(htf_ins)\n\n", maxnr);
inserted = _htree_insert_range(hts, &ht_root, 1, maxnr, gap, htf_ins);
- if (inserted != (maxnr / gap)) {
+ if (unlikely(inserted != (maxnr / gap))) {
pr_ht_err("[FAIL] inserted:%llu, dcnt:%llu, diff:%lld\n\n",
inserted, hts->dcnt, inserted - hts->dcnt);
}
found = _htree_find_range(hts, &ht_root, 0, maxnr, gap - 1);
- if (found != (hts->dcnt - inserted)) {
+ if (unlikely(found != (hts->dcnt - inserted))) {
pr_ht_err("[FAIL] dcnt:%llu, inserted:%llu, found:%llu\n\n",
hts->dcnt, inserted, found);
}
@@ -114,7 +114,7 @@ static enum ht_flags __ht_find(struct htree_state *hts, struct hash_tree *htree,
_retry:
*rtree = htree;
ncnt = ht_ncnt_get(htree[hts->hkey].next);
- if (ncnt == 0)
+ if (unlikely(ncnt == 0))
goto _next_step;
hlist_for_each_entry(pos, &htree[hts->hkey].head, hnode) {
@@ -180,7 +180,7 @@ struct htree_data *ht_find(struct htree_state *hts,
struct htree_data *rdata = NULL;
struct hash_tree *rtree;
- if (!htree)
+ if (unlikely(!htree))
return NULL;
if (_ht_find(hts, htree, index, &rdata, &rtree) == htf_find)
@@ -232,7 +232,7 @@ static void _ht_move_to_next(struct htree_state *hts, struct htree_data *sdata,
}
ncnt = ht_ncnt_get(ntree[hkey].next);
- if (ncnt == 0) {
+ if (unlikely(ncnt == 0)) {
htree_add_head(ntree, &edata->hnode, hkey);
goto _next;
}
@@ -292,7 +292,7 @@ static void _ht_insert(struct htree_state *hts, struct hash_tree *htree,
hts->hkey = ht_get_hkey(index, hts->dept, bits, hts->idxt);
ncnt = ht_ncnt_get(htree[hts->hkey].next);
- if (ncnt == 0) {
+ if (unlikely(ncnt == 0)) {
htree_add_head(htree, &hdata->hnode, hts->hkey);
goto _finish;
}
@@ -348,7 +348,7 @@ struct htree_data *ht_insert(struct htree_state *hts, struct hash_tree *htree,
struct hash_tree *rtree = NULL;
enum ht_flags htf;
- if (!htree)
+ if (unlikely(!htree))
return NULL;
htf = _ht_find(hts, htree, hdata->index, &rdata, &rtree);
@@ -379,7 +379,7 @@ static enum ht_flags ___ht_erase(struct htree_state *hts,
if (htree[k].next)
break;
- if (k == anum) {
+ if (unlikely(k == anum)) {
kfree(htree);
hts->acnt--;
hts->dept--;
@@ -408,7 +408,7 @@ static int __ht_erase(struct htree_state *hts, struct hash_tree *htree,
ncnt = ht_ncnt_get(htree[key].next);
bits = ht_bits_from_depth(hts->sbit, hts->dept);
- if (ncnt == 0)
+ if (unlikely(ncnt == 0))
goto _next_step;
hlist_for_each_entry_safe(pos, tmp, &htree[key].head, hnode) {
@@ -429,7 +429,7 @@ static int __ht_erase(struct htree_state *hts, struct hash_tree *htree,
}
}
- if (ncnt == 0)
+ if (unlikely(ncnt == 0))
ret = ___ht_erase(hts, htree, bits);
if (ret > htf_none) /* erased or freed */
@@ -444,7 +444,7 @@ static int __ht_erase(struct htree_state *hts, struct hash_tree *htree,
/* must be recursive call */
ret = __ht_erase(hts, _next, rdata, index);
- if (ret == htf_freed) {
+ if (unlikely(ret == htf_freed)) {
WRITE_ONCE(htree[key].next, ht_ncnt_set(NULL, ncnt));
ret = htf_erase;
}
@@ -484,7 +484,7 @@ struct htree_data *ht_erase(struct htree_state *hts,
{
struct htree_data *rdata = NULL;
- if (!htree)
+ if (unlikely(!htree))
return NULL;
if (_ht_erase(hts, htree, &rdata, index) == htf_erase)
@@ -558,7 +558,7 @@ enum ht_flags ht_destroy_lock(struct htree_state *hts, struct htree_root *root)
return htf_ok;
htree = htree_first_rcu(root);
- if (!htree)
+ if (unlikely(!htree))
return htf_none;
hts->dept = 0;
@@ -598,7 +598,7 @@ static void __ht_statis(struct htree_state *hts,
for (k = 0; k < anum; k++) {
ncnt = ht_ncnt_get(htree[k].next);
- if (ncnt > 0) {
+ if (likely(ncnt > 0)) {
(*dcnt) += ncnt;
}
_next = ht_ncnt_pointer(htree[k].next);
@@ -631,7 +631,7 @@ void ht_statis(struct htree_state *hts,
hts->dept = 0;
hts->dmax = 0;
- if (!htree)
+ if (unlikely(!htree))
return;
__ht_statis(hts, htree, acnt, dcnt);
@@ -10,6 +10,7 @@
#include <linux/maple_tree.h>
#include <linux/module.h>
#include <linux/rwsem.h>
+#include <asm/timex.h>
#define MTREE_ALLOC_MAX 0x2000000000000Ul
#define CONFIG_MAPLE_SEARCH
@@ -3638,6 +3639,34 @@ static noinline void __init alloc_cyclic_testing(struct maple_tree *mt)
MT_BUG_ON(mt, ret != 1);
}
+static noinline void check_latency(struct maple_tree *mt)
+{
+ MA_STATE(mas, mt, 25203307, 25203307);
+ cycles_t time1, time2;
+ void *val;
+ unsigned long i, cnt = 1 << 20;
+
+ for (i = 0; i < cnt; i++)
+ mtree_store(mt, i, xa_mk_value(i), GFP_KERNEL);
+
+ pr_info("\n*check_latency(): index nr:%lu\n", cnt);
+
+ time1 = get_cycles();
+ mtree_store(mt, cnt, xa_mk_value(cnt), GFP_KERNEL);
+ time2 = get_cycles();
+ pr_info("mtree_store(): latency:%lu cycles\n", time2 - time1);
+
+ time1 = get_cycles();
+ val = mas_find(&mas, ULONG_MAX);
+ time2 = get_cycles();
+ pr_info("mas_find(): latency:%lu cycles\n", time2 - time1);
+
+ time1 = get_cycles();
+ mtree_erase(mt, cnt);
+ time2 = get_cycles();
+ pr_info("mtree_erase(): latency:%lu cycles\n", time2 - time1);
+}
+
static DEFINE_MTREE(tree);
static int __init maple_tree_seed(void)
{
@@ -3923,6 +3952,9 @@ static int __init maple_tree_seed(void)
alloc_cyclic_testing(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_latency(&tree);
+ mtree_destroy(&tree);
#if defined(BENCH)
skip:
@@ -8,6 +8,7 @@
#include <linux/xarray.h>
#include <linux/module.h>
+#include <asm/timex.h>
static unsigned int tests_run;
static unsigned int tests_passed;
@@ -2125,6 +2126,34 @@ static noinline void check_destroy(struct xarray *xa)
#endif
}
+static noinline void check_latency(struct xarray *xa)
+{
+ unsigned long i, index;
+ unsigned long cnt = 1 << 20;
+ cycles_t time1, time2;
+
+ for (i = 0; i < cnt; i++)
+ xa_store(xa, i, xa_mk_index(i), GFP_KERNEL);
+
+ pr_info("\n*check_latency: index nr:%lu\n", cnt);
+ index = 25203307;
+
+ time1 = get_cycles();
+ xa_store(xa, index, xa_mk_index(index), GFP_KERNEL);
+ time2 = get_cycles();
+ pr_info("xa_store(): latency: %lu cycles\n", time2 - time1);
+
+ time1 = get_cycles();
+ xa_find(xa, &index, ULONG_MAX, XA_PRESENT);
+ time2 = get_cycles();
+ pr_info("xa_find(): latency: %lu cycles\n", time2 - time1);
+
+ time1 = get_cycles();
+ xa_erase(xa, index);
+ time2 = get_cycles();
+ pr_info("xa_erase(): latency: %lu cycles\n", time2 - time1);
+}
+
static DEFINE_XARRAY(array);
static int xarray_checks(void)
@@ -2162,6 +2191,8 @@ static int xarray_checks(void)
check_workingset(&array, 64);
check_workingset(&array, 4096);
+ check_latency(&array);
+
printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
return (tests_run == tests_passed) ? 0 : -EINVAL;
}
Added get_cycles() to measure execution time during insert, find, and erase operations. Added check_latency() in the lib/test_xarray.c and the lib/test_maple_tree.c Added likely/unlikely to improve if conditional code. check_latency() measures latency using get_cycles() with 1M indices(nr) on XArray and Maple Tree. The output of this can be used to make the performance comparison table like this: The numerical unit is cycle. performance insert find erase ------------------------------------------------ XArray 9 6 16 Maple Tree 11 7 27 Hash Tree 6 2 12 ------------------------------------------------ Full source of Hash Tree is also available in the following github link: https://github.com/kernel-bz/htree.git Signed-off-by: JaeJoon Jung <rgbi3307@gmail.com> --- lib/htree-test.c | 90 +++++++++++++++++++++++++++---------------- lib/htree.c | 26 ++++++------- lib/test_maple_tree.c | 32 +++++++++++++++ lib/test_xarray.c | 31 +++++++++++++++ 4 files changed, 132 insertions(+), 47 deletions(-)