@@ -4,164 +4,176 @@
#include "processor.h"
#include "kvmclock.h"
-#define DEFAULT_TEST_LOOPS 100000000L
-#define DEFAULT_THRESHOLD 5L
-
-long loops = DEFAULT_TEST_LOOPS;
-long sec = 0;
-long threshold = DEFAULT_THRESHOLD;
-
-struct test_info {
- struct spinlock lock;
- u64 warps; /* warp count */
- u64 stalls; /* stall count */
- long long worst; /* worst warp */
- volatile cycle_t last; /* last cycle seen by test */
- atomic_t ncpus; /* number of cpu in the test*/
- int check; /* check cycle ? */
+#define DURATION 2 /* testcase duration (s) */
+#define WC_DELTA_MAX 5 /* max delta of wallclock from expected */
+#define CPI_STABLE_MAX 1000 /* max cycles per iteration with TSC_STABLE */
+#define CPI_UNSTABLE_MAX 4000 /* ditto for no TSC_STABLE */
+
+struct warp_test_info {
+ unsigned long long warps;
+ unsigned long long stalls;
+ long long worst;
};
+struct warp_test_info wti[MAX_CPU];
-struct test_info ti[4];
-
-static void wallclock_test(void *data)
-{
- int *p_err = data;
- long ksec, offset;
- struct timespec ts;
+struct perf_test_info {
+ unsigned long long cycles;
+ unsigned long long loops;
+};
+struct perf_test_info pti[MAX_CPU];
- kvm_get_wallclock(&ts);
- ksec = ts.tv_sec;
+atomic_t cpus_left;
- offset = ksec - sec;
- printf("Seconds get from kvmclock: %ld (cpu %d, offset: %ld)\n", ksec, smp_id(), offset);
+static void get_wallclock_secs(void *data)
+{
+ struct timespec ts;
+ long *ksec = data;
- if (offset > threshold || offset < -threshold) {
- printf("offset too large!\n");
- (*p_err)++;
- }
+ kvm_get_wallclock(&ts);
+ *ksec = ts.tv_sec;
}
-static void kvm_clock_test(void *data)
+static void wallclock_test(int ncpus, long sec)
{
- struct test_info *hv_test_info = (struct test_info *)data;
- long i, check = hv_test_info->check;
-
- for (i = 0; i < loops; i++){
- cycle_t t0, t1;
- long long delta;
-
- if (check == 0) {
- kvm_clock_read();
- continue;
- }
-
- spin_lock(&hv_test_info->lock);
- t1 = kvm_clock_read();
- t0 = hv_test_info->last;
- hv_test_info->last = kvm_clock_read();
- spin_unlock(&hv_test_info->lock);
-
- delta = t1 - t0;
- if (delta < 0) {
- spin_lock(&hv_test_info->lock);
- ++hv_test_info->warps;
- if (delta < hv_test_info->worst){
- hv_test_info->worst = delta;
- printf("Worst warp %lld\n", hv_test_info->worst);
- }
- spin_unlock(&hv_test_info->lock);
- }
- if (delta == 0)
- ++hv_test_info->stalls;
-
- if (!((unsigned long)i & 31))
- asm volatile("rep; nop");
- }
-
- atomic_dec(&hv_test_info->ncpus);
+ int i;
+ for (i = 0; i < ncpus; ++i) {
+ long wc_sec, delta;
+ on_cpu(i, get_wallclock_secs, &wc_sec);
+
+ delta = wc_sec - sec;
+
+ if (delta > WC_DELTA_MAX || delta < -WC_DELTA_MAX) {
+ report("wallclock: %ld s, expected: %ld s, "
+ "delta: %ld s (expected < %ld s)", false, wc_sec,
+ sec, delta, WC_DELTA_MAX);
+ return;
+ }
+ }
+ report("all wallclocks within %ld s from expected", true,
+ WC_DELTA_MAX);
}
-static int cycle_test(int ncpus, int check, struct test_info *ti)
+static void warp_test_cpu(void *data)
{
- int i;
- unsigned long long begin, end;
+ struct warp_test_info *ti = data;
+ unsigned long long t = kvm_clock_read();
+ unsigned long long end = t + DURATION * NSEC_PER_SEC;
+ ti->warps = 0;
+ ti->stalls = 0;
+ ti->worst = 0;
+
+ do {
+ unsigned long long now = kvm_clock_read();
+ long long delta = now - t;
+
+ if (delta < 0) {
+ ti->warps++;
+ if (delta < ti->worst)
+ ti->worst = delta;
+ }
+ if (delta == 0)
+ ti->stalls++;
+
+ t = now;
+ } while (t < end);
+
+ atomic_dec(&cpus_left);
+}
- begin = rdtsc();
+static void perf_test_cpu(void *data)
+{
+ struct perf_test_info *ti = data;
+ unsigned long long end = kvm_clock_read() +
+ DURATION * NSEC_PER_SEC;
+ ti->loops = 0;
+ ti->cycles = rdtsc();
- atomic_set(&ti->ncpus, ncpus);
- ti->check = check;
- for (i = ncpus - 1; i >= 0; i--)
- on_cpu_async(i, kvm_clock_test, (void *)ti);
+ do
+ ti->loops++;
+ while (kvm_clock_read() < end);
- /* Wait for the end of other vcpu */
- while(atomic_read(&ti->ncpus))
- ;
+ ti->cycles = rdtsc() - ti->cycles;
- end = rdtsc();
+ atomic_dec(&cpus_left);
+}
- printf("Total vcpus: %d\n", ncpus);
- printf("Test loops: %ld\n", loops);
- if (check == 1) {
- printf("Total warps: %" PRId64 "\n", ti->warps);
- printf("Total stalls: %" PRId64 "\n", ti->stalls);
- printf("Worst warp: %lld\n", ti->worst);
- } else
- printf("TSC cycles: %lld\n", end - begin);
+static void warp_test(int ncpus, bool stable)
+{
+ int i;
+ unsigned long long warps = 0, stalls = 0;
+ long long worst = 0;
+
+ pvclock_set_flags(stable ? PVCLOCK_RAW_CYCLE_BIT : 0);
+
+ atomic_set(&cpus_left, ncpus);
+ for (i = ncpus - 1; i >= 0; i--)
+ on_cpu_async(i, warp_test_cpu, &wti[i]);
+ while (atomic_read(&cpus_left));
+
+ for (i = 0; i < ncpus; i++) {
+ warps += wti[i].warps;
+ stalls += wti[i].stalls;
+ if (wti[i].worst < worst)
+ worst = wti[i].worst;
+ }
+
+ report("with%s TSC_STABLE: warps: %llu (worst %lld), stalls: %llu",
+ warps == 0, stable ? "" : "out", warps, worst, stalls);
+}
- return ti->warps ? 1 : 0;
+static void perf_test(int ncpus, bool stable)
+{
+ int i;
+ unsigned long long loops = 0, cycles = 0;
+ unsigned long long cpi_max;
+
+ pvclock_set_flags(stable ? PVCLOCK_RAW_CYCLE_BIT : 0);
+ cpi_max = stable ? CPI_STABLE_MAX : CPI_UNSTABLE_MAX;
+
+ atomic_set(&cpus_left, ncpus);
+ for (i = ncpus - 1; i >= 0; i--)
+ on_cpu_async(i, perf_test_cpu, &pti[i]);
+ while (atomic_read(&cpus_left));
+
+ for (i = 0; i < ncpus; i++) {
+ loops += pti[i].loops;
+ cycles += pti[i].cycles;
+ }
+
+ cycles /= loops;
+ report("with%s TSC_STABLE: iterations/s/cpu: %llu, "
+ "cycles/iteration: %llu (expected < %u)",
+ cycles < cpi_max, stable ? "" : "out",
+ loops / DURATION / ncpus, cycles, cpi_max);
}
int main(int ac, char **av)
{
- int nerr = 0;
- int ncpus;
- int i;
-
- if (ac > 1)
- loops = atol(av[1]);
- if (ac > 2)
- sec = atol(av[2]);
- if (ac > 3)
- threshold = atol(av[3]);
-
- smp_init();
-
- ncpus = cpu_count();
- if (ncpus > MAX_CPU)
- ncpus = MAX_CPU;
- for (i = 0; i < ncpus; ++i)
- on_cpu(i, kvm_clock_init, (void *)0);
-
- if (ac > 2) {
- printf("Wallclock test, threshold %ld\n", threshold);
- printf("Seconds get from host: %ld\n", sec);
- for (i = 0; i < ncpus; ++i)
- on_cpu(i, wallclock_test, &nerr);
- }
-
- printf("Check the stability of raw cycle ...\n");
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT
- | PVCLOCK_RAW_CYCLE_BIT);
- if (cycle_test(ncpus, 1, &ti[0]))
- printf("Raw cycle is not stable\n");
- else
- printf("Raw cycle is stable\n");
-
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
- printf("Monotonic cycle test:\n");
- nerr += cycle_test(ncpus, 1, &ti[1]);
-
- printf("Measure the performance of raw cycle ...\n");
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT
- | PVCLOCK_RAW_CYCLE_BIT);
- cycle_test(ncpus, 0, &ti[2]);
-
- printf("Measure the performance of adjusted cycle ...\n");
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
- cycle_test(ncpus, 0, &ti[3]);
-
- for (i = 0; i < ncpus; ++i)
- on_cpu(i, kvm_clock_clear, (void *)0);
-
- return nerr > 0 ? 1 : 0;
+ int ncpus;
+ int i;
+ long sec = -1;
+
+ if (ac > 1)
+ sec = atol(av[1]);
+
+ smp_init();
+
+ ncpus = cpu_count();
+ if (ncpus > MAX_CPU)
+ ncpus = MAX_CPU;
+ for (i = 0; i < ncpus; ++i)
+ on_cpu(i, kvm_clock_init, (void *)0);
+
+ if (sec > 0)
+ wallclock_test(ncpus, sec);
+
+ warp_test(ncpus, true);
+ warp_test(ncpus, false);
+ perf_test(ncpus, true);
+ perf_test(ncpus, false);
+
+ for (i = 0; i < ncpus; ++i)
+ on_cpu(i, kvm_clock_clear, (void *)0);
+
+ return report_summary();
}
@@ -172,7 +172,7 @@ groups = tasks
[kvmclock_test]
file = kvmclock_test.flat
smp = 2
-extra_params = --append "10000000 `date +%s`"
+extra_params = --append "`date +%s`"
[pcid]
file = pcid.flat
The test for kvmclock uses a data structure to hold intermediate data which is shared across cpus, and is protected by a spinlock. As a result, the vCPUs are mostly contending on the spinlock rather than doing kvmclock reads. Rework the test to keep the intermediate data on per-cpu structures, and only merge the results at the end. As this resulted in a fairly big change to the test structure, go ahead and make other enhancements, namely: - use library functions for test results reporting - stop passing command-line parameters which can be sensibly set at compile time - limit the testcases by duration in seconds rather than by iterations - differentiate between kvmclock using TSC_STABLE bit and not using it by forcing either mode explicitly regardless of the host properties - yield test results that are easier to compare Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> --- x86/kvmclock_test.c | 290 +++++++++++++++++++++++++++------------------------- x86/unittests.cfg | 2 +- 2 files changed, 152 insertions(+), 140 deletions(-)