Message ID | 1437744306-7911-1-git-send-email-alex.bennee@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote: > This adds a fairly brain dead torture test for TLB flushes intended for > stressing the MTTCG QEMU build. It takes the usual -smp option for > multiple CPUs. > > By default it will do a TLBIALL flush after each cycle. If you pass > -append "page" to the kernel it will take it in turns to flush each of > the computation functions. At the moment it doesn't do any re-mapping of > pages but maybe that is something that could be done in the future. > > [DEV VERSION FOR COMMENT] > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++ > config/config-arm-common.mak | 4 +- > lib/arm/asm/mmu.h | 11 +++ > 3 files changed, 177 insertions(+), 1 deletion(-) > create mode 100644 arm/tlbflush.c > > diff --git a/arm/tlbflush.c b/arm/tlbflush.c > new file mode 100644 > index 0000000..6eeff18 > --- /dev/null > +++ b/arm/tlbflush.c > @@ -0,0 +1,163 @@ > +#include <libcflat.h> > +#include <asm/smp.h> > +#include <asm/cpumask.h> > +#include <asm/barrier.h> > +#include <asm/mmu.h> > + > +#define SEQ_LENGTH 10 > + > +static cpumask_t smp_test_complete; > +static int flush_count = 100000; > +static int flush_self = 1; > +static int flush_page = 0; > + > +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array) You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing the test to also work for aarch64, as we're using 64k pages on aarch64. > +{ > + int i; > + unsigned int sum=0; > + for (i=0; i<length; i++) > + { > + unsigned int val = *array++; > + sum ^= val; > + sum ^= (val >> (val % 16)); > + sum ^= (val << (val % 32)); > + } > + > + return sum; > +} > + > +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array) > +{ > + int i; > + > + /* first two values */ > + array[0] = 0; > + array[1] = 1; > + for (i=2; i<length; i++) > + { > + array[i] = array[i-2] + array[i-1]; > + } > +} > + > +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n) > +{ > + unsigned int i; > + unsigned long long fac = 1; > + for (i=1; i<=n; i++) > + { > + fac = fac * i; > + } > + return fac; > +} > + > +/* do some computationally expensive stuff, return a checksum of the > + * results */ > +__attribute__((aligned(0x1000))) unsigned int do_computation(void) > +{ > + unsigned int fib_array[SEQ_LENGTH]; > + unsigned long long facfib_array[SEQ_LENGTH]; > + unsigned int fib_hash, facfib_hash; > + int cpu = smp_processor_id(); > + int i, j; > + > + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); > + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]); > + for (i=0; i<SEQ_LENGTH; i++) { > + for (j=0; j<fib_array[i]; j++) { > + facfib_array[i] = factorial(fib_array[i]+j); > + } > + } > + facfib_hash = 0; > + for (i=0; i<SEQ_LENGTH; i++) { > + for (j=0; j<fib_array[i]; j++) { > + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]); > + } > + } > + > +#if 0 > + printf("CPU:%d FIBSEQ ", cpu); > + for (i=0; i<SEQ_LENGTH; i++) > + printf("%u,", fib_array[i]); > + printf("\n"); > + > + printf("CPU:%d FACFIB ", cpu); > + for (i=0; i<SEQ_LENGTH; i++) > + printf("%llu,", facfib_array[i]); > + printf("\n"); > +#endif > + > + return (fib_hash ^ facfib_hash); > +} > + > +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation}; I can't comment on whether or not the complexity of do_computation is necessary for your test, but it seems like overkill. Comments explaining why it's necessary would be good. > + > +static void test_flush(void) > +{ > + int i, errors = 0; > + int cpu = smp_processor_id(); > + > + unsigned int ref; > + > + printf("CPU%d online\n", cpu); > + > + ref = do_computation(); What makes you sure that the first time you do the computation per cpu is correct? I think computing it externally, and saving the result, i.e. #define EXPECTED_RESULT 0x12345678 would be more reliable. > + > + for (i=0; i < flush_count; i++) { > + unsigned int this_ref = do_computation(); > + > + if (this_ref != ref) { > + errors++; > + printf("CPU%d: seq%d 0x%x!=0x%x\n", > + cpu, i, ref, this_ref); > + } > + > + if ((i % 1000) == 0) { > + printf("CPU%d: seq%d\n", cpu, i); > + } > + > + if (flush_self) { > + if (flush_page) { > + int j = (i % (sizeof(pages)/sizeof(void *))); libcflat.h has the ARRAY_SIZE macro > + flush_tlb_page((unsigned long)pages[j]); > + } else { > + flush_tlb_all(); > + } > + } > + } > + > + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors); > + > + cpumask_set_cpu(cpu, &smp_test_complete); > + if (cpu != 0) > + halt(); > +} > + > +int main(int argc, char **argv) > +{ > + int cpu, i; > + > + report_prefix_push("tlbflush"); > + > + for (i=0; i<argc; i++) { > + char *arg = argv[i]; > +/* printf("arg:%d:%s\n", i, arg); */ > + > + if (strcmp(arg, "page") == 0) { > + report_prefix_push("page"); > + flush_page = 1; > + } > + } > + > + for_each_present_cpu(cpu) { > + if (cpu == 0) > + continue; > + smp_boot_secondary(cpu, test_flush); > + } > + > + test_flush(); > + > + while (!cpumask_full(&smp_test_complete)) > + cpu_relax(); > + > + return report_summary(); As we use the kernel coding style you should run $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c Also, please rename to tlbflush-test.c to differentiate it from an implementation of tlbflush support, and to make the standalone test name (if we commit those patches) more descriptive. > +} > diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak > index 0674daa..5b14db4 100644 > --- a/config/config-arm-common.mak > +++ b/config/config-arm-common.mak > @@ -11,7 +11,8 @@ endif > > tests-common = \ > $(TEST_DIR)/selftest.flat \ > - $(TEST_DIR)/spinlock-test.flat > + $(TEST_DIR)/spinlock-test.flat \ > + $(TEST_DIR)/tlbflush.flat As we're adding tests faster now it's becoming clear that the '\' list isn't so great. To add a new test at the bottom we always have to modify the last line too. We should either add the new one at the top (right below the 'test-common =' line), or change this to a '+=' sequence like some other lists are done. > > all: test_cases > > @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests) > > $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o > $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o > +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o > diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h > index c1bd01c..2bb0cde 100644 > --- a/lib/arm/asm/mmu.h > +++ b/lib/arm/asm/mmu.h > @@ -14,8 +14,11 @@ > #define PTE_AF PTE_EXT_AF > #define PTE_WBWA L_PTE_MT_WRITEALLOC > > +/* See B3.18.7 TLB maintenance operations */ > + > static inline void local_flush_tlb_all(void) > { > + /* TLBIALL */ > asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); > dsb(); > isb(); > @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) > local_flush_tlb_all(); > } > > +static inline void flush_tlb_page(unsigned long vaddr) > +{ > + /* TLBIMVAA */ > + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); > + dsb(); > + isb(); > +} > + > #include <asm/mmu-api.h> > > #endif /* __ASMARM_MMU_H_ */ This mmu.h change looks good, but please add the arm64 flush_tlb_page at the same time. And anyway, I guess you'll want your test to work for both arm and aarch64? Thanks, drew -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andrew Jones <drjones@redhat.com> writes: > On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote: >> This adds a fairly brain dead torture test for TLB flushes intended for >> stressing the MTTCG QEMU build. It takes the usual -smp option for >> multiple CPUs. >> >> By default it will do a TLBIALL flush after each cycle. If you pass >> -append "page" to the kernel it will take it in turns to flush each of >> the computation functions. At the moment it doesn't do any re-mapping of >> pages but maybe that is something that could be done in the future. >> >> [DEV VERSION FOR COMMENT] >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> --- >> arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++ >> config/config-arm-common.mak | 4 +- >> lib/arm/asm/mmu.h | 11 +++ >> 3 files changed, 177 insertions(+), 1 deletion(-) >> create mode 100644 arm/tlbflush.c >> >> diff --git a/arm/tlbflush.c b/arm/tlbflush.c >> new file mode 100644 >> index 0000000..6eeff18 >> --- /dev/null >> +++ b/arm/tlbflush.c >> @@ -0,0 +1,163 @@ >> +#include <libcflat.h> >> +#include <asm/smp.h> >> +#include <asm/cpumask.h> >> +#include <asm/barrier.h> >> +#include <asm/mmu.h> >> + >> +#define SEQ_LENGTH 10 >> + >> +static cpumask_t smp_test_complete; >> +static int flush_count = 100000; >> +static int flush_self = 1; >> +static int flush_page = 0; >> + >> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array) > > You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing > the test to also work for aarch64, as we're using 64k pages on > aarch64. Good point. > >> +{ >> + int i; >> + unsigned int sum=0; >> + for (i=0; i<length; i++) >> + { >> + unsigned int val = *array++; >> + sum ^= val; >> + sum ^= (val >> (val % 16)); >> + sum ^= (val << (val % 32)); >> + } >> + >> + return sum; >> +} >> + >> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array) >> +{ >> + int i; >> + >> + /* first two values */ >> + array[0] = 0; >> + array[1] = 1; >> + for (i=2; i<length; i++) >> + { >> + array[i] = array[i-2] + array[i-1]; >> + } >> +} >> + >> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n) >> +{ >> + unsigned int i; >> + unsigned long long fac = 1; >> + for (i=1; i<=n; i++) >> + { >> + fac = fac * i; >> + } >> + return fac; >> +} >> + >> +/* do some computationally expensive stuff, return a checksum of the >> + * results */ >> +__attribute__((aligned(0x1000))) unsigned int do_computation(void) >> +{ >> + unsigned int fib_array[SEQ_LENGTH]; >> + unsigned long long facfib_array[SEQ_LENGTH]; >> + unsigned int fib_hash, facfib_hash; >> + int cpu = smp_processor_id(); >> + int i, j; >> + >> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); >> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]); >> + for (i=0; i<SEQ_LENGTH; i++) { >> + for (j=0; j<fib_array[i]; j++) { >> + facfib_array[i] = factorial(fib_array[i]+j); >> + } >> + } >> + facfib_hash = 0; >> + for (i=0; i<SEQ_LENGTH; i++) { >> + for (j=0; j<fib_array[i]; j++) { >> + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]); >> + } >> + } >> + >> +#if 0 >> + printf("CPU:%d FIBSEQ ", cpu); >> + for (i=0; i<SEQ_LENGTH; i++) >> + printf("%u,", fib_array[i]); >> + printf("\n"); >> + >> + printf("CPU:%d FACFIB ", cpu); >> + for (i=0; i<SEQ_LENGTH; i++) >> + printf("%llu,", facfib_array[i]); >> + printf("\n"); >> +#endif >> + >> + return (fib_hash ^ facfib_hash); >> +} >> + >> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation}; > > I can't comment on whether or not the complexity of do_computation is > necessary for your test, but it seems like overkill. Comments explaining > why it's necessary would be good. OK. From QEMUs TCG point of view I just want to ensure I have more than two basic blocks per-page region so I can check the block-chaining in-page and jump caching intra-page which are both affected on flushes. A computationally complex routine with a known answer would be nicer though I guess. > >> + >> +static void test_flush(void) >> +{ >> + int i, errors = 0; >> + int cpu = smp_processor_id(); >> + >> + unsigned int ref; >> + >> + printf("CPU%d online\n", cpu); >> + >> + ref = do_computation(); > > What makes you sure that the first time you do the computation > per cpu is correct? I think computing it externally, and saving > the result, i.e. > > #define EXPECTED_RESULT 0x12345678 > > would be more reliable. OK. > >> + >> + for (i=0; i < flush_count; i++) { >> + unsigned int this_ref = do_computation(); >> + >> + if (this_ref != ref) { >> + errors++; >> + printf("CPU%d: seq%d 0x%x!=0x%x\n", >> + cpu, i, ref, this_ref); >> + } >> + >> + if ((i % 1000) == 0) { >> + printf("CPU%d: seq%d\n", cpu, i); >> + } >> + >> + if (flush_self) { >> + if (flush_page) { >> + int j = (i % (sizeof(pages)/sizeof(void *))); > libcflat.h has the ARRAY_SIZE macro OK >> + flush_tlb_page((unsigned long)pages[j]); >> + } else { >> + flush_tlb_all(); >> + } >> + } >> + } >> + >> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors); >> + >> + cpumask_set_cpu(cpu, &smp_test_complete); >> + if (cpu != 0) >> + halt(); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + int cpu, i; >> + >> + report_prefix_push("tlbflush"); >> + >> + for (i=0; i<argc; i++) { >> + char *arg = argv[i]; >> +/* printf("arg:%d:%s\n", i, arg); */ >> + >> + if (strcmp(arg, "page") == 0) { >> + report_prefix_push("page"); >> + flush_page = 1; >> + } >> + } >> + >> + for_each_present_cpu(cpu) { >> + if (cpu == 0) >> + continue; >> + smp_boot_secondary(cpu, test_flush); >> + } >> + >> + test_flush(); >> + >> + while (!cpumask_full(&smp_test_complete)) >> + cpu_relax(); >> + >> + return report_summary(); > > As we use the kernel coding style you should run > > $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c > > Also, please rename to tlbflush-test.c to differentiate it > from an implementation of tlbflush support, and to make > the standalone test name (if we commit those patches) more > descriptive. I'll have another poke at my editor config. It should have been setting the coding style automatically, although of course explicit local variables are better ;-) > >> +} >> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak >> index 0674daa..5b14db4 100644 >> --- a/config/config-arm-common.mak >> +++ b/config/config-arm-common.mak >> @@ -11,7 +11,8 @@ endif >> >> tests-common = \ >> $(TEST_DIR)/selftest.flat \ >> - $(TEST_DIR)/spinlock-test.flat >> + $(TEST_DIR)/spinlock-test.flat \ >> + $(TEST_DIR)/tlbflush.flat > > As we're adding tests faster now it's becoming clear that the '\' list > isn't so great. To add a new test at the bottom we always have to modify > the last line too. We should either add the new one at the top (right > below the 'test-common =' line), or change this to a '+=' sequence like > some other lists are done. > >> >> all: test_cases >> >> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests) >> >> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o >> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o >> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o >> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h >> index c1bd01c..2bb0cde 100644 >> --- a/lib/arm/asm/mmu.h >> +++ b/lib/arm/asm/mmu.h >> @@ -14,8 +14,11 @@ >> #define PTE_AF PTE_EXT_AF >> #define PTE_WBWA L_PTE_MT_WRITEALLOC >> >> +/* See B3.18.7 TLB maintenance operations */ >> + >> static inline void local_flush_tlb_all(void) >> { >> + /* TLBIALL */ >> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); >> dsb(); >> isb(); >> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) >> local_flush_tlb_all(); >> } >> >> +static inline void flush_tlb_page(unsigned long vaddr) >> +{ >> + /* TLBIMVAA */ >> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); >> + dsb(); >> + isb(); >> +} >> + >> #include <asm/mmu-api.h> >> >> #endif /* __ASMARM_MMU_H_ */ > > This mmu.h change looks good, but please add the arm64 > flush_tlb_page at the same time. And anyway, I guess you'll > want your test to work for both arm and aarch64? Yes I will. Currently the MTTCG is arm32 only but this will be expanded. > > Thanks, > drew
On Mon, Jul 27, 2015 at 10:07:57AM +0100, Alex Bennée wrote: > > Andrew Jones <drjones@redhat.com> writes: > > > On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote: > >> This adds a fairly brain dead torture test for TLB flushes intended for > >> stressing the MTTCG QEMU build. It takes the usual -smp option for > >> multiple CPUs. > >> > >> By default it will do a TLBIALL flush after each cycle. If you pass > >> -append "page" to the kernel it will take it in turns to flush each of > >> the computation functions. At the moment it doesn't do any re-mapping of > >> pages but maybe that is something that could be done in the future. > >> > >> [DEV VERSION FOR COMMENT] > >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > >> --- > >> arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++ > >> config/config-arm-common.mak | 4 +- > >> lib/arm/asm/mmu.h | 11 +++ > >> 3 files changed, 177 insertions(+), 1 deletion(-) > >> create mode 100644 arm/tlbflush.c > >> > >> diff --git a/arm/tlbflush.c b/arm/tlbflush.c > >> new file mode 100644 > >> index 0000000..6eeff18 > >> --- /dev/null > >> +++ b/arm/tlbflush.c > >> @@ -0,0 +1,163 @@ > >> +#include <libcflat.h> > >> +#include <asm/smp.h> > >> +#include <asm/cpumask.h> > >> +#include <asm/barrier.h> > >> +#include <asm/mmu.h> > >> + > >> +#define SEQ_LENGTH 10 > >> + > >> +static cpumask_t smp_test_complete; > >> +static int flush_count = 100000; > >> +static int flush_self = 1; > >> +static int flush_page = 0; > >> + > >> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array) > > > > You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing > > the test to also work for aarch64, as we're using 64k pages on > > aarch64. > > Good point. > > > > >> +{ > >> + int i; > >> + unsigned int sum=0; > >> + for (i=0; i<length; i++) > >> + { > >> + unsigned int val = *array++; > >> + sum ^= val; > >> + sum ^= (val >> (val % 16)); > >> + sum ^= (val << (val % 32)); > >> + } > >> + > >> + return sum; > >> +} > >> + > >> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array) > >> +{ > >> + int i; > >> + > >> + /* first two values */ > >> + array[0] = 0; > >> + array[1] = 1; > >> + for (i=2; i<length; i++) > >> + { > >> + array[i] = array[i-2] + array[i-1]; > >> + } > >> +} > >> + > >> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n) > >> +{ > >> + unsigned int i; > >> + unsigned long long fac = 1; > >> + for (i=1; i<=n; i++) > >> + { > >> + fac = fac * i; > >> + } > >> + return fac; > >> +} > >> + > >> +/* do some computationally expensive stuff, return a checksum of the > >> + * results */ > >> +__attribute__((aligned(0x1000))) unsigned int do_computation(void) > >> +{ > >> + unsigned int fib_array[SEQ_LENGTH]; > >> + unsigned long long facfib_array[SEQ_LENGTH]; > >> + unsigned int fib_hash, facfib_hash; > >> + int cpu = smp_processor_id(); > >> + int i, j; > >> + > >> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); > >> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]); > >> + for (i=0; i<SEQ_LENGTH; i++) { > >> + for (j=0; j<fib_array[i]; j++) { > >> + facfib_array[i] = factorial(fib_array[i]+j); > >> + } > >> + } > >> + facfib_hash = 0; > >> + for (i=0; i<SEQ_LENGTH; i++) { > >> + for (j=0; j<fib_array[i]; j++) { > >> + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]); > >> + } > >> + } > >> + > >> +#if 0 > >> + printf("CPU:%d FIBSEQ ", cpu); > >> + for (i=0; i<SEQ_LENGTH; i++) > >> + printf("%u,", fib_array[i]); > >> + printf("\n"); > >> + > >> + printf("CPU:%d FACFIB ", cpu); > >> + for (i=0; i<SEQ_LENGTH; i++) > >> + printf("%llu,", facfib_array[i]); > >> + printf("\n"); > >> +#endif > >> + > >> + return (fib_hash ^ facfib_hash); > >> +} > >> + > >> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation}; > > > > I can't comment on whether or not the complexity of do_computation is > > necessary for your test, but it seems like overkill. Comments explaining > > why it's necessary would be good. > > OK. From QEMUs TCG point of view I just want to ensure I have more than two > basic blocks per-page region so I can check the block-chaining in-page > and jump caching intra-page which are both affected on flushes. A > computationally complex routine with a known answer would be nicer > though I guess. > > > > >> + > >> +static void test_flush(void) > >> +{ > >> + int i, errors = 0; > >> + int cpu = smp_processor_id(); > >> + > >> + unsigned int ref; > >> + > >> + printf("CPU%d online\n", cpu); > >> + > >> + ref = do_computation(); > > > > What makes you sure that the first time you do the computation > > per cpu is correct? I think computing it externally, and saving > > the result, i.e. > > > > #define EXPECTED_RESULT 0x12345678 > > > > would be more reliable. > > OK. > > > > >> + > >> + for (i=0; i < flush_count; i++) { > >> + unsigned int this_ref = do_computation(); > >> + > >> + if (this_ref != ref) { > >> + errors++; > >> + printf("CPU%d: seq%d 0x%x!=0x%x\n", > >> + cpu, i, ref, this_ref); > >> + } > >> + > >> + if ((i % 1000) == 0) { > >> + printf("CPU%d: seq%d\n", cpu, i); > >> + } > >> + > >> + if (flush_self) { > >> + if (flush_page) { > >> + int j = (i % (sizeof(pages)/sizeof(void *))); > > libcflat.h has the ARRAY_SIZE macro > > OK > > >> + flush_tlb_page((unsigned long)pages[j]); > >> + } else { > >> + flush_tlb_all(); > >> + } > >> + } > >> + } > >> + > >> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors); > >> + > >> + cpumask_set_cpu(cpu, &smp_test_complete); > >> + if (cpu != 0) > >> + halt(); > >> +} > >> + > >> +int main(int argc, char **argv) > >> +{ > >> + int cpu, i; > >> + > >> + report_prefix_push("tlbflush"); > >> + > >> + for (i=0; i<argc; i++) { > >> + char *arg = argv[i]; > >> +/* printf("arg:%d:%s\n", i, arg); */ > >> + > >> + if (strcmp(arg, "page") == 0) { > >> + report_prefix_push("page"); > >> + flush_page = 1; > >> + } > >> + } > >> + > >> + for_each_present_cpu(cpu) { > >> + if (cpu == 0) > >> + continue; > >> + smp_boot_secondary(cpu, test_flush); > >> + } > >> + > >> + test_flush(); > >> + > >> + while (!cpumask_full(&smp_test_complete)) > >> + cpu_relax(); > >> + > >> + return report_summary(); > > > > As we use the kernel coding style you should run > > > > $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c > > > > Also, please rename to tlbflush-test.c to differentiate it > > from an implementation of tlbflush support, and to make > > the standalone test name (if we commit those patches) more > > descriptive. > > I'll have another poke at my editor config. It should have been setting > the coding style automatically, although of course explicit local > variables are better ;-) > > > > >> +} > >> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak > >> index 0674daa..5b14db4 100644 > >> --- a/config/config-arm-common.mak > >> +++ b/config/config-arm-common.mak > >> @@ -11,7 +11,8 @@ endif > >> > >> tests-common = \ > >> $(TEST_DIR)/selftest.flat \ > >> - $(TEST_DIR)/spinlock-test.flat > >> + $(TEST_DIR)/spinlock-test.flat \ > >> + $(TEST_DIR)/tlbflush.flat > > > > As we're adding tests faster now it's becoming clear that the '\' list > > isn't so great. To add a new test at the bottom we always have to modify > > the last line too. We should either add the new one at the top (right > > below the 'test-common =' line), or change this to a '+=' sequence like > > some other lists are done. > > > >> > >> all: test_cases > >> > >> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests) > >> > >> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o > >> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o > >> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o > >> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h > >> index c1bd01c..2bb0cde 100644 > >> --- a/lib/arm/asm/mmu.h > >> +++ b/lib/arm/asm/mmu.h > >> @@ -14,8 +14,11 @@ > >> #define PTE_AF PTE_EXT_AF > >> #define PTE_WBWA L_PTE_MT_WRITEALLOC > >> > >> +/* See B3.18.7 TLB maintenance operations */ > >> + > >> static inline void local_flush_tlb_all(void) > >> { > >> + /* TLBIALL */ > >> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); > >> dsb(); > >> isb(); > >> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) > >> local_flush_tlb_all(); > >> } > >> > >> +static inline void flush_tlb_page(unsigned long vaddr) > >> +{ > >> + /* TLBIMVAA */ > >> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); > >> + dsb(); > >> + isb(); > >> +} > >> + > >> #include <asm/mmu-api.h> > >> > >> #endif /* __ASMARM_MMU_H_ */ > > > > This mmu.h change looks good, but please add the arm64 > > flush_tlb_page at the same time. And anyway, I guess you'll > > want your test to work for both arm and aarch64? > > Yes I will. Currently the MTTCG is arm32 only but this will be expanded. Actually, I'd also like the arm*/asm/mmu.h file changes to be in a separate patch, they can be together in one patch, but that patch should be separate from the unit test. Thanks, drew -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 27/07/2015 09:54, Andrew Jones wrote: > Also, please rename to tlbflush-test.c to differentiate it > from an implementation of tlbflush support, and to make > the standalone test name (if we commit those patches) more > descriptive. I disagree here. Support code would go in lib/arm. > As we're adding tests faster now it's becoming clear that the '\' list > isn't so great. To add a new test at the bottom we always have to modify > the last line too. We should either add the new one at the top (right > below the 'test-common =' line), or change this to a '+=' sequence like > some other lists are done. If you prefer += that's okay, but then please modify also cflatobjs in Makefile and the x86 fragments in config/. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 29, 2015 at 03:58:59PM +0200, Paolo Bonzini wrote: > > > On 27/07/2015 09:54, Andrew Jones wrote: > > Also, please rename to tlbflush-test.c to differentiate it > > from an implementation of tlbflush support, and to make > > the standalone test name (if we commit those patches) more > > descriptive. > > I disagree here. Support code would go in lib/arm. Generally yes, and for arm, so far yes, but not always. In x86 we have kvmclock.c (support) vs. kvmclock_test.c (the test). And powerpc will have a support C file in powerpc vs. lib/powerpc as well, as I feel it fits better there, since it's really part of cstart. But that said, I'm not overly opposed to dropping -test from the common case, only using it when necessary. We can also append -test for standalone test names later, if we want to. Thanks, drew -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arm/tlbflush.c b/arm/tlbflush.c new file mode 100644 index 0000000..6eeff18 --- /dev/null +++ b/arm/tlbflush.c @@ -0,0 +1,163 @@ +#include <libcflat.h> +#include <asm/smp.h> +#include <asm/cpumask.h> +#include <asm/barrier.h> +#include <asm/mmu.h> + +#define SEQ_LENGTH 10 + +static cpumask_t smp_test_complete; +static int flush_count = 100000; +static int flush_self = 1; +static int flush_page = 0; + +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array) +{ + int i; + unsigned int sum=0; + for (i=0; i<length; i++) + { + unsigned int val = *array++; + sum ^= val; + sum ^= (val >> (val % 16)); + sum ^= (val << (val % 32)); + } + + return sum; +} + +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array) +{ + int i; + + /* first two values */ + array[0] = 0; + array[1] = 1; + for (i=2; i<length; i++) + { + array[i] = array[i-2] + array[i-1]; + } +} + +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n) +{ + unsigned int i; + unsigned long long fac = 1; + for (i=1; i<=n; i++) + { + fac = fac * i; + } + return fac; +} + +/* do some computationally expensive stuff, return a checksum of the + * results */ +__attribute__((aligned(0x1000))) unsigned int do_computation(void) +{ + unsigned int fib_array[SEQ_LENGTH]; + unsigned long long facfib_array[SEQ_LENGTH]; + unsigned int fib_hash, facfib_hash; + int cpu = smp_processor_id(); + int i, j; + + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]); + for (i=0; i<SEQ_LENGTH; i++) { + for (j=0; j<fib_array[i]; j++) { + facfib_array[i] = factorial(fib_array[i]+j); + } + } + facfib_hash = 0; + for (i=0; i<SEQ_LENGTH; i++) { + for (j=0; j<fib_array[i]; j++) { + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]); + } + } + +#if 0 + printf("CPU:%d FIBSEQ ", cpu); + for (i=0; i<SEQ_LENGTH; i++) + printf("%u,", fib_array[i]); + printf("\n"); + + printf("CPU:%d FACFIB ", cpu); + for (i=0; i<SEQ_LENGTH; i++) + printf("%llu,", facfib_array[i]); + printf("\n"); +#endif + + return (fib_hash ^ facfib_hash); +} + +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation}; + +static void test_flush(void) +{ + int i, errors = 0; + int cpu = smp_processor_id(); + + unsigned int ref; + + printf("CPU%d online\n", cpu); + + ref = do_computation(); + + for (i=0; i < flush_count; i++) { + unsigned int this_ref = do_computation(); + + if (this_ref != ref) { + errors++; + printf("CPU%d: seq%d 0x%x!=0x%x\n", + cpu, i, ref, this_ref); + } + + if ((i % 1000) == 0) { + printf("CPU%d: seq%d\n", cpu, i); + } + + if (flush_self) { + if (flush_page) { + int j = (i % (sizeof(pages)/sizeof(void *))); + flush_tlb_page((unsigned long)pages[j]); + } else { + flush_tlb_all(); + } + } + } + + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors); + + cpumask_set_cpu(cpu, &smp_test_complete); + if (cpu != 0) + halt(); +} + +int main(int argc, char **argv) +{ + int cpu, i; + + report_prefix_push("tlbflush"); + + for (i=0; i<argc; i++) { + char *arg = argv[i]; +/* printf("arg:%d:%s\n", i, arg); */ + + if (strcmp(arg, "page") == 0) { + report_prefix_push("page"); + flush_page = 1; + } + } + + for_each_present_cpu(cpu) { + if (cpu == 0) + continue; + smp_boot_secondary(cpu, test_flush); + } + + test_flush(); + + while (!cpumask_full(&smp_test_complete)) + cpu_relax(); + + return report_summary(); +} diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak index 0674daa..5b14db4 100644 --- a/config/config-arm-common.mak +++ b/config/config-arm-common.mak @@ -11,7 +11,8 @@ endif tests-common = \ $(TEST_DIR)/selftest.flat \ - $(TEST_DIR)/spinlock-test.flat + $(TEST_DIR)/spinlock-test.flat \ + $(TEST_DIR)/tlbflush.flat all: test_cases @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests) $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h index c1bd01c..2bb0cde 100644 --- a/lib/arm/asm/mmu.h +++ b/lib/arm/asm/mmu.h @@ -14,8 +14,11 @@ #define PTE_AF PTE_EXT_AF #define PTE_WBWA L_PTE_MT_WRITEALLOC +/* See B3.18.7 TLB maintenance operations */ + static inline void local_flush_tlb_all(void) { + /* TLBIALL */ asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); dsb(); isb(); @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) local_flush_tlb_all(); } +static inline void flush_tlb_page(unsigned long vaddr) +{ + /* TLBIMVAA */ + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); + dsb(); + isb(); +} + #include <asm/mmu-api.h> #endif /* __ASMARM_MMU_H_ */
This adds a fairly brain dead torture test for TLB flushes intended for stressing the MTTCG QEMU build. It takes the usual -smp option for multiple CPUs. By default it will do a TLBIALL flush after each cycle. If you pass -append "page" to the kernel it will take it in turns to flush each of the computation functions. At the moment it doesn't do any re-mapping of pages but maybe that is something that could be done in the future. [DEV VERSION FOR COMMENT] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> --- arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++ config/config-arm-common.mak | 4 +- lib/arm/asm/mmu.h | 11 +++ 3 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 arm/tlbflush.c