@@ -48,6 +48,7 @@ obj-$(CONFIG_INDIRECT_THUNK) += indirect
obj-$(CONFIG_PV) += ioport_emulate.o
obj-y += irq.o
obj-$(CONFIG_KEXEC) += machine_kexec.o
+obj-y += memset.o
obj-y += mm.o x86_64/mm.o
obj-$(CONFIG_HVM) += monitor.o
obj-y += mpparse.o
@@ -0,0 +1,30 @@
+#include <asm/asm_defns.h>
+
+.macro memset
+ and $7, %edx
+ shr $3, %rcx
+ movzbl %sil, %esi
+ mov $0x0101010101010101, %rax
+ imul %rsi, %rax
+ mov %rdi, %r8
+ rep stosq
+ or %edx, %ecx
+ jz 0f
+ rep stosb
+0:
+ mov %r8, %rax
+ ret
+.endm
+
+.macro memset_erms
+ mov %esi, %eax
+ mov %rdi, %r8
+ rep stosb
+ mov %r8, %rax
+ ret
+.endm
+
+FUNC(memset)
+ mov %rdx, %rcx
+ ALTERNATIVE memset, memset_erms, X86_FEATURE_ERMS
+END(memset)
@@ -22,19 +22,6 @@ void *(memcpy)(void *dest, const void *s
return dest;
}
-void *(memset)(void *s, int c, size_t n)
-{
- long d0, d1;
-
- asm volatile (
- "rep stosb"
- : "=&c" (d0), "=&D" (d1)
- : "a" (c), "1" (s), "0" (n)
- : "memory");
-
- return s;
-}
-
void *(memmove)(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
Move the function to its own assembly file. Having it in C just for the entire body to be an asm() isn't really helpful. Then have two flavors: A "basic" version using qword steps for the bulk of the operation, and an ERMS version for modern hardware, to be substituted in via alternatives patching. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- We may want to consider branching over the REP STOSQ as well, if the number of qwords turns out to be zero. We may also want to consider using non-REP STOS{L,W,B} for the tail. --- v4: Use %r8 instead of %rsi in a few places. v3: Re-base.