@@ -761,6 +761,13 @@ static void __init noinline detect_bugs(
*/
if (!cpu_has_nscb)
setup_force_cpu_cap(X86_BUG_NULL_SEG);
+
+ /*
+ * AMD CPUs not supporting CLFLUSHOPT require MFENCE to serialize
+ * CLFLUSH against other memory accesses.
+ */
+ if (!cpu_has_clflushopt)
+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
}
static void init_amd(struct cpuinfo_x86 *c)
@@ -245,12 +245,15 @@ unsigned int flush_area_local(const void
c->x86_clflush_size && c->x86_cache_size && sz &&
((sz >> 10) < c->x86_cache_size) )
{
- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
for ( i = 0; i < sz; i += c->x86_clflush_size )
alternative_input("ds; clflush %0",
"data16 clflush %0", /* clflushopt */
X86_FEATURE_CLFLUSHOPT,
"m" (((const char *)va)[i]));
+ alternative_2("",
+ "sfence", X86_FEATURE_CLFLUSHOPT,
+ "mfence", X86_BUG_CLFLUSH_MFENCE);
flags &= ~FLUSH_CACHE;
}
else
@@ -274,6 +277,8 @@ void cache_writeback(const void *addr, u
unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
const void *end = addr + size;
+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
+
addr -= (unsigned long)addr & (clflush_size - 1);
for ( ; addr < end; addr += clflush_size )
{
@@ -47,6 +47,7 @@ XEN_CPUFEATURE(RET_SIMPLE, X86_SY
#define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
#define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialize CLFLUSH */
/* Total number of capability words, inc synth and bug words. */
#define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
As noted in the context of 3330013e6739 ("VT-d / x86: re-arrange cache syncing"): While cache_writeback() has the SFENCE on the correct side of CLFLUSHOPT, flush_area_local() doesn't. While I can't prove it due to lacking a copy of the old SDM version, I can only assume this placement was a result of what had been described there originally. In any event recent versions of the SDM hve been telling us otherwise. For AMD the situation is more complex: MFENCE is needed ahead and/or after CLFLUSH when the CPU doesn't also support CLFLUSHOPT. (It's "and" in the flush_area_local() case, as we cannot know what the caller's needs are. For cache_writeback() fencing ahead of the flush is sufficient.) Fixes: 623c720fc8da3 ("x86: use CLFLUSHOPT when available") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- TBD: I'd be okay with not touching cache_writeback(), for only being used by VT-d right now. --- v2: Mark AMD behavior as a bug rather than a feature. Retain quotes.