@@ -994,7 +994,7 @@ int arch_set_info_guest(
init_int80_direct_trap(v);
/* IOPL privileges are virtualised. */
- v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
+ v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL;
v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
/* Ensure real hardware interrupts are enabled. */
@@ -1735,8 +1735,10 @@ static void load_segments(struct vcpu *n)
cs_and_mask = (unsigned short)regs->cs |
((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
/* Fold upcall mask into RFLAGS.IF. */
- eflags = regs->_eflags & ~X86_EFLAGS_IF;
+ eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+ if ( VM_ASSIST(n->domain, architectural_iopl) )
+ eflags |= n->arch.pv_vcpu.iopl;
if ( !ring_1(regs) )
{
@@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
vcpu_info(n, evtchn_upcall_mask) = 1;
regs->entry_vector |= TRAP_syscall;
- regs->_eflags &= 0xFFFCBEFFUL;
+ regs->_eflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
+ X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
regs->cs = FLAT_COMPAT_KERNEL_CS;
@@ -1781,8 +1784,10 @@ static void load_segments(struct vcpu *n)
((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
/* Fold upcall mask into RFLAGS.IF. */
- rflags = regs->rflags & ~X86_EFLAGS_IF;
+ rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+ if ( VM_ASSIST(n->domain, architectural_iopl) )
+ rflags |= n->arch.pv_vcpu.iopl;
if ( put_user(regs->ss, rsp- 1) |
put_user(regs->rsp, rsp- 2) |
@@ -1806,7 +1811,7 @@ static void load_segments(struct vcpu *n)
regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
- X86_EFLAGS_NT|X86_EFLAGS_TF);
+ X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
regs->rsp = (unsigned long)(rsp-11);
regs->cs = FLAT_KERNEL_CS;
@@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
if ( set_iopl.iopl > 3 )
break;
ret = 0;
- curr->arch.pv_vcpu.iopl = set_iopl.iopl;
+ curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL);
break;
}
@@ -1779,6 +1779,17 @@ static int read_gate_descriptor(unsigned int gate_sel,
return 1;
}
+/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
+static bool_t iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
+{
+ unsigned int cpl = guest_kernel_mode(v, regs) ?
+ (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
+
+ ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0);
+
+ return IOPL(cpl) <= v->arch.pv_vcpu.iopl;
+}
+
/* Has the guest requested sufficient permission for this I/O access? */
static int guest_io_okay(
unsigned int port, unsigned int bytes,
@@ -1788,7 +1799,7 @@ static int guest_io_okay(
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
- if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
+ if ( iopl_ok(v, regs) )
return 1;
if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
@@ -2346,7 +2357,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case 0xfa: /* CLI */
case 0xfb: /* STI */
- if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
+ if ( !iopl_ok(v, regs) )
goto fail;
/*
* This is just too dangerous to allow, in my opinion. Consider if the
@@ -86,6 +86,7 @@ void __dummy__(void)
OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt);
OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp);
OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
+ OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
@@ -166,4 +167,6 @@ void __dummy__(void)
OFFSET(MB_flags, multiboot_info_t, flags);
OFFSET(MB_cmdline, multiboot_info_t, cmdline);
OFFSET(MB_mem_lower, multiboot_info_t, mem_lower);
+
+ OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
}
@@ -263,6 +263,7 @@ compat_create_bounce_frame:
movl UREGS_rsp+8(%rsp),%esi
.Lft4: mov UREGS_ss+8(%rsp),%fs
2:
+ movq VCPU_domain(%rbx),%r8
subl $3*4,%esi
movq VCPU_vcpu_info(%rbx),%rax
pushq COMPAT_VCPUINFO_upcall_mask(%rax)
@@ -277,9 +278,13 @@ compat_create_bounce_frame:
testb %al,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
- andl $~X86_EFLAGS_IF,%eax
+ andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
+ xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) )
+ testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%r8)
+ cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)
+ orl %ecx,%eax # Fold EFLAGS.IOPL into %eax
.Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS
movl UREGS_rip+8(%rsp),%eax
.Lft7: movl %eax,%fs:(%rsi) # EIP
@@ -99,6 +99,10 @@ unsigned int compat_iret(void)
domain_crash(v->domain);
return 0;
}
+
+ if ( VM_ASSIST(v->domain, architectural_iopl) )
+ v->arch.pv_vcpu.iopl = eflags & X86_EFLAGS_IOPL;
+
regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
if ( unlikely(eflags & X86_EFLAGS_VM) )
@@ -346,6 +346,7 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
subq $40,%rsi
movq UREGS_ss+8(%rsp),%rax
ASM_STAC
+ movq VCPU_domain(%rbx),%rdi
.Lft2: movq %rax,32(%rsi) # SS
movq UREGS_rsp+8(%rsp),%rax
.Lft3: movq %rax,24(%rsi) # RSP
@@ -362,9 +363,13 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
testb $0xFF,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
- andl $~X86_EFLAGS_IF,%eax
+ andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
+ xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) )
+ testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rdi)
+ cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)
+ orl %ecx,%eax # Fold EFLAGS.IOPL into %eax
.Lft5: movq %rax,16(%rsi) # RFLAGS
movq UREGS_rip+8(%rsp),%rax
.Lft6: movq %rax,(%rsi) # RIP
@@ -310,6 +310,9 @@ unsigned long do_iret(void)
toggle_guest_mode(v);
}
+ if ( VM_ASSIST(v->domain, architectural_iopl) )
+ v->arch.pv_vcpu.iopl = iret_saved.rflags & X86_EFLAGS_IOPL;
+
regs->rip = iret_saved.rip;
regs->cs = iret_saved.cs | 3; /* force guest privilege */
regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
@@ -331,6 +331,7 @@ extern unsigned long xen_phys_start;
(1UL << VMASST_TYPE_4gb_segments_notify) | \
(1UL << VMASST_TYPE_writable_pagetables) | \
(1UL << VMASST_TYPE_pae_extended_cr3) | \
+ (1UL << VMASST_TYPE_architectural_iopl) | \
(1UL << VMASST_TYPE_m2p_strict))
#define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID
#define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \
@@ -470,7 +470,9 @@ struct pv_vcpu
/* I/O-port access bitmap. */
XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */
- unsigned int iopl; /* Current IOPL for this VCPU. */
+#define IOPL(val) MASK_INSR(val, X86_EFLAGS_IOPL)
+ unsigned int iopl; /* Current IOPL for this VCPU, shifted left by
+ * 12 to match the eflags register. */
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
@@ -503,6 +503,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
#define VMASST_TYPE_pae_extended_cr3 3
/*
+ * x86 guests: Sane behaviour for virtual iopl
+ * - virtual iopl updated from do_iret() hypercalls.
+ * - virtual iopl reported in bounce frames.
+ * - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl 4
+
+/*
* x86/64 guests: strictly hide M2P from user mode.
* This allows the guest to control respective hypervisor behavior:
* - when not set, L4 tables get created with the respective slot blank,
The existing vIOPL interface is hard to use, and need not be. Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour consistenly with native hardware. Specifically: - virtual iopl updated from do_iret() hypercalls. - virtual iopl reported in bounce frames. - guest kernels assumed to be level 0 for the purpose of iopl checks. v->arch.pv_vcpu.iopl is altered to store IOPL shifted as it would exist eflags, for the benefit of the assembly code. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> v2: * Shift vcpu.iopl to match eflags. * Factor out iopl_ok(). * Use CMOVcc in assembly code. Along with this, I have functional tests for both vIOPL interfaces in PV guests. --- xen/arch/x86/domain.c | 15 ++++++++++----- xen/arch/x86/physdev.c | 2 +- xen/arch/x86/traps.c | 15 +++++++++++++-- xen/arch/x86/x86_64/asm-offsets.c | 3 +++ xen/arch/x86/x86_64/compat/entry.S | 7 ++++++- xen/arch/x86/x86_64/compat/traps.c | 4 ++++ xen/arch/x86/x86_64/entry.S | 7 ++++++- xen/arch/x86/x86_64/traps.c | 3 +++ xen/include/asm-x86/config.h | 1 + xen/include/asm-x86/domain.h | 4 +++- xen/include/public/xen.h | 8 ++++++++ 11 files changed, 58 insertions(+), 11 deletions(-)