[v3,26/41] KVM: arm64: Introduce framework for accessing deferred sysregs
diff mbox

Message ID 20180112120747.27999-27-christoffer.dall@linaro.org
State New
Headers show

Commit Message

Christoffer Dall Jan. 12, 2018, 12:07 p.m. UTC
We are about to defer saving and restoring some groups of system
registers to vcpu_put and vcpu_load on supported systems.  This means
that we need some infrastructure to access system registes which
supports either accessing the memory backing of the register or directly
accessing the system registers, depending on the state of the system
when we access the register.

We do this by defining a set of read/write accessors for each system
register, and letting each system register be defined as "immediate" or
"deferrable".  Immediate registers are always saved/restored in the
world-switch path, but deferrable registers are only saved/restored in
vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
in that case.

Not that we don't use the deferred mechanism yet in this patch, but only
introduce infrastructure.  This is to improve convenience of review in
the subsequent patches where it is clear which registers become
deferred.

 [ Most of this logic was contributed by Marc Zyngier ]

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |   8 +-
 arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+), 2 deletions(-)

Comments

Julien Thierry Jan. 17, 2018, 5:52 p.m. UTC | #1
On 12/01/18 12:07, Christoffer Dall wrote:
> We are about to defer saving and restoring some groups of system
> registers to vcpu_put and vcpu_load on supported systems.  This means
> that we need some infrastructure to access system registes which
> supports either accessing the memory backing of the register or directly
> accessing the system registers, depending on the state of the system
> when we access the register.
> 
> We do this by defining a set of read/write accessors for each system
> register, and letting each system register be defined as "immediate" or
> "deferrable".  Immediate registers are always saved/restored in the
> world-switch path, but deferrable registers are only saved/restored in
> vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> in that case.
> 

The patch is fine, however I'd suggest adding a comment in the pointing 
out that the IMMEDIATE/DEFERRABLE apply to save/restore to the vcpu 
struct. Instinctively I would expect the deferrable/immediate to apply 
to the actual hardware register access, so a comment would prevent 
people like me to get on the wrong track.

> Not that we don't use the deferred mechanism yet in this patch, but only
> introduce infrastructure.  This is to improve convenience of review in
> the subsequent patches where it is clear which registers become
> deferred.
> 
>   [ Most of this logic was contributed by Marc Zyngier ]
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

Reviewed-by: Julien Thierry <julien.thierry@arm.com>

> ---
>   arch/arm64/include/asm/kvm_host.h |   8 +-
>   arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 166 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 91272c35cc36..4b5ef82f6bdb 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -281,6 +281,10 @@ struct kvm_vcpu_arch {
>   
>   	/* Detect first run of a vcpu */
>   	bool has_run_once;
> +
> +	/* True when deferrable sysregs are loaded on the physical CPU,
> +	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> +	bool sysregs_loaded_on_cpu;
>   };
>   
>   #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
> @@ -293,8 +297,8 @@ struct kvm_vcpu_arch {
>    */
>   #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
>   
> -#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
> -#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
> +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
> +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
>   
>   /*
>    * CP14 and CP15 live in the same array, as they are backed by the
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 96398d53b462..9d353a6a55c9 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -35,6 +35,7 @@
>   #include <asm/kvm_coproc.h>
>   #include <asm/kvm_emulate.h>
>   #include <asm/kvm_host.h>
> +#include <asm/kvm_hyp.h>
>   #include <asm/kvm_mmu.h>
>   #include <asm/perf_event.h>
>   #include <asm/sysreg.h>
> @@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>   	return false;
>   }
>   
> +struct sys_reg_accessor {
> +	u64	(*rdsr)(struct kvm_vcpu *, int);
> +	void	(*wrsr)(struct kvm_vcpu *, int, u64);

Nit:

Why use a signed integer for the register index argument?

> +};
> +
> +#define DECLARE_IMMEDIATE_SR(i)						\
> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> +	{								\
> +		return __vcpu_sys_reg(vcpu, r);				\
> +	}								\
> +									\
> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> +	{								\
> +		__vcpu_sys_reg(vcpu, r) = v;				\
> +	}								\
> +
> +#define DECLARE_DEFERRABLE_SR(i, s)					\
> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> +	{								\
> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> +			return read_sysreg_s((s));			\
> +		}							\
> +		return __vcpu_sys_reg(vcpu, r);				\
> +	}								\
> +									\
> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> +	{								\
> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> +			write_sysreg_s(v, (s));				\
> +		} else {						\
> +			__vcpu_sys_reg(vcpu, r) = v;			\
> +		}							\
> +	}								\
> +
> +
> +#define SR_HANDLER_RANGE(i,e)						\
> +	[i ... e] =  (struct sys_reg_accessor) {			\
> +		.rdsr = __##i##_read,					\
> +		.wrsr = __##i##_write,					\

Nit:
Could we have __vcpu_##i##_read and __vcpu_##i##_write?

> +	}
> +
> +#define SR_HANDLER(i)	SR_HANDLER_RANGE(i, i)
> +
> +static void bad_sys_reg(int reg)
> +{
> +	WARN_ONCE(1, "Bad system register access %d\n", reg);
> +}
> +
> +static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> +{
> +	bad_sys_reg(reg);
> +	return 0;
> +}
> +
> +static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> +{
> +	bad_sys_reg(reg);
> +}
> +
> +/* Ordered as in enum vcpu_sysreg */
> +DECLARE_IMMEDIATE_SR(MPIDR_EL1);
> +DECLARE_IMMEDIATE_SR(CSSELR_EL1);
> +DECLARE_IMMEDIATE_SR(SCTLR_EL1);
> +DECLARE_IMMEDIATE_SR(ACTLR_EL1);
> +DECLARE_IMMEDIATE_SR(CPACR_EL1);
> +DECLARE_IMMEDIATE_SR(TTBR0_EL1);
> +DECLARE_IMMEDIATE_SR(TTBR1_EL1);
> +DECLARE_IMMEDIATE_SR(TCR_EL1);
> +DECLARE_IMMEDIATE_SR(ESR_EL1);
> +DECLARE_IMMEDIATE_SR(AFSR0_EL1);
> +DECLARE_IMMEDIATE_SR(AFSR1_EL1);
> +DECLARE_IMMEDIATE_SR(FAR_EL1);
> +DECLARE_IMMEDIATE_SR(MAIR_EL1);
> +DECLARE_IMMEDIATE_SR(VBAR_EL1);
> +DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1);
> +DECLARE_IMMEDIATE_SR(TPIDR_EL0);
> +DECLARE_IMMEDIATE_SR(TPIDRRO_EL0);
> +DECLARE_IMMEDIATE_SR(TPIDR_EL1);
> +DECLARE_IMMEDIATE_SR(AMAIR_EL1);
> +DECLARE_IMMEDIATE_SR(CNTKCTL_EL1);
> +DECLARE_IMMEDIATE_SR(PAR_EL1);
> +DECLARE_IMMEDIATE_SR(MDSCR_EL1);
> +DECLARE_IMMEDIATE_SR(MDCCINT_EL1);
> +DECLARE_IMMEDIATE_SR(PMCR_EL0);
> +DECLARE_IMMEDIATE_SR(PMSELR_EL0);
> +DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0);
> +/* PMEVCNTR30_EL0 */
> +DECLARE_IMMEDIATE_SR(PMCCNTR_EL0);
> +DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0);
> +/* PMEVTYPER30_EL0 */
> +DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0);
> +DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0);
> +DECLARE_IMMEDIATE_SR(PMINTENSET_EL1);
> +DECLARE_IMMEDIATE_SR(PMOVSSET_EL0);
> +DECLARE_IMMEDIATE_SR(PMSWINC_EL0);
> +DECLARE_IMMEDIATE_SR(PMUSERENR_EL0);
> +DECLARE_IMMEDIATE_SR(DACR32_EL2);
> +DECLARE_IMMEDIATE_SR(IFSR32_EL2);
> +DECLARE_IMMEDIATE_SR(FPEXC32_EL2);
> +DECLARE_IMMEDIATE_SR(DBGVCR32_EL2);
> +
> +static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = {
> +	[0 ... NR_SYS_REGS - 1] = {
> +		.rdsr = __default_read_sys_reg,
> +		.wrsr = __default_write_sys_reg,
> +	},
> +
> +	SR_HANDLER(MPIDR_EL1),
> +	SR_HANDLER(CSSELR_EL1),
> +	SR_HANDLER(SCTLR_EL1),
> +	SR_HANDLER(ACTLR_EL1),
> +	SR_HANDLER(CPACR_EL1),
> +	SR_HANDLER(TTBR0_EL1),
> +	SR_HANDLER(TTBR1_EL1),
> +	SR_HANDLER(TCR_EL1),
> +	SR_HANDLER(ESR_EL1),
> +	SR_HANDLER(AFSR0_EL1),
> +	SR_HANDLER(AFSR1_EL1),
> +	SR_HANDLER(FAR_EL1),
> +	SR_HANDLER(MAIR_EL1),
> +	SR_HANDLER(VBAR_EL1),
> +	SR_HANDLER(CONTEXTIDR_EL1),
> +	SR_HANDLER(TPIDR_EL0),
> +	SR_HANDLER(TPIDRRO_EL0),
> +	SR_HANDLER(TPIDR_EL1),
> +	SR_HANDLER(AMAIR_EL1),
> +	SR_HANDLER(CNTKCTL_EL1),
> +	SR_HANDLER(PAR_EL1),
> +	SR_HANDLER(MDSCR_EL1),
> +	SR_HANDLER(MDCCINT_EL1),
> +	SR_HANDLER(PMCR_EL0),
> +	SR_HANDLER(PMSELR_EL0),
> +	SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0),
> +	SR_HANDLER(PMCCNTR_EL0),
> +	SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0),
> +	SR_HANDLER(PMCCFILTR_EL0),
> +	SR_HANDLER(PMCNTENSET_EL0),
> +	SR_HANDLER(PMINTENSET_EL1),
> +	SR_HANDLER(PMOVSSET_EL0),
> +	SR_HANDLER(PMSWINC_EL0),
> +	SR_HANDLER(PMUSERENR_EL0),
> +	SR_HANDLER(DACR32_EL2),
> +	SR_HANDLER(IFSR32_EL2),
> +	SR_HANDLER(FPEXC32_EL2),
> +	SR_HANDLER(DBGVCR32_EL2),
> +};
> +
> +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> +{
> +	return sys_reg_accessors[reg].rdsr(vcpu, reg);
> +}
> +
> +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> +{
> +	sys_reg_accessors[reg].wrsr(vcpu, reg, val);
> +}
> +
>   /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
>   static u32 cache_levels;
>   
>
Christoffer Dall Jan. 18, 2018, 1:08 p.m. UTC | #2
On Wed, Jan 17, 2018 at 05:52:21PM +0000, Julien Thierry wrote:
> 
> 
> On 12/01/18 12:07, Christoffer Dall wrote:
> >We are about to defer saving and restoring some groups of system
> >registers to vcpu_put and vcpu_load on supported systems.  This means
> >that we need some infrastructure to access system registes which
> >supports either accessing the memory backing of the register or directly
> >accessing the system registers, depending on the state of the system
> >when we access the register.
> >
> >We do this by defining a set of read/write accessors for each system
> >register, and letting each system register be defined as "immediate" or
> >"deferrable".  Immediate registers are always saved/restored in the
> >world-switch path, but deferrable registers are only saved/restored in
> >vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> >in that case.
> >
> 
> The patch is fine, however I'd suggest adding a comment in the pointing out
> that the IMMEDIATE/DEFERRABLE apply to save/restore to the vcpu struct.
> Instinctively I would expect the deferrable/immediate to apply to the actual
> hardware register access, so a comment would prevent people like me to get
> on the wrong track.
> 

I tried to explain that a bit in the first sentence of the commit
message, but I can try to make it more clear that we introduce
terminology.

> >Not that we don't use the deferred mechanism yet in this patch, but only
> >introduce infrastructure.  This is to improve convenience of review in
> >the subsequent patches where it is clear which registers become
> >deferred.
> >
> >  [ Most of this logic was contributed by Marc Zyngier ]
> >
> >Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> 
> Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> 
> >---
> >  arch/arm64/include/asm/kvm_host.h |   8 +-
> >  arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 166 insertions(+), 2 deletions(-)
> >
> >diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> >index 91272c35cc36..4b5ef82f6bdb 100644
> >--- a/arch/arm64/include/asm/kvm_host.h
> >+++ b/arch/arm64/include/asm/kvm_host.h
> >@@ -281,6 +281,10 @@ struct kvm_vcpu_arch {
> >  	/* Detect first run of a vcpu */
> >  	bool has_run_once;
> >+
> >+	/* True when deferrable sysregs are loaded on the physical CPU,
> >+	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> >+	bool sysregs_loaded_on_cpu;
> >  };
> >  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
> >@@ -293,8 +297,8 @@ struct kvm_vcpu_arch {
> >   */
> >  #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
> >-#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
> >-#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
> >+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
> >+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
> >  /*
> >   * CP14 and CP15 live in the same array, as they are backed by the
> >diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >index 96398d53b462..9d353a6a55c9 100644
> >--- a/arch/arm64/kvm/sys_regs.c
> >+++ b/arch/arm64/kvm/sys_regs.c
> >@@ -35,6 +35,7 @@
> >  #include <asm/kvm_coproc.h>
> >  #include <asm/kvm_emulate.h>
> >  #include <asm/kvm_host.h>
> >+#include <asm/kvm_hyp.h>
> >  #include <asm/kvm_mmu.h>
> >  #include <asm/perf_event.h>
> >  #include <asm/sysreg.h>
> >@@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
> >  	return false;
> >  }
> >+struct sys_reg_accessor {
> >+	u64	(*rdsr)(struct kvm_vcpu *, int);
> >+	void	(*wrsr)(struct kvm_vcpu *, int, u64);
> 
> Nit:
> 
> Why use a signed integer for the register index argument?
> 

The type name is short? ;)  No particular reason, could be an unsigned
int, but I don't think it matters here does it?

> >+};
> >+
> >+#define DECLARE_IMMEDIATE_SR(i)						\
> >+	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> >+	{								\
> >+		return __vcpu_sys_reg(vcpu, r);				\
> >+	}								\
> >+									\
> >+	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> >+	{								\
> >+		__vcpu_sys_reg(vcpu, r) = v;				\
> >+	}								\
> >+
> >+#define DECLARE_DEFERRABLE_SR(i, s)					\
> >+	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> >+	{								\
> >+		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> >+			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> >+			return read_sysreg_s((s));			\
> >+		}							\
> >+		return __vcpu_sys_reg(vcpu, r);				\
> >+	}								\
> >+									\
> >+	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> >+	{								\
> >+		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> >+			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> >+			write_sysreg_s(v, (s));				\
> >+		} else {						\
> >+			__vcpu_sys_reg(vcpu, r) = v;			\
> >+		}							\
> >+	}								\
> >+
> >+
> >+#define SR_HANDLER_RANGE(i,e)						\
> >+	[i ... e] =  (struct sys_reg_accessor) {			\
> >+		.rdsr = __##i##_read,					\
> >+		.wrsr = __##i##_write,					\
> 
> Nit:
> Could we have __vcpu_##i##_read and __vcpu_##i##_write?
> 

They don't necessarily read from the vcpu do they?

Unrelated: I also thought about just having a single function a switch
statement instead, which may make it easier to follow the code as there
would be no macros generating functions, but it would be slightly less
declarative.

For example:

u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
{
	if (!vcpu->arch.sysregs_loaded_on_cpu)
		goto immediate_read;
	
	/*
	 * All system registers listed in the switch are deferred
	 * save/restored on VHE systems.
	 */
	switch (reg) {
	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1));
	case SCTLR_EL1:		return read_sysreg_s(sctlr_EL12));
	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1));
	case CPACR_EL1:		return read_sysreg_s(cpacr_EL12));
	case TTBR0_EL1:		return read_sysreg_s(ttbr0_EL12));
	case TTBR1_EL1:		return read_sysreg_s(ttbr1_EL12));
	case TCR_EL1:		return read_sysreg_s(tcr_EL12));
	case ESR_EL1:		return read_sysreg_s(esr_EL12));
	case AFSR0_EL1:		return read_sysreg_s(afsr0_EL12));
	case AFSR1_EL1:		return read_sysreg_s(afsr1_EL12));
	case FAR_EL1:		return read_sysreg_s(far_EL12));
	case MAIR_EL1:		return read_sysreg_s(mair_EL12));
	case VBAR_EL1:		return read_sysreg_s(vbar_EL12));
	case CONTEXTIDR_EL1:	return read_sysreg_s(contextidr_EL12));
	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0));
	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0));
	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1));
	case AMAIR_EL1:		return read_sysreg_s(amair_EL12));
	case CNTKCTL_EL1:	return read_sysreg_s(cntkctl_EL12));
	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1));
	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2));
	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2));
	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2));
	}

immediate_read:
	return __vcpu_sys_reg(vcpu, reg);
}

Since you're having a look at this, what are your thoughts?

Marc, what's your preference?

Thanks,
-Christoffer

> >+	}
> >+
> >+#define SR_HANDLER(i)	SR_HANDLER_RANGE(i, i)
> >+
> >+static void bad_sys_reg(int reg)
> >+{
> >+	WARN_ONCE(1, "Bad system register access %d\n", reg);
> >+}
> >+
> >+static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> >+{
> >+	bad_sys_reg(reg);
> >+	return 0;
> >+}
> >+
> >+static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> >+{
> >+	bad_sys_reg(reg);
> >+}
> >+
> >+/* Ordered as in enum vcpu_sysreg */
> >+DECLARE_IMMEDIATE_SR(MPIDR_EL1);
> >+DECLARE_IMMEDIATE_SR(CSSELR_EL1);
> >+DECLARE_IMMEDIATE_SR(SCTLR_EL1);
> >+DECLARE_IMMEDIATE_SR(ACTLR_EL1);
> >+DECLARE_IMMEDIATE_SR(CPACR_EL1);
> >+DECLARE_IMMEDIATE_SR(TTBR0_EL1);
> >+DECLARE_IMMEDIATE_SR(TTBR1_EL1);
> >+DECLARE_IMMEDIATE_SR(TCR_EL1);
> >+DECLARE_IMMEDIATE_SR(ESR_EL1);
> >+DECLARE_IMMEDIATE_SR(AFSR0_EL1);
> >+DECLARE_IMMEDIATE_SR(AFSR1_EL1);
> >+DECLARE_IMMEDIATE_SR(FAR_EL1);
> >+DECLARE_IMMEDIATE_SR(MAIR_EL1);
> >+DECLARE_IMMEDIATE_SR(VBAR_EL1);
> >+DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1);
> >+DECLARE_IMMEDIATE_SR(TPIDR_EL0);
> >+DECLARE_IMMEDIATE_SR(TPIDRRO_EL0);
> >+DECLARE_IMMEDIATE_SR(TPIDR_EL1);
> >+DECLARE_IMMEDIATE_SR(AMAIR_EL1);
> >+DECLARE_IMMEDIATE_SR(CNTKCTL_EL1);
> >+DECLARE_IMMEDIATE_SR(PAR_EL1);
> >+DECLARE_IMMEDIATE_SR(MDSCR_EL1);
> >+DECLARE_IMMEDIATE_SR(MDCCINT_EL1);
> >+DECLARE_IMMEDIATE_SR(PMCR_EL0);
> >+DECLARE_IMMEDIATE_SR(PMSELR_EL0);
> >+DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0);
> >+/* PMEVCNTR30_EL0 */
> >+DECLARE_IMMEDIATE_SR(PMCCNTR_EL0);
> >+DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0);
> >+/* PMEVTYPER30_EL0 */
> >+DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0);
> >+DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0);
> >+DECLARE_IMMEDIATE_SR(PMINTENSET_EL1);
> >+DECLARE_IMMEDIATE_SR(PMOVSSET_EL0);
> >+DECLARE_IMMEDIATE_SR(PMSWINC_EL0);
> >+DECLARE_IMMEDIATE_SR(PMUSERENR_EL0);
> >+DECLARE_IMMEDIATE_SR(DACR32_EL2);
> >+DECLARE_IMMEDIATE_SR(IFSR32_EL2);
> >+DECLARE_IMMEDIATE_SR(FPEXC32_EL2);
> >+DECLARE_IMMEDIATE_SR(DBGVCR32_EL2);
> >+
> >+static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = {
> >+	[0 ... NR_SYS_REGS - 1] = {
> >+		.rdsr = __default_read_sys_reg,
> >+		.wrsr = __default_write_sys_reg,
> >+	},
> >+
> >+	SR_HANDLER(MPIDR_EL1),
> >+	SR_HANDLER(CSSELR_EL1),
> >+	SR_HANDLER(SCTLR_EL1),
> >+	SR_HANDLER(ACTLR_EL1),
> >+	SR_HANDLER(CPACR_EL1),
> >+	SR_HANDLER(TTBR0_EL1),
> >+	SR_HANDLER(TTBR1_EL1),
> >+	SR_HANDLER(TCR_EL1),
> >+	SR_HANDLER(ESR_EL1),
> >+	SR_HANDLER(AFSR0_EL1),
> >+	SR_HANDLER(AFSR1_EL1),
> >+	SR_HANDLER(FAR_EL1),
> >+	SR_HANDLER(MAIR_EL1),
> >+	SR_HANDLER(VBAR_EL1),
> >+	SR_HANDLER(CONTEXTIDR_EL1),
> >+	SR_HANDLER(TPIDR_EL0),
> >+	SR_HANDLER(TPIDRRO_EL0),
> >+	SR_HANDLER(TPIDR_EL1),
> >+	SR_HANDLER(AMAIR_EL1),
> >+	SR_HANDLER(CNTKCTL_EL1),
> >+	SR_HANDLER(PAR_EL1),
> >+	SR_HANDLER(MDSCR_EL1),
> >+	SR_HANDLER(MDCCINT_EL1),
> >+	SR_HANDLER(PMCR_EL0),
> >+	SR_HANDLER(PMSELR_EL0),
> >+	SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0),
> >+	SR_HANDLER(PMCCNTR_EL0),
> >+	SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0),
> >+	SR_HANDLER(PMCCFILTR_EL0),
> >+	SR_HANDLER(PMCNTENSET_EL0),
> >+	SR_HANDLER(PMINTENSET_EL1),
> >+	SR_HANDLER(PMOVSSET_EL0),
> >+	SR_HANDLER(PMSWINC_EL0),
> >+	SR_HANDLER(PMUSERENR_EL0),
> >+	SR_HANDLER(DACR32_EL2),
> >+	SR_HANDLER(IFSR32_EL2),
> >+	SR_HANDLER(FPEXC32_EL2),
> >+	SR_HANDLER(DBGVCR32_EL2),
> >+};
> >+
> >+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> >+{
> >+	return sys_reg_accessors[reg].rdsr(vcpu, reg);
> >+}
> >+
> >+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> >+{
> >+	sys_reg_accessors[reg].wrsr(vcpu, reg, val);
> >+}
> >+
> >  /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
> >  static u32 cache_levels;
> >
> 
> -- 
> Julien Thierry
Julien Thierry Jan. 18, 2018, 1:39 p.m. UTC | #3
On 18/01/18 13:08, Christoffer Dall wrote:
> On Wed, Jan 17, 2018 at 05:52:21PM +0000, Julien Thierry wrote:
>>
>>
>> On 12/01/18 12:07, Christoffer Dall wrote:
>>> We are about to defer saving and restoring some groups of system
>>> registers to vcpu_put and vcpu_load on supported systems.  This means
>>> that we need some infrastructure to access system registes which
>>> supports either accessing the memory backing of the register or directly
>>> accessing the system registers, depending on the state of the system
>>> when we access the register.
>>>
>>> We do this by defining a set of read/write accessors for each system
>>> register, and letting each system register be defined as "immediate" or
>>> "deferrable".  Immediate registers are always saved/restored in the
>>> world-switch path, but deferrable registers are only saved/restored in
>>> vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
>>> in that case.
>>>
>>
>> The patch is fine, however I'd suggest adding a comment in the pointing out
>> that the IMMEDIATE/DEFERRABLE apply to save/restore to the vcpu struct.
>> Instinctively I would expect the deferrable/immediate to apply to the actual
>> hardware register access, so a comment would prevent people like me to get
>> on the wrong track.
>>
> 
> I tried to explain that a bit in the first sentence of the commit
> message, but I can try to make it more clear that we introduce
> terminology.
> 

The commit message is fine, I just think it would be nice to have it in 
the code so you don't have to look for the commit to understand.

>>> Not that we don't use the deferred mechanism yet in this patch, but only
>>> introduce infrastructure.  This is to improve convenience of review in
>>> the subsequent patches where it is clear which registers become
>>> deferred.
>>>
>>>   [ Most of this logic was contributed by Marc Zyngier ]
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
>>
>> Reviewed-by: Julien Thierry <julien.thierry@arm.com>
>>
>>> ---
>>>   arch/arm64/include/asm/kvm_host.h |   8 +-
>>>   arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
>>>   2 files changed, 166 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>> index 91272c35cc36..4b5ef82f6bdb 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -281,6 +281,10 @@ struct kvm_vcpu_arch {
>>>   	/* Detect first run of a vcpu */
>>>   	bool has_run_once;
>>> +
>>> +	/* True when deferrable sysregs are loaded on the physical CPU,
>>> +	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>>> +	bool sysregs_loaded_on_cpu;
>>>   };
>>>   #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
>>> @@ -293,8 +297,8 @@ struct kvm_vcpu_arch {
>>>    */
>>>   #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
>>> -#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
>>> -#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
>>> +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
>>> +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
>>>   /*
>>>    * CP14 and CP15 live in the same array, as they are backed by the
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 96398d53b462..9d353a6a55c9 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -35,6 +35,7 @@
>>>   #include <asm/kvm_coproc.h>
>>>   #include <asm/kvm_emulate.h>
>>>   #include <asm/kvm_host.h>
>>> +#include <asm/kvm_hyp.h>
>>>   #include <asm/kvm_mmu.h>
>>>   #include <asm/perf_event.h>
>>>   #include <asm/sysreg.h>
>>> @@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>>>   	return false;
>>>   }
>>> +struct sys_reg_accessor {
>>> +	u64	(*rdsr)(struct kvm_vcpu *, int);
>>> +	void	(*wrsr)(struct kvm_vcpu *, int, u64);
>>
>> Nit:
>>
>> Why use a signed integer for the register index argument?
>>
> 
> The type name is short? ;)  No particular reason, could be an unsigned
> int, but I don't think it matters here does it?
> 

Probably not, just personal preference I guess.

>>> +};
>>> +
>>> +#define DECLARE_IMMEDIATE_SR(i)						\
>>> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
>>> +	{								\
>>> +		return __vcpu_sys_reg(vcpu, r);				\
>>> +	}								\
>>> +									\
>>> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
>>> +	{								\
>>> +		__vcpu_sys_reg(vcpu, r) = v;				\
>>> +	}								\
>>> +
>>> +#define DECLARE_DEFERRABLE_SR(i, s)					\
>>> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
>>> +	{								\
>>> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
>>> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
>>> +			return read_sysreg_s((s));			\
>>> +		}							\
>>> +		return __vcpu_sys_reg(vcpu, r);				\
>>> +	}								\
>>> +									\
>>> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
>>> +	{								\
>>> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
>>> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
>>> +			write_sysreg_s(v, (s));				\
>>> +		} else {						\
>>> +			__vcpu_sys_reg(vcpu, r) = v;			\
>>> +		}							\
>>> +	}								\
>>> +
>>> +
>>> +#define SR_HANDLER_RANGE(i,e)						\
>>> +	[i ... e] =  (struct sys_reg_accessor) {			\
>>> +		.rdsr = __##i##_read,					\
>>> +		.wrsr = __##i##_write,					\
>>
>> Nit:
>> Could we have __vcpu_##i##_read and __vcpu_##i##_write?
>>
> 
> They don't necessarily read from the vcpu do they?
> 

Hmmm, from my understanding they do, but the action the vcpu can be 
immediate or deferred. But from the semantics you give to 
IMMEDIATE/DEFERRABLE, I'd say the actions are related to vcpu.

I don't know if that makes sense.

> Unrelated: I also thought about just having a single function a switch
> statement instead, which may make it easier to follow the code as there
> would be no macros generating functions, but it would be slightly less
> declarative.
> 
> For example:
> 
> u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> {
> 	if (!vcpu->arch.sysregs_loaded_on_cpu)
> 		goto immediate_read;
> 	
> 	/*
> 	 * All system registers listed in the switch are deferred
> 	 * save/restored on VHE systems.
> 	 */
> 	switch (reg) {
> 	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1));
> 	case SCTLR_EL1:		return read_sysreg_s(sctlr_EL12));
> 	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1));
> 	case CPACR_EL1:		return read_sysreg_s(cpacr_EL12));
> 	case TTBR0_EL1:		return read_sysreg_s(ttbr0_EL12));
> 	case TTBR1_EL1:		return read_sysreg_s(ttbr1_EL12));
> 	case TCR_EL1:		return read_sysreg_s(tcr_EL12));
> 	case ESR_EL1:		return read_sysreg_s(esr_EL12));
> 	case AFSR0_EL1:		return read_sysreg_s(afsr0_EL12));
> 	case AFSR1_EL1:		return read_sysreg_s(afsr1_EL12));
> 	case FAR_EL1:		return read_sysreg_s(far_EL12));
> 	case MAIR_EL1:		return read_sysreg_s(mair_EL12));
> 	case VBAR_EL1:		return read_sysreg_s(vbar_EL12));
> 	case CONTEXTIDR_EL1:	return read_sysreg_s(contextidr_EL12));
> 	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0));
> 	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0));
> 	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1));
> 	case AMAIR_EL1:		return read_sysreg_s(amair_EL12));
> 	case CNTKCTL_EL1:	return read_sysreg_s(cntkctl_EL12));
> 	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1));
> 	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2));
> 	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2));
> 	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2));
> 	}
> 
> immediate_read:
> 	return __vcpu_sys_reg(vcpu, reg);
> }
> 
> Since you're having a look at this, what are your thoughts?

I like that suggestion, very easy to follow. Of course negative side is 
that you'll need two of those switch... But yes I still prefer this new 
suggestion.

So if you go with this you can ignore my other comments ;) .

Thanks,
Dave Martin Jan. 23, 2018, 4:04 p.m. UTC | #4
On Fri, Jan 12, 2018 at 01:07:32PM +0100, Christoffer Dall wrote:
> We are about to defer saving and restoring some groups of system
> registers to vcpu_put and vcpu_load on supported systems.  This means
> that we need some infrastructure to access system registes which
> supports either accessing the memory backing of the register or directly
> accessing the system registers, depending on the state of the system
> when we access the register.
> 
> We do this by defining a set of read/write accessors for each system
> register, and letting each system register be defined as "immediate" or
> "deferrable".  Immediate registers are always saved/restored in the
> world-switch path, but deferrable registers are only saved/restored in
> vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> in that case.
> 
> Not that we don't use the deferred mechanism yet in this patch, but only
> introduce infrastructure.  This is to improve convenience of review in
> the subsequent patches where it is clear which registers become
> deferred.

Might this table-driven approach result in a lot of branch mispredicts,
particularly across load/put boundaries?

If we were to move the whole construct to a header, then it could get
constant-folded at the call site down to the individual reg accessed,
say:

	if (sys_regs_loaded)
		read_sysreg_s(TPIDR_EL0);
	else
		__vcpu_sys_reg(v, TPIDR_EL0);

Where multiple regs are accessed close to each other, the compiler
may be able to specialise the whole sequence for the loaded and !loaded
cases so that there is only one conditional branch.


The individual accessor functions also become unnecessary in this case,
because we wouldn't need to derive function pointers from them any
more.

I don't know how performance would compare in practice though.

I'm also assuming that all calls to these accessors are const-foldable.
If not, relying on inlining would bloat the generated code a lot.

Cheers
---Dave

> 
>  [ Most of this logic was contributed by Marc Zyngier ]
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
>  arch/arm64/include/asm/kvm_host.h |   8 +-
>  arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 166 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 91272c35cc36..4b5ef82f6bdb 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -281,6 +281,10 @@ struct kvm_vcpu_arch {
>  
>  	/* Detect first run of a vcpu */
>  	bool has_run_once;
> +
> +	/* True when deferrable sysregs are loaded on the physical CPU,
> +	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> +	bool sysregs_loaded_on_cpu;
>  };
>  
>  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
> @@ -293,8 +297,8 @@ struct kvm_vcpu_arch {
>   */
>  #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
>  
> -#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
> -#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
> +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
> +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
>  
>  /*
>   * CP14 and CP15 live in the same array, as they are backed by the
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 96398d53b462..9d353a6a55c9 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -35,6 +35,7 @@
>  #include <asm/kvm_coproc.h>
>  #include <asm/kvm_emulate.h>
>  #include <asm/kvm_host.h>
> +#include <asm/kvm_hyp.h>
>  #include <asm/kvm_mmu.h>
>  #include <asm/perf_event.h>
>  #include <asm/sysreg.h>
> @@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>  	return false;
>  }
>  
> +struct sys_reg_accessor {
> +	u64	(*rdsr)(struct kvm_vcpu *, int);
> +	void	(*wrsr)(struct kvm_vcpu *, int, u64);
> +};
> +
> +#define DECLARE_IMMEDIATE_SR(i)						\
> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> +	{								\
> +		return __vcpu_sys_reg(vcpu, r);				\
> +	}								\
> +									\
> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> +	{								\
> +		__vcpu_sys_reg(vcpu, r) = v;				\
> +	}								\
> +
> +#define DECLARE_DEFERRABLE_SR(i, s)					\
> +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> +	{								\
> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> +			return read_sysreg_s((s));			\
> +		}							\
> +		return __vcpu_sys_reg(vcpu, r);				\
> +	}								\
> +									\
> +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> +	{								\
> +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> +			write_sysreg_s(v, (s));				\
> +		} else {						\
> +			__vcpu_sys_reg(vcpu, r) = v;			\
> +		}							\
> +	}								\
> +
> +
> +#define SR_HANDLER_RANGE(i,e)						\
> +	[i ... e] =  (struct sys_reg_accessor) {			\
> +		.rdsr = __##i##_read,					\
> +		.wrsr = __##i##_write,					\
> +	}
> +
> +#define SR_HANDLER(i)	SR_HANDLER_RANGE(i, i)
> +
> +static void bad_sys_reg(int reg)
> +{
> +	WARN_ONCE(1, "Bad system register access %d\n", reg);
> +}
> +
> +static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> +{
> +	bad_sys_reg(reg);
> +	return 0;
> +}
> +
> +static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> +{
> +	bad_sys_reg(reg);
> +}
> +
> +/* Ordered as in enum vcpu_sysreg */
> +DECLARE_IMMEDIATE_SR(MPIDR_EL1);
> +DECLARE_IMMEDIATE_SR(CSSELR_EL1);
> +DECLARE_IMMEDIATE_SR(SCTLR_EL1);
> +DECLARE_IMMEDIATE_SR(ACTLR_EL1);
> +DECLARE_IMMEDIATE_SR(CPACR_EL1);
> +DECLARE_IMMEDIATE_SR(TTBR0_EL1);
> +DECLARE_IMMEDIATE_SR(TTBR1_EL1);
> +DECLARE_IMMEDIATE_SR(TCR_EL1);
> +DECLARE_IMMEDIATE_SR(ESR_EL1);
> +DECLARE_IMMEDIATE_SR(AFSR0_EL1);
> +DECLARE_IMMEDIATE_SR(AFSR1_EL1);
> +DECLARE_IMMEDIATE_SR(FAR_EL1);
> +DECLARE_IMMEDIATE_SR(MAIR_EL1);
> +DECLARE_IMMEDIATE_SR(VBAR_EL1);
> +DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1);
> +DECLARE_IMMEDIATE_SR(TPIDR_EL0);
> +DECLARE_IMMEDIATE_SR(TPIDRRO_EL0);
> +DECLARE_IMMEDIATE_SR(TPIDR_EL1);
> +DECLARE_IMMEDIATE_SR(AMAIR_EL1);
> +DECLARE_IMMEDIATE_SR(CNTKCTL_EL1);
> +DECLARE_IMMEDIATE_SR(PAR_EL1);
> +DECLARE_IMMEDIATE_SR(MDSCR_EL1);
> +DECLARE_IMMEDIATE_SR(MDCCINT_EL1);
> +DECLARE_IMMEDIATE_SR(PMCR_EL0);
> +DECLARE_IMMEDIATE_SR(PMSELR_EL0);
> +DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0);
> +/* PMEVCNTR30_EL0 */
> +DECLARE_IMMEDIATE_SR(PMCCNTR_EL0);
> +DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0);
> +/* PMEVTYPER30_EL0 */
> +DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0);
> +DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0);
> +DECLARE_IMMEDIATE_SR(PMINTENSET_EL1);
> +DECLARE_IMMEDIATE_SR(PMOVSSET_EL0);
> +DECLARE_IMMEDIATE_SR(PMSWINC_EL0);
> +DECLARE_IMMEDIATE_SR(PMUSERENR_EL0);
> +DECLARE_IMMEDIATE_SR(DACR32_EL2);
> +DECLARE_IMMEDIATE_SR(IFSR32_EL2);
> +DECLARE_IMMEDIATE_SR(FPEXC32_EL2);
> +DECLARE_IMMEDIATE_SR(DBGVCR32_EL2);
> +
> +static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = {
> +	[0 ... NR_SYS_REGS - 1] = {
> +		.rdsr = __default_read_sys_reg,
> +		.wrsr = __default_write_sys_reg,
> +	},
> +
> +	SR_HANDLER(MPIDR_EL1),
> +	SR_HANDLER(CSSELR_EL1),
> +	SR_HANDLER(SCTLR_EL1),
> +	SR_HANDLER(ACTLR_EL1),
> +	SR_HANDLER(CPACR_EL1),
> +	SR_HANDLER(TTBR0_EL1),
> +	SR_HANDLER(TTBR1_EL1),
> +	SR_HANDLER(TCR_EL1),
> +	SR_HANDLER(ESR_EL1),
> +	SR_HANDLER(AFSR0_EL1),
> +	SR_HANDLER(AFSR1_EL1),
> +	SR_HANDLER(FAR_EL1),
> +	SR_HANDLER(MAIR_EL1),
> +	SR_HANDLER(VBAR_EL1),
> +	SR_HANDLER(CONTEXTIDR_EL1),
> +	SR_HANDLER(TPIDR_EL0),
> +	SR_HANDLER(TPIDRRO_EL0),
> +	SR_HANDLER(TPIDR_EL1),
> +	SR_HANDLER(AMAIR_EL1),
> +	SR_HANDLER(CNTKCTL_EL1),
> +	SR_HANDLER(PAR_EL1),
> +	SR_HANDLER(MDSCR_EL1),
> +	SR_HANDLER(MDCCINT_EL1),
> +	SR_HANDLER(PMCR_EL0),
> +	SR_HANDLER(PMSELR_EL0),
> +	SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0),
> +	SR_HANDLER(PMCCNTR_EL0),
> +	SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0),
> +	SR_HANDLER(PMCCFILTR_EL0),
> +	SR_HANDLER(PMCNTENSET_EL0),
> +	SR_HANDLER(PMINTENSET_EL1),
> +	SR_HANDLER(PMOVSSET_EL0),
> +	SR_HANDLER(PMSWINC_EL0),
> +	SR_HANDLER(PMUSERENR_EL0),
> +	SR_HANDLER(DACR32_EL2),
> +	SR_HANDLER(IFSR32_EL2),
> +	SR_HANDLER(FPEXC32_EL2),
> +	SR_HANDLER(DBGVCR32_EL2),
> +};
> +
> +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> +{
> +	return sys_reg_accessors[reg].rdsr(vcpu, reg);
> +}
> +
> +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> +{
> +	sys_reg_accessors[reg].wrsr(vcpu, reg, val);
> +}
> +
>  /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
>  static u32 cache_levels;
>  
> -- 
> 2.14.2
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Christoffer Dall Jan. 25, 2018, 7:54 p.m. UTC | #5
On Tue, Jan 23, 2018 at 04:04:40PM +0000, Dave Martin wrote:
> On Fri, Jan 12, 2018 at 01:07:32PM +0100, Christoffer Dall wrote:
> > We are about to defer saving and restoring some groups of system
> > registers to vcpu_put and vcpu_load on supported systems.  This means
> > that we need some infrastructure to access system registes which
> > supports either accessing the memory backing of the register or directly
> > accessing the system registers, depending on the state of the system
> > when we access the register.
> > 
> > We do this by defining a set of read/write accessors for each system
> > register, and letting each system register be defined as "immediate" or
> > "deferrable".  Immediate registers are always saved/restored in the
> > world-switch path, but deferrable registers are only saved/restored in
> > vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> > in that case.
> > 
> > Not that we don't use the deferred mechanism yet in this patch, but only
> > introduce infrastructure.  This is to improve convenience of review in
> > the subsequent patches where it is clear which registers become
> > deferred.
> 
> Might this table-driven approach result in a lot of branch mispredicts,
> particularly across load/put boundaries?
> 
> If we were to move the whole construct to a header, then it could get
> constant-folded at the call site down to the individual reg accessed,
> say:
> 
> 	if (sys_regs_loaded)
> 		read_sysreg_s(TPIDR_EL0);
> 	else
> 		__vcpu_sys_reg(v, TPIDR_EL0);
> 
> Where multiple regs are accessed close to each other, the compiler
> may be able to specialise the whole sequence for the loaded and !loaded
> cases so that there is only one conditional branch.
> 

That's an interesting thing to consider indeed.  I wasn't really sure
how to put this in a header file which wouldn't look overly bloated for
inclusion elsewhere, so we ended up with this.

I don't think the alternative suggestion that I discused with Julien on
this patch changes this much, but since you've had a look at this, I'm
curious which one of the two (lookup table vs. giant switch) you prefer?

> 
> The individual accessor functions also become unnecessary in this case,
> because we wouldn't need to derive function pointers from them any
> more.
> 
> I don't know how performance would compare in practice though.

I don't know either.  But I will say that the whole idea behind put/load
is that you do this rarely, and going to userspace from KVM is
notriously expensive, also on x86.

> 
> I'm also assuming that all calls to these accessors are const-foldable.
> If not, relying on inlining would bloat the generated code a lot.

We have places where this is not the cae, access_vm_reg() for example.
But if we really, really, wanted to, we could rewrite that to have a
function for each register, but that's pretty horrid on its own.

Thanks,
-Christoffer

> 
> > 
> >  [ Most of this logic was contributed by Marc Zyngier ]
> > 
> > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> >  arch/arm64/include/asm/kvm_host.h |   8 +-
> >  arch/arm64/kvm/sys_regs.c         | 160 ++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 166 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 91272c35cc36..4b5ef82f6bdb 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -281,6 +281,10 @@ struct kvm_vcpu_arch {
> >  
> >  	/* Detect first run of a vcpu */
> >  	bool has_run_once;
> > +
> > +	/* True when deferrable sysregs are loaded on the physical CPU,
> > +	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> > +	bool sysregs_loaded_on_cpu;
> >  };
> >  
> >  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
> > @@ -293,8 +297,8 @@ struct kvm_vcpu_arch {
> >   */
> >  #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
> >  
> > -#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
> > -#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
> > +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
> > +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
> >  
> >  /*
> >   * CP14 and CP15 live in the same array, as they are backed by the
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 96398d53b462..9d353a6a55c9 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -35,6 +35,7 @@
> >  #include <asm/kvm_coproc.h>
> >  #include <asm/kvm_emulate.h>
> >  #include <asm/kvm_host.h>
> > +#include <asm/kvm_hyp.h>
> >  #include <asm/kvm_mmu.h>
> >  #include <asm/perf_event.h>
> >  #include <asm/sysreg.h>
> > @@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
> >  	return false;
> >  }
> >  
> > +struct sys_reg_accessor {
> > +	u64	(*rdsr)(struct kvm_vcpu *, int);
> > +	void	(*wrsr)(struct kvm_vcpu *, int, u64);
> > +};
> > +
> > +#define DECLARE_IMMEDIATE_SR(i)						\
> > +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> > +	{								\
> > +		return __vcpu_sys_reg(vcpu, r);				\
> > +	}								\
> > +									\
> > +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> > +	{								\
> > +		__vcpu_sys_reg(vcpu, r) = v;				\
> > +	}								\
> > +
> > +#define DECLARE_DEFERRABLE_SR(i, s)					\
> > +	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
> > +	{								\
> > +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> > +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> > +			return read_sysreg_s((s));			\
> > +		}							\
> > +		return __vcpu_sys_reg(vcpu, r);				\
> > +	}								\
> > +									\
> > +	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
> > +	{								\
> > +		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
> > +			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
> > +			write_sysreg_s(v, (s));				\
> > +		} else {						\
> > +			__vcpu_sys_reg(vcpu, r) = v;			\
> > +		}							\
> > +	}								\
> > +
> > +
> > +#define SR_HANDLER_RANGE(i,e)						\
> > +	[i ... e] =  (struct sys_reg_accessor) {			\
> > +		.rdsr = __##i##_read,					\
> > +		.wrsr = __##i##_write,					\
> > +	}
> > +
> > +#define SR_HANDLER(i)	SR_HANDLER_RANGE(i, i)
> > +
> > +static void bad_sys_reg(int reg)
> > +{
> > +	WARN_ONCE(1, "Bad system register access %d\n", reg);
> > +}
> > +
> > +static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> > +{
> > +	bad_sys_reg(reg);
> > +	return 0;
> > +}
> > +
> > +static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> > +{
> > +	bad_sys_reg(reg);
> > +}
> > +
> > +/* Ordered as in enum vcpu_sysreg */
> > +DECLARE_IMMEDIATE_SR(MPIDR_EL1);
> > +DECLARE_IMMEDIATE_SR(CSSELR_EL1);
> > +DECLARE_IMMEDIATE_SR(SCTLR_EL1);
> > +DECLARE_IMMEDIATE_SR(ACTLR_EL1);
> > +DECLARE_IMMEDIATE_SR(CPACR_EL1);
> > +DECLARE_IMMEDIATE_SR(TTBR0_EL1);
> > +DECLARE_IMMEDIATE_SR(TTBR1_EL1);
> > +DECLARE_IMMEDIATE_SR(TCR_EL1);
> > +DECLARE_IMMEDIATE_SR(ESR_EL1);
> > +DECLARE_IMMEDIATE_SR(AFSR0_EL1);
> > +DECLARE_IMMEDIATE_SR(AFSR1_EL1);
> > +DECLARE_IMMEDIATE_SR(FAR_EL1);
> > +DECLARE_IMMEDIATE_SR(MAIR_EL1);
> > +DECLARE_IMMEDIATE_SR(VBAR_EL1);
> > +DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1);
> > +DECLARE_IMMEDIATE_SR(TPIDR_EL0);
> > +DECLARE_IMMEDIATE_SR(TPIDRRO_EL0);
> > +DECLARE_IMMEDIATE_SR(TPIDR_EL1);
> > +DECLARE_IMMEDIATE_SR(AMAIR_EL1);
> > +DECLARE_IMMEDIATE_SR(CNTKCTL_EL1);
> > +DECLARE_IMMEDIATE_SR(PAR_EL1);
> > +DECLARE_IMMEDIATE_SR(MDSCR_EL1);
> > +DECLARE_IMMEDIATE_SR(MDCCINT_EL1);
> > +DECLARE_IMMEDIATE_SR(PMCR_EL0);
> > +DECLARE_IMMEDIATE_SR(PMSELR_EL0);
> > +DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0);
> > +/* PMEVCNTR30_EL0 */
> > +DECLARE_IMMEDIATE_SR(PMCCNTR_EL0);
> > +DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0);
> > +/* PMEVTYPER30_EL0 */
> > +DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0);
> > +DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0);
> > +DECLARE_IMMEDIATE_SR(PMINTENSET_EL1);
> > +DECLARE_IMMEDIATE_SR(PMOVSSET_EL0);
> > +DECLARE_IMMEDIATE_SR(PMSWINC_EL0);
> > +DECLARE_IMMEDIATE_SR(PMUSERENR_EL0);
> > +DECLARE_IMMEDIATE_SR(DACR32_EL2);
> > +DECLARE_IMMEDIATE_SR(IFSR32_EL2);
> > +DECLARE_IMMEDIATE_SR(FPEXC32_EL2);
> > +DECLARE_IMMEDIATE_SR(DBGVCR32_EL2);
> > +
> > +static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = {
> > +	[0 ... NR_SYS_REGS - 1] = {
> > +		.rdsr = __default_read_sys_reg,
> > +		.wrsr = __default_write_sys_reg,
> > +	},
> > +
> > +	SR_HANDLER(MPIDR_EL1),
> > +	SR_HANDLER(CSSELR_EL1),
> > +	SR_HANDLER(SCTLR_EL1),
> > +	SR_HANDLER(ACTLR_EL1),
> > +	SR_HANDLER(CPACR_EL1),
> > +	SR_HANDLER(TTBR0_EL1),
> > +	SR_HANDLER(TTBR1_EL1),
> > +	SR_HANDLER(TCR_EL1),
> > +	SR_HANDLER(ESR_EL1),
> > +	SR_HANDLER(AFSR0_EL1),
> > +	SR_HANDLER(AFSR1_EL1),
> > +	SR_HANDLER(FAR_EL1),
> > +	SR_HANDLER(MAIR_EL1),
> > +	SR_HANDLER(VBAR_EL1),
> > +	SR_HANDLER(CONTEXTIDR_EL1),
> > +	SR_HANDLER(TPIDR_EL0),
> > +	SR_HANDLER(TPIDRRO_EL0),
> > +	SR_HANDLER(TPIDR_EL1),
> > +	SR_HANDLER(AMAIR_EL1),
> > +	SR_HANDLER(CNTKCTL_EL1),
> > +	SR_HANDLER(PAR_EL1),
> > +	SR_HANDLER(MDSCR_EL1),
> > +	SR_HANDLER(MDCCINT_EL1),
> > +	SR_HANDLER(PMCR_EL0),
> > +	SR_HANDLER(PMSELR_EL0),
> > +	SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0),
> > +	SR_HANDLER(PMCCNTR_EL0),
> > +	SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0),
> > +	SR_HANDLER(PMCCFILTR_EL0),
> > +	SR_HANDLER(PMCNTENSET_EL0),
> > +	SR_HANDLER(PMINTENSET_EL1),
> > +	SR_HANDLER(PMOVSSET_EL0),
> > +	SR_HANDLER(PMSWINC_EL0),
> > +	SR_HANDLER(PMUSERENR_EL0),
> > +	SR_HANDLER(DACR32_EL2),
> > +	SR_HANDLER(IFSR32_EL2),
> > +	SR_HANDLER(FPEXC32_EL2),
> > +	SR_HANDLER(DBGVCR32_EL2),
> > +};
> > +
> > +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> > +{
> > +	return sys_reg_accessors[reg].rdsr(vcpu, reg);
> > +}
> > +
> > +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> > +{
> > +	sys_reg_accessors[reg].wrsr(vcpu, reg, val);
> > +}
> > +
> >  /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
> >  static u32 cache_levels;
> >  
> > -- 
> > 2.14.2
> > 
> > _______________________________________________
> > kvmarm mailing list
> > kvmarm@lists.cs.columbia.edu
> > https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Dave Martin Feb. 9, 2018, 4:17 p.m. UTC | #6
On Thu, Jan 25, 2018 at 08:54:13PM +0100, Christoffer Dall wrote:
> On Tue, Jan 23, 2018 at 04:04:40PM +0000, Dave Martin wrote:
> > On Fri, Jan 12, 2018 at 01:07:32PM +0100, Christoffer Dall wrote:
> > > We are about to defer saving and restoring some groups of system
> > > registers to vcpu_put and vcpu_load on supported systems.  This means
> > > that we need some infrastructure to access system registes which
> > > supports either accessing the memory backing of the register or directly
> > > accessing the system registers, depending on the state of the system
> > > when we access the register.
> > > 
> > > We do this by defining a set of read/write accessors for each system
> > > register, and letting each system register be defined as "immediate" or
> > > "deferrable".  Immediate registers are always saved/restored in the
> > > world-switch path, but deferrable registers are only saved/restored in
> > > vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> > > in that case.
> > > 
> > > Not that we don't use the deferred mechanism yet in this patch, but only
> > > introduce infrastructure.  This is to improve convenience of review in
> > > the subsequent patches where it is clear which registers become
> > > deferred.
> > 
> > Might this table-driven approach result in a lot of branch mispredicts,
> > particularly across load/put boundaries?
> > 
> > If we were to move the whole construct to a header, then it could get
> > constant-folded at the call site down to the individual reg accessed,
> > say:
> > 
> > 	if (sys_regs_loaded)
> > 		read_sysreg_s(TPIDR_EL0);
> > 	else
> > 		__vcpu_sys_reg(v, TPIDR_EL0);
> > 
> > Where multiple regs are accessed close to each other, the compiler
> > may be able to specialise the whole sequence for the loaded and !loaded
> > cases so that there is only one conditional branch.
> > 
> 
> That's an interesting thing to consider indeed.  I wasn't really sure
> how to put this in a header file which wouldn't look overly bloated for
> inclusion elsewhere, so we ended up with this.
> 
> I don't think the alternative suggestion that I discused with Julien on
> this patch changes this much, but since you've had a look at this, I'm
> curious which one of the two (lookup table vs. giant switch) you prefer?

The giant switch approach has the advantage that it is likely to be
folded down to a single case when the switch control expression is
const-foldable; the flipside is that when that fails the entire
switch would be inlined.

> > The individual accessor functions also become unnecessary in this case,
> > because we wouldn't need to derive function pointers from them any
> > more.
> > 
> > I don't know how performance would compare in practice though.
> 
> I don't know either.  But I will say that the whole idea behind put/load
> is that you do this rarely, and going to userspace from KVM is
> notriously expensive, also on x86.

I guess that makes sense.  I'm still a bit hazy on the big picture
for KVM.

> > I'm also assuming that all calls to these accessors are const-foldable.
> > If not, relying on inlining would bloat the generated code a lot.
> 
> We have places where this is not the cae, access_vm_reg() for example.
> But if we really, really, wanted to, we could rewrite that to have a
> function for each register, but that's pretty horrid on its own.

That might not be too bad if there is only one giant inline expansion
and the rest are folded down.


I guess this is something to revisit _if_ we suspect a performance
bottleneck later on.

For now, I was lacking some understanding regarding how this code gets
run, so I was guessing about potential issues rather then proven
issues.

As you might guess, I'm still at the "stupid questions" stage for
this series :)

[...]

Cheers
---Dave
Christoffer Dall Feb. 13, 2018, 8:55 a.m. UTC | #7
On Fri, Feb 09, 2018 at 04:17:39PM +0000, Dave Martin wrote:
> On Thu, Jan 25, 2018 at 08:54:13PM +0100, Christoffer Dall wrote:
> > On Tue, Jan 23, 2018 at 04:04:40PM +0000, Dave Martin wrote:
> > > On Fri, Jan 12, 2018 at 01:07:32PM +0100, Christoffer Dall wrote:
> > > > We are about to defer saving and restoring some groups of system
> > > > registers to vcpu_put and vcpu_load on supported systems.  This means
> > > > that we need some infrastructure to access system registes which
> > > > supports either accessing the memory backing of the register or directly
> > > > accessing the system registers, depending on the state of the system
> > > > when we access the register.
> > > > 
> > > > We do this by defining a set of read/write accessors for each system
> > > > register, and letting each system register be defined as "immediate" or
> > > > "deferrable".  Immediate registers are always saved/restored in the
> > > > world-switch path, but deferrable registers are only saved/restored in
> > > > vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set
> > > > in that case.
> > > > 
> > > > Not that we don't use the deferred mechanism yet in this patch, but only
> > > > introduce infrastructure.  This is to improve convenience of review in
> > > > the subsequent patches where it is clear which registers become
> > > > deferred.
> > > 
> > > Might this table-driven approach result in a lot of branch mispredicts,
> > > particularly across load/put boundaries?
> > > 
> > > If we were to move the whole construct to a header, then it could get
> > > constant-folded at the call site down to the individual reg accessed,
> > > say:
> > > 
> > > 	if (sys_regs_loaded)
> > > 		read_sysreg_s(TPIDR_EL0);
> > > 	else
> > > 		__vcpu_sys_reg(v, TPIDR_EL0);
> > > 
> > > Where multiple regs are accessed close to each other, the compiler
> > > may be able to specialise the whole sequence for the loaded and !loaded
> > > cases so that there is only one conditional branch.
> > > 
> > 
> > That's an interesting thing to consider indeed.  I wasn't really sure
> > how to put this in a header file which wouldn't look overly bloated for
> > inclusion elsewhere, so we ended up with this.
> > 
> > I don't think the alternative suggestion that I discused with Julien on
> > this patch changes this much, but since you've had a look at this, I'm
> > curious which one of the two (lookup table vs. giant switch) you prefer?
> 
> The giant switch approach has the advantage that it is likely to be
> folded down to a single case when the switch control expression is
> const-foldable; the flipside is that when that fails the entire
> switch would be inlined.
> 
> > > The individual accessor functions also become unnecessary in this case,
> > > because we wouldn't need to derive function pointers from them any
> > > more.
> > > 
> > > I don't know how performance would compare in practice though.
> > 
> > I don't know either.  But I will say that the whole idea behind put/load
> > is that you do this rarely, and going to userspace from KVM is
> > notriously expensive, also on x86.
> 
> I guess that makes sense.  I'm still a bit hazy on the big picture
> for KVM.
> 
> > > I'm also assuming that all calls to these accessors are const-foldable.
> > > If not, relying on inlining would bloat the generated code a lot.
> > 
> > We have places where this is not the cae, access_vm_reg() for example.
> > But if we really, really, wanted to, we could rewrite that to have a
> > function for each register, but that's pretty horrid on its own.
> 
> That might not be too bad if there is only one giant inline expansion
> and the rest are folded down.
> 
> 
> I guess this is something to revisit _if_ we suspect a performance
> bottleneck later on.
> 
> For now, I was lacking some understanding regarding how this code gets
> run, so I was guessing about potential issues rather then proven
> issues.
> 

This was a very useful discussion.  I think I'll change this to a big
switch statement in the header file using a static inline, because it
makes the code more readable, and if we notice a huge code size
explosion, we can take measures to make sure things are const-foldable.


> As you might guess, I'm still at the "stupid questions" stage for
> this series :)
> 
Not at all.

Thanks,
-Christoffer
Dave Martin Feb. 13, 2018, 2:27 p.m. UTC | #8
On Tue, Feb 13, 2018 at 09:55:02AM +0100, Christoffer Dall wrote:
> On Fri, Feb 09, 2018 at 04:17:39PM +0000, Dave Martin wrote:
> > On Thu, Jan 25, 2018 at 08:54:13PM +0100, Christoffer Dall wrote:
> > > On Tue, Jan 23, 2018 at 04:04:40PM +0000, Dave Martin wrote:

[...]

> > > > The individual accessor functions also become unnecessary in this case,
> > > > because we wouldn't need to derive function pointers from them any
> > > > more.
> > > > 
> > > > I don't know how performance would compare in practice though.
> > > 
> > > I don't know either.  But I will say that the whole idea behind put/load
> > > is that you do this rarely, and going to userspace from KVM is
> > > notriously expensive, also on x86.
> > 
> > I guess that makes sense.  I'm still a bit hazy on the big picture
> > for KVM.
> > 
> > > > I'm also assuming that all calls to these accessors are const-foldable.
> > > > If not, relying on inlining would bloat the generated code a lot.
> > > 
> > > We have places where this is not the cae, access_vm_reg() for example.
> > > But if we really, really, wanted to, we could rewrite that to have a
> > > function for each register, but that's pretty horrid on its own.
> > 
> > That might not be too bad if there is only one giant inline expansion
> > and the rest are folded down.
> > 
> > 
> > I guess this is something to revisit _if_ we suspect a performance
> > bottleneck later on.
> > 
> > For now, I was lacking some understanding regarding how this code gets
> > run, so I was guessing about potential issues rather then proven
> > issues.
> > 
> 
> This was a very useful discussion.  I think I'll change this to a big
> switch statement in the header file using a static inline, because it
> makes the code more readable, and if we notice a huge code size
> explosion, we can take measures to make sure things are const-foldable.

Sure, that sounds reasonable.

C99 inline semantics allow a single out-of-line body to be linked in
somewhere for when the function isn't inlined, so we might be able to
mitigate the bloat that way if it's a problem... unless the compiler
flags sabotage it (I remember GCC traditionally does something a bit
different where there's a particular difference between "inline" and
"extern inline".)

> > As you might guess, I'm still at the "stupid questions" stage for
> > this series :)
>
> Not at all.

Hmmm, I must try to be more stupid when I look at the other
patches...

Cheers
---Dave

Patch
diff mbox

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 91272c35cc36..4b5ef82f6bdb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -281,6 +281,10 @@  struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* True when deferrable sysregs are loaded on the physical CPU,
+	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+	bool sysregs_loaded_on_cpu;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
@@ -293,8 +297,8 @@  struct kvm_vcpu_arch {
  */
 #define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
 
-#define vcpu_read_sys_reg(v,r)	__vcpu_sys_reg(v,r)
-#define vcpu_write_sys_reg(v,r,n)	do { __vcpu_sys_reg(v,r) = n; } while (0)
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
 
 /*
  * CP14 and CP15 live in the same array, as they are backed by the
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 96398d53b462..9d353a6a55c9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@ 
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 #include <asm/perf_event.h>
 #include <asm/sysreg.h>
@@ -76,6 +77,165 @@  static bool write_to_read_only(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+struct sys_reg_accessor {
+	u64	(*rdsr)(struct kvm_vcpu *, int);
+	void	(*wrsr)(struct kvm_vcpu *, int, u64);
+};
+
+#define DECLARE_IMMEDIATE_SR(i)						\
+	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
+	{								\
+		return __vcpu_sys_reg(vcpu, r);				\
+	}								\
+									\
+	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
+	{								\
+		__vcpu_sys_reg(vcpu, r) = v;				\
+	}								\
+
+#define DECLARE_DEFERRABLE_SR(i, s)					\
+	static u64 __##i##_read(struct kvm_vcpu *vcpu, int r)		\
+	{								\
+		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
+			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
+			return read_sysreg_s((s));			\
+		}							\
+		return __vcpu_sys_reg(vcpu, r);				\
+	}								\
+									\
+	static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v)	\
+	{								\
+		if (vcpu->arch.sysregs_loaded_on_cpu) {			\
+			WARN_ON(kvm_arm_get_running_vcpu() != vcpu);	\
+			write_sysreg_s(v, (s));				\
+		} else {						\
+			__vcpu_sys_reg(vcpu, r) = v;			\
+		}							\
+	}								\
+
+
+#define SR_HANDLER_RANGE(i,e)						\
+	[i ... e] =  (struct sys_reg_accessor) {			\
+		.rdsr = __##i##_read,					\
+		.wrsr = __##i##_write,					\
+	}
+
+#define SR_HANDLER(i)	SR_HANDLER_RANGE(i, i)
+
+static void bad_sys_reg(int reg)
+{
+	WARN_ONCE(1, "Bad system register access %d\n", reg);
+}
+
+static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+	bad_sys_reg(reg);
+	return 0;
+}
+
+static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
+{
+	bad_sys_reg(reg);
+}
+
+/* Ordered as in enum vcpu_sysreg */
+DECLARE_IMMEDIATE_SR(MPIDR_EL1);
+DECLARE_IMMEDIATE_SR(CSSELR_EL1);
+DECLARE_IMMEDIATE_SR(SCTLR_EL1);
+DECLARE_IMMEDIATE_SR(ACTLR_EL1);
+DECLARE_IMMEDIATE_SR(CPACR_EL1);
+DECLARE_IMMEDIATE_SR(TTBR0_EL1);
+DECLARE_IMMEDIATE_SR(TTBR1_EL1);
+DECLARE_IMMEDIATE_SR(TCR_EL1);
+DECLARE_IMMEDIATE_SR(ESR_EL1);
+DECLARE_IMMEDIATE_SR(AFSR0_EL1);
+DECLARE_IMMEDIATE_SR(AFSR1_EL1);
+DECLARE_IMMEDIATE_SR(FAR_EL1);
+DECLARE_IMMEDIATE_SR(MAIR_EL1);
+DECLARE_IMMEDIATE_SR(VBAR_EL1);
+DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1);
+DECLARE_IMMEDIATE_SR(TPIDR_EL0);
+DECLARE_IMMEDIATE_SR(TPIDRRO_EL0);
+DECLARE_IMMEDIATE_SR(TPIDR_EL1);
+DECLARE_IMMEDIATE_SR(AMAIR_EL1);
+DECLARE_IMMEDIATE_SR(CNTKCTL_EL1);
+DECLARE_IMMEDIATE_SR(PAR_EL1);
+DECLARE_IMMEDIATE_SR(MDSCR_EL1);
+DECLARE_IMMEDIATE_SR(MDCCINT_EL1);
+DECLARE_IMMEDIATE_SR(PMCR_EL0);
+DECLARE_IMMEDIATE_SR(PMSELR_EL0);
+DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0);
+/* PMEVCNTR30_EL0 */
+DECLARE_IMMEDIATE_SR(PMCCNTR_EL0);
+DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0);
+/* PMEVTYPER30_EL0 */
+DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0);
+DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0);
+DECLARE_IMMEDIATE_SR(PMINTENSET_EL1);
+DECLARE_IMMEDIATE_SR(PMOVSSET_EL0);
+DECLARE_IMMEDIATE_SR(PMSWINC_EL0);
+DECLARE_IMMEDIATE_SR(PMUSERENR_EL0);
+DECLARE_IMMEDIATE_SR(DACR32_EL2);
+DECLARE_IMMEDIATE_SR(IFSR32_EL2);
+DECLARE_IMMEDIATE_SR(FPEXC32_EL2);
+DECLARE_IMMEDIATE_SR(DBGVCR32_EL2);
+
+static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = {
+	[0 ... NR_SYS_REGS - 1] = {
+		.rdsr = __default_read_sys_reg,
+		.wrsr = __default_write_sys_reg,
+	},
+
+	SR_HANDLER(MPIDR_EL1),
+	SR_HANDLER(CSSELR_EL1),
+	SR_HANDLER(SCTLR_EL1),
+	SR_HANDLER(ACTLR_EL1),
+	SR_HANDLER(CPACR_EL1),
+	SR_HANDLER(TTBR0_EL1),
+	SR_HANDLER(TTBR1_EL1),
+	SR_HANDLER(TCR_EL1),
+	SR_HANDLER(ESR_EL1),
+	SR_HANDLER(AFSR0_EL1),
+	SR_HANDLER(AFSR1_EL1),
+	SR_HANDLER(FAR_EL1),
+	SR_HANDLER(MAIR_EL1),
+	SR_HANDLER(VBAR_EL1),
+	SR_HANDLER(CONTEXTIDR_EL1),
+	SR_HANDLER(TPIDR_EL0),
+	SR_HANDLER(TPIDRRO_EL0),
+	SR_HANDLER(TPIDR_EL1),
+	SR_HANDLER(AMAIR_EL1),
+	SR_HANDLER(CNTKCTL_EL1),
+	SR_HANDLER(PAR_EL1),
+	SR_HANDLER(MDSCR_EL1),
+	SR_HANDLER(MDCCINT_EL1),
+	SR_HANDLER(PMCR_EL0),
+	SR_HANDLER(PMSELR_EL0),
+	SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0),
+	SR_HANDLER(PMCCNTR_EL0),
+	SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0),
+	SR_HANDLER(PMCCFILTR_EL0),
+	SR_HANDLER(PMCNTENSET_EL0),
+	SR_HANDLER(PMINTENSET_EL1),
+	SR_HANDLER(PMOVSSET_EL0),
+	SR_HANDLER(PMSWINC_EL0),
+	SR_HANDLER(PMUSERENR_EL0),
+	SR_HANDLER(DACR32_EL2),
+	SR_HANDLER(IFSR32_EL2),
+	SR_HANDLER(FPEXC32_EL2),
+	SR_HANDLER(DBGVCR32_EL2),
+};
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+	return sys_reg_accessors[reg].rdsr(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
+{
+	sys_reg_accessors[reg].wrsr(vcpu, reg, val);
+}
+
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 static u32 cache_levels;