diff mbox series

[v8,2/2] x86/kvm: use __bss_decrypted attribute in shared variables

Message ID 1536875471-17391-3-git-send-email-brijesh.singh@amd.com (mailing list archive)
State New, archived
Headers show
Series x86: Fix SEV guest regression | expand

Commit Message

Brijesh Singh Sept. 13, 2018, 9:51 p.m. UTC
The recent removal of the memblock dependency from kvmclock caused a SEV
guest regression because the wall_clock and hv_clock_boot variables are
no longer mapped decrypted when SEV is active.

Use the __bss_decrypted attribute to put the static wall_clock and
hv_clock_boot in the .bss..decrypted section so that they are mapped
decrypted during boot.

In the preparatory stage of CPU hotplug, the per-cpu pvclock data pointer
assigns either an element of the static array or dynamically allocated
memory for the pvclock data pointer. The static array are now mapped
decrypted but the dynamically allocated memory is not mapped decrypted.
However, when SEV is active this memory range must be mapped decrypted.

Add a function which is called after the page allocator is up, and
allocate memory for the pvclock data pointers for the all possible cpus.
Map this memory range as decrypted when SEV is active.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: kvm@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

Comments

Thomas Gleixner Sept. 13, 2018, 11:31 p.m. UTC | #1
On Thu, 13 Sep 2018, Brijesh Singh wrote:
>  
> +static void __init kvmclock_init_mem(void)
> +{
> +	unsigned int ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
> +	unsigned int order = get_order(ncpus * sizeof(*hvclock_mem));
> +	struct page *p;
> +	int r;
> +
> +	p = alloc_pages(GFP_KERNEL, order);
> +	if (p) {

If you make this:

	if (!p) {
		pr_warn();
		return;
	}

then you spare one indentation level and give useful information.

> +		hvclock_mem = page_address(p);
> +
> +		/*
> +		 * hvclock is shared between the guest and the hypervisor, must
> +		 * be mapped decrypted.
> +		 */
> +		if (sev_active()) {
> +			r = set_memory_decrypted((unsigned long) hvclock_mem,
> +						 1UL << order);
> +			if (r) {
> +				__free_pages(p, order);
> +				hvclock_mem = NULL;

This wants a pr_warn() as well, please.
  
> +				return;
> +			}
> +		}
> +
> +		memset(hvclock_mem, 0, PAGE_SIZE << order);
> +	}
> +}

Other than that, this looks really reasonable and way more palatable in the
rc stage.

Thanks,

	tglx
kernel test robot Sept. 14, 2018, 12:25 a.m. UTC | #2
Hi Brijesh,

I love your patch! Perhaps something to improve:

[auto build test WARNING on tip/x86/core]
[also build test WARNING on v4.19-rc3 next-20180913]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Brijesh-Singh/x86-mm-add-bss-decrypted-section-to-hold-shared-variables/20180914-080110
config: x86_64-randconfig-x019-201836 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/cpumask.h:5:0,
                    from arch/x86/include/asm/msr.h:11,
                    from arch/x86/include/asm/processor.h:21,
                    from arch/x86/include/asm/cpufeature.h:5,
                    from arch/x86/include/asm/thread_info.h:53,
                    from include/linux/thread_info.h:38,
                    from arch/x86/include/asm/preempt.h:7,
                    from include/linux/preempt.h:81,
                    from include/linux/spinlock.h:51,
                    from include/linux/seqlock.h:36,
                    from include/linux/time.h:6,
                    from include/uapi/linux/timex.h:56,
                    from include/linux/timex.h:56,
                    from include/linux/clocksource.h:13,
                    from arch/x86/kernel/kvmclock.c:19:
   arch/x86/kernel/kvmclock.c: In function 'kvmclock_init_mem':
>> include/linux/cpumask.h:109:29: warning: large integer implicitly truncated to unsigned type [-Woverflow]
    #define num_possible_cpus() 1U
                                ^
>> arch/x86/kernel/kvmclock.c:243:23: note: in expansion of macro 'num_possible_cpus'
     unsigned int ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
                          ^~~~~~~~~~~~~~~~~
--
   In file included from arch/x86/include/asm/cpumask.h:5:0,
                    from arch/x86/include/asm/msr.h:11,
                    from arch/x86/include/asm/processor.h:21,
                    from arch/x86/include/asm/cpufeature.h:5,
                    from arch/x86/include/asm/thread_info.h:53,
                    from include/linux/thread_info.h:38,
                    from arch/x86/include/asm/preempt.h:7,
                    from include/linux/preempt.h:81,
                    from include/linux/spinlock.h:51,
                    from include/linux/seqlock.h:36,
                    from include/linux/time.h:6,
                    from include/uapi/linux/timex.h:56,
                    from include/linux/timex.h:56,
                    from include/linux/clocksource.h:13,
                    from arch/x86//kernel/kvmclock.c:19:
   arch/x86//kernel/kvmclock.c: In function 'kvmclock_init_mem':
>> include/linux/cpumask.h:109:29: warning: large integer implicitly truncated to unsigned type [-Woverflow]
    #define num_possible_cpus() 1U
                                ^
   arch/x86//kernel/kvmclock.c:243:23: note: in expansion of macro 'num_possible_cpus'
     unsigned int ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
                          ^~~~~~~~~~~~~~~~~

vim +/num_possible_cpus +243 arch/x86/kernel/kvmclock.c

   240	
   241	static void __init kvmclock_init_mem(void)
   242	{
 > 243		unsigned int ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
   244		unsigned int order = get_order(ncpus * sizeof(*hvclock_mem));
   245		struct page *p;
   246		int r;
   247	
   248		p = alloc_pages(GFP_KERNEL, order);
   249		if (p) {
   250			hvclock_mem = page_address(p);
   251	
   252			/*
   253			 * hvclock is shared between the guest and the hypervisor, must
   254			 * be mapped decrypted.
   255			 */
   256			if (sev_active()) {
   257				r = set_memory_decrypted((unsigned long) hvclock_mem,
   258							 1UL << order);
   259				if (r) {
   260					__free_pages(p, order);
   261					hvclock_mem = NULL;
   262					return;
   263				}
   264			}
   265	
   266			memset(hvclock_mem, 0, PAGE_SIZE << order);
   267		}
   268	}
   269	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index a36b93a..84f29f1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@ 
 #include <linux/sched/clock.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/set_memory.h>
 
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@  early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 	(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
 
 static struct pvclock_vsyscall_time_info
-			hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+			hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __bss_decrypted;
 static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+static struct pvclock_vsyscall_time_info *hvclock_mem;
 
 static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
 {
@@ -236,6 +238,35 @@  static void kvm_shutdown(void)
 	native_machine_shutdown();
 }
 
+static void __init kvmclock_init_mem(void)
+{
+	unsigned int ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
+	unsigned int order = get_order(ncpus * sizeof(*hvclock_mem));
+	struct page *p;
+	int r;
+
+	p = alloc_pages(GFP_KERNEL, order);
+	if (p) {
+		hvclock_mem = page_address(p);
+
+		/*
+		 * hvclock is shared between the guest and the hypervisor, must
+		 * be mapped decrypted.
+		 */
+		if (sev_active()) {
+			r = set_memory_decrypted((unsigned long) hvclock_mem,
+						 1UL << order);
+			if (r) {
+				__free_pages(p, order);
+				hvclock_mem = NULL;
+				return;
+			}
+		}
+
+		memset(hvclock_mem, 0, PAGE_SIZE << order);
+	}
+}
+
 static int __init kvm_setup_vsyscall_timeinfo(void)
 {
 #ifdef CONFIG_X86_64
@@ -250,6 +281,9 @@  static int __init kvm_setup_vsyscall_timeinfo(void)
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
+
+	kvmclock_init_mem();
+
 	return 0;
 }
 early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +303,10 @@  static int kvmclock_setup_percpu(unsigned int cpu)
 	/* Use the static page for the first CPUs, allocate otherwise */
 	if (cpu < HVC_BOOT_ARRAY_SIZE)
 		p = &hv_clock_boot[cpu];
+	else if (hvclock_mem)
+		p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
 	else
-		p = kzalloc(sizeof(*p), GFP_KERNEL);
+		return -ENOMEM;
 
 	per_cpu(hv_clock_per_cpu, cpu) = p;
 	return p ? 0 : -ENOMEM;