diff mbox

[v1,02/12] xen/hvmlite: Factor out common kernel init code

Message ID 1453498558-6028-3-git-send-email-boris.ostrovsky@oracle.com
State New, archived
Headers show

Commit Message

Boris Ostrovsky Jan. 22, 2016, 9:35 p.m. UTC
HVMlite guests (to be introduced in subsequent patches) share most
of the kernel initialization code with PV(H).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/enlighten.c |  225 ++++++++++++++++++++++++----------------------
 1 files changed, 119 insertions(+), 106 deletions(-)

Comments

Luis Chamberlain Jan. 22, 2016, 11:01 p.m. UTC | #1
On Fri, Jan 22, 2016 at 04:35:48PM -0500, Boris Ostrovsky wrote:
> HVMlite guests (to be introduced in subsequent patches) share most
> of the kernel initialization code with PV(H).
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
>  arch/x86/xen/enlighten.c |  225 ++++++++++++++++++++++++----------------------
>  1 files changed, 119 insertions(+), 106 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index d09e4c9..2cf446a 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1505,19 +1505,14 @@ static void __init xen_pvh_early_guest_init(void)
>  }
>  #endif    /* CONFIG_XEN_PVH */
>  
> -/* First C function to be called on Xen boot */
> -asmlinkage __visible void __init xen_start_kernel(void)
> +
> +static void __init xen_init_kernel(void)
>  {
>  	struct physdev_set_iopl set_iopl;
>  	unsigned long initrd_start = 0;
>  	u64 pat;
>  	int rc;
>  
> -	if (!xen_start_info)
> -		return;
> -
> -	xen_domain_type = XEN_PV_DOMAIN;
> -
>  	xen_setup_features();
>  #ifdef CONFIG_XEN_PVH
>  	xen_pvh_early_guest_init();
> @@ -1529,48 +1524,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  	if (xen_initial_domain())
>  		pv_info.features |= PV_SUPPORTED_RTC;
>  	pv_init_ops = xen_init_ops;
> -	if (!xen_pvh_domain()) {
> -		pv_cpu_ops = xen_cpu_ops;
> -
> -		x86_platform.get_nmi_reason = xen_get_nmi_reason;
> -	}
> -
> -	if (xen_feature(XENFEAT_auto_translated_physmap))
> -		x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
> -	else
> -		x86_init.resources.memory_setup = xen_memory_setup;
> -	x86_init.oem.arch_setup = xen_arch_setup;
> -	x86_init.oem.banner = xen_banner;
>  
>  	xen_init_time_ops();
>  
> -	/*
> -	 * Set up some pagetable state before starting to set any ptes.
> -	 */
> -
> -	xen_init_mmu_ops();
> -
> -	/* Prevent unwanted bits from being set in PTEs. */
> -	__supported_pte_mask &= ~_PAGE_GLOBAL;
> -
> -	/*
> -	 * Prevent page tables from being allocated in highmem, even
> -	 * if CONFIG_HIGHPTE is enabled.
> -	 */
> -	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
> -
> -	/* Work out if we support NX */
> -	x86_configure_nx();
> -
> -	/* Get mfn list */
> -	xen_build_dynamic_phys_to_machine();
> -
> -	/*
> -	 * Set up kernel GDT and segment registers, mainly so that
> -	 * -fstack-protector code can be executed.
> -	 */
> -	xen_setup_gdt(0);
> -
>  	xen_init_irq_ops();
>  	xen_init_cpuid_mask();
>  
> @@ -1580,21 +1536,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  	 */
>  	xen_init_apic();
>  #endif
> -
> -	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
> -		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
> -		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
> -	}
> -
>  	machine_ops = xen_machine_ops;
>  
> -	/*
> -	 * The only reliable way to retain the initial address of the
> -	 * percpu gdt_page is to remember it here, so we can go and
> -	 * mark it RW later, when the initial percpu area is freed.
> -	 */
> -	xen_initial_gdt = &per_cpu(gdt_page, 0);
> -
>  	xen_smp_init();
>  
>  #ifdef CONFIG_ACPI_NUMA
> @@ -1609,66 +1552,126 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  	   possible map and a non-dummy shared_info. */
>  	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
>  
> -	local_irq_disable();
> -	early_boot_irqs_disabled = true;
> +	if (!xen_hvm_domain()) {
> +		if (!xen_pvh_domain()) {
> +			pv_cpu_ops = xen_cpu_ops;
>  
> -	xen_raw_console_write("mapping kernel into physical memory\n");
> -	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
> -				   xen_start_info->nr_pages);
> -	xen_reserve_special_pages();
> +			x86_platform.get_nmi_reason = xen_get_nmi_reason;
> +		}
>  
> -	/*
> -	 * Modify the cache mode translation tables to match Xen's PAT
> -	 * configuration.
> -	 */
> -	rdmsrl(MSR_IA32_CR_PAT, pat);
> -	pat_init_cache_modes(pat);
> +		if (xen_feature(XENFEAT_auto_translated_physmap))
> +			x86_init.resources.memory_setup =
> +				xen_auto_xlated_memory_setup;
> +		else
> +			x86_init.resources.memory_setup = xen_memory_setup;
> +		x86_init.oem.arch_setup = xen_arch_setup;
> +		x86_init.oem.banner = xen_banner;
>  
> -	/* keep using Xen gdt for now; no urgent need to change it */
> +		/*
> +		 * Set up some pagetable state before starting to set any ptes.
> +		 */
>  
> -#ifdef CONFIG_X86_32
> -	pv_info.kernel_rpl = 1;
> -	if (xen_feature(XENFEAT_supervisor_mode_kernel))
> -		pv_info.kernel_rpl = 0;
> -#else
> -	pv_info.kernel_rpl = 0;
> -#endif
> -	/* set the limit of our address space */
> -	xen_reserve_top();
> +		xen_init_mmu_ops();
> +
> +		/* Prevent unwanted bits from being set in PTEs. */
> +		__supported_pte_mask &= ~_PAGE_GLOBAL;
>  
> -	/* PVH: runs at default kernel iopl of 0 */
> -	if (!xen_pvh_domain()) {
>  		/*
> -		 * We used to do this in xen_arch_setup, but that is too late
> -		 * on AMD were early_cpu_init (run before ->arch_setup()) calls
> -		 * early_amd_init which pokes 0xcf8 port.
> +		 * Prevent page tables from being allocated in highmem, even
> +		 * if CONFIG_HIGHPTE is enabled.
>  		 */
> -		set_iopl.iopl = 1;
> -		rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
> -		if (rc != 0)
> -			xen_raw_printk("physdev_op failed %d\n", rc);
> -	}
> +		__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
> +
> +		/* Work out if we support NX */
> +		x86_configure_nx();
> +
> +		/* Get mfn list */
> +		xen_build_dynamic_phys_to_machine();
> +
> +		/*
> +		 * Set up kernel GDT and segment registers, mainly so that
> +		 * -fstack-protector code can be executed.
> +		 */
> +		xen_setup_gdt(0);
> +
> +		if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
> +			pv_mmu_ops.ptep_modify_prot_start =
> +				xen_ptep_modify_prot_start;
> +			pv_mmu_ops.ptep_modify_prot_commit =
> +				xen_ptep_modify_prot_commit;
> +		}
> +
> +		/*
> +		 * The only reliable way to retain the initial address of the
> +		 * percpu gdt_page is to remember it here, so we can go and
> +		 * mark it RW later, when the initial percpu area is freed.
> +		 */
> +		xen_initial_gdt = &per_cpu(gdt_page, 0);
> +
> +		xen_raw_console_write("mapping kernel into physical memory\n");
> +		xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
> +					   xen_start_info->nr_pages);
> +		xen_reserve_special_pages();
> +
> +		/*
> +		 * Modify the cache mode translation tables to match Xen's PAT
> +		 * configuration.
> +		 */
> +		rdmsrl(MSR_IA32_CR_PAT, pat);
> +		pat_init_cache_modes(pat);
> +
> +		/* keep using Xen gdt for now; no urgent need to change it */
>  
>  #ifdef CONFIG_X86_32
> -	/* set up basic CPUID stuff */
> -	cpu_detect(&new_cpu_data);
> -	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
> -	new_cpu_data.wp_works_ok = 1;
> -	new_cpu_data.x86_capability[0] = cpuid_edx(1);
> +		pv_info.kernel_rpl = 1;
> +		if (xen_feature(XENFEAT_supervisor_mode_kernel))
> +			pv_info.kernel_rpl = 0;
> +#else
> +		pv_info.kernel_rpl = 0;
> +#endif
> +		/* set the limit of our address space */
> +		xen_reserve_top();
> +
> +		/* PVH: runs at default kernel iopl of 0 */
> +		if (!xen_pvh_domain()) {
> +			/*
> +			 * We used to do this in xen_arch_setup, but that is
> +			 * too late on AMD were early_cpu_init (run before
> +			 * ->arch_setup()) calls early_amd_init which pokes
> +			 * 0xcf8 port.
> +			 */
> +			set_iopl.iopl = 1;
> +			rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl,
> +						   &set_iopl);
> +			if (rc != 0)
> +				xen_raw_printk("physdev_op failed %d\n", rc);
> +		}
> +
> +#ifdef CONFIG_X86_32
> +		/* set up basic CPUID stuff */
> +		cpu_detect(&new_cpu_data);
> +		set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
> +		new_cpu_data.wp_works_ok = 1;
> +		new_cpu_data.x86_capability[0] = cpuid_edx(1);
>  #endif

Whoa, I'm lost, its hard for me to tell what exactly stayed and what
got pulled into a helper, etc. Is there a possibility to split this
patch in 2 somehow to make the actual functional changes easier to
read? There are too many changes here and I just can't tell easily
what's going on.

  Luis
Boris Ostrovsky Jan. 22, 2016, 11:12 p.m. UTC | #2
On 01/22/2016 06:01 PM, Luis R. Rodriguez wrote:
> On Fri, Jan 22, 2016 at 04:35:48PM -0500, Boris Ostrovsky wrote:
>> HVMlite guests (to be introduced in subsequent patches) share most
>> of the kernel initialization code with PV(H).
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> ---
>>   arch/x86/xen/enlighten.c |  225 ++++++++++++++++++++++++----------------------
>>   1 files changed, 119 insertions(+), 106 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index d09e4c9..2cf446a 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
> Whoa, I'm lost, its hard for me to tell what exactly stayed and what
> got pulled into a helper, etc. Is there a possibility to split this
> patch in 2 somehow to make the actual functional changes easier to
> read? There are too many changes here and I just can't tell easily
> what's going on.


The only real changes that this patch introduces is it reorders some of 
the operations that used to be in xen_start_kernel(). This is done so 
that in the next patch when we add hvmlite we can easily put those 
specific to PV(H) inside 'if (!xen_hvm_domain())'. I probably should 
have said so in the commit message.

It is indeed difficult to review but I don't see how I can split this. 
Even if I just moved it (without reordering) it would still be hard to read.

-boris
Boris Ostrovsky Jan. 22, 2016, 11:27 p.m. UTC | #3
On 01/22/2016 06:12 PM, Boris Ostrovsky wrote:
> On 01/22/2016 06:01 PM, Luis R. Rodriguez wrote:
>> On Fri, Jan 22, 2016 at 04:35:48PM -0500, Boris Ostrovsky wrote:
>>> HVMlite guests (to be introduced in subsequent patches) share most
>>> of the kernel initialization code with PV(H).
>>>
>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>>> ---
>>>   arch/x86/xen/enlighten.c |  225 
>>> ++++++++++++++++++++++++----------------------
>>>   1 files changed, 119 insertions(+), 106 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>>> index d09e4c9..2cf446a 100644
>>> --- a/arch/x86/xen/enlighten.c
>>> +++ b/arch/x86/xen/enlighten.c
>> Whoa, I'm lost, its hard for me to tell what exactly stayed and what
>> got pulled into a helper, etc. Is there a possibility to split this
>> patch in 2 somehow to make the actual functional changes easier to
>> read? There are too many changes here and I just can't tell easily
>> what's going on.
>
>
> The only real changes that this patch introduces is it reorders some 
> of the operations that used to be in xen_start_kernel(). This is done 
> so that in the next patch when we add hvmlite we can easily put those 
> specific to PV(H) inside 'if (!xen_hvm_domain())'. I probably should 
> have said so in the commit message.


Actually, I forgot that I merged the 'if' clause into this patch already 
so I'll see if I can separate them again to make it easier on the eye.

-boris


>
> It is indeed difficult to review but I don't see how I can split this. 
> Even if I just moved it (without reordering) it would still be hard to 
> read.
>
> -boris
Luis Chamberlain Jan. 22, 2016, 11:41 p.m. UTC | #4
On Fri, Jan 22, 2016 at 06:12:47PM -0500, Boris Ostrovsky wrote:
> On 01/22/2016 06:01 PM, Luis R. Rodriguez wrote:
> >On Fri, Jan 22, 2016 at 04:35:48PM -0500, Boris Ostrovsky wrote:
> >>HVMlite guests (to be introduced in subsequent patches) share most
> >>of the kernel initialization code with PV(H).
> >>
> >>Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >>---
> >>  arch/x86/xen/enlighten.c |  225 ++++++++++++++++++++++++----------------------
> >>  1 files changed, 119 insertions(+), 106 deletions(-)
> >>
> >>diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> >>index d09e4c9..2cf446a 100644
> >>--- a/arch/x86/xen/enlighten.c
> >>+++ b/arch/x86/xen/enlighten.c
> >Whoa, I'm lost, its hard for me to tell what exactly stayed and what
> >got pulled into a helper, etc. Is there a possibility to split this
> >patch in 2 somehow to make the actual functional changes easier to
> >read? There are too many changes here and I just can't tell easily
> >what's going on.
> 
> 
> The only real changes that this patch introduces is it reorders some
> of the operations that used to be in xen_start_kernel(). This is
> done so that in the next patch when we add hvmlite we can easily put
> those specific to PV(H) inside 'if (!xen_hvm_domain())'. I probably
> should have said so in the commit message.

Ah, I see thanks.

> It is indeed difficult to review but I don't see how I can split
> this. Even if I just moved it (without reordering) it would still be
> hard to read.

A code shuffle but yet introducing non-functional changes as you did
in some other patches might help if possible, but sure if you can say
this is non-functional here or if you can split this up.

  Luis
David Vrabel Jan. 25, 2016, 11:04 a.m. UTC | #5
On 22/01/16 21:35, Boris Ostrovsky wrote:
> HVMlite guests (to be introduced in subsequent patches) share most
> of the kernel initialization code with PV(H).

Where possible, HVMlite should share initialization with bare metal/HVM
and not PV(H).

Sharing any code with the existing PVH code isn't useful, since PVH
support will be removed.

David
Boris Ostrovsky Jan. 25, 2016, 3:42 p.m. UTC | #6
On 01/25/2016 06:04 AM, David Vrabel wrote:
> On 22/01/16 21:35, Boris Ostrovsky wrote:
>> HVMlite guests (to be introduced in subsequent patches) share most
>> of the kernel initialization code with PV(H).
> Where possible, HVMlite should share initialization with bare metal/HVM
> and not PV(H).

This is really platform initialization, for HVMlite it's invoked as 
x86_init.oem.arch_setup(). Things like xen_setup_features(), 
xen_init_apic() etc.

I suppose I can move it to xen_hvm_guest_init() but then we will have 
some amount of code duplication. There is also a chunk of code there (in 
xen_init_kernel()) that will probably be used for HVMlite dom0.

>
> Sharing any code with the existing PVH code isn't useful, since PVH
> support will be removed.

There is nothing PVH-specific, I said "(H)" mostly because that code is 
the same is PV.

-boris
diff mbox

Patch

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d09e4c9..2cf446a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1505,19 +1505,14 @@  static void __init xen_pvh_early_guest_init(void)
 }
 #endif    /* CONFIG_XEN_PVH */
 
-/* First C function to be called on Xen boot */
-asmlinkage __visible void __init xen_start_kernel(void)
+
+static void __init xen_init_kernel(void)
 {
 	struct physdev_set_iopl set_iopl;
 	unsigned long initrd_start = 0;
 	u64 pat;
 	int rc;
 
-	if (!xen_start_info)
-		return;
-
-	xen_domain_type = XEN_PV_DOMAIN;
-
 	xen_setup_features();
 #ifdef CONFIG_XEN_PVH
 	xen_pvh_early_guest_init();
@@ -1529,48 +1524,9 @@  asmlinkage __visible void __init xen_start_kernel(void)
 	if (xen_initial_domain())
 		pv_info.features |= PV_SUPPORTED_RTC;
 	pv_init_ops = xen_init_ops;
-	if (!xen_pvh_domain()) {
-		pv_cpu_ops = xen_cpu_ops;
-
-		x86_platform.get_nmi_reason = xen_get_nmi_reason;
-	}
-
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
-	else
-		x86_init.resources.memory_setup = xen_memory_setup;
-	x86_init.oem.arch_setup = xen_arch_setup;
-	x86_init.oem.banner = xen_banner;
 
 	xen_init_time_ops();
 
-	/*
-	 * Set up some pagetable state before starting to set any ptes.
-	 */
-
-	xen_init_mmu_ops();
-
-	/* Prevent unwanted bits from being set in PTEs. */
-	__supported_pte_mask &= ~_PAGE_GLOBAL;
-
-	/*
-	 * Prevent page tables from being allocated in highmem, even
-	 * if CONFIG_HIGHPTE is enabled.
-	 */
-	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
-
-	/* Work out if we support NX */
-	x86_configure_nx();
-
-	/* Get mfn list */
-	xen_build_dynamic_phys_to_machine();
-
-	/*
-	 * Set up kernel GDT and segment registers, mainly so that
-	 * -fstack-protector code can be executed.
-	 */
-	xen_setup_gdt(0);
-
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
 
@@ -1580,21 +1536,8 @@  asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	xen_init_apic();
 #endif
-
-	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
-		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
-		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
-	}
-
 	machine_ops = xen_machine_ops;
 
-	/*
-	 * The only reliable way to retain the initial address of the
-	 * percpu gdt_page is to remember it here, so we can go and
-	 * mark it RW later, when the initial percpu area is freed.
-	 */
-	xen_initial_gdt = &per_cpu(gdt_page, 0);
-
 	xen_smp_init();
 
 #ifdef CONFIG_ACPI_NUMA
@@ -1609,66 +1552,126 @@  asmlinkage __visible void __init xen_start_kernel(void)
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
 
-	local_irq_disable();
-	early_boot_irqs_disabled = true;
+	if (!xen_hvm_domain()) {
+		if (!xen_pvh_domain()) {
+			pv_cpu_ops = xen_cpu_ops;
 
-	xen_raw_console_write("mapping kernel into physical memory\n");
-	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
-				   xen_start_info->nr_pages);
-	xen_reserve_special_pages();
+			x86_platform.get_nmi_reason = xen_get_nmi_reason;
+		}
 
-	/*
-	 * Modify the cache mode translation tables to match Xen's PAT
-	 * configuration.
-	 */
-	rdmsrl(MSR_IA32_CR_PAT, pat);
-	pat_init_cache_modes(pat);
+		if (xen_feature(XENFEAT_auto_translated_physmap))
+			x86_init.resources.memory_setup =
+				xen_auto_xlated_memory_setup;
+		else
+			x86_init.resources.memory_setup = xen_memory_setup;
+		x86_init.oem.arch_setup = xen_arch_setup;
+		x86_init.oem.banner = xen_banner;
 
-	/* keep using Xen gdt for now; no urgent need to change it */
+		/*
+		 * Set up some pagetable state before starting to set any ptes.
+		 */
 
-#ifdef CONFIG_X86_32
-	pv_info.kernel_rpl = 1;
-	if (xen_feature(XENFEAT_supervisor_mode_kernel))
-		pv_info.kernel_rpl = 0;
-#else
-	pv_info.kernel_rpl = 0;
-#endif
-	/* set the limit of our address space */
-	xen_reserve_top();
+		xen_init_mmu_ops();
+
+		/* Prevent unwanted bits from being set in PTEs. */
+		__supported_pte_mask &= ~_PAGE_GLOBAL;
 
-	/* PVH: runs at default kernel iopl of 0 */
-	if (!xen_pvh_domain()) {
 		/*
-		 * We used to do this in xen_arch_setup, but that is too late
-		 * on AMD were early_cpu_init (run before ->arch_setup()) calls
-		 * early_amd_init which pokes 0xcf8 port.
+		 * Prevent page tables from being allocated in highmem, even
+		 * if CONFIG_HIGHPTE is enabled.
 		 */
-		set_iopl.iopl = 1;
-		rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-		if (rc != 0)
-			xen_raw_printk("physdev_op failed %d\n", rc);
-	}
+		__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+
+		/* Work out if we support NX */
+		x86_configure_nx();
+
+		/* Get mfn list */
+		xen_build_dynamic_phys_to_machine();
+
+		/*
+		 * Set up kernel GDT and segment registers, mainly so that
+		 * -fstack-protector code can be executed.
+		 */
+		xen_setup_gdt(0);
+
+		if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
+			pv_mmu_ops.ptep_modify_prot_start =
+				xen_ptep_modify_prot_start;
+			pv_mmu_ops.ptep_modify_prot_commit =
+				xen_ptep_modify_prot_commit;
+		}
+
+		/*
+		 * The only reliable way to retain the initial address of the
+		 * percpu gdt_page is to remember it here, so we can go and
+		 * mark it RW later, when the initial percpu area is freed.
+		 */
+		xen_initial_gdt = &per_cpu(gdt_page, 0);
+
+		xen_raw_console_write("mapping kernel into physical memory\n");
+		xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
+					   xen_start_info->nr_pages);
+		xen_reserve_special_pages();
+
+		/*
+		 * Modify the cache mode translation tables to match Xen's PAT
+		 * configuration.
+		 */
+		rdmsrl(MSR_IA32_CR_PAT, pat);
+		pat_init_cache_modes(pat);
+
+		/* keep using Xen gdt for now; no urgent need to change it */
 
 #ifdef CONFIG_X86_32
-	/* set up basic CPUID stuff */
-	cpu_detect(&new_cpu_data);
-	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
-	new_cpu_data.wp_works_ok = 1;
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+		pv_info.kernel_rpl = 1;
+		if (xen_feature(XENFEAT_supervisor_mode_kernel))
+			pv_info.kernel_rpl = 0;
+#else
+		pv_info.kernel_rpl = 0;
+#endif
+		/* set the limit of our address space */
+		xen_reserve_top();
+
+		/* PVH: runs at default kernel iopl of 0 */
+		if (!xen_pvh_domain()) {
+			/*
+			 * We used to do this in xen_arch_setup, but that is
+			 * too late on AMD were early_cpu_init (run before
+			 * ->arch_setup()) calls early_amd_init which pokes
+			 * 0xcf8 port.
+			 */
+			set_iopl.iopl = 1;
+			rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl,
+						   &set_iopl);
+			if (rc != 0)
+				xen_raw_printk("physdev_op failed %d\n", rc);
+		}
+
+#ifdef CONFIG_X86_32
+		/* set up basic CPUID stuff */
+		cpu_detect(&new_cpu_data);
+		set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
+		new_cpu_data.wp_works_ok = 1;
+		new_cpu_data.x86_capability[0] = cpuid_edx(1);
 #endif
 
-	if (xen_start_info->mod_start) {
-	    if (xen_start_info->flags & SIF_MOD_START_PFN)
-		initrd_start = PFN_PHYS(xen_start_info->mod_start);
-	    else
-		initrd_start = __pa(xen_start_info->mod_start);
+		if (xen_start_info->mod_start) {
+			if (xen_start_info->flags & SIF_MOD_START_PFN)
+				initrd_start =
+					PFN_PHYS(xen_start_info->mod_start);
+			else
+				initrd_start = __pa(xen_start_info->mod_start);
+		}
+
+		/* Poke various useful things into boot_params */
+		boot_params.hdr.type_of_loader = (9 << 4) | 0;
+		boot_params.hdr.ramdisk_image = initrd_start;
+		boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+		boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
 	}
 
-	/* Poke various useful things into boot_params */
-	boot_params.hdr.type_of_loader = (9 << 4) | 0;
-	boot_params.hdr.ramdisk_image = initrd_start;
-	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
-	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
+	local_irq_disable();
+	early_boot_irqs_disabled = true;
 
 	if (!xen_initial_domain()) {
 		add_preferred_console("xenboot", 0, NULL);
@@ -1713,6 +1716,16 @@  asmlinkage __visible void __init xen_start_kernel(void)
 	xen_setup_runstate_info(0);
 
 	xen_efi_init();
+}
+
+/* First C function to be called on Xen boot */
+asmlinkage __visible void __init xen_start_kernel(void)
+{
+	if (!xen_start_info)
+		return;
+	xen_domain_type = XEN_PV_DOMAIN;
+
+	xen_init_kernel();
 
 	/* Start the world */
 #ifdef CONFIG_X86_32