diff mbox series

[22/35] kvm, x86: Distinguish dmemfs page from mmio page

Message ID b2b6837785f6786575823c919788464373d3ee05.1602093760.git.yuleixzhang@tencent.com (mailing list archive)
State New, archived
Headers show
Series Enhance memory utilization with DMEMFS | expand

Commit Message

yulei zhang Oct. 8, 2020, 7:54 a.m. UTC
From: Yulei Zhang <yuleixzhang@tencent.com>

Dmem page is pfn invalid but not mmio. Support cacheable
dmem page for kvm.

Signed-off-by: Chen Zhuo <sagazchen@tencent.com>
Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
---
 arch/x86/kvm/mmu/mmu.c | 5 +++--
 include/linux/dmem.h   | 7 +++++++
 mm/dmem.c              | 7 +++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

Comments

Sean Christopherson Oct. 9, 2020, 12:58 a.m. UTC | #1
On Thu, Oct 08, 2020 at 03:54:12PM +0800, yulei.kernel@gmail.com wrote:
> From: Yulei Zhang <yuleixzhang@tencent.com>
> 
> Dmem page is pfn invalid but not mmio. Support cacheable
> dmem page for kvm.
> 
> Signed-off-by: Chen Zhuo <sagazchen@tencent.com>
> Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
> ---
>  arch/x86/kvm/mmu/mmu.c | 5 +++--
>  include/linux/dmem.h   | 7 +++++++
>  mm/dmem.c              | 7 +++++++
>  3 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 71aa3da2a0b7..0115c1767063 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -41,6 +41,7 @@
>  #include <linux/hash.h>
>  #include <linux/kern_levels.h>
>  #include <linux/kthread.h>
> +#include <linux/dmem.h>
>  
>  #include <asm/page.h>
>  #include <asm/memtype.h>
> @@ -2962,9 +2963,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
>  			 */
>  			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
>  
> -	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
> +	return (!e820__mapped_raw_any(pfn_to_hpa(pfn),
>  				     pfn_to_hpa(pfn + 1) - 1,
> -				     E820_TYPE_RAM);
> +				     E820_TYPE_RAM)) || (!is_dmem_pfn(pfn));

This is wrong.  As is, the logic reads "A PFN is MMIO if it is INVALID &&
(!RAM || !DMEM)".  The obvious fix would be to change it to "INVALID &&
!RAM && !DMEM", but that begs the question of whether or DMEM is reported
as RAM.  I don't see any e820 related changes in the series, i.e. no evidence
that dmem yanks its memory out of the e820 tables, which makes me think this
change is unnecessary.

>  }
>  
>  /* Bits which may be returned by set_spte() */
Joao Martins Oct. 9, 2020, 10:28 a.m. UTC | #2
On 10/9/20 1:58 AM, Sean Christopherson wrote:
> On Thu, Oct 08, 2020 at 03:54:12PM +0800, yulei.kernel@gmail.com wrote:
>> From: Yulei Zhang <yuleixzhang@tencent.com>
>>
>> Dmem page is pfn invalid but not mmio. Support cacheable
>> dmem page for kvm.
>>
>> Signed-off-by: Chen Zhuo <sagazchen@tencent.com>
>> Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
>> ---
>>  arch/x86/kvm/mmu/mmu.c | 5 +++--
>>  include/linux/dmem.h   | 7 +++++++
>>  mm/dmem.c              | 7 +++++++
>>  3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>> index 71aa3da2a0b7..0115c1767063 100644
>> --- a/arch/x86/kvm/mmu/mmu.c
>> +++ b/arch/x86/kvm/mmu/mmu.c
>> @@ -41,6 +41,7 @@
>>  #include <linux/hash.h>
>>  #include <linux/kern_levels.h>
>>  #include <linux/kthread.h>
>> +#include <linux/dmem.h>
>>  
>>  #include <asm/page.h>
>>  #include <asm/memtype.h>
>> @@ -2962,9 +2963,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
>>  			 */
>>  			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
>>  
>> -	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
>> +	return (!e820__mapped_raw_any(pfn_to_hpa(pfn),
>>  				     pfn_to_hpa(pfn + 1) - 1,
>> -				     E820_TYPE_RAM);
>> +				     E820_TYPE_RAM)) || (!is_dmem_pfn(pfn));
> 
> This is wrong.  As is, the logic reads "A PFN is MMIO if it is INVALID &&
> (!RAM || !DMEM)".  The obvious fix would be to change it to "INVALID &&
> !RAM && !DMEM", but that begs the question of whether or DMEM is reported
> as RAM.  I don't see any e820 related changes in the series, i.e. no evidence
> that dmem yanks its memory out of the e820 tables, which makes me think this
> change is unnecessary.
> 
Even if there would exist e820 changes, e820__mapped_raw_any() checks against
hardware-provided e820 that we are given before any changes happen i.e. not the one kernel
has changed (e820_table_firmware). So unless you're having that memory carved from an MMIO
range (which would be wrong), or the BIOS is misrepresenting its memory map... the
e820__mapped_raw_any(E820_TYPE_RAM) ought to be enough to cover RAM.

Or at least that has been my experience with similar work.

	Joao
yulei zhang Oct. 9, 2020, 11:42 a.m. UTC | #3
Sean and Joao, thanks for the feedback. Probably we can drop this change.

On Fri, Oct 9, 2020 at 6:28 PM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> On 10/9/20 1:58 AM, Sean Christopherson wrote:
> > On Thu, Oct 08, 2020 at 03:54:12PM +0800, yulei.kernel@gmail.com wrote:
> >> From: Yulei Zhang <yuleixzhang@tencent.com>
> >>
> >> Dmem page is pfn invalid but not mmio. Support cacheable
> >> dmem page for kvm.
> >>
> >> Signed-off-by: Chen Zhuo <sagazchen@tencent.com>
> >> Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
> >> ---
> >>  arch/x86/kvm/mmu/mmu.c | 5 +++--
> >>  include/linux/dmem.h   | 7 +++++++
> >>  mm/dmem.c              | 7 +++++++
> >>  3 files changed, 17 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> >> index 71aa3da2a0b7..0115c1767063 100644
> >> --- a/arch/x86/kvm/mmu/mmu.c
> >> +++ b/arch/x86/kvm/mmu/mmu.c
> >> @@ -41,6 +41,7 @@
> >>  #include <linux/hash.h>
> >>  #include <linux/kern_levels.h>
> >>  #include <linux/kthread.h>
> >> +#include <linux/dmem.h>
> >>
> >>  #include <asm/page.h>
> >>  #include <asm/memtype.h>
> >> @@ -2962,9 +2963,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
> >>                       */
> >>                      (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
> >>
> >> -    return !e820__mapped_raw_any(pfn_to_hpa(pfn),
> >> +    return (!e820__mapped_raw_any(pfn_to_hpa(pfn),
> >>                                   pfn_to_hpa(pfn + 1) - 1,
> >> -                                 E820_TYPE_RAM);
> >> +                                 E820_TYPE_RAM)) || (!is_dmem_pfn(pfn));
> >
> > This is wrong.  As is, the logic reads "A PFN is MMIO if it is INVALID &&
> > (!RAM || !DMEM)".  The obvious fix would be to change it to "INVALID &&
> > !RAM && !DMEM", but that begs the question of whether or DMEM is reported
> > as RAM.  I don't see any e820 related changes in the series, i.e. no evidence
> > that dmem yanks its memory out of the e820 tables, which makes me think this
> > change is unnecessary.
> >
> Even if there would exist e820 changes, e820__mapped_raw_any() checks against
> hardware-provided e820 that we are given before any changes happen i.e. not the one kernel
> has changed (e820_table_firmware). So unless you're having that memory carved from an MMIO
> range (which would be wrong), or the BIOS is misrepresenting its memory map... the
> e820__mapped_raw_any(E820_TYPE_RAM) ought to be enough to cover RAM.
>
> Or at least that has been my experience with similar work.
>
>         Joao
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 71aa3da2a0b7..0115c1767063 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -41,6 +41,7 @@ 
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
 #include <linux/kthread.h>
+#include <linux/dmem.h>
 
 #include <asm/page.h>
 #include <asm/memtype.h>
@@ -2962,9 +2963,9 @@  static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
 			 */
 			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
 
-	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+	return (!e820__mapped_raw_any(pfn_to_hpa(pfn),
 				     pfn_to_hpa(pfn + 1) - 1,
-				     E820_TYPE_RAM);
+				     E820_TYPE_RAM)) || (!is_dmem_pfn(pfn));
 }
 
 /* Bits which may be returned by set_spte() */
diff --git a/include/linux/dmem.h b/include/linux/dmem.h
index 8682d63ed43a..59d3ef14fe42 100644
--- a/include/linux/dmem.h
+++ b/include/linux/dmem.h
@@ -19,11 +19,18 @@  dmem_alloc_pages_vma(struct vm_area_struct *vma, unsigned long addr,
 		     unsigned int try_max, unsigned int *result_nr);
 
 void dmem_free_pages(phys_addr_t addr, unsigned int dpages_nr);
+bool is_dmem_pfn(unsigned long pfn);
 #define dmem_free_page(addr)	dmem_free_pages(addr, 1)
 #else
 static inline int dmem_reserve_init(void)
 {
 	return 0;
 }
+
+static inline bool is_dmem_pfn(unsigned long pfn)
+{
+	return 0;
+}
+
 #endif
 #endif	/* _LINUX_DMEM_H */
diff --git a/mm/dmem.c b/mm/dmem.c
index 2e61dbddbc62..eb6df7059cf0 100644
--- a/mm/dmem.c
+++ b/mm/dmem.c
@@ -972,3 +972,10 @@  void dmem_free_pages(phys_addr_t addr, unsigned int dpages_nr)
 }
 EXPORT_SYMBOL(dmem_free_pages);
 
+bool is_dmem_pfn(unsigned long pfn)
+{
+	struct dmem_node *dnode;
+
+	return !!find_dmem_region(__pfn_to_phys(pfn), &dnode);
+}
+EXPORT_SYMBOL(is_dmem_pfn);