Message ID | 20231230161954.569267-8-michael.roth@amd.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Initialization Support | expand |
On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote: > +void snp_dump_hva_rmpentry(unsigned long hva) > +{ > + unsigned int level; > + pgd_t *pgd; > + pte_t *pte; > + > + pgd = __va(read_cr3_pa()); > + pgd += pgd_index(hva); > + pte = lookup_address_in_pgd(pgd, hva, &level); > + > + if (!pte) { > + pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva); > + return; > + } > + > + dump_rmpentry(pte_pfn(*pte)); > +} > +EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry); show_fault_oops() - the only caller of this - is builtin code and thus doesn't need symbol exports. Symbol exports are only for module code. --- diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index a8cf33b7da71..31154f087fb0 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -339,4 +339,3 @@ void snp_dump_hva_rmpentry(unsigned long hva) dump_rmpentry(pte_pfn(*pte)); } -EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote: > + while (pfn_current < pfn_end) { > + e = __snp_lookup_rmpentry(pfn_current, &level); > + if (IS_ERR(e)) { > + pfn_current++; > + continue; > + } > + > + e_data = (u64 *)e; > + if (e_data[0] || e_data[1]) { > + pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", > + pfn, pfn_current, e_data[1], e_data[0]); > + return; > + } > + pfn_current++; > + } > + > + pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", > + pfn); > +} Ok, I went and reworked this, see below. Yes, I think it is important - at least in the beginning - to dump the whole 2M PFN region for debugging purposes. If that output starts becoming too unwieldy and overflowing terminals or log files, we'd shorten it or put it behind a debug option or so. Thx. --- diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index a8cf33b7da71..259a1dd655a7 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -35,16 +35,21 @@ * Family 19h Model 01h, Rev B1 processor. */ struct rmpentry { - u64 assigned : 1, - pagesize : 1, - immutable : 1, - rsvd1 : 9, - gpa : 39, - asid : 10, - vmsa : 1, - validated : 1, - rsvd2 : 1; - u64 rsvd3; + union { + struct { + u64 assigned : 1, + pagesize : 1, + immutable : 1, + rsvd1 : 9, + gpa : 39, + asid : 10, + vmsa : 1, + validated : 1, + rsvd2 : 1; + }; + u64 lo; + }; + u64 hi; } __packed; /* @@ -272,22 +277,20 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); */ static void dump_rmpentry(u64 pfn) { - u64 pfn_current, pfn_end; + u64 pfn_i, pfn_end; struct rmpentry *e; - u64 *e_data; int level; e = __snp_lookup_rmpentry(pfn, &level); if (IS_ERR(e)) { - pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n", - pfn, PTR_ERR(e)); + pr_err("Error %ld reading RMP entry for PFN 0x%llx\n", + PTR_ERR(e), pfn); return; } - e_data = (u64 *)e; if (e->assigned) { - pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", - pfn, e_data[1], e_data[0]); + pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n", + pfn, e->lo, e->hi); return; } @@ -299,27 +302,28 @@ static void dump_rmpentry(u64 pfn) * certain situations, such as when the PFN is being accessed via a 2MB * mapping in the host page table. */ - pfn_current = ALIGN(pfn, PTRS_PER_PMD); - pfn_end = pfn_current + PTRS_PER_PMD; + pfn_i = ALIGN(pfn, PTRS_PER_PMD); + pfn_end = pfn_i + PTRS_PER_PMD; - while (pfn_current < pfn_end) { - e = __snp_lookup_rmpentry(pfn_current, &level); + pr_info("PFN 0x%llx unassigned, dumping the whole 2M PFN region: [0x%llx - 0x%llx]\n", + pfn, pfn_i, pfn_end); + + while (pfn_i < pfn_end) { + e = __snp_lookup_rmpentry(pfn_i, &level); if (IS_ERR(e)) { - pfn_current++; + pr_err("Error %ld reading RMP entry for PFN 0x%llx\n", + PTR_ERR(e), pfn_i); + pfn_i++; continue; } - e_data = (u64 *)e; - if (e_data[0] || e_data[1]) { - pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", - pfn, pfn_current, e_data[1], e_data[0]); - return; - } - pfn_current++; - } + if (e->lo || e->hi) + pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi); + else + pr_info("PFN: 0x%llx ...\n", pfn_i); - pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", - pfn); + pfn_i++; + } } void snp_dump_hva_rmpentry(unsigned long hva) @@ -339,4 +343,3 @@ void snp_dump_hva_rmpentry(unsigned long hva) dump_rmpentry(pte_pfn(*pte)); } -EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
On 12/30/23 10:19, Michael Roth wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > This information will be useful for debugging things like page faults > due to RMP access violations and RMPUPDATE failures. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > [mdr: move helper to standalone patch, rework dump logic to reduce > verbosity] > Signed-off-by: Michael Roth <michael.roth@amd.com> > --- > arch/x86/include/asm/sev.h | 2 + > arch/x86/virt/svm/sev.c | 77 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 79 insertions(+) > > diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h > index 01ce61b283a3..2c53e3de0b71 100644 > --- a/arch/x86/include/asm/sev.h > +++ b/arch/x86/include/asm/sev.h > @@ -247,9 +247,11 @@ static inline u64 sev_get_status(void) { return 0; } > #ifdef CONFIG_KVM_AMD_SEV > bool snp_probe_rmptable_info(void); > int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); > +void snp_dump_hva_rmpentry(unsigned long address); > #else > static inline bool snp_probe_rmptable_info(void) { return false; } > static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } > +static inline void snp_dump_hva_rmpentry(unsigned long address) {} > #endif > > #endif > diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c > index 49fdfbf4e518..7c9ced8911e9 100644 > --- a/arch/x86/virt/svm/sev.c > +++ b/arch/x86/virt/svm/sev.c > @@ -266,3 +266,80 @@ int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) > return 0; > } > EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); > + > +/* > + * Dump the raw RMP entry for a particular PFN. These bits are documented in the > + * PPR for a particular CPU model and provide useful information about how a > + * particular PFN is being utilized by the kernel/firmware at the time certain > + * unexpected events occur, such as RMP faults. > + */ > +static void dump_rmpentry(u64 pfn) > +{ > + u64 pfn_current, pfn_end; > + struct rmpentry *e; > + u64 *e_data; > + int level; > + > + e = __snp_lookup_rmpentry(pfn, &level); > + if (IS_ERR(e)) { > + pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n", > + pfn, PTR_ERR(e)); > + return; > + } > + > + e_data = (u64 *)e; > + if (e->assigned) { > + pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", > + pfn, e_data[1], e_data[0]); > + return; > + } > + > + /* > + * If the RMP entry for a particular PFN is not in an assigned state, > + * then it is sometimes useful to get an idea of whether or not any RMP > + * entries for other PFNs within the same 2MB region are assigned, since > + * those too can affect the ability to access a particular PFN in > + * certain situations, such as when the PFN is being accessed via a 2MB > + * mapping in the host page table. > + */ > + pfn_current = ALIGN(pfn, PTRS_PER_PMD); > + pfn_end = pfn_current + PTRS_PER_PMD; > + > + while (pfn_current < pfn_end) { > + e = __snp_lookup_rmpentry(pfn_current, &level); > + if (IS_ERR(e)) { > + pfn_current++; > + continue; > + } > + > + e_data = (u64 *)e; > + if (e_data[0] || e_data[1]) { > + pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", > + pfn, pfn_current, e_data[1], e_data[0]); > + return; > + } > + pfn_current++; > + } > + > + pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", > + pfn); > +} > + > +void snp_dump_hva_rmpentry(unsigned long hva) > +{ > + unsigned int level; > + pgd_t *pgd; > + pte_t *pte; > + > + pgd = __va(read_cr3_pa()); > + pgd += pgd_index(hva); > + pte = lookup_address_in_pgd(pgd, hva, &level); > + > + if (!pte) { > + pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva); > + return; > + } > + > + dump_rmpentry(pte_pfn(*pte)); Already worked with Mike offline when I was running into issues using this function. Net of that conversation is that the PFN needs to be adjusted using the address offset if the PTE level indicates a huge page. Additionally the loop in dump_rmpentry() needs to use ALIGN_DOWN() in order to get the PFN of the starting 2MB area. Thanks, Tom > +} > +EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
On 1/10/24 05:13, Borislav Petkov wrote: > On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote: >> + while (pfn_current < pfn_end) { >> + e = __snp_lookup_rmpentry(pfn_current, &level); >> + if (IS_ERR(e)) { >> + pfn_current++; >> + continue; >> + } >> + >> + e_data = (u64 *)e; >> + if (e_data[0] || e_data[1]) { >> + pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", >> + pfn, pfn_current, e_data[1], e_data[0]); >> + return; >> + } >> + pfn_current++; >> + } >> + >> + pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", >> + pfn); >> +} > > Ok, I went and reworked this, see below. > > Yes, I think it is important - at least in the beginning - to dump the > whole 2M PFN region for debugging purposes. If that output starts > becoming too unwieldy and overflowing terminals or log files, we'd > shorten it or put it behind a debug option or so. > > Thx. > > --- > diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c > index a8cf33b7da71..259a1dd655a7 100644 > --- a/arch/x86/virt/svm/sev.c > +++ b/arch/x86/virt/svm/sev.c > + pr_info("PFN 0x%llx unassigned, dumping the whole 2M PFN region: [0x%llx - 0x%llx]\n", > + pfn, pfn_i, pfn_end); How about saying "... dumping all non-zero entries in the whole ..." and then removing the print below that prints the PFN and "..." > + > + while (pfn_i < pfn_end) { > + e = __snp_lookup_rmpentry(pfn_i, &level); > if (IS_ERR(e)) { > - pfn_current++; > + pr_err("Error %ld reading RMP entry for PFN 0x%llx\n", > + PTR_ERR(e), pfn_i); > + pfn_i++; > continue; > } > > - e_data = (u64 *)e; > - if (e_data[0] || e_data[1]) { > - pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", > - pfn, pfn_current, e_data[1], e_data[0]); > - return; > - } > - pfn_current++; > - } > + if (e->lo || e->hi) > + pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi); > + else > + pr_info("PFN: 0x%llx ...\n", pfn_i); Remove this one. That should cut down on excess output since you are really only concerned with non-zero RMP entries when the input PFN RMP entry is not assigned. Thanks, Tom > > - pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", > - pfn); > + pfn_i++; > + } > } > > void snp_dump_hva_rmpentry(unsigned long hva) > @@ -339,4 +343,3 @@ void snp_dump_hva_rmpentry(unsigned long hva) > > dump_rmpentry(pte_pfn(*pte)); > } > -EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry); >
On Wed, Jan 10, 2024 at 09:20:44AM -0600, Tom Lendacky wrote: > How about saying "... dumping all non-zero entries in the whole ..." I'm trying not to have long stories in printk statements :) > and then removing the print below that prints the PFN and "..." Why remove the print? You want to print every non-null RMP entry in the 2M range, no? And the "..." says that it is a null entry.
On 1/10/24 09:27, Borislav Petkov wrote: > On Wed, Jan 10, 2024 at 09:20:44AM -0600, Tom Lendacky wrote: >> How about saying "... dumping all non-zero entries in the whole ..." > > I'm trying not to have long stories in printk statements :) Well it only adds "non-zero" > >> and then removing the print below that prints the PFN and "..." > > Why remove the print? You want to print every non-null RMP entry in the > 2M range, no? I'm only suggesting getting rid of the else that prints "..." when the entry is all zeroes. Printing the non-zero entries would still occur. Thanks, Tom > > And the "..." says that it is a null entry. >
On Wed, Jan 10, 2024 at 09:51:04AM -0600, Tom Lendacky wrote: > I'm only suggesting getting rid of the else that prints "..." when the entry > is all zeroes. Printing the non-zero entries would still occur. Sure, one should be able to to infer that the missing entries are null. :-)
On Wed Jan 10, 2024 at 11:59 AM EET, Borislav Petkov wrote: > On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote: > > +void snp_dump_hva_rmpentry(unsigned long hva) > > +{ > > + unsigned int level; > > + pgd_t *pgd; > > + pte_t *pte; > > + > > + pgd = __va(read_cr3_pa()); > > + pgd += pgd_index(hva); > > + pte = lookup_address_in_pgd(pgd, hva, &level); > > + > > + if (!pte) { > > + pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva); ~~~~~~~ is this correct log level? BR, Jarkko
On Wed, Jan 10, 2024 at 10:18:37PM +0200, Jarkko Sakkinen wrote: > > > + if (!pte) { > > > + pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva); > ~~~~~~~ > is this correct log level? No, and I caught a couple of those already but missed this one, thanks. Mike, please make sure all your error prints are pr_err. Thx.
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 01ce61b283a3..2c53e3de0b71 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -247,9 +247,11 @@ static inline u64 sev_get_status(void) { return 0; } #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); +void snp_dump_hva_rmpentry(unsigned long address); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } +static inline void snp_dump_hva_rmpentry(unsigned long address) {} #endif #endif diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 49fdfbf4e518..7c9ced8911e9 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -266,3 +266,80 @@ int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) return 0; } EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); + +/* + * Dump the raw RMP entry for a particular PFN. These bits are documented in the + * PPR for a particular CPU model and provide useful information about how a + * particular PFN is being utilized by the kernel/firmware at the time certain + * unexpected events occur, such as RMP faults. + */ +static void dump_rmpentry(u64 pfn) +{ + u64 pfn_current, pfn_end; + struct rmpentry *e; + u64 *e_data; + int level; + + e = __snp_lookup_rmpentry(pfn, &level); + if (IS_ERR(e)) { + pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n", + pfn, PTR_ERR(e)); + return; + } + + e_data = (u64 *)e; + if (e->assigned) { + pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", + pfn, e_data[1], e_data[0]); + return; + } + + /* + * If the RMP entry for a particular PFN is not in an assigned state, + * then it is sometimes useful to get an idea of whether or not any RMP + * entries for other PFNs within the same 2MB region are assigned, since + * those too can affect the ability to access a particular PFN in + * certain situations, such as when the PFN is being accessed via a 2MB + * mapping in the host page table. + */ + pfn_current = ALIGN(pfn, PTRS_PER_PMD); + pfn_end = pfn_current + PTRS_PER_PMD; + + while (pfn_current < pfn_end) { + e = __snp_lookup_rmpentry(pfn_current, &level); + if (IS_ERR(e)) { + pfn_current++; + continue; + } + + e_data = (u64 *)e; + if (e_data[0] || e_data[1]) { + pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n", + pfn, pfn_current, e_data[1], e_data[0]); + return; + } + pfn_current++; + } + + pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n", + pfn); +} + +void snp_dump_hva_rmpentry(unsigned long hva) +{ + unsigned int level; + pgd_t *pgd; + pte_t *pte; + + pgd = __va(read_cr3_pa()); + pgd += pgd_index(hva); + pte = lookup_address_in_pgd(pgd, hva, &level); + + if (!pte) { + pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva); + return; + } + + dump_rmpentry(pte_pfn(*pte)); +} +EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);