diff mbox series

[v4,2/7] xen/riscv: set up fixmap mappings

Message ID 1c1c0f912a9abbb542baa1ce92e75d64ec8043e9.1723214540.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series RISCV device tree mapping | expand

Commit Message

Oleksii Kurochko Aug. 9, 2024, 4:19 p.m. UTC
Set up fixmap mappings and the L0 page table for fixmap support.

Define new macros in riscv/config.h for calculating
the FIXMAP_BASE address, including BOOT_FDT_VIRT_{START, SIZE},
XEN_VIRT_SIZE, and XEN_VIRT_END.

Update the check for Xen size in riscv/lds.S to use
XEN_VIRT_SIZE instead of a hardcoded constant.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V4:
 - move definitions of XEN_VIRT_SIZE, BOOT_FDT_VIRT_{START,SIZE}, FIXMAP_{BASE,ADDR}
   below XEN_VIRT_START to have definitions appear in order.
 - define FIX_LAST as (FIX_MISC + 1) to have a guard slot at the end.
 - s/enumerated/numbered in the comment
 - update the cycle which looks for L1 page table in setup_fixmap_mapping_function() and
   the comment above him.
 - drop fences inside write_pte() and put 'fence r,r' in setup_fixmap() before sfence_vma().
 - update the commit message
 - drop printk message inside setup_fixmap().
---
Changes in V3:
 - s/XEN_SIZE/XEN_VIRT_SIZE
 - drop usage of XEN_VIRT_END.
 - sort newly introduced defines in config.h by address
 - code style fixes
 - drop runtime check of that pte is valid as it was checked in L1 page table finding cycle by BUG_ON().
 - update implementation of write_pte() with FENCE rw, rw.
 - add BUILD_BUG_ON() to check that amount of entries aren't bigger then entries in page table.
 - drop set_fixmap, clear_fixmap declarations as they aren't used and defined now
 - update the commit message.
 - s/__ASM_FIXMAP_H/ASM_FIXMAP_H
 - add SPDX-License-Identifier: GPL-2.0 
---
 xen/arch/riscv/include/asm/config.h |  8 ++++++
 xen/arch/riscv/include/asm/fixmap.h | 44 +++++++++++++++++++++++++++++
 xen/arch/riscv/include/asm/mm.h     |  2 ++
 xen/arch/riscv/include/asm/page.h   |  6 ++++
 xen/arch/riscv/mm.c                 | 43 ++++++++++++++++++++++++++++
 xen/arch/riscv/setup.c              |  2 ++
 xen/arch/riscv/xen.lds.S            |  2 +-
 7 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 xen/arch/riscv/include/asm/fixmap.h

Comments

Jan Beulich Aug. 13, 2024, 8:22 a.m. UTC | #1
On 09.08.2024 18:19, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/include/asm/config.h
> +++ b/xen/arch/riscv/include/asm/config.h
> @@ -74,6 +74,14 @@
>  #error "unsupported RV_STAGE1_MODE"
>  #endif
>  
> +#define XEN_VIRT_SIZE           MB(2)
> +
> +#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
> +#define BOOT_FDT_VIRT_SIZE      MB(4)
> +
> +#define FIXMAP_BASE             (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
> +#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)

Just to confirm: Did you consider leaving gaps between the regions, but
then discarded that idea for whatever reason? It's not like you're tight
on address space.

As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h?

> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/fixmap.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * fixmap.h: compile-time virtual memory allocation
> + */
> +#ifndef ASM_FIXMAP_H
> +#define ASM_FIXMAP_H
> +
> +#include <xen/bug.h>
> +#include <xen/page-size.h>
> +#include <xen/pmap.h>
> +
> +#include <asm/page.h>
> +
> +/* Fixmap slots */
> +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
> +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
> +#define FIX_MISC (FIX_PMAP_END + 1)  /* Ephemeral mappings of hardware */
> +
> +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */

As per my comment on the earlier version: No, I don't think this arranges
for a guard slot. It merely arranges for FIX_MISC to not trigger the
BUG_ON() in virt_to_fix().

> --- a/xen/arch/riscv/include/asm/page.h
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
>      BUG_ON("unimplemented");
>  }
>  
> +/* Write a pagetable entry. */
> +static inline void write_pte(pte_t *p, pte_t pte)
> +{
> +    *p = pte;
> +}

No use of write_atomic() here? And no read_pte() counterpart right away (then
also properly using read_atomic())?

> @@ -191,6 +195,45 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
>      return is_mode_supported;
>  }
>  
> +void __init setup_fixmap_mappings(void)
> +{
> +    pte_t *pte, tmp;
> +    unsigned int i;
> +
> +    BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
> +
> +    pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
> +
> +    /*
> +     * In RISC-V page table levels are numbered from Lx to L0 where
> +     * x is the highest page table level for currect  MMU mode ( for example,
> +     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> +     *
> +     * In this cycle we want to find L1 page table because as L0 page table
> +     * xen_fixmap[] will be used.
> +     */
> +    for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> +    {
> +        BUG_ON(!pte_is_valid(*pte));
> +
> +        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> +        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> +    }
> +
> +    BUG_ON(pte_is_valid(*pte));
> +
> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
> +    write_pte(pte, tmp);
> +
> +    RISCV_FENCE(rw, rw);

In the changes section you say "r,r", and I was wondering about that. I
take it that "rw,rw" is really what's needed here.

Jan
Oleksii Kurochko Aug. 14, 2024, 2:21 p.m. UTC | #2
On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
> On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > --- a/xen/arch/riscv/include/asm/config.h
> > +++ b/xen/arch/riscv/include/asm/config.h
> > @@ -74,6 +74,14 @@
> >  #error "unsupported RV_STAGE1_MODE"
> >  #endif
> >  
> > +#define XEN_VIRT_SIZE           MB(2)
> > +
> > +#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
> > +#define BOOT_FDT_VIRT_SIZE      MB(4)
> > +
> > +#define FIXMAP_BASE             (BOOT_FDT_VIRT_START +
> > BOOT_FDT_VIRT_SIZE)
> > +#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)
> 
> Just to confirm: Did you consider leaving gaps between the regions,
> but
> then discarded that idea for whatever reason? It's not like you're
> tight
> on address space.
No, I would like to leave gaps between the regions. I have a slot gap
where it is possible, inside a slot I decided just not to add this gap
without any specific reason TBH. I can add some gap ( for example,
0x100000 ) if it would be better and consistent.

> 
> As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h?
Sure, it would more correct place for this macros.

> 
> > --- /dev/null
> > +++ b/xen/arch/riscv/include/asm/fixmap.h
> > @@ -0,0 +1,44 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * fixmap.h: compile-time virtual memory allocation
> > + */
> > +#ifndef ASM_FIXMAP_H
> > +#define ASM_FIXMAP_H
> > +
> > +#include <xen/bug.h>
> > +#include <xen/page-size.h>
> > +#include <xen/pmap.h>
> > +
> > +#include <asm/page.h>
> > +
> > +/* Fixmap slots */
> > +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
> > +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of
> > PMAP */
> > +#define FIX_MISC (FIX_PMAP_END + 1)  /* Ephemeral mappings of
> > hardware */
> > +
> > +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
> 
> As per my comment on the earlier version: No, I don't think this
> arranges
> for a guard slot. It merely arranges for FIX_MISC to not trigger the
> BUG_ON() in virt_to_fix().
> 
> > --- a/xen/arch/riscv/include/asm/page.h
> > +++ b/xen/arch/riscv/include/asm/page.h
> > @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned
> > long mfn, bool sync_icache)
> >      BUG_ON("unimplemented");
> >  }
> >  
> > +/* Write a pagetable entry. */
> > +static inline void write_pte(pte_t *p, pte_t pte)
> > +{
> > +    *p = pte;
> > +}
> 
> No use of write_atomic() here? And no read_pte() counterpart right
> away (then
> also properly using read_atomic())?
I wanted to avoid the fence before "*p=pte" which in case of
write_atomic() will be present. Won't it be enough to use here
WRITE_ONCE()?


> 
> > @@ -191,6 +195,45 @@ static bool __init
> > check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
> >      return is_mode_supported;
> >  }
> >  
> > +void __init setup_fixmap_mappings(void)
> > +{
> > +    pte_t *pte, tmp;
> > +    unsigned int i;
> > +
> > +    BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
> > +
> > +    pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL,
> > FIXMAP_ADDR(0))];
> > +
> > +    /*
> > +     * In RISC-V page table levels are numbered from Lx to L0
> > where
> > +     * x is the highest page table level for currect  MMU mode (
> > for example,
> > +     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> > +     *
> > +     * In this cycle we want to find L1 page table because as L0
> > page table
> > +     * xen_fixmap[] will be used.
> > +     */
> > +    for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> > +    {
> > +        BUG_ON(!pte_is_valid(*pte));
> > +
> > +        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> > +        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> > +    }
> > +
> > +    BUG_ON(pte_is_valid(*pte));
> > +
> > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > PTE_TABLE);
> > +    write_pte(pte, tmp);
> > +
> > +    RISCV_FENCE(rw, rw);
> 
> In the changes section you say "r,r", and I was wondering about that.
> I
> take it that "rw,rw" is really what's needed here.
It was typo. I will update the change log.

Thanks.

~ Oleksii
Jan Beulich Aug. 14, 2024, 3:08 p.m. UTC | #3
On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>> --- a/xen/arch/riscv/include/asm/page.h
>>> +++ b/xen/arch/riscv/include/asm/page.h
>>> @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned
>>> long mfn, bool sync_icache)
>>>      BUG_ON("unimplemented");
>>>  }
>>>  
>>> +/* Write a pagetable entry. */
>>> +static inline void write_pte(pte_t *p, pte_t pte)
>>> +{
>>> +    *p = pte;
>>> +}
>>
>> No use of write_atomic() here? And no read_pte() counterpart right
>> away (then
>> also properly using read_atomic())?
> I wanted to avoid the fence before "*p=pte" which in case of
> write_atomic() will be present.

Well, this goes back to write_atomic() resolving to write{b,w,l,q}() for
unclear reasons; even the comment in our atomic.h says "For some reason".
The fence there is of course getting in the way here. I keep forgetting
about that aspect, because neither x86 nor Arm has anything similar
afaics.

> Won't it be enough to use here WRITE_ONCE()?

Maybe. I'm not entirely sure.

Jan
Oleksii Kurochko Aug. 15, 2024, 8:16 a.m. UTC | #4
On Wed, 2024-08-14 at 17:08 +0200, Jan Beulich wrote:
> On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote:
> > On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
> > > On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > > > --- a/xen/arch/riscv/include/asm/page.h
> > > > +++ b/xen/arch/riscv/include/asm/page.h
> > > > @@ -81,6 +81,12 @@ static inline void
> > > > flush_page_to_ram(unsigned
> > > > long mfn, bool sync_icache)
> > > >      BUG_ON("unimplemented");
> > > >  }
> > > >  
> > > > +/* Write a pagetable entry. */
> > > > +static inline void write_pte(pte_t *p, pte_t pte)
> > > > +{
> > > > +    *p = pte;
> > > > +}
> > > 
> > > No use of write_atomic() here? And no read_pte() counterpart
> > > right
> > > away (then
> > > also properly using read_atomic())?
> > I wanted to avoid the fence before "*p=pte" which in case of
> > write_atomic() will be present.
> 
> Well, this goes back to write_atomic() resolving to write{b,w,l,q}()
> for
> unclear reasons; even the comment in our atomic.h says "For some
> reason".
> The fence there is of course getting in the way here. I keep
> forgetting
> about that aspect, because neither x86 nor Arm has anything similar
> afaics.
Good point. I overlooked something here ( and misinterpreted your
comments regarding that write_atomic() implementation ) but I think it
would be better to use write{b,w,l,q}_cpu() for write_atomic.

~ Oleksii

> 
> > Won't it be enough to use here WRITE_ONCE()?
> 
> Maybe. I'm not entirely sure.
> 
> Jan
diff mbox series

Patch

diff --git a/xen/arch/riscv/include/asm/config.h b/xen/arch/riscv/include/asm/config.h
index 50583aafdc..4b4cc529a9 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -74,6 +74,14 @@ 
 #error "unsupported RV_STAGE1_MODE"
 #endif
 
+#define XEN_VIRT_SIZE           MB(2)
+
+#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
+#define BOOT_FDT_VIRT_SIZE      MB(4)
+
+#define FIXMAP_BASE             (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
+#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)
+
 #define DIRECTMAP_SLOT_END      509
 #define DIRECTMAP_SLOT_START    200
 #define DIRECTMAP_VIRT_START    SLOTN(DIRECTMAP_SLOT_START)
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
new file mode 100644
index 0000000000..2ecd05dd9f
--- /dev/null
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -0,0 +1,44 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ */
+#ifndef ASM_FIXMAP_H
+#define ASM_FIXMAP_H
+
+#include <xen/bug.h>
+#include <xen/page-size.h>
+#include <xen/pmap.h>
+
+#include <asm/page.h>
+
+/* Fixmap slots */
+#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
+#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
+#define FIX_MISC (FIX_PMAP_END + 1)  /* Ephemeral mappings of hardware */
+
+#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
+
+#define FIXADDR_START FIXMAP_ADDR(0)
+#define FIXADDR_TOP FIXMAP_ADDR(FIX_LAST)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Direct access to xen_fixmap[] should only happen when {set,
+ * clear}_fixmap() is unusable (e.g. where we would end up to
+ * recursively call the helpers).
+ */
+extern pte_t xen_fixmap[];
+
+#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
+
+static inline unsigned int virt_to_fix(vaddr_t vaddr)
+{
+    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+
+    return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ASM_FIXMAP_H */
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 25af9e1aaa..a0bdc2bc3a 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -255,4 +255,6 @@  static inline unsigned int arch_get_dma_bitsize(void)
     return 32; /* TODO */
 }
 
+void setup_fixmap_mappings(void);
+
 #endif /* _ASM_RISCV_MM_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index c831e16417..5db3edb100 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -81,6 +81,12 @@  static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
     BUG_ON("unimplemented");
 }
 
+/* Write a pagetable entry. */
+static inline void write_pte(pte_t *p, pte_t pte)
+{
+    *p = pte;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PAGE_H */
diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
index 7d09e781bf..b8ff91cf4e 100644
--- a/xen/arch/riscv/mm.c
+++ b/xen/arch/riscv/mm.c
@@ -12,6 +12,7 @@ 
 #include <asm/early_printk.h>
 #include <asm/csr.h>
 #include <asm/current.h>
+#include <asm/fixmap.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 
@@ -49,6 +50,9 @@  stage1_pgtbl_root[PAGETABLE_ENTRIES];
 pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
 stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PAGETABLE_ENTRIES];
 
+pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
+xen_fixmap[PAGETABLE_ENTRIES];
+
 #define HANDLE_PGTBL(curr_lvl_num)                                          \
     index = pt_index(curr_lvl_num, page_addr);                              \
     if ( pte_is_valid(pgtbl[index]) )                                       \
@@ -191,6 +195,45 @@  static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
     return is_mode_supported;
 }
 
+void __init setup_fixmap_mappings(void)
+{
+    pte_t *pte, tmp;
+    unsigned int i;
+
+    BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
+
+    pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
+
+    /*
+     * In RISC-V page table levels are numbered from Lx to L0 where
+     * x is the highest page table level for currect  MMU mode ( for example,
+     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
+     *
+     * In this cycle we want to find L1 page table because as L0 page table
+     * xen_fixmap[] will be used.
+     */
+    for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
+    {
+        BUG_ON(!pte_is_valid(*pte));
+
+        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
+        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
+    }
+
+    BUG_ON(pte_is_valid(*pte));
+
+    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
+    write_pte(pte, tmp);
+
+    RISCV_FENCE(rw, rw);
+    sfence_vma();
+
+    /*
+     * We only need the zeroeth table allocated, but not the PTEs set, because
+     * set_fixmap() will set them on the fly.
+     */
+}
+
 /*
  * setup_initial_pagetables:
  *
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 4defad68f4..13f0e8c77d 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -46,6 +46,8 @@  void __init noreturn start_xen(unsigned long bootcpu_id,
     test_macros_from_bug_h();
 #endif
 
+    setup_fixmap_mappings();
+
     printk("All set up\n");
 
     for ( ;; )
diff --git a/xen/arch/riscv/xen.lds.S b/xen/arch/riscv/xen.lds.S
index 070b19d915..7a683f6065 100644
--- a/xen/arch/riscv/xen.lds.S
+++ b/xen/arch/riscv/xen.lds.S
@@ -181,6 +181,6 @@  ASSERT(!SIZEOF(.got.plt),  ".got.plt non-empty")
  * Changing the size of Xen binary can require an update of
  * PGTBL_INITIAL_COUNT.
  */
-ASSERT(_end - _start <= MB(2), "Xen too large for early-boot assumptions")
+ASSERT(_end - _start <= XEN_VIRT_SIZE, "Xen too large for early-boot assumptions")
 
 ASSERT(_ident_end - _ident_start <= IDENT_AREA_SIZE, "identity region is too big");