diff mbox series

[v3,4/9] xen/riscv: setup fixmap mapping

Message ID 04576976b82b97442f645b83b3d62475d144af8e.1721834549.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series RISCV device tree mapping | expand

Commit Message

Oleksii Kurochko July 24, 2024, 3:31 p.m. UTC
Introduce a function to set up fixmap mappings and L0 page
table for fixmap.

Additionally, defines were introduced in riscv/config.h to
calculate the FIXMAP_BASE address.
This involved introducing BOOT_FDT_VIRT_{START, SIZE} and
XEN_VIRT_SIZE, XEN_VIRT_END.

Also, the check of Xen size was updated in the riscv/lds.S
script to use XEN_VIRT_SIZE instead of a hardcoded constant.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V3:
 - s/XEN_SIZE/XEN_VIRT_SIZE
 - drop usage of XEN_VIRT_END.
 - sort newly introduced defines in config.h by address
 - code style fixes
 - drop runtime check of that pte is valid as it was checked in L1 page table finding cycle by BUG_ON().
 - update implementation of write_pte() with FENCE rw, rw.
 - add BUILD_BUG_ON() to check that amount of entries aren't bigger then entries in page table.
 - drop set_fixmap, clear_fixmap declarations as they aren't used and defined now
 - update the commit message.
 - s/__ASM_FIXMAP_H/ASM_FIXMAP_H
 - add SPDX-License-Identifier: GPL-2.0 
---
 xen/arch/riscv/include/asm/config.h |  8 +++++
 xen/arch/riscv/include/asm/fixmap.h | 44 +++++++++++++++++++++++++++
 xen/arch/riscv/include/asm/mm.h     |  2 ++
 xen/arch/riscv/include/asm/page.h   |  9 ++++++
 xen/arch/riscv/mm.c                 | 47 +++++++++++++++++++++++++++++
 xen/arch/riscv/setup.c              |  2 ++
 xen/arch/riscv/xen.lds.S            |  2 +-
 7 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 xen/arch/riscv/include/asm/fixmap.h

Comments

Jan Beulich July 29, 2024, 1:35 p.m. UTC | #1
On 24.07.2024 17:31, Oleksii Kurochko wrote:
> Introduce a function to set up fixmap mappings and L0 page
> table for fixmap.
> 
> Additionally, defines were introduced in riscv/config.h to
> calculate the FIXMAP_BASE address.
> This involved introducing BOOT_FDT_VIRT_{START, SIZE} and
> XEN_VIRT_SIZE, XEN_VIRT_END.
> 
> Also, the check of Xen size was updated in the riscv/lds.S
> script to use XEN_VIRT_SIZE instead of a hardcoded constant.
> 
> Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>

Yet set_fixmap() isn't introduced here, so effectively it's all dead
code. This could do with mentioning, as I at least would expect
set_fixmap() to be usable once fixmaps are properly set up.

> --- a/xen/arch/riscv/include/asm/config.h
> +++ b/xen/arch/riscv/include/asm/config.h
> @@ -66,6 +66,14 @@
>  #define SLOTN_ENTRY_BITS        (HYP_PT_ROOT_LEVEL * VPN_BITS + PAGE_SHIFT)
>  #define SLOTN(slot)             (_AT(vaddr_t, slot) << SLOTN_ENTRY_BITS)
>  
> +#define XEN_VIRT_SIZE           MB(2)
> +
> +#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
> +#define BOOT_FDT_VIRT_SIZE      MB(4)
> +
> +#define FIXMAP_BASE             (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
> +#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)
> +
>  #if RV_STAGE1_MODE == SATP_MODE_SV39
>  #define XEN_VIRT_START 0xFFFFFFFFC0000000
>  #elif RV_STAGE1_MODE == SATP_MODE_SV48

Related to my earlier comment: Is there a particular reason that what
you add cannot live _below_ the XEN_VIRT_START definitions, so things
actually appear in order?

> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/fixmap.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * fixmap.h: compile-time virtual memory allocation
> + */
> +#ifndef ASM_FIXMAP_H
> +#define ASM_FIXMAP_H
> +
> +#include <xen/bug.h>
> +#include <xen/page-size.h>
> +#include <xen/pmap.h>
> +
> +#include <asm/page.h>
> +
> +/* Fixmap slots */
> +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
> +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
> +#define FIX_MISC (FIX_PMAP_END + 1)  /* Ephemeral mappings of hardware */
> +
> +#define FIX_LAST FIX_MISC
> +
> +#define FIXADDR_START FIXMAP_ADDR(0)
> +#define FIXADDR_TOP FIXMAP_ADDR(FIX_LAST)

This doesn't look right, even if it matches what Arm has. Following
what we have for x86, the page at FIXADDR_TOP should be a guard page,
i.e. not have any mapping ever put there. Whereas you put FIX_MISC's
mapping there. This then results in ...

> +#ifndef __ASSEMBLY__
> +
> +/*
> + * Direct access to xen_fixmap[] should only happen when {set,
> + * clear}_fixmap() is unusable (e.g. where we would end up to
> + * recursively call the helpers).
> + */
> +extern pte_t xen_fixmap[];
> +
> +#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
> +
> +static inline unsigned int virt_to_fix(vaddr_t vaddr)
> +{
> +    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);

... this being wrong, i.e. triggering for FIX_MISC. Again, same issue
on Arm afaics, just that it would trigger for FIX_PMAP_END there. (Cc-ing
the other two Arm maintainers for that.)

> @@ -81,6 +82,14 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
>      BUG_ON("unimplemented");
>  }
>  
> +/* Write a pagetable entry. */
> +static inline void write_pte(pte_t *p, pte_t pte)
> +{
> +    RISCV_FENCE(rw, rw);
> +    *p = pte;
> +    RISCV_FENCE(rw, rw);
> +}

Why the first of the two fences? And isn't having the 2nd here going
to badly affect batch updates of page tables?

> @@ -191,6 +195,49 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
>      return is_mode_supported;
>  }
>  
> +void __init setup_fixmap_mappings(void)
> +{
> +    pte_t *pte, tmp;
> +    unsigned int i;
> +
> +    BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
> +
> +    pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
> +
> +    /*
> +     * In RISC-V page table levels are enumerated from Lx to L0 where

Nit: s/enumerated/numbered/ ?

> +     * x is the highest page table level for currect  MMU mode ( for example,
> +     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> +     *
> +     * In this cycle we want to find L1 page table because as L0 page table
> +     * xen_fixmap[] will be used.
> +     *
> +     * i is defined ( HYP_PT_ROOT_LEVEL - 1 ) becuase pte for L2 ( in
> +     * case of Sv39 ) has been recieved above.
> +     */
> +    for ( i = HYP_PT_ROOT_LEVEL - 1; i != 0; i-- )

I think the subtracting of 1 is unhelpful here. Think of another  case where
you want to walk down to L0. How would you express that with a similar for()
construct. It would imo be more natural to use

    for ( i = HYP_PT_ROOT_LEVEL; i > 1; i-- )

here (and then in that hypothetical other case

    for ( i = HYP_PT_ROOT_LEVEL; i > 0; i-- )

), and then the last part of the comment likely wouldn't be needed either.
However, considering ...

> +    {
> +        BUG_ON(!pte_is_valid(*pte));
> +
> +        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> +        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];

... the use of i here, it may instead want to be

    for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )

> +    }
> +
> +    BUG_ON(pte_is_valid(*pte));
> +
> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);

I'm a little puzzled by the use of LINK_TO_LOAD() (and LOAD_TO_LINK() a
little further up) here. Don't you have functioning __pa() and __va()?

> +    write_pte(pte, tmp);
> +
> +    sfence_vma();
> +
> +    printk("(XEN) fixmap is mapped\n");

Why the (XEN) prefix? And perhaps why the printk() in the first place?

Jan
Oleksii Kurochko July 29, 2024, 4:11 p.m. UTC | #2
On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> On 24.07.2024 17:31, Oleksii Kurochko wrote:
> > Introduce a function to set up fixmap mappings and L0 page
> > table for fixmap.
> > 
> > Additionally, defines were introduced in riscv/config.h to
> > calculate the FIXMAP_BASE address.
> > This involved introducing BOOT_FDT_VIRT_{START, SIZE} and
> > XEN_VIRT_SIZE, XEN_VIRT_END.
> > 
> > Also, the check of Xen size was updated in the riscv/lds.S
> > script to use XEN_VIRT_SIZE instead of a hardcoded constant.
> > 
> > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
> 
> Yet set_fixmap() isn't introduced here, so effectively it's all dead
> code. This could do with mentioning, as I at least would expect
> set_fixmap() to be usable once fixmaps are properly set up.
> 
> > --- a/xen/arch/riscv/include/asm/config.h
> > +++ b/xen/arch/riscv/include/asm/config.h
> > @@ -66,6 +66,14 @@
> >  #define SLOTN_ENTRY_BITS        (HYP_PT_ROOT_LEVEL * VPN_BITS +
> > PAGE_SHIFT)
> >  #define SLOTN(slot)             (_AT(vaddr_t, slot) <<
> > SLOTN_ENTRY_BITS)
> >  
> > +#define XEN_VIRT_SIZE           MB(2)
> > +
> > +#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
> > +#define BOOT_FDT_VIRT_SIZE      MB(4)
> > +
> > +#define FIXMAP_BASE             (BOOT_FDT_VIRT_START +
> > BOOT_FDT_VIRT_SIZE)
> > +#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)
> > +
> >  #if RV_STAGE1_MODE == SATP_MODE_SV39
> >  #define XEN_VIRT_START 0xFFFFFFFFC0000000
> >  #elif RV_STAGE1_MODE == SATP_MODE_SV48
> 
> Related to my earlier comment: Is there a particular reason that what
> you add cannot live _below_ the XEN_VIRT_START definitions, so things
> actually appear in order?
It can leave _below_ the XEN_VIRT_START definitions, I just wanted to
be in sync with table above.
But I'll move everything below the XEN_VIRT_START as it is used in
newly introduced definitions.
> 
> 
> 
> > @@ -81,6 +82,14 @@ static inline void flush_page_to_ram(unsigned
> > long mfn, bool sync_icache)
> >      BUG_ON("unimplemented");
> >  }
> >  
> > +/* Write a pagetable entry. */
> > +static inline void write_pte(pte_t *p, pte_t pte)
> > +{
> > +    RISCV_FENCE(rw, rw);
> > +    *p = pte;
> > +    RISCV_FENCE(rw, rw);
> > +}
> 
> Why the first of the two fences? 
To ensure that writes have completed with the old mapping.

> And isn't having the 2nd here going
> to badly affect batch updates of page tables?
By batch you mean update more then one pte?
It yes, then it will definitely affect. It could be dropped here but
could we do something to be sure that someone won't miss to add
fence/barrier?

> 
> > +     * x is the highest page table level for currect  MMU mode (
> > for example,
> > +     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> > +     *
> > +     * In this cycle we want to find L1 page table because as L0
> > page table
> > +     * xen_fixmap[] will be used.
> > +     *
> > +     * i is defined ( HYP_PT_ROOT_LEVEL - 1 ) becuase pte for L2 (
> > in
> > +     * case of Sv39 ) has been recieved above.
> > +     */
> > +    for ( i = HYP_PT_ROOT_LEVEL - 1; i != 0; i-- )
> 
> I think the subtracting of 1 is unhelpful here. Think of another 
> case where
> you want to walk down to L0. How would you express that with a
> similar for()
> construct. It would imo be more natural to use
> 
>     for ( i = HYP_PT_ROOT_LEVEL; i > 1; i-- )
Then the first one _i_ will be initialized as L2, not L1. As an option
we then have to use ...
> 
> here (and then in that hypothetical other case
> 
>     for ( i = HYP_PT_ROOT_LEVEL; i > 0; i-- )
> 
> ), and then the last part of the comment likely wouldn't be needed
> either.
> However, considering ...
> 
> > +    {
> > +        BUG_ON(!pte_is_valid(*pte));
> > +
> > +        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> > +        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> 
> ... the use of i here, it may instead want to be
... should be ( i - 1 ).
I am okay with such refactoring.

> 
>     for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> 
> > +    }
> > +
> > +    BUG_ON(pte_is_valid(*pte));
> > +
> > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > PTE_TABLE);
> 
> I'm a little puzzled by the use of LINK_TO_LOAD() (and LOAD_TO_LINK()
> a
> little further up) here. Don't you have functioning __pa() and
> __va()?
No, they haven't been introduced yet.

~ Oleksii
Jan Beulich July 30, 2024, 7:49 a.m. UTC | #3
On 29.07.2024 18:11, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>> On 24.07.2024 17:31, Oleksii Kurochko wrote:
>>> @@ -81,6 +82,14 @@ static inline void flush_page_to_ram(unsigned
>>> long mfn, bool sync_icache)
>>>      BUG_ON("unimplemented");
>>>  }
>>>  
>>> +/* Write a pagetable entry. */
>>> +static inline void write_pte(pte_t *p, pte_t pte)
>>> +{
>>> +    RISCV_FENCE(rw, rw);
>>> +    *p = pte;
>>> +    RISCV_FENCE(rw, rw);
>>> +}
>>
>> Why the first of the two fences? 
> To ensure that writes have completed with the old mapping.

Wait: There can certainly be uncompleted writes, but those must have
walked the page tables already, or else a (synchronous) fault could
not be delivered on the originating store instruction. Or am I
misunderstanding how paging (and associated faults) work on RISC-V?

>> And isn't having the 2nd here going
>> to badly affect batch updates of page tables?
> By batch you mean update more then one pte?
> It yes, then it will definitely affect. It could be dropped here but
> could we do something to be sure that someone won't miss to add
> fence/barrier?

Well, imo you first want to spell out where the responsibilities for
fencing lies. Then follow the spelled out rules in all new code you
add.

>>> +     * x is the highest page table level for currect  MMU mode (
>>> for example,
>>> +     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
>>> +     *
>>> +     * In this cycle we want to find L1 page table because as L0
>>> page table
>>> +     * xen_fixmap[] will be used.
>>> +     *
>>> +     * i is defined ( HYP_PT_ROOT_LEVEL - 1 ) becuase pte for L2 (
>>> in
>>> +     * case of Sv39 ) has been recieved above.
>>> +     */
>>> +    for ( i = HYP_PT_ROOT_LEVEL - 1; i != 0; i-- )
>>
>> I think the subtracting of 1 is unhelpful here. Think of another 
>> case where
>> you want to walk down to L0. How would you express that with a
>> similar for()
>> construct. It would imo be more natural to use
>>
>>     for ( i = HYP_PT_ROOT_LEVEL; i > 1; i-- )
> Then the first one _i_ will be initialized as L2, not L1. As an option
> we then have to use ...
>>
>> here (and then in that hypothetical other case
>>
>>     for ( i = HYP_PT_ROOT_LEVEL; i > 0; i-- )
>>
>> ), and then the last part of the comment likely wouldn't be needed
>> either.
>> However, considering ...
>>
>>> +    {
>>> +        BUG_ON(!pte_is_valid(*pte));
>>> +
>>> +        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
>>> +        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
>>
>> ... the use of i here, it may instead want to be
> ... should be ( i - 1 ).
> I am okay with such refactoring.

Well, because of this use of i I specifically suggested ...

>>     for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )

... this.

>>> +    }
>>> +
>>> +    BUG_ON(pte_is_valid(*pte));
>>> +
>>> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
>>> PTE_TABLE);
>>
>> I'm a little puzzled by the use of LINK_TO_LOAD() (and LOAD_TO_LINK()
>> a
>> little further up) here. Don't you have functioning __pa() and
>> __va()?
> No, they haven't been introduced yet.

So you're building up more technical debt, as the use of said two
constructs really should be limited to very early setup. Aiui once
you have functioning __va() / __pa() the code here would want
changing?

Jan
Oleksii Kurochko July 30, 2024, 11:25 a.m. UTC | #4
On Tue, 2024-07-30 at 09:49 +0200, Jan Beulich wrote:
> On 29.07.2024 18:11, oleksii.kurochko@gmail.com wrote:
> > On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > On 24.07.2024 17:31, Oleksii Kurochko wrote:
> > > > @@ -81,6 +82,14 @@ static inline void
> > > > flush_page_to_ram(unsigned
> > > > long mfn, bool sync_icache)
> > > >      BUG_ON("unimplemented");
> > > >  }
> > > >  
> > > > +/* Write a pagetable entry. */
> > > > +static inline void write_pte(pte_t *p, pte_t pte)
> > > > +{
> > > > +    RISCV_FENCE(rw, rw);
> > > > +    *p = pte;
> > > > +    RISCV_FENCE(rw, rw);
> > > > +}
> > > 
> > > Why the first of the two fences? 
> > To ensure that writes have completed with the old mapping.
> 
> Wait: There can certainly be uncompleted writes, but those must have
> walked the page tables already, or else a (synchronous) fault could
> not be delivered on the originating store instruction. Or am I
> misunderstanding how paging (and associated faults) work on RISC-V?
I am not sure that I correctly understand the part regarding (
synchronous ) fault. Could you please come up with an example?

If something during page table walk will go wrong then a fault will be
raised.

My initial intension was to be sure if I will be writing to an actively
in-use page table that other cores can see at the time then fences
above are required. It is not the case for now as we have only one CPUs
but I assume that it will be a case when SMP will be enabled and more
then one CPU will be able to work with the same page table.

>>> And isn't having the 2nd here going
>> to badly affect batch updates of page tables?
>> By batch you mean update more then one pte?
>> It yes, then it will definitely affect. It could be dropped here but
>> could we do something to be sure that someone won't miss to add
>> fence/barrier?

> Well, imo you first want to spell out where the responsibilities for
> fencing lies. Then follow the spelled out rules in all new code you
> add.
It makes sense. It would we nice if someone can help me with that.

> 
> > > > +    }
> > > > +
> > > > +    BUG_ON(pte_is_valid(*pte));
> > > > +
> > > > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned
> > > > long)&xen_fixmap),
> > > > PTE_TABLE);
> > > 
> > > I'm a little puzzled by the use of LINK_TO_LOAD() (and
> > > LOAD_TO_LINK()
> > > a
> > > little further up) here. Don't you have functioning __pa() and
> > > __va()?
> > No, they haven't been introduced yet.
> 
> So you're building up more technical debt, as the use of said two
> constructs really should be limited to very early setup. Aiui once
> you have functioning __va() / __pa() the code here would want
> changing?

Ideally yes, it would want to changed.

Would it be the better solution to define __va() and __pa() using
LOAD_TO_LINK()/LINK_TO_LOAD() so when "real" __va() and __pa() will be
ready so only definitions of __va() and __pa() should be changed.

~ Oleksii
Jan Beulich July 30, 2024, 12:11 p.m. UTC | #5
On 30.07.2024 13:25, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-07-30 at 09:49 +0200, Jan Beulich wrote:
>> On 29.07.2024 18:11, oleksii.kurochko@gmail.com wrote:
>>> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>>>> On 24.07.2024 17:31, Oleksii Kurochko wrote:
>>>>> @@ -81,6 +82,14 @@ static inline void
>>>>> flush_page_to_ram(unsigned
>>>>> long mfn, bool sync_icache)
>>>>>      BUG_ON("unimplemented");
>>>>>  }
>>>>>  
>>>>> +/* Write a pagetable entry. */
>>>>> +static inline void write_pte(pte_t *p, pte_t pte)
>>>>> +{
>>>>> +    RISCV_FENCE(rw, rw);
>>>>> +    *p = pte;
>>>>> +    RISCV_FENCE(rw, rw);
>>>>> +}
>>>>
>>>> Why the first of the two fences? 
>>> To ensure that writes have completed with the old mapping.
>>
>> Wait: There can certainly be uncompleted writes, but those must have
>> walked the page tables already, or else a (synchronous) fault could
>> not be delivered on the originating store instruction. Or am I
>> misunderstanding how paging (and associated faults) work on RISC-V?
> I am not sure that I correctly understand the part regarding (
> synchronous ) fault. Could you please come up with an example?
> 
> If something during page table walk will go wrong then a fault will be
> raised.

On the very insn, with subsequent insns not having started executing
(from an architectural perspective, i.e. leaving aside speculation).
That is what my use of "synchronous" meant.

> My initial intension was to be sure if I will be writing to an actively
> in-use page table that other cores can see at the time then fences
> above are required. It is not the case for now as we have only one CPUs
> but I assume that it will be a case when SMP will be enabled and more
> then one CPU will be able to work with the same page table.

Would that first fence really help there? The other CPU could use
the page tables in the window between the fence and the write. My
understanding of the need for fences is for them to be used at times
where ordering of memory accesses matters. For the moment I don't
see this as an aspect for the 1st fence here, but I may be
overlooking something.

>>>>> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned
>>>>> long)&xen_fixmap),
>>>>> PTE_TABLE);
>>>>
>>>> I'm a little puzzled by the use of LINK_TO_LOAD() (and
>>>> LOAD_TO_LINK()
>>>> a
>>>> little further up) here. Don't you have functioning __pa() and
>>>> __va()?
>>> No, they haven't been introduced yet.
>>
>> So you're building up more technical debt, as the use of said two
>> constructs really should be limited to very early setup. Aiui once
>> you have functioning __va() / __pa() the code here would want
>> changing?
> 
> Ideally yes, it would want to changed.
> 
> Would it be the better solution to define __va() and __pa() using
> LOAD_TO_LINK()/LINK_TO_LOAD() so when "real" __va() and __pa() will be
> ready so only definitions of __va() and __pa() should be changed.

Well, that's something you're in a better position to answer, as it
depends on the ordering of subsequent work of yours.

Jan
Oleksii Kurochko July 31, 2024, 9:47 a.m. UTC | #6
On Tue, 2024-07-30 at 14:11 +0200, Jan Beulich wrote:
> On 30.07.2024 13:25, oleksii.kurochko@gmail.com wrote:
> > On Tue, 2024-07-30 at 09:49 +0200, Jan Beulich wrote:
> > > On 29.07.2024 18:11, oleksii.kurochko@gmail.com wrote:
> > > > On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > > > On 24.07.2024 17:31, Oleksii Kurochko wrote:
> > > > > > @@ -81,6 +82,14 @@ static inline void
> > > > > > flush_page_to_ram(unsigned
> > > > > > long mfn, bool sync_icache)
> > > > > >      BUG_ON("unimplemented");
> > > > > >  }
> > > > > >  
> > > > > > +/* Write a pagetable entry. */
> > > > > > +static inline void write_pte(pte_t *p, pte_t pte)
> > > > > > +{
> > > > > > +    RISCV_FENCE(rw, rw);
> > > > > > +    *p = pte;
> > > > > > +    RISCV_FENCE(rw, rw);
> > > > > > +}
> > > > > 
> > > > > Why the first of the two fences? 
> > > > To ensure that writes have completed with the old mapping.
> > > 
> > > Wait: There can certainly be uncompleted writes, but those must
> > > have
> > > walked the page tables already, or else a (synchronous) fault
> > > could
> > > not be delivered on the originating store instruction. Or am I
> > > misunderstanding how paging (and associated faults) work on RISC-
> > > V?
> > I am not sure that I correctly understand the part regarding (
> > synchronous ) fault. Could you please come up with an example?
> > 
> > If something during page table walk will go wrong then a fault will
> > be
> > raised.
> 
> On the very insn, with subsequent insns not having started executing
> (from an architectural perspective, i.e. leaving aside speculation).
> That is what my use of "synchronous" meant.
> 
> > My initial intension was to be sure if I will be writing to an
> > actively
> > in-use page table that other cores can see at the time then fences
> > above are required. It is not the case for now as we have only one
> > CPUs
> > but I assume that it will be a case when SMP will be enabled and
> > more
> > then one CPU will be able to work with the same page table.
> 
> Would that first fence really help there? The other CPU could use
> the page tables in the window between the fence and the write. My
> understanding of the need for fences is for them to be used at times
> where ordering of memory accesses matters. For the moment I don't
> see this as an aspect for the 1st fence here, but I may be
> overlooking something.
AFAIU there are cases when we need the first fence, i.e. when superpage
breakup is happening and IIUC in this case we would first do a fence (
assuming pages are already allocated ), one fence between each
superpage breakup and its PTE, and one after each regular PTE write.
But in this case the first fence could be outside write_pte() function.

So we could eliminate the first fence in write_pte() and place it
outside the function ( if it will be needed for some cases ), thus
eliminating double fences.

~ Oleksii
Oleksii Kurochko Aug. 5, 2024, 3:13 p.m. UTC | #7
On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > +    }
> > +
> > +    BUG_ON(pte_is_valid(*pte));
> > +
> > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > PTE_TABLE);
> 
> I'm a little puzzled by the use of LINK_TO_LOAD() (and LOAD_TO_LINK()
> a
> little further up) here. Don't you have functioning __pa() and
> __va()?
Can __pa() and __va() be used in this case?

According to comments for other architectures, these macros are used
for converting between Xen heap virtual addresses (VA) and machine
addresses (MA). I may have misunderstood what is meant by the Xen heap
in this context, but I'm not sure if xen_fixmap[] and page tables are
considered part of the Xen heap.

~ Oleksii
Oleksii Kurochko Aug. 5, 2024, 3:34 p.m. UTC | #8
On Mon, 2024-08-05 at 17:13 +0200, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > +    }
> > > +
> > > +    BUG_ON(pte_is_valid(*pte));
> > > +
> > > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > > PTE_TABLE);
> > 
> > I'm a little puzzled by the use of LINK_TO_LOAD() (and
> > LOAD_TO_LINK()
> > a
> > little further up) here. Don't you have functioning __pa() and
> > __va()?
> Can __pa() and __va() be used in this case?
> 
> According to comments for other architectures, these macros are used
> for converting between Xen heap virtual addresses (VA) and machine
> addresses (MA). I may have misunderstood what is meant by the Xen
> heap
> in this context, but I'm not sure if xen_fixmap[] and page tables are
> considered part of the Xen heap.

One more thing: can we use __pa() and __va() in setup_fixmap()?

As I understand it, to define __pa() and __va(), the DIRECTMAP mapping
should be mapped first. However, this requires information about RAM
banks, which is provided in the device tree file. But we can't map
device tree without fixmap. Am I missing something?

~ Oleksii
Jan Beulich Aug. 5, 2024, 3:45 p.m. UTC | #9
On 05.08.2024 17:13, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>>> +    }
>>> +
>>> +    BUG_ON(pte_is_valid(*pte));
>>> +
>>> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
>>> PTE_TABLE);
>>
>> I'm a little puzzled by the use of LINK_TO_LOAD() (and LOAD_TO_LINK()
>> a
>> little further up) here. Don't you have functioning __pa() and
>> __va()?
> Can __pa() and __va() be used in this case?
> 
> According to comments for other architectures, these macros are used
> for converting between Xen heap virtual addresses (VA) and machine
> addresses (MA). I may have misunderstood what is meant by the Xen heap
> in this context, but I'm not sure if xen_fixmap[] and page tables are
> considered part of the Xen heap.

I didn't check Arm, but on x86 virt_to_maddr() (underlying __pa()) has
special case code to also allow addresses within the Xen image (area).

Jan
Jan Beulich Aug. 5, 2024, 3:48 p.m. UTC | #10
On 05.08.2024 17:34, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-08-05 at 17:13 +0200, oleksii.kurochko@gmail.com wrote:
>> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>>>> +    }
>>>> +
>>>> +    BUG_ON(pte_is_valid(*pte));
>>>> +
>>>> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
>>>> PTE_TABLE);
>>>
>>> I'm a little puzzled by the use of LINK_TO_LOAD() (and
>>> LOAD_TO_LINK()
>>> a
>>> little further up) here. Don't you have functioning __pa() and
>>> __va()?
>> Can __pa() and __va() be used in this case?
>>
>> According to comments for other architectures, these macros are used
>> for converting between Xen heap virtual addresses (VA) and machine
>> addresses (MA). I may have misunderstood what is meant by the Xen
>> heap
>> in this context, but I'm not sure if xen_fixmap[] and page tables are
>> considered part of the Xen heap.
> 
> One more thing: can we use __pa() and __va() in setup_fixmap()?
> 
> As I understand it, to define __pa() and __va(), the DIRECTMAP mapping
> should be mapped first. However, this requires information about RAM
> banks, which is provided in the device tree file. But we can't map
> device tree without fixmap. Am I missing something?

Depends on further plans. In principle VA <-> PA translation within the
directmap range and within the Xen image range is purely arithmetic, not
requiring any mappings to have been established. You don't access any
of the referenced memory in the course of doing the translation, after
all.

Jan
Oleksii Kurochko Aug. 5, 2024, 4:02 p.m. UTC | #11
On Mon, 2024-08-05 at 17:45 +0200, Jan Beulich wrote:
> On 05.08.2024 17:13, oleksii.kurochko@gmail.com wrote:
> > On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > > +    }
> > > > +
> > > > +    BUG_ON(pte_is_valid(*pte));
> > > > +
> > > > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned
> > > > long)&xen_fixmap),
> > > > PTE_TABLE);
> > > 
> > > I'm a little puzzled by the use of LINK_TO_LOAD() (and
> > > LOAD_TO_LINK()
> > > a
> > > little further up) here. Don't you have functioning __pa() and
> > > __va()?
> > Can __pa() and __va() be used in this case?
> > 
> > According to comments for other architectures, these macros are
> > used
> > for converting between Xen heap virtual addresses (VA) and machine
> > addresses (MA). I may have misunderstood what is meant by the Xen
> > heap
> > in this context, but I'm not sure if xen_fixmap[] and page tables
> > are
> > considered part of the Xen heap.
> 
> I didn't check Arm, but on x86 virt_to_maddr() (underlying __pa())
> has
> special case code to also allow addresses within the Xen image
> (area).

Yes, it is true for __virt_to_maddr:
   static inline unsigned long __virt_to_maddr(unsigned long va)
   {
       ASSERT(va < DIRECTMAP_VIRT_END);
       if ( va >= DIRECTMAP_VIRT_START )
           va -= DIRECTMAP_VIRT_START;
       else
       {
           BUILD_BUG_ON(XEN_VIRT_END - XEN_VIRT_START != GB(1));
           /* Signed, so ((long)XEN_VIRT_START >> 30) fits in an imm32. */
           ASSERT(((long)va >> (PAGE_ORDER_1G + PAGE_SHIFT)) ==
                  ((long)XEN_VIRT_START >> (PAGE_ORDER_1G + PAGE_SHIFT)));
   
           va += xen_phys_start - XEN_VIRT_START;
       }
       return (va & ma_va_bottom_mask) |
              ((va << pfn_pdx_hole_shift) & ma_top_mask);
   }
   
But in case of __maddr_to_virt ( __va() ) it is using directmap region:
   static inline void *__maddr_to_virt(unsigned long ma)
   {
       ASSERT(pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >>
   PAGE_SHIFT));
       return (void *)(DIRECTMAP_VIRT_START +
                       ((ma & ma_va_bottom_mask) |
                        ((ma & ma_top_mask) >> pfn_pdx_hole_shift)));
   }

But I have to use both __va() and __pa().
__va() inside cycle to find L1 page table:

    for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
    {
        BUG_ON(!pte_is_valid(*pte));

        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
    }

__pa() to set a physical address of L0 page table:
    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
PTE_TABLE);
    write_pte(pte, tmp);


~ Oleksii
Jan Beulich Aug. 5, 2024, 4:14 p.m. UTC | #12
On 05.08.2024 18:02, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-08-05 at 17:45 +0200, Jan Beulich wrote:
>> On 05.08.2024 17:13, oleksii.kurochko@gmail.com wrote:
>>> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>>>>> +    }
>>>>> +
>>>>> +    BUG_ON(pte_is_valid(*pte));
>>>>> +
>>>>> +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned
>>>>> long)&xen_fixmap),
>>>>> PTE_TABLE);
>>>>
>>>> I'm a little puzzled by the use of LINK_TO_LOAD() (and
>>>> LOAD_TO_LINK()
>>>> a
>>>> little further up) here. Don't you have functioning __pa() and
>>>> __va()?
>>> Can __pa() and __va() be used in this case?
>>>
>>> According to comments for other architectures, these macros are
>>> used
>>> for converting between Xen heap virtual addresses (VA) and machine
>>> addresses (MA). I may have misunderstood what is meant by the Xen
>>> heap
>>> in this context, but I'm not sure if xen_fixmap[] and page tables
>>> are
>>> considered part of the Xen heap.
>>
>> I didn't check Arm, but on x86 virt_to_maddr() (underlying __pa())
>> has
>> special case code to also allow addresses within the Xen image
>> (area).
> 
> Yes, it is true for __virt_to_maddr:
>    static inline unsigned long __virt_to_maddr(unsigned long va)
>    {
>        ASSERT(va < DIRECTMAP_VIRT_END);
>        if ( va >= DIRECTMAP_VIRT_START )
>            va -= DIRECTMAP_VIRT_START;
>        else
>        {
>            BUILD_BUG_ON(XEN_VIRT_END - XEN_VIRT_START != GB(1));
>            /* Signed, so ((long)XEN_VIRT_START >> 30) fits in an imm32. */
>            ASSERT(((long)va >> (PAGE_ORDER_1G + PAGE_SHIFT)) ==
>                   ((long)XEN_VIRT_START >> (PAGE_ORDER_1G + PAGE_SHIFT)));
>    
>            va += xen_phys_start - XEN_VIRT_START;
>        }
>        return (va & ma_va_bottom_mask) |
>               ((va << pfn_pdx_hole_shift) & ma_top_mask);
>    }
>    
> But in case of __maddr_to_virt ( __va() ) it is using directmap region:
>    static inline void *__maddr_to_virt(unsigned long ma)
>    {
>        ASSERT(pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >>
>    PAGE_SHIFT));
>        return (void *)(DIRECTMAP_VIRT_START +
>                        ((ma & ma_va_bottom_mask) |
>                         ((ma & ma_top_mask) >> pfn_pdx_hole_shift)));
>    }
> 
> But I have to use both __va() and __pa().
> __va() inside cycle to find L1 page table:
> 
>     for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
>     {
>         BUG_ON(!pte_is_valid(*pte));
> 
>         pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
>         pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
>     }
> 
> __pa() to set a physical address of L0 page table:
>     tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> PTE_TABLE);
>     write_pte(pte, tmp);

Hmm, then using at least LINK_TO_LOAD() is going to be unavoidable for the
time being, I guess. Yet midterm I think they should disappear from here. 

Jan
Oleksii Kurochko Aug. 6, 2024, 9:32 a.m. UTC | #13
On Mon, 2024-08-05 at 18:14 +0200, Jan Beulich wrote:
> On 05.08.2024 18:02, oleksii.kurochko@gmail.com wrote:
> > On Mon, 2024-08-05 at 17:45 +0200, Jan Beulich wrote:
> > > On 05.08.2024 17:13, oleksii.kurochko@gmail.com wrote:
> > > > On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > > > > +    }
> > > > > > +
> > > > > > +    BUG_ON(pte_is_valid(*pte));
> > > > > > +
> > > > > > +    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned
> > > > > > long)&xen_fixmap),
> > > > > > PTE_TABLE);
> > > > > 
> > > > > I'm a little puzzled by the use of LINK_TO_LOAD() (and
> > > > > LOAD_TO_LINK()
> > > > > a
> > > > > little further up) here. Don't you have functioning __pa()
> > > > > and
> > > > > __va()?
> > > > Can __pa() and __va() be used in this case?
> > > > 
> > > > According to comments for other architectures, these macros are
> > > > used
> > > > for converting between Xen heap virtual addresses (VA) and
> > > > machine
> > > > addresses (MA). I may have misunderstood what is meant by the
> > > > Xen
> > > > heap
> > > > in this context, but I'm not sure if xen_fixmap[] and page
> > > > tables
> > > > are
> > > > considered part of the Xen heap.
> > > 
> > > I didn't check Arm, but on x86 virt_to_maddr() (underlying
> > > __pa())
> > > has
> > > special case code to also allow addresses within the Xen image
> > > (area).
> > 
> > Yes, it is true for __virt_to_maddr:
> >    static inline unsigned long __virt_to_maddr(unsigned long va)
> >    {
> >        ASSERT(va < DIRECTMAP_VIRT_END);
> >        if ( va >= DIRECTMAP_VIRT_START )
> >            va -= DIRECTMAP_VIRT_START;
> >        else
> >        {
> >            BUILD_BUG_ON(XEN_VIRT_END - XEN_VIRT_START != GB(1));
> >            /* Signed, so ((long)XEN_VIRT_START >> 30) fits in an
> > imm32. */
> >            ASSERT(((long)va >> (PAGE_ORDER_1G + PAGE_SHIFT)) ==
> >                   ((long)XEN_VIRT_START >> (PAGE_ORDER_1G +
> > PAGE_SHIFT)));
> >    
> >            va += xen_phys_start - XEN_VIRT_START;
> >        }
> >        return (va & ma_va_bottom_mask) |
> >               ((va << pfn_pdx_hole_shift) & ma_top_mask);
> >    }
> >    
> > But in case of __maddr_to_virt ( __va() ) it is using directmap
> > region:
> >    static inline void *__maddr_to_virt(unsigned long ma)
> >    {
> >        ASSERT(pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >>
> >    PAGE_SHIFT));
> >        return (void *)(DIRECTMAP_VIRT_START +
> >                        ((ma & ma_va_bottom_mask) |
> >                         ((ma & ma_top_mask) >>
> > pfn_pdx_hole_shift)));
> >    }
> > 
> > But I have to use both __va() and __pa().
> > __va() inside cycle to find L1 page table:
> > 
> >     for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> >     {
> >         BUG_ON(!pte_is_valid(*pte));
> > 
> >         pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> >         pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> >     }
> > 
> > __pa() to set a physical address of L0 page table:
> >     tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > PTE_TABLE);
> >     write_pte(pte, tmp);
> 
> Hmm, then using at least LINK_TO_LOAD() is going to be unavoidable
> for the
> time being, I guess. Yet midterm I think they should disappear from
> here.
I think that in this specific case both LINK_TO_LOAD() and
LOAD_TO_LINK() should be used.

__va() -> __maddr_to_virt() can't be used here as it's calculation is
based on DIRECTMAP_VIRT_START thereby we will receive incorrect virtual
address of page table.

__pa() -> __virt_to_maddr() can't be used too because virtual address
of xen_fixmap will be always bigger then DIRECTMAP_VIRT_START ( as
XEN_VIRT_START > DIRECTMAP_VIRT_START ) thereby physical address (PA)
will be calculated based on DIRECTMAP_VIRT_START and incorrect PA of
xen_fixmap will be received.

It seems to me that it is only one choice we have at the moment ( and
probably in the case of fixmap mapping ) is to use
LINK_TO_LOAD()/LOAD_TO_LINK().

~ Oleksii
Oleksii Kurochko Aug. 6, 2024, 9:49 a.m. UTC | #14
On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > +    write_pte(pte, tmp);
> > +
> > +    sfence_vma();
> > +
> > +    printk("(XEN) fixmap is mapped\n");
> 
> Why the (XEN) prefix? And perhaps why the printk() in the first
> place?
printk() is available after common code started to be buildable and can
be used here as we have already introduced early_puts() which uses
sbi_console_putchar(). So it doesn't matter if we use printk() or
early_printk() here the result will be the same the call of
sbi_console_putchar().

Am I missing something?

~ Oleksii
Jan Beulich Aug. 6, 2024, 9:59 a.m. UTC | #15
On 06.08.2024 11:49, oleksii.kurochko@gmail.com wrote:
> On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
>>> +    write_pte(pte, tmp);
>>> +
>>> +    sfence_vma();
>>> +
>>> +    printk("(XEN) fixmap is mapped\n");
>>
>> Why the (XEN) prefix? And perhaps why the printk() in the first
>> place?
> printk() is available after common code started to be buildable and can
> be used here as we have already introduced early_puts() which uses
> sbi_console_putchar(). So it doesn't matter if we use printk() or
> early_printk() here the result will be the same the call of
> sbi_console_putchar().
> 
> Am I missing something?

Apparently yes, as you answered neither of my questions. To put them
differently: What value does this log message have, outside of your
own development activities? What value does the explicit (XEN) have
when printk() prepends such a prefix already anyway?

Jan
Oleksii Kurochko Aug. 6, 2024, 10:14 a.m. UTC | #16
On Tue, 2024-08-06 at 11:59 +0200, Jan Beulich wrote:
> On 06.08.2024 11:49, oleksii.kurochko@gmail.com wrote:
> > On Mon, 2024-07-29 at 15:35 +0200, Jan Beulich wrote:
> > > > +    write_pte(pte, tmp);
> > > > +
> > > > +    sfence_vma();
> > > > +
> > > > +    printk("(XEN) fixmap is mapped\n");
> > > 
> > > Why the (XEN) prefix? And perhaps why the printk() in the first
> > > place?
> > printk() is available after common code started to be buildable and
> > can
> > be used here as we have already introduced early_puts() which uses
> > sbi_console_putchar(). So it doesn't matter if we use printk() or
> > early_printk() here the result will be the same the call of
> > sbi_console_putchar().
> > 
> > Am I missing something?
> 
> Apparently yes, as you answered neither of my questions. To put them
> differently: What value does this log message have, outside of your
> own development activities? 
Probably there is no any value. Just show that mapping of fixmap has
been finished. I think that we can really this printk.

> What value does the explicit (XEN) have
> when printk() prepends such a prefix already anyway?
I thought that I answered on this question before that there is no any
sense in the explicit "(XEN)".

~ Oleksii
diff mbox series

Patch

diff --git a/xen/arch/riscv/include/asm/config.h b/xen/arch/riscv/include/asm/config.h
index 50583aafdc..f517684cbb 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -66,6 +66,14 @@ 
 #define SLOTN_ENTRY_BITS        (HYP_PT_ROOT_LEVEL * VPN_BITS + PAGE_SHIFT)
 #define SLOTN(slot)             (_AT(vaddr_t, slot) << SLOTN_ENTRY_BITS)
 
+#define XEN_VIRT_SIZE           MB(2)
+
+#define BOOT_FDT_VIRT_START     (XEN_VIRT_START + XEN_VIRT_SIZE)
+#define BOOT_FDT_VIRT_SIZE      MB(4)
+
+#define FIXMAP_BASE             (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
+#define FIXMAP_ADDR(n)          (FIXMAP_BASE + (n) * PAGE_SIZE)
+
 #if RV_STAGE1_MODE == SATP_MODE_SV39
 #define XEN_VIRT_START 0xFFFFFFFFC0000000
 #elif RV_STAGE1_MODE == SATP_MODE_SV48
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
new file mode 100644
index 0000000000..d3f5ee4944
--- /dev/null
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -0,0 +1,44 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ */
+#ifndef ASM_FIXMAP_H
+#define ASM_FIXMAP_H
+
+#include <xen/bug.h>
+#include <xen/page-size.h>
+#include <xen/pmap.h>
+
+#include <asm/page.h>
+
+/* Fixmap slots */
+#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
+#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
+#define FIX_MISC (FIX_PMAP_END + 1)  /* Ephemeral mappings of hardware */
+
+#define FIX_LAST FIX_MISC
+
+#define FIXADDR_START FIXMAP_ADDR(0)
+#define FIXADDR_TOP FIXMAP_ADDR(FIX_LAST)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Direct access to xen_fixmap[] should only happen when {set,
+ * clear}_fixmap() is unusable (e.g. where we would end up to
+ * recursively call the helpers).
+ */
+extern pte_t xen_fixmap[];
+
+#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
+
+static inline unsigned int virt_to_fix(vaddr_t vaddr)
+{
+    BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+
+    return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ASM_FIXMAP_H */
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 25af9e1aaa..a0bdc2bc3a 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -255,4 +255,6 @@  static inline unsigned int arch_get_dma_bitsize(void)
     return 32; /* TODO */
 }
 
+void setup_fixmap_mappings(void);
+
 #endif /* _ASM_RISCV_MM_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index c831e16417..0cc2f37cf8 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -11,6 +11,7 @@ 
 
 #include <asm/mm.h>
 #include <asm/page-bits.h>
+#include <asm/system.h>
 
 #define VPN_MASK                    (PAGETABLE_ENTRIES - 1UL)
 
@@ -81,6 +82,14 @@  static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
     BUG_ON("unimplemented");
 }
 
+/* Write a pagetable entry. */
+static inline void write_pte(pte_t *p, pte_t pte)
+{
+    RISCV_FENCE(rw, rw);
+    *p = pte;
+    RISCV_FENCE(rw, rw);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PAGE_H */
diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
index 7d09e781bf..35724505ec 100644
--- a/xen/arch/riscv/mm.c
+++ b/xen/arch/riscv/mm.c
@@ -12,6 +12,7 @@ 
 #include <asm/early_printk.h>
 #include <asm/csr.h>
 #include <asm/current.h>
+#include <asm/fixmap.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 
@@ -49,6 +50,9 @@  stage1_pgtbl_root[PAGETABLE_ENTRIES];
 pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
 stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PAGETABLE_ENTRIES];
 
+pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
+xen_fixmap[PAGETABLE_ENTRIES];
+
 #define HANDLE_PGTBL(curr_lvl_num)                                          \
     index = pt_index(curr_lvl_num, page_addr);                              \
     if ( pte_is_valid(pgtbl[index]) )                                       \
@@ -191,6 +195,49 @@  static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
     return is_mode_supported;
 }
 
+void __init setup_fixmap_mappings(void)
+{
+    pte_t *pte, tmp;
+    unsigned int i;
+
+    BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
+
+    pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
+
+    /*
+     * In RISC-V page table levels are enumerated from Lx to L0 where
+     * x is the highest page table level for currect  MMU mode ( for example,
+     * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
+     *
+     * In this cycle we want to find L1 page table because as L0 page table
+     * xen_fixmap[] will be used.
+     *
+     * i is defined ( HYP_PT_ROOT_LEVEL - 1 ) becuase pte for L2 ( in
+     * case of Sv39 ) has been recieved above.
+     */
+    for ( i = HYP_PT_ROOT_LEVEL - 1; i != 0; i-- )
+    {
+        BUG_ON(!pte_is_valid(*pte));
+
+        pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
+        pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
+    }
+
+    BUG_ON(pte_is_valid(*pte));
+
+    tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
+    write_pte(pte, tmp);
+
+    sfence_vma();
+
+    printk("(XEN) fixmap is mapped\n");
+
+    /*
+     * We only need the zeroeth table allocated, but not the PTEs set, because
+     * set_fixmap() will set them on the fly.
+     */
+}
+
 /*
  * setup_initial_pagetables:
  *
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 4defad68f4..13f0e8c77d 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -46,6 +46,8 @@  void __init noreturn start_xen(unsigned long bootcpu_id,
     test_macros_from_bug_h();
 #endif
 
+    setup_fixmap_mappings();
+
     printk("All set up\n");
 
     for ( ;; )
diff --git a/xen/arch/riscv/xen.lds.S b/xen/arch/riscv/xen.lds.S
index 070b19d915..7a683f6065 100644
--- a/xen/arch/riscv/xen.lds.S
+++ b/xen/arch/riscv/xen.lds.S
@@ -181,6 +181,6 @@  ASSERT(!SIZEOF(.got.plt),  ".got.plt non-empty")
  * Changing the size of Xen binary can require an update of
  * PGTBL_INITIAL_COUNT.
  */
-ASSERT(_end - _start <= MB(2), "Xen too large for early-boot assumptions")
+ASSERT(_end - _start <= XEN_VIRT_SIZE, "Xen too large for early-boot assumptions")
 
 ASSERT(_ident_end - _ident_start <= IDENT_AREA_SIZE, "identity region is too big");