Message ID | 20230228213738.272178-14-willy@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | New page table range API | expand |
Hi Willy, On Tue, Feb 28, 2023 at 10:37 PM Matthew Wilcox (Oracle) <willy@infradead.org> wrote: > Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and > flush_dcache_folio(). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Thanks for your patch! > --- a/arch/m68k/include/asm/cacheflush_mm.h > +++ b/arch/m68k/include/asm/cacheflush_mm.h > @@ -220,24 +220,28 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm > > /* Push the page at kernel virtual address and clear the icache */ > /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */ > -static inline void __flush_page_to_ram(void *vaddr) > +static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr) > { > if (CPU_IS_COLDFIRE) { > unsigned long addr, start, end; > addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1); > start = addr & ICACHE_SET_MASK; > - end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK; > + end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK; > if (start > end) { > flush_cf_bcache(0, end); > end = ICACHE_MAX_ADDR; > } > flush_cf_bcache(start, end); > } else if (CPU_IS_040_OR_060) { > - __asm__ __volatile__("nop\n\t" > - ".chip 68040\n\t" > - "cpushp %%bc,(%0)\n\t" > - ".chip 68k" > - : : "a" (__pa(vaddr))); > + unsigned long paddr = __pa(vaddr); > + > + while (nr--) { > + __asm__ __volatile__("nop\n\t" > + ".chip 68040\n\t" > + "cpushp %%bc,(%0)\n\t" > + ".chip 68k" > + : : "a" (paddr + nr * PAGE_SIZE)); As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE" inside the loop (albeit using a shift instead of a multiplication), please use "paddr" here, followed by "paddr += PAGE_SIZE;". Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote: > > + while (nr--) { > > + __asm__ __volatile__("nop\n\t" > > + ".chip 68040\n\t" > > + "cpushp %%bc,(%0)\n\t" > > + ".chip 68k" > > + : : "a" (paddr + nr * PAGE_SIZE)); > > As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE" > inside the loop (albeit using a shift instead of a multiplication), > please use "paddr" here, followed by "paddr += PAGE_SIZE;". Thanks. So this? +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr) } else if (CPU_IS_040_OR_060) { unsigned long paddr = __pa(vaddr); - while (nr--) { + do { __asm__ __volatile__("nop\n\t" ".chip 68040\n\t" "cpushp %%bc,(%0)\n\t" ".chip 68k" - : : "a" (paddr + nr * PAGE_SIZE)); - } + : : "a" (paddr)); + paddr += PAGE_SIZE; + } while (--nr); } else { unsigned long _tmp; __asm__ __volatile__("movec %%cacr,%0\n\t" Also, I noticed that I broke sun3. It puts the PFN in bits 0-n instead of 12-n. New patch coming soon.
Hi Willy, On Sun, Mar 5, 2023 at 4:28 PM Matthew Wilcox <willy@infradead.org> wrote: > On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote: > > > + while (nr--) { > > > + __asm__ __volatile__("nop\n\t" > > > + ".chip 68040\n\t" > > > + "cpushp %%bc,(%0)\n\t" > > > + ".chip 68k" > > > + : : "a" (paddr + nr * PAGE_SIZE)); > > > > As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE" > > inside the loop (albeit using a shift instead of a multiplication), > > please use "paddr" here, followed by "paddr += PAGE_SIZE;". > > Thanks. So this? > > +++ b/arch/m68k/include/asm/cacheflush_mm.h > @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr) > } else if (CPU_IS_040_OR_060) { > unsigned long paddr = __pa(vaddr); > > - while (nr--) { > + do { > __asm__ __volatile__("nop\n\t" > ".chip 68040\n\t" > "cpushp %%bc,(%0)\n\t" > ".chip 68k" > - : : "a" (paddr + nr * PAGE_SIZE)); > - } > + : : "a" (paddr)); > + paddr += PAGE_SIZE; > + } while (--nr); > } else { > unsigned long _tmp; > __asm__ __volatile__("movec %%cacr,%0\n\t" LGTM. Might be safer to keep the "while (nr--) {", just in case someone ever passes zero. > Also, I noticed that I broke sun3. It puts the PFN in bits 0-n instead > of 12-n. New patch coming soon. Thanks, hadn't noticed (there are no sun3-specific code changes in this series?) Gr{oetje,eeting}s, Geert
Hi Matthew, Geert, sorry, I missed that one when it got posted ... On 6/03/23 04:28, Matthew Wilcox wrote: > On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote: >>> + while (nr--) { >>> + __asm__ __volatile__("nop\n\t" >>> + ".chip 68040\n\t" >>> + "cpushp %%bc,(%0)\n\t" >>> + ".chip 68k" >>> + : : "a" (paddr + nr * PAGE_SIZE)); >> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE" >> inside the loop (albeit using a shift instead of a multiplication), >> please use "paddr" here, followed by "paddr += PAGE_SIZE;". Are we certain that contiguous vaddr always maps to contiguous paddr? If not, I'd suggest we increment vaddr inside the loop, and use __pa() each time: > +++ b/arch/m68k/include/asm/cacheflush_mm.h > @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr) > } else if (CPU_IS_040_OR_060) { > > - while (nr--) { > + do { > __asm__ __volatile__("nop\n\t" > ".chip 68040\n\t" > "cpushp %%bc,(%0)\n\t" > ".chip 68k" > - : : "a" (paddr + nr * PAGE_SIZE)); > - } > + : : "a" __pa(vaddr)); > + vaddr += PAGE_SIZE; > + } while (--nr); > } else { > unsigned long _tmp; > __asm__ __volatile__("movec %%cacr,%0\n\t" > (just edited Matthew's patch in the mail editor, untested, may not apply cleanly ...) Cheers, Michael
Hi Michael, On Sun, Mar 5, 2023 at 9:44 PM Michael Schmitz <schmitzmic@gmail.com> wrote: > On 6/03/23 04:28, Matthew Wilcox wrote: > > On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote: > >>> + while (nr--) { > >>> + __asm__ __volatile__("nop\n\t" > >>> + ".chip 68040\n\t" > >>> + "cpushp %%bc,(%0)\n\t" > >>> + ".chip 68k" > >>> + : : "a" (paddr + nr * PAGE_SIZE)); > >> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE" > >> inside the loop (albeit using a shift instead of a multiplication), > >> please use "paddr" here, followed by "paddr += PAGE_SIZE;". > > Are we certain that contiguous vaddr always maps to contiguous paddr? For a general __flush_pages_to_ram() function, that would not be guaranteed. But as this is meant for folios, it must be true: https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320 Gr{oetje,eeting}s, Geert
Hi Geert, On 6/03/23 20:21, Geert Uytterhoeven wrote: >> Are we certain that contiguous vaddr always maps to contiguous paddr? > For a general __flush_pages_to_ram() function, that would not be > guaranteed. But as this is meant for folios, it must be true: > https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320 Thanks for explaining - that just leaves the problem of cowboys like myself abusing __flush_pages_to_ram(addr, nr) with nr > 1 for something that isn't a folio. Maybe a comment 'nr > 1 only valid on folios' would help ... Cheers, Michael > > Gr{oetje,eeting}s, > > Geert >
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 1ac55e7b47f0..d43c8bce149b 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -220,24 +220,28 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm /* Push the page at kernel virtual address and clear the icache */ /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */ -static inline void __flush_page_to_ram(void *vaddr) +static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr) { if (CPU_IS_COLDFIRE) { unsigned long addr, start, end; addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1); start = addr & ICACHE_SET_MASK; - end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK; + end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK; if (start > end) { flush_cf_bcache(0, end); end = ICACHE_MAX_ADDR; } flush_cf_bcache(start, end); } else if (CPU_IS_040_OR_060) { - __asm__ __volatile__("nop\n\t" - ".chip 68040\n\t" - "cpushp %%bc,(%0)\n\t" - ".chip 68k" - : : "a" (__pa(vaddr))); + unsigned long paddr = __pa(vaddr); + + while (nr--) { + __asm__ __volatile__("nop\n\t" + ".chip 68040\n\t" + "cpushp %%bc,(%0)\n\t" + ".chip 68k" + : : "a" (paddr + nr * PAGE_SIZE)); + } } else { unsigned long _tmp; __asm__ __volatile__("movec %%cacr,%0\n\t" @@ -249,10 +253,14 @@ static inline void __flush_page_to_ram(void *vaddr) } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -#define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) +#define flush_dcache_page(page) __flush_pages_to_ram(page_address(page), 1) +#define flush_dcache_folio(folio) \ + __flush_pages_to_ram(folio_address(folio), folio_nr_pages(folio)) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page)) +#define flush_icache_pages(vma, page, nr) \ + __flush_pages_to_ram(page_address(page), nr) +#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1) extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index b93c41fe2067..400206c17c97 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -31,8 +31,20 @@ do{ \ *(pteptr) = (pteval); \ } while(0) -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + for (;;) { + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte_val(pte) += PAGE_SIZE; + } +} + +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) /* PMD_SHIFT determines the size of the area a second-level page table can map */ #if CONFIG_PGTABLE_LEVELS == 3 @@ -138,11 +150,14 @@ extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode); * tables contain all the necessary information. The Sun3 does, but * they are updated on demand. */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline void update_mmu_cache_range(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, unsigned int nr) { } +#define update_mmu_cache(vma, addr, ptep) \ + update_mmu_cache_range(vma, addr, ptep, 1) + #endif /* !__ASSEMBLY__ */ /* MMU-specific headers */ diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 2a375637e007..7784d0fcdf6e 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -81,7 +81,7 @@ static inline void cache_page(void *vaddr) void mmu_page_ctor(void *page) { - __flush_page_to_ram(page); + __flush_pages_to_ram(page, 1); flush_tlb_kernel_page(page); nocache_page(page); }
Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and flush_dcache_folio(). Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: linux-m68k@lists.linux-m68k.org --- arch/m68k/include/asm/cacheflush_mm.h | 26 +++++++++++++++++--------- arch/m68k/include/asm/pgtable_mm.h | 21 ++++++++++++++++++--- arch/m68k/mm/motorola.c | 2 +- 3 files changed, 36 insertions(+), 13 deletions(-)