diff mbox series

[v4,5/5] riscv: implement IPI-based remote TLB shootdown

Message ID d1c980f2e49192c7cb2ddd4f3e4af577d1eaf539.1553647082.git.gary@garyguo.net (mailing list archive)
State New, archived
Headers show
Series TLB/I$ flush cleanups and improvements | expand

Commit Message

Gary Guo March 27, 2019, 12:41 a.m. UTC
From: Gary Guo <gary@garyguo.net>

This patch implements IPI-based remote TLB shootdown, which is useful
at this stage for testing because BBL/OpenSBI ignores operands of
sbi_remote_sfence_vma_asid and always perform a global TLB flush.
The SBI-based remote TLB shootdown can still be opt-in using boot
cmdline "tlbi_method=sbi".

Signed-off-by: Gary Guo <gary@garyguo.net>
Tested-by: Atish Patra <atish.patra@wdc.com>
---
 .../admin-guide/kernel-parameters.txt         |  5 +
 arch/riscv/mm/tlbflush.c                      | 99 +++++++++++++++++--
 2 files changed, 98 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig March 27, 2019, 7:31 a.m. UTC | #1
On Wed, Mar 27, 2019 at 12:41:30AM +0000, Gary Guo wrote:
> From: Gary Guo <gary@garyguo.net>
> 
> This patch implements IPI-based remote TLB shootdown, which is useful
> at this stage for testing because BBL/OpenSBI ignores operands of
> sbi_remote_sfence_vma_asid and always perform a global TLB flush.
> The SBI-based remote TLB shootdown can still be opt-in using boot
> cmdline "tlbi_method=sbi".

I think Anup now fixes OpenSBI.  Do you have benchmarks vs BBL,
old OpenSBI and new OpenSBI?

> +static void ipi_remote_sfence_vma(void *info)
> +{
> +	struct tlbi *data = info;
> +	unsigned long start = data->start;
> +	unsigned long size = data->size;
> +	unsigned long i;
> +
> +	if (size == SFENCE_VMA_FLUSH_ALL) {
> +		local_flush_tlb_all();
> +	}

Doesn't this need a return to skip the latter code?  Also it might
we worth to just split the all case into entirely separate helpers,
that way the on_each_cpu calls don't need to pass a private data
argument at all.

> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		__asm__ __volatile__ ("sfence.vma %0"
> +				      : : "r" (start + i)
> +				      : "memory");
> +	}

local_flush_tlb_kernel_page?

> +static void ipi_remote_sfence_vma_asid(void *info)
> +{
> +	struct tlbi *data = info;
> +	unsigned long asid = data->asid;
> +	unsigned long start = data->start;
> +	unsigned long size = data->size;
> +	unsigned long i;
> +
> +	if (size == SFENCE_VMA_FLUSH_ALL) {
> +		__asm__ __volatile__ ("sfence.vma x0, %0"
> +				      : : "r" (asid)
> +				      : "memory");
> +		return;
> +	}
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		__asm__ __volatile__ ("sfence.vma %0, %1"
> +				      : : "r" (start + i), "r" (asid)
> +				      : "memory");
> +	}

local_flush_tlb_range?
Gary Guo March 27, 2019, 2:03 p.m. UTC | #2
On 27/03/2019 07:31, Christoph Hellwig wrote:
> On Wed, Mar 27, 2019 at 12:41:30AM +0000, Gary Guo wrote:
>> From: Gary Guo <gary@garyguo.net>
>>
>> This patch implements IPI-based remote TLB shootdown, which is useful
>> at this stage for testing because BBL/OpenSBI ignores operands of
>> sbi_remote_sfence_vma_asid and always perform a global TLB flush.
>> The SBI-based remote TLB shootdown can still be opt-in using boot
>> cmdline "tlbi_method=sbi".
> 
> I think Anup now fixes OpenSBI.  Do you have benchmarks vs BBL,
> old OpenSBI and new OpenSBI?
> 
See the cover letter. OpenSBI's code hasn't been made into stable, and 
it has race conditions now.
>> +static void ipi_remote_sfence_vma(void *info)
>> +{
>> +	struct tlbi *data = info;
>> +	unsigned long start = data->start;
>> +	unsigned long size = data->size;
>> +	unsigned long i;
>> +
>> +	if (size == SFENCE_VMA_FLUSH_ALL) {
>> +		local_flush_tlb_all();
>> +	}
> 
> Doesn't this need a return to skip the latter code?  Also it might
> we worth to just split the all case into entirely separate helpers,
> that way the on_each_cpu calls don't need to pass a private data
> argument at all.
I managed to get the return missing when doing rebasing... My fault. I 
don't think it is necessary to make it into a separate helpers, as the 
private data argument isn't expensive at all.
> 
>> +
>> +	for (i = 0; i < size; i += PAGE_SIZE) {
>> +		__asm__ __volatile__ ("sfence.vma %0"
>> +				      : : "r" (start + i)
>> +				      : "memory");
>> +	}
> 
> local_flush_tlb_kernel_page?
Good catch
> 
>> +static void ipi_remote_sfence_vma_asid(void *info)
>> +{
>> +	struct tlbi *data = info;
>> +	unsigned long asid = data->asid;
>> +	unsigned long start = data->start;
>> +	unsigned long size = data->size;
>> +	unsigned long i;
>> +
>> +	if (size == SFENCE_VMA_FLUSH_ALL) {
>> +		__asm__ __volatile__ ("sfence.vma x0, %0"
>> +				      : : "r" (asid)
>> +				      : "memory");
>> +		return;
>> +	}
>> +
>> +	for (i = 0; i < size; i += PAGE_SIZE) {
>> +		__asm__ __volatile__ ("sfence.vma %0, %1"
>> +				      : : "r" (start + i), "r" (asid)
>> +				      : "memory");
>> +	}
> 
> local_flush_tlb_range?
> 
We don't have the VMA structures now so no. This is related to future 
ASID patch as well.
Anup Patel March 28, 2019, 6:50 a.m. UTC | #3
On Wed, Mar 27, 2019 at 6:11 AM Gary Guo <gary@garyguo.net> wrote:
>
> From: Gary Guo <gary@garyguo.net>
>
> This patch implements IPI-based remote TLB shootdown, which is useful
> at this stage for testing because BBL/OpenSBI ignores operands of
> sbi_remote_sfence_vma_asid and always perform a global TLB flush.
> The SBI-based remote TLB shootdown can still be opt-in using boot
> cmdline "tlbi_method=sbi".
>
> Signed-off-by: Gary Guo <gary@garyguo.net>
> Tested-by: Atish Patra <atish.patra@wdc.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  5 +
>  arch/riscv/mm/tlbflush.c                      | 99 +++++++++++++++++--
>  2 files changed, 98 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 7a60edef09d2..afd34fa1db91 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4552,6 +4552,11 @@
>                         flushed.
>                         See arch/riscv/mm/tlbflush.c
>
> +       tlbi_method=    [RV]
> +                       Format: { "sbi", "ipi" }
> +                       Default: "ipi"
> +                       Specify the method used to perform remote TLB shootdown.
> +
>         tmem            [KNL,XEN]
>                         Enable the Transcendent memory driver if built-in.
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 33083f48a936..ceee76f14a0a 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -72,19 +72,106 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
>
>  #ifdef CONFIG_SMP
>
> +/*
> + * SBI has interfaces for remote TLB shootdown.  If there is no hardware
> + * remote TLB shootdown support, SBI perform IPIs itself instead.  Some SBI
> + * implementations may also ignore ASID and address ranges provided and do a
> + * full TLB flush instead.  In these cases we might want to do IPIs ourselves.
> + *
> + * This parameter allows the approach (IPI/SBI) to be specified using boot
> + * cmdline.
> + */
> +static bool tlbi_ipi = true;
> +
> +static int __init setup_tlbi_method(char *str)
> +{
> +       if (strcmp(str, "ipi") == 0)
> +               tlbi_ipi = true;
> +       else if (strcmp(str, "sbi") == 0)
> +               tlbi_ipi = false;
> +       else
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +early_param("tlbi_method", setup_tlbi_method);
> +
> +
> +struct tlbi {
> +       unsigned long start;
> +       unsigned long size;
> +       unsigned long asid;
> +};
> +
> +static void ipi_remote_sfence_vma(void *info)
> +{
> +       struct tlbi *data = info;
> +       unsigned long start = data->start;
> +       unsigned long size = data->size;
> +       unsigned long i;
> +
> +       if (size == SFENCE_VMA_FLUSH_ALL) {
> +               local_flush_tlb_all();
> +       }

No brace required here.

> +
> +       for (i = 0; i < size; i += PAGE_SIZE) {
> +               __asm__ __volatile__ ("sfence.vma %0"
> +                                     : : "r" (start + i)
> +                                     : "memory");
> +       }

No brace required here as well.

> +}
> +
> +static void ipi_remote_sfence_vma_asid(void *info)
> +{
> +       struct tlbi *data = info;
> +       unsigned long asid = data->asid;
> +       unsigned long start = data->start;
> +       unsigned long size = data->size;
> +       unsigned long i;
> +
> +       if (size == SFENCE_VMA_FLUSH_ALL) {
> +               __asm__ __volatile__ ("sfence.vma x0, %0"
> +                                     : : "r" (asid)
> +                                     : "memory");
> +               return;
> +       }
> +
> +       for (i = 0; i < size; i += PAGE_SIZE) {
> +               __asm__ __volatile__ ("sfence.vma %0, %1"
> +                                     : : "r" (start + i), "r" (asid)
> +                                     : "memory");
> +       }

No brace required here as well.

> +}
> +
>  static void remote_sfence_vma(unsigned long start, unsigned long size)
>  {
> -       sbi_remote_sfence_vma(NULL, start, size);
> +       if (tlbi_ipi) {
> +               struct tlbi info = {
> +                       .start = start,
> +                       .size = size,
> +               };
> +               on_each_cpu(ipi_remote_sfence_vma, &info, 1);
> +       } else
> +               sbi_remote_sfence_vma(NULL, start, size);
>  }
>
>  static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
>                                    unsigned long size, unsigned long asid)
>  {
> -       cpumask_t hmask;
> -
> -       cpumask_clear(&hmask);
> -       riscv_cpuid_to_hartid_mask(mask, &hmask);
> -       sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
> +       if (tlbi_ipi) {
> +               struct tlbi info = {
> +                       .start = start,
> +                       .size = size,
> +                       .asid = asid,
> +               };
> +               on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1);
> +       } else {
> +               cpumask_t hmask;
> +
> +               cpumask_clear(&hmask);
> +               riscv_cpuid_to_hartid_mask(mask, &hmask);
> +               sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
> +       }
>  }
>
>
> --
> 2.17.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Regards,
Anup
Christoph Hellwig March 28, 2019, 4:36 p.m. UTC | #4
> > 
> > I think Anup now fixes OpenSBI.  Do you have benchmarks vs BBL,
> > old OpenSBI and new OpenSBI?
> > 
> See the cover letter. OpenSBI's code hasn't been made into stable, and 
> it has race conditions now.

Well, I'd still like to know numbers.  That is how people generally
justify more complex code that claims to be more optimal :)

> > local_flush_tlb_range?
> > 
> We don't have the VMA structures now so no. This is related to future 
> ASID patch as well.

Well, sprinkling inline assembly all over is generally not a good idea,
so I'd like to have some helper.  And as pointed out before, IFF we pass
an asid instead of the vma to local_flush_tlb_page once ASID support
is added local_flush_tlb_page would nicely do that job.  If we really
want another indirection it could be local_flush_tlb_page_asid instead,
but I don't really see the point.
Gary Guo March 28, 2019, 4:47 p.m. UTC | #5
On 28/03/2019 16:36, Christoph Hellwig wrote:
>>>
>>> I think Anup now fixes OpenSBI.  Do you have benchmarks vs BBL,
>>> old OpenSBI and new OpenSBI?
>>>
>> See the cover letter. OpenSBI's code hasn't been made into stable, and
>> it has race conditions now.
> 
> Well, I'd still like to know numbers.  That is how people generally
> justify more complex code that claims to be more optimal :)
> 
No visible difference on QEMU, as all SFENCE.VMAs are global so we 
doesn't save anything, and the added cost of doing IPI is barely visible.

Don't have a board so can't test it out.

The main gain is to allow hardware devs to test their TLB implementation 
with Linux. If OpenSBI implements this properly we don't probably need 
the IPI code I guess.

>>> local_flush_tlb_range?
>>>
>> We don't have the VMA structures now so no. This is related to future
>> ASID patch as well.
> 
> Well, sprinkling inline assembly all over is generally not a good idea,
> so I'd like to have some helper.  And as pointed out before, IFF we pass
> an asid instead of the vma to local_flush_tlb_page once ASID support
> is added local_flush_tlb_page would nicely do that job.  If we really
> want another indirection it could be local_flush_tlb_page_asid instead,
> but I don't really see the point.
> 
Caller of local_flush_tlb_page (and the non-local) version shouldn't 
care about ASID. They only care about a particular MM. flush_tlb_page 
always takes a MM as argument and I see no point about why we shouldn't 
in the local version.
Christoph Hellwig March 28, 2019, 4:57 p.m. UTC | #6
On Thu, Mar 28, 2019 at 04:47:36PM +0000, Gary Guo wrote:
> Caller of local_flush_tlb_page (and the non-local) version shouldn't 
> care about ASID. They only care about a particular MM. flush_tlb_page 
> always takes a MM as argument and I see no point about why we shouldn't 
> in the local version.

There is exactly two callers in the tree with your patches, one of
which is the !SMP version of flush_tlb_page, and the other is
update_mmu_cache, so I'm really not worried about spreading ASID
information too far.  And all that is only a concern once we get
ASID support anyway.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7a60edef09d2..afd34fa1db91 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4552,6 +4552,11 @@ 
 			flushed.
 			See arch/riscv/mm/tlbflush.c
 
+	tlbi_method=	[RV]
+			Format: { "sbi", "ipi" }
+			Default: "ipi"
+			Specify the method used to perform remote TLB shootdown.
+
 	tmem		[KNL,XEN]
 			Enable the Transcendent memory driver if built-in.
 
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 33083f48a936..ceee76f14a0a 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -72,19 +72,106 @@  void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 #ifdef CONFIG_SMP
 
+/*
+ * SBI has interfaces for remote TLB shootdown.  If there is no hardware
+ * remote TLB shootdown support, SBI perform IPIs itself instead.  Some SBI
+ * implementations may also ignore ASID and address ranges provided and do a
+ * full TLB flush instead.  In these cases we might want to do IPIs ourselves.
+ *
+ * This parameter allows the approach (IPI/SBI) to be specified using boot
+ * cmdline.
+ */
+static bool tlbi_ipi = true;
+
+static int __init setup_tlbi_method(char *str)
+{
+	if (strcmp(str, "ipi") == 0)
+		tlbi_ipi = true;
+	else if (strcmp(str, "sbi") == 0)
+		tlbi_ipi = false;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+early_param("tlbi_method", setup_tlbi_method);
+
+
+struct tlbi {
+	unsigned long start;
+	unsigned long size;
+	unsigned long asid;
+};
+
+static void ipi_remote_sfence_vma(void *info)
+{
+	struct tlbi *data = info;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	if (size == SFENCE_VMA_FLUSH_ALL) {
+		local_flush_tlb_all();
+	}
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0"
+				      : : "r" (start + i)
+				      : "memory");
+	}
+}
+
+static void ipi_remote_sfence_vma_asid(void *info)
+{
+	struct tlbi *data = info;
+	unsigned long asid = data->asid;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	if (size == SFENCE_VMA_FLUSH_ALL) {
+		__asm__ __volatile__ ("sfence.vma x0, %0"
+				      : : "r" (asid)
+				      : "memory");
+		return;
+	}
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				      : : "r" (start + i), "r" (asid)
+				      : "memory");
+	}
+}
+
 static void remote_sfence_vma(unsigned long start, unsigned long size)
 {
-	sbi_remote_sfence_vma(NULL, start, size);
+	if (tlbi_ipi) {
+		struct tlbi info = {
+			.start = start,
+			.size = size,
+		};
+		on_each_cpu(ipi_remote_sfence_vma, &info, 1);
+	} else
+		sbi_remote_sfence_vma(NULL, start, size);
 }
 
 static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
 				   unsigned long size, unsigned long asid)
 {
-	cpumask_t hmask;
-
-	cpumask_clear(&hmask);
-	riscv_cpuid_to_hartid_mask(mask, &hmask);
-	sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
+	if (tlbi_ipi) {
+		struct tlbi info = {
+			.start = start,
+			.size = size,
+			.asid = asid,
+		};
+		on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1);
+	} else {
+		cpumask_t hmask;
+
+		cpumask_clear(&hmask);
+		riscv_cpuid_to_hartid_mask(mask, &hmask);
+		sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
+	}
 }