diff mbox

[v3] irqchip/gic-v3: Ensure GICR_CTLR.EnableLPI=0 is observed before enabling

Message ID 1521683929-15644-1-git-send-email-shankerd@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Shanker Donthineni March 22, 2018, 1:58 a.m. UTC
The definition of the GICR_CTLR.RWP control bit was expanded to indicate
status of changing GICR_CTLR.EnableLPI from 1 to 0 is being in progress
or completed. Software must observe GICR_CTLR.RWP==0 after clearing
GICR_CTLR.EnableLPI from 1 to 0 and before writing GICR_PENDBASER and/or
GICR_PROPBASER, otherwise behavior is UNPREDICTABLE.

Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
---
Changes since v2:
  -Revert readl_relaxed_poll() usage since it's not usable in GICv3 probe().
  -Changes to pr_xxx messages.

Changes since v1:
  -Moved LPI disable code to a seperate function as Marc suggested.
  -Mark's suggestion to use readl_relaxed_poll_timeout() helper functions.

 drivers/irqchip/irq-gic-v3-its.c   | 75 +++++++++++++++++++++++++++++++-------
 include/linux/irqchip/arm-gic-v3.h |  1 +
 2 files changed, 62 insertions(+), 14 deletions(-)

Comments

Marc Zyngier March 22, 2018, 3:51 p.m. UTC | #1
On 22/03/18 01:58, Shanker Donthineni wrote:
> The definition of the GICR_CTLR.RWP control bit was expanded to indicate
> status of changing GICR_CTLR.EnableLPI from 1 to 0 is being in progress
> or completed. Software must observe GICR_CTLR.RWP==0 after clearing
> GICR_CTLR.EnableLPI from 1 to 0 and before writing GICR_PENDBASER and/or
> GICR_PROPBASER, otherwise behavior is UNPREDICTABLE.
> 
> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> ---
> Changes since v2:
>   -Revert readl_relaxed_poll() usage since it's not usable in GICv3 probe().
>   -Changes to pr_xxx messages.
> 
> Changes since v1:
>   -Moved LPI disable code to a seperate function as Marc suggested.
>   -Mark's suggestion to use readl_relaxed_poll_timeout() helper functions.
> 
>  drivers/irqchip/irq-gic-v3-its.c   | 75 +++++++++++++++++++++++++++++++-------
>  include/linux/irqchip/arm-gic-v3.h |  1 +
>  2 files changed, 62 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 2cbb19c..c1e8a8e 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -33,6 +33,7 @@
>  #include <linux/of_platform.h>
>  #include <linux/percpu.h>
>  #include <linux/slab.h>
> +#include <linux/time64.h>
>  
>  #include <linux/irqchip.h>
>  #include <linux/irqchip/arm-gic-v3.h>

This hunk doesn't apply to my -next branch, but I don't think it is
actually required either...

> @@ -1875,16 +1876,6 @@ static void its_cpu_init_lpis(void)
>  		gic_data_rdist()->pend_page = pend_page;
>  	}
>  
> -	/* Disable LPIs */
> -	val = readl_relaxed(rbase + GICR_CTLR);
> -	val &= ~GICR_CTLR_ENABLE_LPIS;
> -	writel_relaxed(val, rbase + GICR_CTLR);
> -
> -	/*
> -	 * Make sure any change to the table is observable by the GIC.
> -	 */
> -	dsb(sy);
> -
>  	/* set PROPBASE */
>  	val = (page_to_phys(gic_rdists->prop_page) |
>  	       GICR_PROPBASER_InnerShareable |
> @@ -3287,13 +3278,69 @@ static bool gic_rdists_supports_plpis(void)
>  	return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
>  }
>  
> +static int redist_disable_lpis(void)
> +{
> +	void __iomem *rbase = gic_data_rdist_rd_base();
> +	u64 timeout = USEC_PER_SEC;
> +	u64 val;
> +
> +	if (!gic_rdists_supports_plpis()) {
> +		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
> +		return -ENXIO;
> +	}
> +
> +	val = readl_relaxed(rbase + GICR_CTLR);
> +	if (!(val & GICR_CTLR_ENABLE_LPIS))
> +		return 0;
> +
> +	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
> +		smp_processor_id());
> +	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
> +
> +	/* Disable LPIs */
> +	val &= ~GICR_CTLR_ENABLE_LPIS;
> +	writel_relaxed(val, rbase + GICR_CTLR);
> +
> +	/* Make sure any change to GICR_CTLR is observable by the GIC */
> +	dsb(sy);
> +
> +	/**
> +	 * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs
> +	 * from 1 to 0 before programming GICR_PEND{PROP}BASER registers.
> +	 * Bail out the driver probe() in case of timeout.
> +	 */
> +	while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) {
> +		if (!timeout) {
> +			pr_err("CPU%d: Failed to observe RWP==0 after disabling LPIs\n",

I think you can simplify the message with something like:

	"Time-out disabling LPIs\n"

Nobody apart from you and I really want to know about RWP...

> +			       smp_processor_id());
> +			return -ETIMEDOUT;
> +		}
> +		udelay(1);
> +		timeout--;
> +	}
> +
> +	/**
> +	 * After it has been written to 1, it is IMPLEMENTATION DEFINED whether
> +	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to 0.
> +	 * Bail out the driver probe() on systems where it's RES1.
> +	 */
> +	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
> +		pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id());
> +		return -EBUSY;
> +	}
> +
> +	return 0;
> +}
> +
>  int its_cpu_init(void)
>  {
>  	if (!list_empty(&its_nodes)) {
> -		if (!gic_rdists_supports_plpis()) {
> -			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
> -			return -ENXIO;
> -		}
> +		int ret;
> +
> +		ret = redist_disable_lpis();
> +		if (ret)
> +			return ret;

Just realised that this is totally broken.

Why do we have this in the loop? Checking the LPI support for each ITS
was admittedly braindead (we only need to check it once per CPU), but
now trying to disable the LPIs each time we encounter an ITS is going to
make it go crazy and taint the kernel for no reason.

> +
>  		its_cpu_init_lpis();
>  		its_cpu_init_collection();
>  	}
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index b26eccc..c6f4c48 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -106,6 +106,7 @@
>  #define GICR_PIDR2			GICD_PIDR2
>  
>  #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
> +#define GICR_CTLR_RWP			(1UL << 3)
>  
>  #define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)
>  
> 

Thanks,

	M.
Shanker Donthineni March 22, 2018, 7:41 p.m. UTC | #2
Hi Marc,

On 03/22/2018 10:51 AM, Marc Zyngier wrote:
> On 22/03/18 01:58, Shanker Donthineni wrote:
>> The definition of the GICR_CTLR.RWP control bit was expanded to indicate
>> status of changing GICR_CTLR.EnableLPI from 1 to 0 is being in progress
>> or completed. Software must observe GICR_CTLR.RWP==0 after clearing
>> GICR_CTLR.EnableLPI from 1 to 0 and before writing GICR_PENDBASER and/or
>> GICR_PROPBASER, otherwise behavior is UNPREDICTABLE.
>>
>> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
>> ---
>> Changes since v2:
>>   -Revert readl_relaxed_poll() usage since it's not usable in GICv3 probe().
>>   -Changes to pr_xxx messages.
>>
>> Changes since v1:
>>   -Moved LPI disable code to a seperate function as Marc suggested.
>>   -Mark's suggestion to use readl_relaxed_poll_timeout() helper functions.
>>
>>  drivers/irqchip/irq-gic-v3-its.c   | 75 +++++++++++++++++++++++++++++++-------
>>  include/linux/irqchip/arm-gic-v3.h |  1 +
>>  2 files changed, 62 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
>> index 2cbb19c..c1e8a8e 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -33,6 +33,7 @@
>>  #include <linux/of_platform.h>
>>  #include <linux/percpu.h>
>>  #include <linux/slab.h>
>> +#include <linux/time64.h>
>>  
>>  #include <linux/irqchip.h>
>>  #include <linux/irqchip/arm-gic-v3.h>
> 
> This hunk doesn't apply to my -next branch, but I don't think it is
> actually required either...
> 

I'll try to drop "#include <linux/time64.h>" in next patch if USEC_PER_SEC
included by other header files or rebase to -next branch.

>> @@ -1875,16 +1876,6 @@ static void its_cpu_init_lpis(void)
>>  		gic_data_rdist()->pend_page = pend_page;
>>  	}
>>  
>> -	/* Disable LPIs */
>> -	val = readl_relaxed(rbase + GICR_CTLR);
>> -	val &= ~GICR_CTLR_ENABLE_LPIS;
>> -	writel_relaxed(val, rbase + GICR_CTLR);
>> -
>> -	/*
>> -	 * Make sure any change to the table is observable by the GIC.
>> -	 */
>> -	dsb(sy);
>> -
>>  	/* set PROPBASE */
>>  	val = (page_to_phys(gic_rdists->prop_page) |
>>  	       GICR_PROPBASER_InnerShareable |
>> @@ -3287,13 +3278,69 @@ static bool gic_rdists_supports_plpis(void)
>>  	return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
>>  }
>>  
>> +static int redist_disable_lpis(void)
>> +{
>> +	void __iomem *rbase = gic_data_rdist_rd_base();
>> +	u64 timeout = USEC_PER_SEC;
>> +	u64 val;
>> +
>> +	if (!gic_rdists_supports_plpis()) {
>> +		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
>> +		return -ENXIO;
>> +	}
>> +
>> +	val = readl_relaxed(rbase + GICR_CTLR);
>> +	if (!(val & GICR_CTLR_ENABLE_LPIS))
>> +		return 0;
>> +
>> +	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
>> +		smp_processor_id());
>> +	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
>> +
>> +	/* Disable LPIs */
>> +	val &= ~GICR_CTLR_ENABLE_LPIS;
>> +	writel_relaxed(val, rbase + GICR_CTLR);
>> +
>> +	/* Make sure any change to GICR_CTLR is observable by the GIC */
>> +	dsb(sy);
>> +
>> +	/**
>> +	 * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs
>> +	 * from 1 to 0 before programming GICR_PEND{PROP}BASER registers.
>> +	 * Bail out the driver probe() in case of timeout.
>> +	 */
>> +	while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) {
>> +		if (!timeout) {
>> +			pr_err("CPU%d: Failed to observe RWP==0 after disabling LPIs\n",
> 
> I think you can simplify the message with something like:
> 
> 	"Time-out disabling LPIs\n"
> 
> Nobody apart from you and I really want to know about RWP...
> 

I'll change.

>> +			       smp_processor_id());
>> +			return -ETIMEDOUT;
>> +		}
>> +		udelay(1);
>> +		timeout--;
>> +	}
>> +
>> +	/**
>> +	 * After it has been written to 1, it is IMPLEMENTATION DEFINED whether
>> +	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to 0.
>> +	 * Bail out the driver probe() on systems where it's RES1.
>> +	 */
>> +	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
>> +		pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id());
>> +		return -EBUSY;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  int its_cpu_init(void)
>>  {
>>  	if (!list_empty(&its_nodes)) {
>> -		if (!gic_rdists_supports_plpis()) {
>> -			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
>> -			return -ENXIO;
>> -		}
>> +		int ret;
>> +
>> +		ret = redist_disable_lpis();
>> +		if (ret)
>> +			return ret;
> 
> Just realised that this is totally broken.
> 
> Why do we have this in the loop? Checking the LPI support for each ITS
> was admittedly braindead (we only need to check it once per CPU), but
> now trying to disable the LPIs each time we encounter an ITS is going to
> make it go crazy and taint the kernel for no reason.
> 

Sorry, I didn't quite understand suggestions you're recommending. I don't
see any loop here, it just checks the ITS_LIST_EMPTY.

The function its_cpu_init() is being called for each CPU coming online.
We're trying to disable GICR LPI before calling its_cpu_init_lpis() and
its_cpu_init_collection(). Newly added function redist_disable_lpis()
will be called only once per CPU but not per each ITS hardware instance.
Is something I'm missing here?

>> +
>>  		its_cpu_init_lpis();
>>  		its_cpu_init_collection();
>>  	}
>> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
>> index b26eccc..c6f4c48 100644
>> --- a/include/linux/irqchip/arm-gic-v3.h
>> +++ b/include/linux/irqchip/arm-gic-v3.h
>> @@ -106,6 +106,7 @@
>>  #define GICR_PIDR2			GICD_PIDR2
>>  
>>  #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
>> +#define GICR_CTLR_RWP			(1UL << 3)
>>  
>>  #define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)
>>  
>>
> 
> Thanks,
> 
> 	M.
>
Marc Zyngier March 23, 2018, 9:19 a.m. UTC | #3
On Thu, 22 Mar 2018 19:41:09 +0000,
Shanker Donthineni wrote:
> 
> Hi Marc,
> 
> On 03/22/2018 10:51 AM, Marc Zyngier wrote:
> > On 22/03/18 01:58, Shanker Donthineni wrote:
> >> The definition of the GICR_CTLR.RWP control bit was expanded to indicate
> >> status of changing GICR_CTLR.EnableLPI from 1 to 0 is being in progress
> >> or completed. Software must observe GICR_CTLR.RWP==0 after clearing
> >> GICR_CTLR.EnableLPI from 1 to 0 and before writing GICR_PENDBASER and/or
> >> GICR_PROPBASER, otherwise behavior is UNPREDICTABLE.
> >>
> >> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> >> ---
> >> Changes since v2:
> >>   -Revert readl_relaxed_poll() usage since it's not usable in GICv3 probe().
> >>   -Changes to pr_xxx messages.
> >>
> >> Changes since v1:
> >>   -Moved LPI disable code to a seperate function as Marc suggested.
> >>   -Mark's suggestion to use readl_relaxed_poll_timeout() helper functions.
> >>
> >>  drivers/irqchip/irq-gic-v3-its.c   | 75 +++++++++++++++++++++++++++++++-------
> >>  include/linux/irqchip/arm-gic-v3.h |  1 +
> >>  2 files changed, 62 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> >> index 2cbb19c..c1e8a8e 100644
> >> --- a/drivers/irqchip/irq-gic-v3-its.c
> >> +++ b/drivers/irqchip/irq-gic-v3-its.c
> >> @@ -33,6 +33,7 @@
> >>  #include <linux/of_platform.h>
> >>  #include <linux/percpu.h>
> >>  #include <linux/slab.h>
> >> +#include <linux/time64.h>
> >>  
> >>  #include <linux/irqchip.h>
> >>  #include <linux/irqchip/arm-gic-v3.h>
> > 
> > This hunk doesn't apply to my -next branch, but I don't think it is
> > actually required either...
> > 
> 
> I'll try to drop "#include <linux/time64.h>" in next patch if USEC_PER_SEC
> included by other header files or rebase to -next branch.
> 
> >> @@ -1875,16 +1876,6 @@ static void its_cpu_init_lpis(void)
> >>  		gic_data_rdist()->pend_page = pend_page;
> >>  	}
> >>  
> >> -	/* Disable LPIs */
> >> -	val = readl_relaxed(rbase + GICR_CTLR);
> >> -	val &= ~GICR_CTLR_ENABLE_LPIS;
> >> -	writel_relaxed(val, rbase + GICR_CTLR);
> >> -
> >> -	/*
> >> -	 * Make sure any change to the table is observable by the GIC.
> >> -	 */
> >> -	dsb(sy);
> >> -
> >>  	/* set PROPBASE */
> >>  	val = (page_to_phys(gic_rdists->prop_page) |
> >>  	       GICR_PROPBASER_InnerShareable |
> >> @@ -3287,13 +3278,69 @@ static bool gic_rdists_supports_plpis(void)
> >>  	return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
> >>  }
> >>  
> >> +static int redist_disable_lpis(void)
> >> +{
> >> +	void __iomem *rbase = gic_data_rdist_rd_base();
> >> +	u64 timeout = USEC_PER_SEC;
> >> +	u64 val;
> >> +
> >> +	if (!gic_rdists_supports_plpis()) {
> >> +		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
> >> +		return -ENXIO;
> >> +	}
> >> +
> >> +	val = readl_relaxed(rbase + GICR_CTLR);
> >> +	if (!(val & GICR_CTLR_ENABLE_LPIS))
> >> +		return 0;
> >> +
> >> +	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
> >> +		smp_processor_id());
> >> +	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
> >> +
> >> +	/* Disable LPIs */
> >> +	val &= ~GICR_CTLR_ENABLE_LPIS;
> >> +	writel_relaxed(val, rbase + GICR_CTLR);
> >> +
> >> +	/* Make sure any change to GICR_CTLR is observable by the GIC */
> >> +	dsb(sy);
> >> +
> >> +	/**
> >> +	 * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs
> >> +	 * from 1 to 0 before programming GICR_PEND{PROP}BASER registers.
> >> +	 * Bail out the driver probe() in case of timeout.
> >> +	 */
> >> +	while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) {
> >> +		if (!timeout) {
> >> +			pr_err("CPU%d: Failed to observe RWP==0 after disabling LPIs\n",
> > 
> > I think you can simplify the message with something like:
> > 
> > 	"Time-out disabling LPIs\n"
> > 
> > Nobody apart from you and I really want to know about RWP...
> > 
> 
> I'll change.
> 
> >> +			       smp_processor_id());
> >> +			return -ETIMEDOUT;
> >> +		}
> >> +		udelay(1);
> >> +		timeout--;
> >> +	}
> >> +
> >> +	/**
> >> +	 * After it has been written to 1, it is IMPLEMENTATION DEFINED whether
> >> +	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to 0.
> >> +	 * Bail out the driver probe() on systems where it's RES1.
> >> +	 */
> >> +	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
> >> +		pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id());
> >> +		return -EBUSY;
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>  int its_cpu_init(void)
> >>  {
> >>  	if (!list_empty(&its_nodes)) {
> >> -		if (!gic_rdists_supports_plpis()) {
> >> -			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
> >> -			return -ENXIO;
> >> -		}
> >> +		int ret;
> >> +
> >> +		ret = redist_disable_lpis();
> >> +		if (ret)
> >> +			return ret;
> > 
> > Just realised that this is totally broken.
> > 
> > Why do we have this in the loop? Checking the LPI support for each ITS
> > was admittedly braindead (we only need to check it once per CPU), but
> > now trying to disable the LPIs each time we encounter an ITS is going to
> > make it go crazy and taint the kernel for no reason.
> > 
> 
> Sorry, I didn't quite understand suggestions you're recommending. I don't
> see any loop here, it just checks the ITS_LIST_EMPTY.
> 
> The function its_cpu_init() is being called for each CPU coming online.
> We're trying to disable GICR LPI before calling its_cpu_init_lpis() and
> its_cpu_init_collection(). Newly added function redist_disable_lpis()
> will be called only once per CPU but not per each ITS hardware instance.
> Is something I'm missing here?

No you're not. I just got confused with my own patches and completely
misread yours.

Sorry about that. I'll apply the patch directly with the above
changes.

Thanks,

	M.
Zhang, Lei June 19, 2018, 2:14 p.m. UTC | #4
Hi shankerd, Marc

I have one question.

Does it means after GICR_CTLR.EnableLPI has been written to 1, 
if the bit GICR_CTLR.EnableLPI becomes RES1, we can't use LPIs?
Because its_cpu_init_lpis and its_cpu_init_collections will not be called. 

> whether
> +	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to
> 0.
> +	 * Bail out the driver probe() on systems where it's RES1.
> +	 * 
> +	/**
> +	 * After it has been written to 1, it is IMPLEMENTATION DEFINED
> whether
> +	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to
> 0.
> +	 * Bail out the driver probe() on systems where it's RES1.
> +	 */
> +	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
> +		pr_err("CPU%d: Failed to disable LPIs\n",
> smp_processor_id());
> +		return -EBUSY;
> +	}

Best Regards,
Lei Zhang
zhang.lei@jp.fujitsu.com
Marc Zyngier June 19, 2018, 2:46 p.m. UTC | #5
Hi Lei,

On Tue, 19 Jun 2018 15:14:27 +0100,
"Zhang, Lei" <zhang.lei@jp.fujitsu.com> wrote:
> 
> Hi shankerd, Marc
> 
> I have one question.
> 
> Does it means after GICR_CTLR.EnableLPI has been written to 1, 
> if the bit GICR_CTLR.EnableLPI becomes RES1, we can't use LPIs?
> Because its_cpu_init_lpis and its_cpu_init_collections will not be called. 

There are two issues:

- If EnableLPI is already set to 1, your redistributors are already
  potentially corrupting the memory by writing to the pending
  tables. Your system is now potentially unstable (single bit
  corruption, depending on what the ITS outputs).

- If EnableLPI has become RES1, you cannot even turn it off to
  reprogram things so that the property and pending tables are under
  your control.

At that stage, your system is in a very bad shape, and LPIs are the
least of your problems.

Thanks,

	M.
Zhang, Lei June 21, 2018, 10:57 a.m. UTC | #6
Hi Marc

> - If EnableLPI is already set to 1, your redistributors are already
>   potentially corrupting the memory by writing to the pending
>   tables. Your system is now potentially unstable (single bit
>   corruption, depending on what the ITS outputs).
> 
> - If EnableLPI has become RES1, you cannot even turn it off to
>   reprogram things so that the property and pending tables are under
>   your control.
Thanks for you reply.

One more question.
If EnableLPIs bit becomes RES1 after EnableLPIs has been written to 1,
DKUMP/KEXEC case will enter kernel with GICR_CTLR.EnableLPIs=1.
In this case I do not think DKUMP/KEXEC can work well even use irqchip.gicv3_nolpi, 
if secondary kernel need to use LPI such as the disk is nvme. Am I right?

I have seen the discussion as below URL, but I do not really understand the conclusion.
https://patchwork.kernel.org/patch/10281325/
Marc Zyngier June 21, 2018, 12:02 p.m. UTC | #7
Hi Lei,

On 21/06/18 11:57, Zhang, Lei wrote:
> Hi Marc
> 
>> - If EnableLPI is already set to 1, your redistributors are already
>>   potentially corrupting the memory by writing to the pending
>>   tables. Your system is now potentially unstable (single bit
>>   corruption, depending on what the ITS outputs).
>>
>> - If EnableLPI has become RES1, you cannot even turn it off to
>>   reprogram things so that the property and pending tables are under
>>   your control.
> Thanks for you reply.
> 
> One more question.
> If EnableLPIs bit becomes RES1 after EnableLPIs has been written to
> 1, DKUMP/KEXEC case will enter kernel with GICR_CTLR.EnableLPIs=1. In
> this case I do not think DKUMP/KEXEC can work well even use
> irqchip.gicv3_nolpi, if secondary kernel need to use LPI such as the
> disk is nvme. Am I right?

Absolutely. If you can only signal interrupts using LPIs, there isn't
much you can do if you cannot disable LPIs the first place to
reconfigure the interrupts. Your NVME device is pretty useless in that case.

What we could potentially do in the kdump case (and only in that case)
would be to try to reuse the programmed tables and IDbits settings if
EnableLPIs is RES1. This is absolutely awful, but it could get you
going. Maybe.

> I have seen the discussion as below URL, but I do not really
> understand the conclusion. 
> https://patchwork.kernel.org/patch/10281325/

The conclusion is that although RES1 is valid for EnableLPIs once it has
been set to 1 (because that is how it was initially specified), it is a
very bad idea to implement it like this. Allowing LPIs to be disabled is
absolutely essential in a modern computing environment.

Thanks,

	M.
Marc Zyngier June 26, 2018, 2:53 p.m. UTC | #8
Hi Lei,

On Thu, 21 Jun 2018 13:02:11 +0100,
Marc Zyngier <marc.zyngier@arm.com> wrote:
> 
> Hi Lei,
> 
> On 21/06/18 11:57, Zhang, Lei wrote:
> > Hi Marc
> > 
> >> - If EnableLPI is already set to 1, your redistributors are already
> >>   potentially corrupting the memory by writing to the pending
> >>   tables. Your system is now potentially unstable (single bit
> >>   corruption, depending on what the ITS outputs).
> >>
> >> - If EnableLPI has become RES1, you cannot even turn it off to
> >>   reprogram things so that the property and pending tables are under
> >>   your control.
> > Thanks for you reply.
> > 
> > One more question.
> > If EnableLPIs bit becomes RES1 after EnableLPIs has been written to
> > 1, DKUMP/KEXEC case will enter kernel with GICR_CTLR.EnableLPIs=1. In
> > this case I do not think DKUMP/KEXEC can work well even use
> > irqchip.gicv3_nolpi, if secondary kernel need to use LPI such as the
> > disk is nvme. Am I right?
> 
> Absolutely. If you can only signal interrupts using LPIs, there isn't
> much you can do if you cannot disable LPIs the first place to
> reconfigure the interrupts. Your NVME device is pretty useless in that case.
> 
> What we could potentially do in the kdump case (and only in that case)
> would be to try to reuse the programmed tables and IDbits settings if
> EnableLPIs is RES1. This is absolutely awful, but it could get you
> going. Maybe.

For what it is worth, I've prototyped this solution, and it is less
horrible than I though:

https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/gicv3-kdump

It should allow a system that reboots into a kdump kernel to use LPIs
with the same limitations (IDbits setting and memory) as the previous
one. It doesn't help for kexec though, as the whole memory map is
relinquished to the new kernel (and by the time we find out, it could
be too late to avoid fatal damage).

It could also be used to build in an hypothetical world where the
firmware allocates tables for the kernel, excluding them from the
memory map.

The whole thing, of course, requires careful thoughts, reviewing and
testing. I'd appreciate your feedback on the matter.

Thanks,

	M.
diff mbox

Patch

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2cbb19c..c1e8a8e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -33,6 +33,7 @@ 
 #include <linux/of_platform.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/time64.h>
 
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-gic-v3.h>
@@ -1875,16 +1876,6 @@  static void its_cpu_init_lpis(void)
 		gic_data_rdist()->pend_page = pend_page;
 	}
 
-	/* Disable LPIs */
-	val = readl_relaxed(rbase + GICR_CTLR);
-	val &= ~GICR_CTLR_ENABLE_LPIS;
-	writel_relaxed(val, rbase + GICR_CTLR);
-
-	/*
-	 * Make sure any change to the table is observable by the GIC.
-	 */
-	dsb(sy);
-
 	/* set PROPBASE */
 	val = (page_to_phys(gic_rdists->prop_page) |
 	       GICR_PROPBASER_InnerShareable |
@@ -3287,13 +3278,69 @@  static bool gic_rdists_supports_plpis(void)
 	return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
 }
 
+static int redist_disable_lpis(void)
+{
+	void __iomem *rbase = gic_data_rdist_rd_base();
+	u64 timeout = USEC_PER_SEC;
+	u64 val;
+
+	if (!gic_rdists_supports_plpis()) {
+		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
+		return -ENXIO;
+	}
+
+	val = readl_relaxed(rbase + GICR_CTLR);
+	if (!(val & GICR_CTLR_ENABLE_LPIS))
+		return 0;
+
+	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
+		smp_processor_id());
+	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
+
+	/* Disable LPIs */
+	val &= ~GICR_CTLR_ENABLE_LPIS;
+	writel_relaxed(val, rbase + GICR_CTLR);
+
+	/* Make sure any change to GICR_CTLR is observable by the GIC */
+	dsb(sy);
+
+	/**
+	 * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs
+	 * from 1 to 0 before programming GICR_PEND{PROP}BASER registers.
+	 * Bail out the driver probe() in case of timeout.
+	 */
+	while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) {
+		if (!timeout) {
+			pr_err("CPU%d: Failed to observe RWP==0 after disabling LPIs\n",
+			       smp_processor_id());
+			return -ETIMEDOUT;
+		}
+		udelay(1);
+		timeout--;
+	}
+
+	/**
+	 * After it has been written to 1, it is IMPLEMENTATION DEFINED whether
+	 * the bit GICR_CTLR.EnableLPI becomes RES1 or can be cleared to 0.
+	 * Bail out the driver probe() on systems where it's RES1.
+	 */
+	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
+		pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id());
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 int its_cpu_init(void)
 {
 	if (!list_empty(&its_nodes)) {
-		if (!gic_rdists_supports_plpis()) {
-			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
-			return -ENXIO;
-		}
+		int ret;
+
+		ret = redist_disable_lpis();
+		if (ret)
+			return ret;
+
 		its_cpu_init_lpis();
 		its_cpu_init_collection();
 	}
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b26eccc..c6f4c48 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -106,6 +106,7 @@ 
 #define GICR_PIDR2			GICD_PIDR2
 
 #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
+#define GICR_CTLR_RWP			(1UL << 3)
 
 #define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)