diff mbox

regression: OMAP4 (next-20141204) (bisect to: ARM: 8208/1: l2c: Refactor the driver to use commit-like)

Message ID 20141209165746.GA29935@kahuna (mailing list archive)
State New, archived
Headers show

Commit Message

Nishanth Menon Dec. 9, 2014, 4:57 p.m. UTC
On 10:13-20141205, Nishanth Menon wrote:
> On 12/05/2014 10:10 AM, Nishanth Menon wrote:
> > next-20141204 fails to boot, but next-20141203 boots fine with
> > omap2plus_defconfig.
> > 
> > Panda-ES(4460):
> > https://github.com/nmenon/kernel-test-logs/blob/next-20141204/omap2plus_defconfig/pandaboard-es.txt
> > Panda(4430):
> > https://github.com/nmenon/kernel-test-logs/blob/next-20141204/omap2plus_defconfig/pandaboard-vanilla.txt
> > 
> > at the point of hang (JTAG):
> >  pandaboard-es:
> > 	cpu0: http://slexy.org/view/s2eIFqkRd5
> > 	cpu1: http://slexy.org/view/s2Tysb6gpL
> > 
> > Case #1:
> > Disabling CPUIDLE allows boot to proceed. there does not seem to have
> > been any change in drivers/cpuidle and arch/arm/mach-omap2 w.r.t this.
> > 
> > Case #2: Reverting the following allows boot.
> > 
> > From next-20141204
> > 10df7d5 ARM: 8211/1: l2c: Add support for overriding prefetch settings
> > revert this  -> boot still fails
> > 
> > d42ced0 ARM: 8210/1: l2c: Get outer cache .write_sec callback from
> > mach_desc only if not NULL
> > revert this  -> boot still fails
> > 
> > 46b9af8 ARM: 8209/1: l2c: Add interface to ask hypervisor to configure L2C
> > revert this  -> boot still fails
> > 
> > c94e325 ARM: 8208/1: l2c: Refactor the driver to use commit-like
> > revert this  -> boot passed (first bad commit).
> > 
> > 
> 
> + linux-samsung soc and updated Thomaz's mail ID (gmail now).

Spend a few mins trying to track this down and it does look like commit
c94e325 does a kmemdup for the data as part of l2x0_of_init->__l2c_init

This fails since the invocation is in early_init. doing it a bit later
as the following hack makes it work

Comments

Marek Szyprowski Dec. 10, 2014, 9:42 a.m. UTC | #1
Hello,

On 2014-12-09 17:57, Nishanth Menon wrote:
> On 10:13-20141205, Nishanth Menon wrote:
>> On 12/05/2014 10:10 AM, Nishanth Menon wrote:
>>> next-20141204 fails to boot, but next-20141203 boots fine with
>>> omap2plus_defconfig.
>>>
>>> Panda-ES(4460):
>>> https://github.com/nmenon/kernel-test-logs/blob/next-20141204/omap2plus_defconfig/pandaboard-es.txt
>>> Panda(4430):
>>> https://github.com/nmenon/kernel-test-logs/blob/next-20141204/omap2plus_defconfig/pandaboard-vanilla.txt
>>>
>>> at the point of hang (JTAG):
>>>   pandaboard-es:
>>> 	cpu0: http://slexy.org/view/s2eIFqkRd5
>>> 	cpu1: http://slexy.org/view/s2Tysb6gpL
>>>
>>> Case #1:
>>> Disabling CPUIDLE allows boot to proceed. there does not seem to have
>>> been any change in drivers/cpuidle and arch/arm/mach-omap2 w.r.t this.
>>>
>>> Case #2: Reverting the following allows boot.
>>>
>>>  From next-20141204
>>> 10df7d5 ARM: 8211/1: l2c: Add support for overriding prefetch settings
>>> revert this  -> boot still fails
>>>
>>> d42ced0 ARM: 8210/1: l2c: Get outer cache .write_sec callback from
>>> mach_desc only if not NULL
>>> revert this  -> boot still fails
>>>
>>> 46b9af8 ARM: 8209/1: l2c: Add interface to ask hypervisor to configure L2C
>>> revert this  -> boot still fails
>>>
>>> c94e325 ARM: 8208/1: l2c: Refactor the driver to use commit-like
>>> revert this  -> boot passed (first bad commit).
>>>
>>>
>> + linux-samsung soc and updated Thomaz's mail ID (gmail now).
> Spend a few mins trying to track this down and it does look like commit
> c94e325 does a kmemdup for the data as part of l2x0_of_init->__l2c_init
>
> This fails since the invocation is in early_init. doing it a bit later
> as the following hack makes it work
> diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
> index 608079a..0bc6bd9 100644
> --- a/arch/arm/mach-omap2/board-generic.c
> +++ b/arch/arm/mach-omap2/board-generic.c
> @@ -170,12 +170,19 @@ static const char *const omap4_boards_compat[] __initconst = {
>   	NULL,
>   };
>   
> +
> +static void tmp_init_irq(void)
> +{
> +	omap_l2_cache_init();
> +	omap_gic_of_init();
> +}
> +
>   DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)")
>   	.reserve	= omap_reserve,
>   	.smp		= smp_ops(omap4_smp_ops),
>   	.map_io		= omap4_map_io,
>   	.init_early	= omap4430_init_early,
> -	.init_irq	= omap_gic_of_init,
> +	.init_irq	= tmp_init_irq,
>   	.init_machine	= omap_generic_init,
>   	.init_late	= omap4430_init_late,
>   	.init_time	= omap4_local_timer_init,
> diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
> index 03cbb16..f97847d 100644
> --- a/arch/arm/mach-omap2/io.c
> +++ b/arch/arm/mach-omap2/io.c
> @@ -627,7 +627,6 @@ void __init omap4430_init_early(void)
>   	omap44xx_clockdomains_init();
>   	omap44xx_hwmod_init();
>   	omap_hwmod_init_postsetup();
> -	omap_l2_cache_init();
>   	omap_clk_soc_init = omap4xxx_dt_clk_init;
>   }
>   

Please note that am43xx_init_early() also calls omap_l2_cache_init(),
so similar fix is needed for "Generic AM43 (Flattened Device Tree)"
machines.

I've briefly looked how the initialization is done on various omap
platforms, but I don't see the good generic place for omap_l2_cache_init().
IMHO the best solution will be to completely switch to generic/common l2c
initialization and provide ".l2c_aux_val" and ".l2c_aux_mask" in machine
descriptor. For the time being something like proposed above can be used.

I assume that now it won't be possible to get l2c patches back to -next,
so I will resend them (again...) with the omap related fix.

> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> index e5948c5..0ca90db 100644
> --- a/arch/arm/mm/cache-l2x0.c
> +++ b/arch/arm/mm/cache-l2x0.c
> @@ -848,8 +848,11 @@ static int __init __l2c_init(const struct l2c_init_data *data,
>   	 * context from callers can access the structure.
>   	 */
>   	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
> -	if (!l2x0_data)
> +	if (!l2x0_data) {
> +		pr_err("%s no mem %d\n", __func__, sizeof(*data));
> +		dump_stack();
>   		return -ENOMEM;
> +	}
>   
>   	/*
>   	 * Sanity check the aux values.  aux_mask is the bits we preserve
> @@ -1647,6 +1650,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
>   	struct device_node *np;
>   	struct resource res;
>   	u32 cache_id, old_aux;
> +	int r;
>   
>   	np = of_find_matching_node(NULL, l2x0_ids);
>   	if (!np)
> @@ -1693,6 +1697,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
>   	else
>   		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
>   
> -	return __l2c_init(data, aux_val, aux_mask, cache_id);
> +	r = __l2c_init(data, aux_val, aux_mask, cache_id);
> +	pr_err("%s: %d\n", __func__, r);
> +	return r;
>   }
>

Best regards
Russell King - ARM Linux Dec. 11, 2014, 9:29 a.m. UTC | #2
On Wed, Dec 10, 2014 at 10:42:33AM +0100, Marek Szyprowski wrote:
> I assume that now it won't be possible to get l2c patches back to -next,
> so I will resend them (again...) with the omap related fix.

What, you mean you don't know the fundamental rules of kernel development?

No one should ever dump any new code into linux-next during a merge
window which is not a fix for a regression or a bug fix, period.

Linus has in the past taken a snapshot of linux-next at the beginning
of a merge window, and then threatened to refuse to merge anything that
wasn't in his local snapshot, or which doesn't qualify as the above.

So no, it won't be possible, because I play by the community rules when
it comes to what gets merged and at what time in the cycle.
Marek Szyprowski Dec. 11, 2014, 10:42 a.m. UTC | #3
On 2014-12-11 10:29, Russell King - ARM Linux wrote:
> On Wed, Dec 10, 2014 at 10:42:33AM +0100, Marek Szyprowski wrote:
>> I assume that now it won't be possible to get l2c patches back to -next,
>> so I will resend them (again...) with the omap related fix.
> What, you mean you don't know the fundamental rules of kernel development?
>
> No one should ever dump any new code into linux-next during a merge
> window which is not a fix for a regression or a bug fix, period.
>
> Linus has in the past taken a snapshot of linux-next at the beginning
> of a merge window, and then threatened to refuse to merge anything that
> wasn't in his local snapshot, or which doesn't qualify as the above.
>
> So no, it won't be possible, because I play by the community rules when
> it comes to what gets merged and at what time in the cycle.

I know the rules. It was just my whining, that it is yet another release 
cycle
that got missed. It is really disappointing, that those patches have been
floating for months and noone found issues related to different order of
initialization. It took way to long to get them scheduled for testing in
-next.

Exynos4 platform cannot be considered as fully functional without
proper l2cache support, but I assume that this is once again our fault that
we had to modify the common l2c code.

Best regards
Russell King - ARM Linux Dec. 22, 2014, 5:04 p.m. UTC | #4
On Thu, Dec 11, 2014 at 11:42:48AM +0100, Marek Szyprowski wrote:
> On 2014-12-11 10:29, Russell King - ARM Linux wrote:
> >On Wed, Dec 10, 2014 at 10:42:33AM +0100, Marek Szyprowski wrote:
> >>I assume that now it won't be possible to get l2c patches back to -next,
> >>so I will resend them (again...) with the omap related fix.
> >What, you mean you don't know the fundamental rules of kernel development?
> >
> >No one should ever dump any new code into linux-next during a merge
> >window which is not a fix for a regression or a bug fix, period.
> >
> >Linus has in the past taken a snapshot of linux-next at the beginning
> >of a merge window, and then threatened to refuse to merge anything that
> >wasn't in his local snapshot, or which doesn't qualify as the above.
> >
> >So no, it won't be possible, because I play by the community rules when
> >it comes to what gets merged and at what time in the cycle.
> 
> I know the rules. It was just my whining, that it is yet another release
> cycle
> that got missed. It is really disappointing, that those patches have been
> floating for months and noone found issues related to different order of
> initialization. It took way to long to get them scheduled for testing in
> -next.

Right, so - we're now at -rc1, and we should see about queuing this up
sooner rather than later - in its fixed form.  From what I can see,
there's been little progress on the OMAP problem.

Nishanth - can we push OMAP over to using the generic DT L2C
initialisation (the one from init_IRQ in arch/arm/kernel/irq.c) and
kill the SoC specific stuff in arch/arm/mach-omap2/omap4-common.c ?

From what I can see, in the DT case, the only thing which is used
there is the ioremap() to provide omap4_get_l2cache_base() with
something to return.  Everything else - the initialisation of the
l2c_write_sec pointer, and the aux mask and values - can be specified
via the machine_desc struct.

That only leaves the non-DT stuff to worry about this, and from what I
understand, that's going to be removed soon.  If we're going to keep
the non-DT stuff, we should implement a new machine_desc hook for it
instead of hijacking one of the existing callbacks.
Tony Lindgren Dec. 22, 2014, 5:08 p.m. UTC | #5
* Russell King - ARM Linux <linux@arm.linux.org.uk> [141222 09:06]:
> On Thu, Dec 11, 2014 at 11:42:48AM +0100, Marek Szyprowski wrote:
> > On 2014-12-11 10:29, Russell King - ARM Linux wrote:
> > >On Wed, Dec 10, 2014 at 10:42:33AM +0100, Marek Szyprowski wrote:
> > >>I assume that now it won't be possible to get l2c patches back to -next,
> > >>so I will resend them (again...) with the omap related fix.
> > >What, you mean you don't know the fundamental rules of kernel development?
> > >
> > >No one should ever dump any new code into linux-next during a merge
> > >window which is not a fix for a regression or a bug fix, period.
> > >
> > >Linus has in the past taken a snapshot of linux-next at the beginning
> > >of a merge window, and then threatened to refuse to merge anything that
> > >wasn't in his local snapshot, or which doesn't qualify as the above.
> > >
> > >So no, it won't be possible, because I play by the community rules when
> > >it comes to what gets merged and at what time in the cycle.
> > 
> > I know the rules. It was just my whining, that it is yet another release
> > cycle
> > that got missed. It is really disappointing, that those patches have been
> > floating for months and noone found issues related to different order of
> > initialization. It took way to long to get them scheduled for testing in
> > -next.
> 
> Right, so - we're now at -rc1, and we should see about queuing this up
> sooner rather than later - in its fixed form.  From what I can see,
> there's been little progress on the OMAP problem.
> 
> Nishanth - can we push OMAP over to using the generic DT L2C
> initialisation (the one from init_IRQ in arch/arm/kernel/irq.c) and
> kill the SoC specific stuff in arch/arm/mach-omap2/omap4-common.c ?
> 
> From what I can see, in the DT case, the only thing which is used
> there is the ioremap() to provide omap4_get_l2cache_base() with
> something to return.  Everything else - the initialisation of the
> l2c_write_sec pointer, and the aux mask and values - can be specified
> via the machine_desc struct.
> 
> That only leaves the non-DT stuff to worry about this, and from what I
> understand, that's going to be removed soon.  If we're going to keep
> the non-DT stuff, we should implement a new machine_desc hook for it
> instead of hijacking one of the existing callbacks.

For omap4 and later we've been DT only for about 1.5 years now so
that should not be an issue here.

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-omap" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nishanth Menon Dec. 22, 2014, 5:12 p.m. UTC | #6
On Mon, Dec 22, 2014 at 11:04 AM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
>
>
> Nishanth - can we push OMAP over to using the generic DT L2C
> initialisation (the one from init_IRQ in arch/arm/kernel/irq.c) and
> kill the SoC specific stuff in arch/arm/mach-omap2/omap4-common.c ?
>
> From what I can see, in the DT case, the only thing which is used
> there is the ioremap() to provide omap4_get_l2cache_base() with
> something to return.  Everything else - the initialisation of the
> l2c_write_sec pointer, and the aux mask and values - can be specified
> via the machine_desc struct.

I think this is what Marek proposed. I had requested for patches to be
reposted with linux-omap in CC so that we can test and provide
feedback.

>
> That only leaves the non-DT stuff to worry about this, and from what I
> understand, that's going to be removed soon.  If we're going to keep
> the non-DT stuff, we should implement a new machine_desc hook for it
> instead of hijacking one of the existing callbacks.

none of the PL310 support requires non-DT. PL310 is needed for OMAP4
and AM437x both of which are DT only.
Russell King - ARM Linux Dec. 22, 2014, 5:28 p.m. UTC | #7
On Mon, Dec 22, 2014 at 11:12:42AM -0600, Nishanth Menon wrote:
> On Mon, Dec 22, 2014 at 11:04 AM, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > That only leaves the non-DT stuff to worry about this, and from what I
> > understand, that's going to be removed soon.  If we're going to keep
> > the non-DT stuff, we should implement a new machine_desc hook for it
> > instead of hijacking one of the existing callbacks.
> 
> none of the PL310 support requires non-DT. PL310 is needed for OMAP4
> and AM437x both of which are DT only.

Right, so the simple answer for the time being is to kill most of
omap_l2_cache_init(), leaving just the ioremap() behind.  Everything
else can go into the machine_desc structures, and OMAP4 and AM437x
can both benefit from initialising the L2 cache at exactly the same
point as most other platforms.
Marek Szyprowski Dec. 23, 2014, 11 a.m. UTC | #8
Hello,

On 2014-12-22 18:28, Russell King - ARM Linux wrote:
> On Mon, Dec 22, 2014 at 11:12:42AM -0600, Nishanth Menon wrote:
>> On Mon, Dec 22, 2014 at 11:04 AM, Russell King - ARM Linux
>> <linux@arm.linux.org.uk> wrote:
>>> That only leaves the non-DT stuff to worry about this, and from what I
>>> understand, that's going to be removed soon.  If we're going to keep
>>> the non-DT stuff, we should implement a new machine_desc hook for it
>>> instead of hijacking one of the existing callbacks.
>> none of the PL310 support requires non-DT. PL310 is needed for OMAP4
>> and AM437x both of which are DT only.
> Right, so the simple answer for the time being is to kill most of
> omap_l2_cache_init(), leaving just the ioremap() behind.  Everything
> else can go into the machine_desc structures, and OMAP4 and AM437x
> can both benefit from initialising the L2 cache at exactly the same
> point as most other platforms.

I hope I did it right: https://lkml.org/lkml/2014/12/23/158
Please test, because I have no access to Omap hardware.

Best regards
Russell King - ARM Linux Dec. 23, 2014, 11:10 a.m. UTC | #9
On Tue, Dec 23, 2014 at 12:00:00PM +0100, Marek Szyprowski wrote:
> I hope I did it right: https://lkml.org/lkml/2014/12/23/158
> Please test, because I have no access to Omap hardware.

Patch 1/8 looks like I'd expect it to.  Nishanth, please test with your
failing scenario, thanks.
Nishanth Menon Dec. 23, 2014, 4:05 p.m. UTC | #10
On 12/23/2014 05:10 AM, Russell King - ARM Linux wrote:
> On Tue, Dec 23, 2014 at 12:00:00PM +0100, Marek Szyprowski wrote:
>> I hope I did it right: https://lkml.org/lkml/2014/12/23/158
>> Please test, because I have no access to Omap hardware.
> 
> Patch 1/8 looks like I'd expect it to.  Nishanth, please test with your
> failing scenario, thanks.
> 
3.19-rc1
 1:                      am437x-sk: BOOT: PASS:
http://slexy.org/raw/s2ARFeCcDp
 2:                    am43xx-epos: BOOT: PASS:
http://slexy.org/raw/s2Kzli0GYS
 3:                   am43xx-gpevm: BOOT: PASS:
http://slexy.org/raw/s2DMkJGmdF
 4:                  pandaboard-es: BOOT: PASS:
http://slexy.org/raw/s204jfptrr
 5:             pandaboard-vanilla: BOOT: PASS:
http://slexy.org/raw/s2cbd82pMI
 6:                        sdp4430: BOOT: PASS:
http://slexy.org/raw/s21bzlzUNr
TOTAL = 6 boards, Booted Boards = 6, No Boot boards = 0


against the patch series: (all am437x platforms fail)

testing
 1:                      am437x-sk: BOOT: FAIL:
http://slexy.org/raw/s2yhDXyF7o
 2:                    am43xx-epos: BOOT: FAIL:
http://slexy.org/raw/s2m9cSdt55
 3:                   am43xx-gpevm: BOOT: FAIL:
http://slexy.org/raw/s2MqFFBuIl
 4:                  pandaboard-es: BOOT: PASS:
http://slexy.org/raw/s2XwggyB0a
 5:             pandaboard-vanilla: BOOT: PASS:
http://slexy.org/raw/s25WDvtbob
 6:                        sdp4430: BOOT: PASS:
http://slexy.org/raw/s2gjynR1Co
TOTAL = 6 boards, Booted Boards = 3, No Boot boards = 3

I am trying to understand what is different in AM437x except that it
is a single A9 instead of OMAP4 style dual A9s.
diff mbox

Patch

diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 608079a..0bc6bd9 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -170,12 +170,19 @@  static const char *const omap4_boards_compat[] __initconst = {
 	NULL,
 };
 
+
+static void tmp_init_irq(void)
+{
+	omap_l2_cache_init();
+	omap_gic_of_init();
+}
+
 DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)")
 	.reserve	= omap_reserve,
 	.smp		= smp_ops(omap4_smp_ops),
 	.map_io		= omap4_map_io,
 	.init_early	= omap4430_init_early,
-	.init_irq	= omap_gic_of_init,
+	.init_irq	= tmp_init_irq,
 	.init_machine	= omap_generic_init,
 	.init_late	= omap4430_init_late,
 	.init_time	= omap4_local_timer_init,
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 03cbb16..f97847d 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -627,7 +627,6 @@  void __init omap4430_init_early(void)
 	omap44xx_clockdomains_init();
 	omap44xx_hwmod_init();
 	omap_hwmod_init_postsetup();
-	omap_l2_cache_init();
 	omap_clk_soc_init = omap4xxx_dt_clk_init;
 }
 
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index e5948c5..0ca90db 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -848,8 +848,11 @@  static int __init __l2c_init(const struct l2c_init_data *data,
 	 * context from callers can access the structure.
 	 */
 	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
-	if (!l2x0_data)
+	if (!l2x0_data) {
+		pr_err("%s no mem %d\n", __func__, sizeof(*data));
+		dump_stack();
 		return -ENOMEM;
+	}
 
 	/*
 	 * Sanity check the aux values.  aux_mask is the bits we preserve
@@ -1647,6 +1650,7 @@  int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	struct device_node *np;
 	struct resource res;
 	u32 cache_id, old_aux;
+	int r;
 
 	np = of_find_matching_node(NULL, l2x0_ids);
 	if (!np)
@@ -1693,6 +1697,8 @@  int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	else
 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 
-	return __l2c_init(data, aux_val, aux_mask, cache_id);
+	r = __l2c_init(data, aux_val, aux_mask, cache_id);
+	pr_err("%s: %d\n", __func__, r);
+	return r;
 }