Message ID | 20240531070719.2138-1-cyndis@kapsi.fi (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | gpu: host1x: Request syncpoint IRQs only during probe | expand |
On Fri, May 31, 2024 at 10:07:18AM GMT, Mikko Perttunen wrote: > From: Mikko Perttunen <mperttunen@nvidia.com> > > Syncpoint IRQs are currently requested in a code path that runs > during resume. Due to this, we get multiple overlapping registered > interrupt handlers as host1x is suspended and resumed. > > Rearrange interrupt code to only request IRQs during initialization. > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > --- > drivers/gpu/host1x/dev.h | 2 ++ > drivers/gpu/host1x/hw/intr_hw.c | 37 +++------------------------------ > drivers/gpu/host1x/intr.c | 21 ++++++++++++++++++- > drivers/gpu/host1x/intr.h | 5 +++++ > 4 files changed, 30 insertions(+), 35 deletions(-) Sorry, I only stumbled across this now. Applied, thanks. Thierry
Hi Mikko, On 31/05/2024 08:07, Mikko Perttunen wrote: > From: Mikko Perttunen <mperttunen@nvidia.com> > > Syncpoint IRQs are currently requested in a code path that runs > during resume. Due to this, we get multiple overlapping registered > interrupt handlers as host1x is suspended and resumed. > > Rearrange interrupt code to only request IRQs during initialization. > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > --- > drivers/gpu/host1x/dev.h | 2 ++ > drivers/gpu/host1x/hw/intr_hw.c | 37 +++------------------------------ > drivers/gpu/host1x/intr.c | 21 ++++++++++++++++++- > drivers/gpu/host1x/intr.h | 5 +++++ > 4 files changed, 30 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h > index 53af0334c6e1..d3855a1c6b47 100644 > --- a/drivers/gpu/host1x/dev.h > +++ b/drivers/gpu/host1x/dev.h > @@ -9,6 +9,7 @@ > #include <linux/device.h> > #include <linux/iommu.h> > #include <linux/iova.h> > +#include <linux/irqreturn.h> > #include <linux/platform_device.h> > #include <linux/reset.h> > > @@ -81,6 +82,7 @@ struct host1x_intr_ops { > void (*disable_syncpt_intr)(struct host1x *host, unsigned int id); > void (*disable_all_syncpt_intrs)(struct host1x *host); > int (*free_syncpt_irq)(struct host1x *host); > + irqreturn_t (*isr)(int irq, void *dev_id); > }; > > struct host1x_sid_entry { > diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c > index 9880e0c47235..415f8d7e4202 100644 > --- a/drivers/gpu/host1x/hw/intr_hw.c > +++ b/drivers/gpu/host1x/hw/intr_hw.c > @@ -6,18 +6,11 @@ > * Copyright (c) 2010-2013, NVIDIA Corporation. > */ > > -#include <linux/interrupt.h> > -#include <linux/irq.h> > #include <linux/io.h> > > #include "../intr.h" > #include "../dev.h" > > -struct host1x_intr_irq_data { > - struct host1x *host; > - u32 offset; > -}; > - > static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) > { > struct host1x_intr_irq_data *irq_data = dev_id; > @@ -54,7 +47,8 @@ static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) > } > } > > -static void intr_hw_init(struct host1x *host, u32 cpm) > +static int > +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) > { > #if HOST1X_HW < 6 > /* disable the ip_busy_timeout. this prevents write drops */ > @@ -85,32 +79,6 @@ static void intr_hw_init(struct host1x *host, u32 cpm) > host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); > } > #endif > -} > - > -static int > -host1x_intr_init_host_sync(struct host1x *host, u32 cpm) > -{ > - int err, i; > - struct host1x_intr_irq_data *irq_data; > - > - irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); > - if (!irq_data) > - return -ENOMEM; > - > - host1x_hw_intr_disable_all_syncpt_intrs(host); > - > - for (i = 0; i < host->num_syncpt_irqs; i++) { > - irq_data[i].host = host; > - irq_data[i].offset = i; > - > - err = devm_request_irq(host->dev, host->syncpt_irqs[i], > - syncpt_thresh_isr, IRQF_SHARED, > - "host1x_syncpt", &irq_data[i]); > - if (err < 0) > - return err; > - } > - > - intr_hw_init(host, cpm); > > return 0; > } > @@ -144,4 +112,5 @@ static const struct host1x_intr_ops host1x_intr_ops = { > .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, > .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, > .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, > + .isr = syncpt_thresh_isr, > }; > diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c > index 995bfa980837..b3285dd10180 100644 > --- a/drivers/gpu/host1x/intr.c > +++ b/drivers/gpu/host1x/intr.c > @@ -6,7 +6,7 @@ > */ > > #include <linux/clk.h> > - > +#include <linux/interrupt.h> > #include "dev.h" > #include "fence.h" > #include "intr.h" > @@ -100,7 +100,9 @@ void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) > > int host1x_intr_init(struct host1x *host) > { > + struct host1x_intr_irq_data *irq_data; > unsigned int id; > + int i, err; > > mutex_init(&host->intr_mutex); > > @@ -111,6 +113,23 @@ int host1x_intr_init(struct host1x *host) > INIT_LIST_HEAD(&syncpt->fences.list); > } > > + irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); > + if (!irq_data) > + return -ENOMEM; > + > + host1x_hw_intr_disable_all_syncpt_intrs(host); > + > + for (i = 0; i < host->num_syncpt_irqs; i++) { > + irq_data[i].host = host; > + irq_data[i].offset = i; > + > + err = devm_request_irq(host->dev, host->syncpt_irqs[i], > + host->intr_op->isr, IRQF_SHARED, > + "host1x_syncpt", &irq_data[i]); > + if (err < 0) > + return err; > + } > + > return 0; > } > > diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h > index 3b5610b525e5..11cdf13e32fe 100644 > --- a/drivers/gpu/host1x/intr.h > +++ b/drivers/gpu/host1x/intr.h > @@ -11,6 +11,11 @@ > struct host1x; > struct host1x_syncpt_fence; > > +struct host1x_intr_irq_data { > + struct host1x *host; > + u32 offset; > +}; > + > /* Initialize host1x sync point interrupt */ > int host1x_intr_init(struct host1x *host); > This change is causing a boot regression on Tegra186 with the latest -next. I have reverted this to confirm that this fixes the problem. I don't see any crash log but the board appears to just hang. Jon
On 06/09/2024 09:38, Jon Hunter wrote: > Hi Mikko, > > On 31/05/2024 08:07, Mikko Perttunen wrote: >> From: Mikko Perttunen <mperttunen@nvidia.com> >> >> Syncpoint IRQs are currently requested in a code path that runs >> during resume. Due to this, we get multiple overlapping registered >> interrupt handlers as host1x is suspended and resumed. >> >> Rearrange interrupt code to only request IRQs during initialization. >> >> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> ... > This change is causing a boot regression on Tegra186 with the latest > -next. I have reverted this to confirm that this fixes the problem. I > don't see any crash log but the board appears to just hang. I had a look at this and I was able to fix this by moving where we initialise the interrupts to after the PM runtime enable ... diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index b62e4f0e8130..ff98d4903cac 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -625,12 +625,6 @@ static int host1x_probe(struct platform_device *pdev) goto free_contexts; } - err = host1x_intr_init(host); - if (err) { - dev_err(&pdev->dev, "failed to initialize interrupts\n"); - goto deinit_syncpt; - } - pm_runtime_enable(&pdev->dev); err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); @@ -642,6 +636,12 @@ static int host1x_probe(struct platform_device *pdev) if (err) goto pm_disable; + err = host1x_intr_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize interrupts\n"); + goto pm_put; + } + host1x_debug_init(host); err = host1x_register(host); @@ -658,14 +658,11 @@ static int host1x_probe(struct platform_device *pdev) host1x_unregister(host); deinit_debugfs: host1x_debug_deinit(host); - + host1x_intr_deinit(host); +pm_put: pm_runtime_put_sync_suspend(&pdev->dev); pm_disable: pm_runtime_disable(&pdev->dev); - - host1x_intr_deinit(host); -deinit_syncpt: - host1x_syncpt_deinit(host); free_contexts: host1x_memory_context_list_free(&host->context_list); free_channels: Thierry, do you want to me to send a fix for the above or do you want to squash with the original (assuming that OK with Mikko)? Jon
On 24/09/2024 19:33, Jon Hunter wrote: > > On 06/09/2024 09:38, Jon Hunter wrote: >> Hi Mikko, >> >> On 31/05/2024 08:07, Mikko Perttunen wrote: >>> From: Mikko Perttunen <mperttunen@nvidia.com> >>> >>> Syncpoint IRQs are currently requested in a code path that runs >>> during resume. Due to this, we get multiple overlapping registered >>> interrupt handlers as host1x is suspended and resumed. >>> >>> Rearrange interrupt code to only request IRQs during initialization. >>> >>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > > ... > >> This change is causing a boot regression on Tegra186 with the latest >> -next. I have reverted this to confirm that this fixes the problem. I >> don't see any crash log but the board appears to just hang. > > > I had a look at this and I was able to fix this by moving where > we initialise the interrupts to after the PM runtime enable ... > > diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c > index b62e4f0e8130..ff98d4903cac 100644 > --- a/drivers/gpu/host1x/dev.c > +++ b/drivers/gpu/host1x/dev.c > @@ -625,12 +625,6 @@ static int host1x_probe(struct platform_device *pdev) > goto free_contexts; > } > > - err = host1x_intr_init(host); > - if (err) { > - dev_err(&pdev->dev, "failed to initialize interrupts\n"); > - goto deinit_syncpt; > - } > - > pm_runtime_enable(&pdev->dev); > > err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); > @@ -642,6 +636,12 @@ static int host1x_probe(struct platform_device *pdev) > if (err) > goto pm_disable; > > + err = host1x_intr_init(host); > + if (err) { > + dev_err(&pdev->dev, "failed to initialize interrupts\n"); > + goto pm_put; > + } > + > host1x_debug_init(host); > > err = host1x_register(host); > @@ -658,14 +658,11 @@ static int host1x_probe(struct platform_device *pdev) > host1x_unregister(host); > deinit_debugfs: > host1x_debug_deinit(host); > - > + host1x_intr_deinit(host); > +pm_put: > pm_runtime_put_sync_suspend(&pdev->dev); > pm_disable: > pm_runtime_disable(&pdev->dev); > - > - host1x_intr_deinit(host); > -deinit_syncpt: > - host1x_syncpt_deinit(host); > free_contexts: > host1x_memory_context_list_free(&host->context_list); > free_channels: > > > Thierry, do you want to me to send a fix for the above or do you > want to squash with the original (assuming that OK with Mikko)? Or can we drop from -next and have Mikko send a V2? Jon
On Tue, Sep 24, 2024 at 07:33:05PM GMT, Jon Hunter wrote: > > On 06/09/2024 09:38, Jon Hunter wrote: > > Hi Mikko, > > > > On 31/05/2024 08:07, Mikko Perttunen wrote: > > > From: Mikko Perttunen <mperttunen@nvidia.com> > > > > > > Syncpoint IRQs are currently requested in a code path that runs > > > during resume. Due to this, we get multiple overlapping registered > > > interrupt handlers as host1x is suspended and resumed. > > > > > > Rearrange interrupt code to only request IRQs during initialization. > > > > > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > > ... > > > This change is causing a boot regression on Tegra186 with the latest > > -next. I have reverted this to confirm that this fixes the problem. I > > don't see any crash log but the board appears to just hang. > > > I had a look at this and I was able to fix this by moving where > we initialise the interrupts to after the PM runtime enable ... > > diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c > index b62e4f0e8130..ff98d4903cac 100644 > --- a/drivers/gpu/host1x/dev.c > +++ b/drivers/gpu/host1x/dev.c > @@ -625,12 +625,6 @@ static int host1x_probe(struct platform_device *pdev) > goto free_contexts; > } > - err = host1x_intr_init(host); > - if (err) { > - dev_err(&pdev->dev, "failed to initialize interrupts\n"); > - goto deinit_syncpt; > - } > - > pm_runtime_enable(&pdev->dev); > err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); > @@ -642,6 +636,12 @@ static int host1x_probe(struct platform_device *pdev) > if (err) > goto pm_disable; > + err = host1x_intr_init(host); > + if (err) { > + dev_err(&pdev->dev, "failed to initialize interrupts\n"); > + goto pm_put; > + } > + I think the reason why this might fail now is because host1x_intr_init() ends up writing some registers during the call to the host1x_hw_intr_disable_all_syncpt_intrs() function, which would hang or crash if the device isn't on (which in turn happens during pm_runtime_resume_and_get()). Not sure exactly why this used to work because prior to Mikko's patch because the sequence was the same before. > host1x_debug_init(host); > err = host1x_register(host); > @@ -658,14 +658,11 @@ static int host1x_probe(struct platform_device *pdev) > host1x_unregister(host); > deinit_debugfs: > host1x_debug_deinit(host); > - > + host1x_intr_deinit(host); > +pm_put: > pm_runtime_put_sync_suspend(&pdev->dev); > pm_disable: > pm_runtime_disable(&pdev->dev); > - > - host1x_intr_deinit(host); > -deinit_syncpt: > - host1x_syncpt_deinit(host); > free_contexts: > host1x_memory_context_list_free(&host->context_list); > free_channels: > > > Thierry, do you want to me to send a fix for the above or do you > want to squash with the original (assuming that OK with Mikko)? In any case, this looks like a good fix, so please send a proper patch. This is merged through drm-misc, so squashing this into the original patch is not an option any longer. Thanks, Thierry
On 25/09/2024 13:58, Thierry Reding wrote: > On Tue, Sep 24, 2024 at 07:33:05PM GMT, Jon Hunter wrote: >> >> On 06/09/2024 09:38, Jon Hunter wrote: >>> Hi Mikko, >>> >>> On 31/05/2024 08:07, Mikko Perttunen wrote: >>>> From: Mikko Perttunen <mperttunen@nvidia.com> >>>> >>>> Syncpoint IRQs are currently requested in a code path that runs >>>> during resume. Due to this, we get multiple overlapping registered >>>> interrupt handlers as host1x is suspended and resumed. >>>> >>>> Rearrange interrupt code to only request IRQs during initialization. >>>> >>>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> >> >> ... >> >>> This change is causing a boot regression on Tegra186 with the latest >>> -next. I have reverted this to confirm that this fixes the problem. I >>> don't see any crash log but the board appears to just hang. >> >> >> I had a look at this and I was able to fix this by moving where >> we initialise the interrupts to after the PM runtime enable ... >> >> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c >> index b62e4f0e8130..ff98d4903cac 100644 >> --- a/drivers/gpu/host1x/dev.c >> +++ b/drivers/gpu/host1x/dev.c >> @@ -625,12 +625,6 @@ static int host1x_probe(struct platform_device *pdev) >> goto free_contexts; >> } >> - err = host1x_intr_init(host); >> - if (err) { >> - dev_err(&pdev->dev, "failed to initialize interrupts\n"); >> - goto deinit_syncpt; >> - } >> - >> pm_runtime_enable(&pdev->dev); >> err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); >> @@ -642,6 +636,12 @@ static int host1x_probe(struct platform_device *pdev) >> if (err) >> goto pm_disable; >> + err = host1x_intr_init(host); >> + if (err) { >> + dev_err(&pdev->dev, "failed to initialize interrupts\n"); >> + goto pm_put; >> + } >> + > > I think the reason why this might fail now is because host1x_intr_init() > ends up writing some registers during the call to the > host1x_hw_intr_disable_all_syncpt_intrs() function, which would hang or > crash if the device isn't on (which in turn happens during > pm_runtime_resume_and_get()). Yes that is exactly where it hung! > Not sure exactly why this used to work because prior to Mikko's patch > because the sequence was the same before. Previously host1x_intr_init() did not call host1x_hw_intr_disable_all_syncpt_intrs(). > >> host1x_debug_init(host); >> err = host1x_register(host); >> @@ -658,14 +658,11 @@ static int host1x_probe(struct platform_device *pdev) >> host1x_unregister(host); >> deinit_debugfs: >> host1x_debug_deinit(host); >> - >> + host1x_intr_deinit(host); >> +pm_put: >> pm_runtime_put_sync_suspend(&pdev->dev); >> pm_disable: >> pm_runtime_disable(&pdev->dev); >> - >> - host1x_intr_deinit(host); >> -deinit_syncpt: >> - host1x_syncpt_deinit(host); >> free_contexts: >> host1x_memory_context_list_free(&host->context_list); >> free_channels: >> >> >> Thierry, do you want to me to send a fix for the above or do you >> want to squash with the original (assuming that OK with Mikko)? > > In any case, this looks like a good fix, so please send a proper patch. > This is merged through drm-misc, so squashing this into the original > patch is not an option any longer. Yes will do! Jon
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 53af0334c6e1..d3855a1c6b47 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/iommu.h> #include <linux/iova.h> +#include <linux/irqreturn.h> #include <linux/platform_device.h> #include <linux/reset.h> @@ -81,6 +82,7 @@ struct host1x_intr_ops { void (*disable_syncpt_intr)(struct host1x *host, unsigned int id); void (*disable_all_syncpt_intrs)(struct host1x *host); int (*free_syncpt_irq)(struct host1x *host); + irqreturn_t (*isr)(int irq, void *dev_id); }; struct host1x_sid_entry { diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index 9880e0c47235..415f8d7e4202 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -6,18 +6,11 @@ * Copyright (c) 2010-2013, NVIDIA Corporation. */ -#include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/io.h> #include "../intr.h" #include "../dev.h" -struct host1x_intr_irq_data { - struct host1x *host; - u32 offset; -}; - static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { struct host1x_intr_irq_data *irq_data = dev_id; @@ -54,7 +47,8 @@ static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) } } -static void intr_hw_init(struct host1x *host, u32 cpm) +static int +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) { #if HOST1X_HW < 6 /* disable the ip_busy_timeout. this prevents write drops */ @@ -85,32 +79,6 @@ static void intr_hw_init(struct host1x *host, u32 cpm) host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); } #endif -} - -static int -host1x_intr_init_host_sync(struct host1x *host, u32 cpm) -{ - int err, i; - struct host1x_intr_irq_data *irq_data; - - irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (i = 0; i < host->num_syncpt_irqs; i++) { - irq_data[i].host = host; - irq_data[i].offset = i; - - err = devm_request_irq(host->dev, host->syncpt_irqs[i], - syncpt_thresh_isr, IRQF_SHARED, - "host1x_syncpt", &irq_data[i]); - if (err < 0) - return err; - } - - intr_hw_init(host, cpm); return 0; } @@ -144,4 +112,5 @@ static const struct host1x_intr_ops host1x_intr_ops = { .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, + .isr = syncpt_thresh_isr, }; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 995bfa980837..b3285dd10180 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -6,7 +6,7 @@ */ #include <linux/clk.h> - +#include <linux/interrupt.h> #include "dev.h" #include "fence.h" #include "intr.h" @@ -100,7 +100,9 @@ void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) int host1x_intr_init(struct host1x *host) { + struct host1x_intr_irq_data *irq_data; unsigned int id; + int i, err; mutex_init(&host->intr_mutex); @@ -111,6 +113,23 @@ int host1x_intr_init(struct host1x *host) INIT_LIST_HEAD(&syncpt->fences.list); } + irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + host1x_hw_intr_disable_all_syncpt_intrs(host); + + for (i = 0; i < host->num_syncpt_irqs; i++) { + irq_data[i].host = host; + irq_data[i].offset = i; + + err = devm_request_irq(host->dev, host->syncpt_irqs[i], + host->intr_op->isr, IRQF_SHARED, + "host1x_syncpt", &irq_data[i]); + if (err < 0) + return err; + } + return 0; } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 3b5610b525e5..11cdf13e32fe 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -11,6 +11,11 @@ struct host1x; struct host1x_syncpt_fence; +struct host1x_intr_irq_data { + struct host1x *host; + u32 offset; +}; + /* Initialize host1x sync point interrupt */ int host1x_intr_init(struct host1x *host);