diff mbox series

[v3,1/1] soc: fujitsu: Add A64FX diagnostic interrupt driver

Message ID 20220331092235.3000787-2-hasegawa-hitomi@fujitsu.com (mailing list archive)
State Superseded
Headers show
Series soc: fujitsu: Add A64FX diagnostic interrupt driver | expand

Commit Message

hasegawa-hitomi@fujitsu.com March 31, 2022, 9:22 a.m. UTC
Enable diagnostic interrupts for the A64FX.
This is done using a pseudo-NMI.

Signed-off-by: Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
---
 MAINTAINERS                      |   5 +
 drivers/soc/Kconfig              |   1 +
 drivers/soc/Makefile             |   1 +
 drivers/soc/fujitsu/Kconfig      |  13 +++
 drivers/soc/fujitsu/Makefile     |   3 +
 drivers/soc/fujitsu/a64fx-diag.c | 151 +++++++++++++++++++++++++++++++
 6 files changed, 174 insertions(+)
 create mode 100644 drivers/soc/fujitsu/Kconfig
 create mode 100644 drivers/soc/fujitsu/Makefile
 create mode 100644 drivers/soc/fujitsu/a64fx-diag.c

Comments

Greg Kroah-Hartman March 31, 2022, 11:49 a.m. UTC | #1
On Thu, Mar 31, 2022 at 06:22:35PM +0900, Hitomi Hasegawa wrote:
> Enable diagnostic interrupts for the A64FX.
> This is done using a pseudo-NMI.

I do not understand what this driver is, sorry.  Can you please provide
more information in the changelog text for what this is, who would use
it, and how it will be interacted with.

> 
> Signed-off-by: Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
> ---
>  MAINTAINERS                      |   5 +
>  drivers/soc/Kconfig              |   1 +
>  drivers/soc/Makefile             |   1 +
>  drivers/soc/fujitsu/Kconfig      |  13 +++
>  drivers/soc/fujitsu/Makefile     |   3 +
>  drivers/soc/fujitsu/a64fx-diag.c | 151 +++++++++++++++++++++++++++++++
>  6 files changed, 174 insertions(+)
>  create mode 100644 drivers/soc/fujitsu/Kconfig
>  create mode 100644 drivers/soc/fujitsu/Makefile
>  create mode 100644 drivers/soc/fujitsu/a64fx-diag.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cd0f68d4a34a..dc35c81ba917 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -241,6 +241,11 @@ F:	include/trace/events/9p.h
>  F:	include/uapi/linux/virtio_9p.h
>  F:	net/9p/
>  
> +A64FX DIAG DRIVER
> +M:	Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
> +S:	Supported
> +F:	drivers/soc/fujitsu/a64fx-diag.c
> +
>  A8293 MEDIA DRIVER
>  M:	Antti Palosaari <crope@iki.fi>
>  L:	linux-media@vger.kernel.org
> diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
> index a8562678c437..e10eb27e1e7e 100644
> --- a/drivers/soc/Kconfig
> +++ b/drivers/soc/Kconfig
> @@ -9,6 +9,7 @@ source "drivers/soc/atmel/Kconfig"
>  source "drivers/soc/bcm/Kconfig"
>  source "drivers/soc/canaan/Kconfig"
>  source "drivers/soc/fsl/Kconfig"
> +source "drivers/soc/fujitsu/Kconfig"
>  source "drivers/soc/imx/Kconfig"
>  source "drivers/soc/ixp4xx/Kconfig"
>  source "drivers/soc/litex/Kconfig"
> diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
> index adb30c2d4fea..b12b0b03ad47 100644
> --- a/drivers/soc/Makefile
> +++ b/drivers/soc/Makefile
> @@ -12,6 +12,7 @@ obj-$(CONFIG_SOC_CANAAN)	+= canaan/
>  obj-$(CONFIG_ARCH_DOVE)		+= dove/
>  obj-$(CONFIG_MACH_DOVE)		+= dove/
>  obj-y				+= fsl/
> +obj-y				+= fujitsu/
>  obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
>  obj-y				+= imx/
>  obj-y				+= ixp4xx/
> diff --git a/drivers/soc/fujitsu/Kconfig b/drivers/soc/fujitsu/Kconfig
> new file mode 100644
> index 000000000000..b41cdac67637
> --- /dev/null
> +++ b/drivers/soc/fujitsu/Kconfig
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +menu "fujitsu SoC drivers"
> +
> +config A64FX_DIAG
> +	bool "A64FX diag driver"
> +	depends on ARM64

What about ACPI?  Shouldn't this code depend on that?

> +	help
> +	  Say Y here if you want to enable diag interrupt on A64FX.

What is A64FX?

> +	  This driver uses pseudo-NMI if available.

What does this mean?

> +
> +	  If unsure, say N.

No module?  Why not?

> +
> +endmenu
> diff --git a/drivers/soc/fujitsu/Makefile b/drivers/soc/fujitsu/Makefile
> new file mode 100644
> index 000000000000..945bc1c14ad0
> --- /dev/null
> +++ b/drivers/soc/fujitsu/Makefile
> @@ -0,0 +1,3 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +obj-$(CONFIG_A64FX_DIAG)	+= a64fx-diag.o
> diff --git a/drivers/soc/fujitsu/a64fx-diag.c b/drivers/soc/fujitsu/a64fx-diag.c
> new file mode 100644
> index 000000000000..c6f895cf8912
> --- /dev/null
> +++ b/drivers/soc/fujitsu/a64fx-diag.c
> @@ -0,0 +1,151 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * A64FX diag driver.

No copyright information?  Are you sure?

> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/sysrq.h>
> +
> +#define A64FX_DIAG_IRQ 1
> +#define BMC_DIAG_INTERRUPT_STATUS_OFFSET (0x0044)
> +#define BMC_INTERRUPT_STATUS_MASK ((1U) << 31)

BIT()?

> +#define BMC_DIAG_INTERRUPT_ENABLE_OFFSET (0x0040)
> +#define BMC_INTERRUPT_ENABLE_MASK ((1U) << 31)

BIT()?

> +
> +struct a64fx_diag_priv {
> +	int irq;
> +	void __iomem *mmsc_reg_base;
> +	bool has_nmi;
> +};
> +
> +static irqreturn_t a64fx_diag_handler(int irq, void *dev_id)
> +{
> +	handle_sysrq('c');


Why is this calling this sysrq call?  From an interrupt?  Why?

And you are hard-coding "c", are you sure?

> +
> +	return IRQ_HANDLED;
> +}
> +
> +static void a64fx_diag_interrupt_clear(struct a64fx_diag_priv *priv)
> +{
> +	u32 mmsc;
> +	const void __iomem *diag_status_reg_addr;
> +
> +	diag_status_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_STATUS_OFFSET;
> +	mmsc = readl(diag_status_reg_addr);
> +	if (mmsc & BMC_INTERRUPT_STATUS_MASK)
> +		writel(BMC_INTERRUPT_STATUS_MASK, (void *)diag_status_reg_addr);

No need to wait for the write to complete?

You shouldn't have to cast diag_status_reg_addr, right?

> +}
> +
> +static void a64fx_diag_interrupt_enable(struct a64fx_diag_priv *priv)
> +{
> +	u32 mmsc;
> +	const void __iomem *diag_enable_reg_addr;
> +
> +	diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
> +	mmsc = readl(diag_enable_reg_addr);
> +	if (!(mmsc & BMC_INTERRUPT_ENABLE_MASK)) {
> +		mmsc |= BMC_INTERRUPT_STATUS_MASK;
> +		writel(mmsc, (void *)diag_enable_reg_addr);
> +	}
> +}
> +
> +static void a64fx_diag_interrupt_disable(struct a64fx_diag_priv *priv)
> +{
> +	u32 mmsc;
> +	const void __iomem *diag_enable_reg_addr;
> +
> +	diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
> +	mmsc = readl(diag_enable_reg_addr);
> +	if (mmsc & BMC_INTERRUPT_ENABLE_MASK) {
> +		mmsc &= ~BMC_INTERRUPT_ENABLE_MASK;
> +		writel(mmsc, (void *)diag_enable_reg_addr);
> +	}
> +}
> +
> +static int a64fx_diag_probe(struct platform_device *pdev)
> +{
> +	int ret;
> +	unsigned long irq_flags;
> +	struct device *dev = &pdev->dev;
> +	struct a64fx_diag_priv *priv;
> +
> +	priv = devm_kzalloc(dev, sizeof(struct a64fx_diag_priv), GFP_KERNEL);
> +	if (priv == NULL)
> +		return -ENOMEM;
> +
> +	priv->mmsc_reg_base = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(priv->mmsc_reg_base))
> +		return PTR_ERR(priv->mmsc_reg_base);
> +
> +	priv->irq = platform_get_irq(pdev, A64FX_DIAG_IRQ);
> +	if (priv->irq < 0)
> +		return priv->irq;
> +
> +	platform_set_drvdata(pdev, priv);
> +
> +	a64fx_diag_interrupt_clear(priv);
> +	a64fx_diag_interrupt_enable(priv);
> +
> +	irq_flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_AUTOEN |
> +		   IRQF_NO_THREAD;
> +	ret = request_nmi(priv->irq, &a64fx_diag_handler, irq_flags,
> +			"a64fx_diag_nmi", NULL);
> +	if (ret) {
> +		ret = request_irq(priv->irq, &a64fx_diag_handler,
> +				irq_flags, "a64fx_diag_irq", NULL);
> +		if (ret) {
> +			dev_err(dev, "cannot register IRQ %d\n", ret);
> +			return ret;
> +		}
> +		enable_irq(priv->irq);
> +		priv->has_nmi = false;
> +		dev_info(dev, "registered for IRQ %d\n", priv->irq);
> +	} else {
> +		enable_nmi(priv->irq);
> +		priv->has_nmi = true;
> +		dev_info(dev, "registered for NMI %d\n", priv->irq);

When drivers are successful, they are quiet.  No need for the noise
here.

thanks,

greg k-h
Arnd Bergmann March 31, 2022, 3:44 p.m. UTC | #2
On Thu, Mar 31, 2022 at 1:49 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> > +
> > +static irqreturn_t a64fx_diag_handler(int irq, void *dev_id)
> > +{
> > +     handle_sysrq('c');
>
>
> Why is this calling this sysrq call?  From an interrupt?  Why?
>
> And you are hard-coding "c", are you sure?

This is an actual sysrq driver in the traditional sense, where you can send
a single interrupt to the machine from the outside over a side channel.

I suggested sysrq instead of just panic() to make it a bit more flexible.
Unfortunately there is no additional data, so it comes down to always
sending the same character.

It would be possible to make that character configurable with a module
parameter or something like that, but I'm not sure that is an improvement.
Maybe you have another idea for this.

> > +static void a64fx_diag_interrupt_clear(struct a64fx_diag_priv *priv)
> > +{
> > +     u32 mmsc;
> > +     const void __iomem *diag_status_reg_addr;
> > +
> > +     diag_status_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_STATUS_OFFSET;
> > +     mmsc = readl(diag_status_reg_addr);
> > +     if (mmsc & BMC_INTERRUPT_STATUS_MASK)
> > +             writel(BMC_INTERRUPT_STATUS_MASK, (void *)diag_status_reg_addr);
>
> No need to wait for the write to complete?
>
> You shouldn't have to cast diag_status_reg_addr, right?

I think the cast is needed because the declaration of
'diag_status_reg_addr' incorrectly
marks it as 'const'. However, this should still trigger a 'make C=1'
warning with sparse
because it is now missing the __iomem annotation.

The correct solution of course is to remove both the cast and the 'const'.

       Arnd
hasegawa-hitomi@fujitsu.com April 8, 2022, 10:32 a.m. UTC | #3
Hi Greg,

> > Enable diagnostic interrupts for the A64FX.
> > This is done using a pseudo-NMI.
> 
> I do not understand what this driver is, sorry.  Can you please provide more
> information in the changelog text for what this is, who would use it, and how it will
> be interacted with.

I understand. I will add a description in the next version.

> > +config A64FX_DIAG
> > +	bool "A64FX diag driver"
> > +	depends on ARM64
> 
> What about ACPI?  Shouldn't this code depend on that?

Okey. I will make it dependent on ACPI.

> > +	help
> > +	  Say Y here if you want to enable diag interrupt on A64FX.
> 
> What is A64FX?

A64FX is a processor designed by Fujitsu.
For the sake of clarity, I will describe it as "Fujitsu A64FX".

> > +	  This driver uses pseudo-NMI if available.
> 
> What does this mean?

This driver uses the pseudo-NMI feature to perform diagnostic interrupts
for A64FX. However, I felt that it was superfluous to give this explanation
here, so I will delete this sentence.

> > +	  If unsure, say N.
> 
> No module?  Why not?

request_nmi() is not EXPORT_SYMBOL. So this driver cannot be a module.

> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * A64FX diag driver.
> 
> No copyright information?  Are you sure?

I will add copyright information.

> > +#define BMC_DIAG_INTERRUPT_STATUS_OFFSET (0x0044) #define
> > +BMC_INTERRUPT_STATUS_MASK ((1U) << 31)
> 
> BIT()?
> 
> > +#define BMC_DIAG_INTERRUPT_ENABLE_OFFSET (0x0040) #define
> > +BMC_INTERRUPT_ENABLE_MASK ((1U) << 31)
> 
> BIT()?

Okey, I will use BIT().

> > +static irqreturn_t a64fx_diag_handler(int irq, void *dev_id) {
> > +	handle_sysrq('c');
> 
> 
> Why is this calling this sysrq call?  From an interrupt?  Why?
> 
> And you are hard-coding "c", are you sure?

As mentioned by Arnd, I only called panic () at first, but after receiving
his suggestion, I decided to call handle_sysrq ('c').
This driver only supports the function that causes a panic when receiving
a diagnostic interrupt from the outside, so "c" is coded.
Also, no data is added when a diagnostic interrupt is sent.

> > +	if (mmsc & BMC_INTERRUPT_STATUS_MASK)
> > +		writel(BMC_INTERRUPT_STATUS_MASK, (void
> *)diag_status_reg_addr);
> 
> No need to wait for the write to complete?
> 
> You shouldn't have to cast diag_status_reg_addr, right?

Due to the specifications of the machine, there is no problem even
if there is no write wait processing.

I will remove constant and (void *). Thank you for pointing out.

> > +		enable_irq(priv->irq);
> > +		priv->has_nmi = false;
> > +		dev_info(dev, "registered for IRQ %d\n", priv->irq);
> > +	} else {
> > +		enable_nmi(priv->irq);
> > +		priv->has_nmi = true;
> > +		dev_info(dev, "registered for NMI %d\n", priv->irq);
> 
> When drivers are successful, they are quiet.  No need for the noise here.

I will remove the message for a successful NMI/IRQ registration.

Thank you.
Hitomi Hasegawa
Itaru Kitayama April 8, 2022, 10:44 a.m. UTC | #4
Wondering if, Greg you approves Hitomi’s updated series, then you pick up
it whatever the tree you maintain?

Thanks,
Itaru.

On Fri, Apr 8, 2022 at 19:36 hasegawa-hitomi@fujitsu.com <
hasegawa-hitomi@fujitsu.com> wrote:

> Hi Greg,
>
> > > Enable diagnostic interrupts for the A64FX.
> > > This is done using a pseudo-NMI.
> >
> > I do not understand what this driver is, sorry.  Can you please provide
> more
> > information in the changelog text for what this is, who would use it,
> and how it will
> > be interacted with.
>
> I understand. I will add a description in the next version.
>
> > > +config A64FX_DIAG
> > > +   bool "A64FX diag driver"
> > > +   depends on ARM64
> >
> > What about ACPI?  Shouldn't this code depend on that?
>
> Okey. I will make it dependent on ACPI.
>
> > > +   help
> > > +     Say Y here if you want to enable diag interrupt on A64FX.
> >
> > What is A64FX?
>
> A64FX is a processor designed by Fujitsu.
> For the sake of clarity, I will describe it as "Fujitsu A64FX".
>
> > > +     This driver uses pseudo-NMI if available.
> >
> > What does this mean?
>
> This driver uses the pseudo-NMI feature to perform diagnostic interrupts
> for A64FX. However, I felt that it was superfluous to give this explanation
> here, so I will delete this sentence.
>
> > > +     If unsure, say N.
> >
> > No module?  Why not?
>
> request_nmi() is not EXPORT_SYMBOL. So this driver cannot be a module.
>
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * A64FX diag driver.
> >
> > No copyright information?  Are you sure?
>
> I will add copyright information.
>
> > > +#define BMC_DIAG_INTERRUPT_STATUS_OFFSET (0x0044) #define
> > > +BMC_INTERRUPT_STATUS_MASK ((1U) << 31)
> >
> > BIT()?
> >
> > > +#define BMC_DIAG_INTERRUPT_ENABLE_OFFSET (0x0040) #define
> > > +BMC_INTERRUPT_ENABLE_MASK ((1U) << 31)
> >
> > BIT()?
>
> Okey, I will use BIT().
>
> > > +static irqreturn_t a64fx_diag_handler(int irq, void *dev_id) {
> > > +   handle_sysrq('c');
> >
> >
> > Why is this calling this sysrq call?  From an interrupt?  Why?
> >
> > And you are hard-coding "c", are you sure?
>
> As mentioned by Arnd, I only called panic () at first, but after receiving
> his suggestion, I decided to call handle_sysrq ('c').
> This driver only supports the function that causes a panic when receiving
> a diagnostic interrupt from the outside, so "c" is coded.
> Also, no data is added when a diagnostic interrupt is sent.
>
> > > +   if (mmsc & BMC_INTERRUPT_STATUS_MASK)
> > > +           writel(BMC_INTERRUPT_STATUS_MASK, (void
> > *)diag_status_reg_addr);
> >
> > No need to wait for the write to complete?
> >
> > You shouldn't have to cast diag_status_reg_addr, right?
>
> Due to the specifications of the machine, there is no problem even
> if there is no write wait processing.
>
> I will remove constant and (void *). Thank you for pointing out.
>
> > > +           enable_irq(priv->irq);
> > > +           priv->has_nmi = false;
> > > +           dev_info(dev, "registered for IRQ %d\n", priv->irq);
> > > +   } else {
> > > +           enable_nmi(priv->irq);
> > > +           priv->has_nmi = true;
> > > +           dev_info(dev, "registered for NMI %d\n", priv->irq);
> >
> > When drivers are successful, they are quiet.  No need for the noise here.
>
> I will remove the message for a successful NMI/IRQ registration.
>
> Thank you.
> Hitomi Hasegawa
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Daniel Thompson April 8, 2022, 1:32 p.m. UTC | #5
On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
> On Thu, Mar 31, 2022 at 1:49 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> > > +
> > > +static irqreturn_t a64fx_diag_handler(int irq, void *dev_id)
> > > +{
> > > +     handle_sysrq('c');
> >
> >
> > Why is this calling this sysrq call?  From an interrupt?  Why?
> >
> > And you are hard-coding "c", are you sure?
> 
> This is an actual sysrq driver in the traditional sense, where you can send
> a single interrupt to the machine from the outside over a side channel.
> 
> I suggested sysrq instead of just panic() to make it a bit more flexible.
> Unfortunately there is no additional data, so it comes down to always
> sending the same character.
> 
> It would be possible to make that character configurable with a module
> parameter or something like that, but I'm not sure that is an improvement.
> Maybe you have another idea for this.

Given the interrupt can be dismissed then offering non-fatal actions in
response the chassis command seems reasonable.

There is some prior art for this sort of feature. AFAICT SGI UV has a
similar mechanism that can send an NMI-with-no-side-channel to the
kernel. The corresponding driver offers a range of actions using a
module parameter:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180

I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
it is just obfuscation. However it is certainly seems attractive to be
able to reuse handle_sysrq() to provide a more powerful set of actions.


Daniel.
Arnd Bergmann April 8, 2022, 2:17 p.m. UTC | #6
On Fri, Apr 8, 2022 at 3:32 PM Daniel Thompson
<daniel.thompson@linaro.org> wrote:
> On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
>
> There is some prior art for this sort of feature. AFAICT SGI UV has a
> similar mechanism that can send an NMI-with-no-side-channel to the
> kernel. The corresponding driver offers a range of actions using a
> module parameter:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
>
> I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> it is just obfuscation. However it is certainly seems attractive to be
> able to reuse handle_sysrq() to provide a more powerful set of actions.

How about a module parameter that allows picking a sysrq character then?

        Arnd
Greg Kroah-Hartman April 8, 2022, 2:21 p.m. UTC | #7
On Fri, Apr 08, 2022 at 04:17:16PM +0200, Arnd Bergmann wrote:
> On Fri, Apr 8, 2022 at 3:32 PM Daniel Thompson
> <daniel.thompson@linaro.org> wrote:
> > On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
> >
> > There is some prior art for this sort of feature. AFAICT SGI UV has a
> > similar mechanism that can send an NMI-with-no-side-channel to the
> > kernel. The corresponding driver offers a range of actions using a
> > module parameter:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
> >
> > I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> > it is just obfuscation. However it is certainly seems attractive to be
> > able to reuse handle_sysrq() to provide a more powerful set of actions.
> 
> How about a module parameter that allows picking a sysrq character then?

Module parameters are so 1990, as this is a platform device, why not get
it from DT?

thanks,

greg k-h
Arnd Bergmann April 8, 2022, 2:49 p.m. UTC | #8
On Fri, Apr 8, 2022 at 4:21 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Fri, Apr 08, 2022 at 04:17:16PM +0200, Arnd Bergmann wrote:
> > On Fri, Apr 8, 2022 at 3:32 PM Daniel Thompson
> > <daniel.thompson@linaro.org> wrote:
> > > On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
> > >
> > > There is some prior art for this sort of feature. AFAICT SGI UV has a
> > > similar mechanism that can send an NMI-with-no-side-channel to the
> > > kernel. The corresponding driver offers a range of actions using a
> > > module parameter:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
> > >
> > > I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> > > it is just obfuscation. However it is certainly seems attractive to be
> > > able to reuse handle_sysrq() to provide a more powerful set of actions.
> >
> > How about a module parameter that allows picking a sysrq character then?
>
> Module parameters are so 1990, as this is a platform device, why not get
> it from DT?

This machine doesn't use DT. I suppose the same could be done with an EFI
variable, but with a module parameter you get the added benefit of having both
a boot time kernel command line argument, and the option to override it at
run time.

         Arnd
Greg Kroah-Hartman April 8, 2022, 2:59 p.m. UTC | #9
On Fri, Apr 08, 2022 at 04:49:31PM +0200, Arnd Bergmann wrote:
> On Fri, Apr 8, 2022 at 4:21 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> >
> > On Fri, Apr 08, 2022 at 04:17:16PM +0200, Arnd Bergmann wrote:
> > > On Fri, Apr 8, 2022 at 3:32 PM Daniel Thompson
> > > <daniel.thompson@linaro.org> wrote:
> > > > On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
> > > >
> > > > There is some prior art for this sort of feature. AFAICT SGI UV has a
> > > > similar mechanism that can send an NMI-with-no-side-channel to the
> > > > kernel. The corresponding driver offers a range of actions using a
> > > > module parameter:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
> > > >
> > > > I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> > > > it is just obfuscation. However it is certainly seems attractive to be
> > > > able to reuse handle_sysrq() to provide a more powerful set of actions.
> > >
> > > How about a module parameter that allows picking a sysrq character then?
> >
> > Module parameters are so 1990, as this is a platform device, why not get
> > it from DT?
> 
> This machine doesn't use DT. I suppose the same could be done with an EFI
> variable, but with a module parameter you get the added benefit of having both
> a boot time kernel command line argument, and the option to override it at
> run time.

I thought it was a platform device?  Worst case, make it a sysfs file :)
Daniel Thompson April 8, 2022, 3:02 p.m. UTC | #10
On Fri, Apr 08, 2022 at 04:49:31PM +0200, Arnd Bergmann wrote:
> On Fri, Apr 8, 2022 at 4:21 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> >
> > On Fri, Apr 08, 2022 at 04:17:16PM +0200, Arnd Bergmann wrote:
> > > On Fri, Apr 8, 2022 at 3:32 PM Daniel Thompson
> > > <daniel.thompson@linaro.org> wrote:
> > > > On Thu, Mar 31, 2022 at 05:44:55PM +0200, Arnd Bergmann wrote:
> > > >
> > > > There is some prior art for this sort of feature. AFAICT SGI UV has a
> > > > similar mechanism that can send an NMI-with-no-side-channel to the
> > > > kernel. The corresponding driver offers a range of actions using a
> > > > module parameter:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
> > > >
> > > > I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> > > > it is just obfuscation. However it is certainly seems attractive to be
> > > > able to reuse handle_sysrq() to provide a more powerful set of actions.
> > >
> > > How about a module parameter that allows picking a sysrq character then?
> >
> > Module parameters are so 1990, as this is a platform device, why not get
> > it from DT?
> 
> This machine doesn't use DT. I suppose the same could be done with an EFI
> variable, but with a module parameter you get the added benefit of having both
> a boot time kernel command line argument, and the option to override it at
> run time.

Pushing the decision on what action to take into firmware (whether that
is DT or ACPI) implies that the firmware is well positioned to make a
decision.  I don't think that is true here.

To me, it seems more like an admin choice... and admins are conditioned
to use kernel arguments.

If these type of diagnostics request were more common then perhaps we'd
be looking at a sysctl and call to handle_diagnostic_sysrq().


Daniel.
hasegawa-hitomi@fujitsu.com April 19, 2022, 8:36 a.m. UTC | #11
Hi Greg, Arnd, and Daniel,

> > > > > There is some prior art for this sort of feature. AFAICT SGI UV has a
> > > > > similar mechanism that can send an NMI-with-no-side-channel to the
> > > > > kernel. The corresponding driver offers a range of actions using a
> > > > > module parameter:
> > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/platform/uv/uv_nmi.c#n180
> > > > >
> > > > > I don't think a hardcoded 'c' makes any sense. With a hardcoded argument
> > > > > it is just obfuscation. However it is certainly seems attractive to be
> > > > > able to reuse handle_sysrq() to provide a more powerful set of actions.
> > > >
> > > > How about a module parameter that allows picking a sysrq character then?
> > >
> > > Module parameters are so 1990, as this is a platform device, why not get
> > > it from DT?
> >
> > This machine doesn't use DT. I suppose the same could be done with an EFI
> > variable, but with a module parameter you get the added benefit of having both
> > a boot time kernel command line argument, and the option to override it at
> > run time.
> 
> Pushing the decision on what action to take into firmware (whether that
> is DT or ACPI) implies that the firmware is well positioned to make a
> decision.  I don't think that is true here.
> 
> To me, it seems more like an admin choice... and admins are conditioned
> to use kernel arguments.
> 
> If these type of diagnostics request were more common then perhaps we'd
> be looking at a sysctl and call to handle_diagnostic_sysrq().

I understand that it is not appropriate to hardcode c.
How about using __setup() to add a new kernel parameter and allow the admin
to specify the sysrq command when booting?

Thank you
Hitomi Hasegawa
hasegawa-hitomi@fujitsu.com April 28, 2022, 2:15 a.m. UTC | #12
Hi Greg, Arnd, and Daniel,

> I understand that it is not appropriate to hardcode c.
> How about using __setup() to add a new kernel parameter and allow the admin
> to specify the sysrq command when booting?

I have received a lot of advice regarding sysrq, but after some consideration,
I would like to change to calling panic() directly as in v1 instead of sysrq.

If the administrator wants to request a diagnostic, I think they usually
expect crash with NMI like x86 and take a dump the kernel. It's not common
to handle diagnostic interrupts with sysrq now, so I don't think
it's necessary to make this driver extensible at this time.
Also, A64FX's BMC is not possible to send sideband data with the request,
so it is difficult to take advantage of the flexibility of sysrq.

If you have any comments on this, please reply.

Thank you
Hitomi Hasegawa
Greg Kroah-Hartman April 28, 2022, 5:45 a.m. UTC | #13
On Thu, Apr 28, 2022 at 02:15:52AM +0000, hasegawa-hitomi@fujitsu.com wrote:
> Hi Greg, Arnd, and Daniel,
> 
> > I understand that it is not appropriate to hardcode c.
> > How about using __setup() to add a new kernel parameter and allow the admin
> > to specify the sysrq command when booting?
> 
> I have received a lot of advice regarding sysrq, but after some consideration,
> I would like to change to calling panic() directly as in v1 instead of sysrq.

panic() seems good to me!
Arnd Bergmann April 28, 2022, 7:04 a.m. UTC | #14
On Thu, Apr 28, 2022 at 4:15 AM hasegawa-hitomi@fujitsu.com
<hasegawa-hitomi@fujitsu.com> wrote:
>
> Hi Greg, Arnd, and Daniel,
>
> > I understand that it is not appropriate to hardcode c.
> > How about using __setup() to add a new kernel parameter and allow the admin
> > to specify the sysrq command when booting?
>
> I have received a lot of advice regarding sysrq, but after some consideration,
> I would like to change to calling panic() directly as in v1 instead of sysrq.
>
> If the administrator wants to request a diagnostic, I think they usually
> expect crash with NMI like x86 and take a dump the kernel. It's not common
> to handle diagnostic interrupts with sysrq now, so I don't think
> it's necessary to make this driver extensible at this time.

Ok, fair enough. Matching x86 behavior sounds like a reasonable outcome.
If we want to make this configurable in the future, that can still be done then,
and it should work the same across architectures but adding the logic
in nmi_panic() directly.

        Arnd
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index cd0f68d4a34a..dc35c81ba917 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -241,6 +241,11 @@  F:	include/trace/events/9p.h
 F:	include/uapi/linux/virtio_9p.h
 F:	net/9p/
 
+A64FX DIAG DRIVER
+M:	Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
+S:	Supported
+F:	drivers/soc/fujitsu/a64fx-diag.c
+
 A8293 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index a8562678c437..e10eb27e1e7e 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -9,6 +9,7 @@  source "drivers/soc/atmel/Kconfig"
 source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/canaan/Kconfig"
 source "drivers/soc/fsl/Kconfig"
+source "drivers/soc/fujitsu/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/ixp4xx/Kconfig"
 source "drivers/soc/litex/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index adb30c2d4fea..b12b0b03ad47 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -12,6 +12,7 @@  obj-$(CONFIG_SOC_CANAAN)	+= canaan/
 obj-$(CONFIG_ARCH_DOVE)		+= dove/
 obj-$(CONFIG_MACH_DOVE)		+= dove/
 obj-y				+= fsl/
+obj-y				+= fujitsu/
 obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-y				+= imx/
 obj-y				+= ixp4xx/
diff --git a/drivers/soc/fujitsu/Kconfig b/drivers/soc/fujitsu/Kconfig
new file mode 100644
index 000000000000..b41cdac67637
--- /dev/null
+++ b/drivers/soc/fujitsu/Kconfig
@@ -0,0 +1,13 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+menu "fujitsu SoC drivers"
+
+config A64FX_DIAG
+	bool "A64FX diag driver"
+	depends on ARM64
+	help
+	  Say Y here if you want to enable diag interrupt on A64FX.
+	  This driver uses pseudo-NMI if available.
+
+	  If unsure, say N.
+
+endmenu
diff --git a/drivers/soc/fujitsu/Makefile b/drivers/soc/fujitsu/Makefile
new file mode 100644
index 000000000000..945bc1c14ad0
--- /dev/null
+++ b/drivers/soc/fujitsu/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_A64FX_DIAG)	+= a64fx-diag.o
diff --git a/drivers/soc/fujitsu/a64fx-diag.c b/drivers/soc/fujitsu/a64fx-diag.c
new file mode 100644
index 000000000000..c6f895cf8912
--- /dev/null
+++ b/drivers/soc/fujitsu/a64fx-diag.c
@@ -0,0 +1,151 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * A64FX diag driver.
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sysrq.h>
+
+#define A64FX_DIAG_IRQ 1
+#define BMC_DIAG_INTERRUPT_STATUS_OFFSET (0x0044)
+#define BMC_INTERRUPT_STATUS_MASK ((1U) << 31)
+#define BMC_DIAG_INTERRUPT_ENABLE_OFFSET (0x0040)
+#define BMC_INTERRUPT_ENABLE_MASK ((1U) << 31)
+
+struct a64fx_diag_priv {
+	int irq;
+	void __iomem *mmsc_reg_base;
+	bool has_nmi;
+};
+
+static irqreturn_t a64fx_diag_handler(int irq, void *dev_id)
+{
+	handle_sysrq('c');
+
+	return IRQ_HANDLED;
+}
+
+static void a64fx_diag_interrupt_clear(struct a64fx_diag_priv *priv)
+{
+	u32 mmsc;
+	const void __iomem *diag_status_reg_addr;
+
+	diag_status_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_STATUS_OFFSET;
+	mmsc = readl(diag_status_reg_addr);
+	if (mmsc & BMC_INTERRUPT_STATUS_MASK)
+		writel(BMC_INTERRUPT_STATUS_MASK, (void *)diag_status_reg_addr);
+}
+
+static void a64fx_diag_interrupt_enable(struct a64fx_diag_priv *priv)
+{
+	u32 mmsc;
+	const void __iomem *diag_enable_reg_addr;
+
+	diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
+	mmsc = readl(diag_enable_reg_addr);
+	if (!(mmsc & BMC_INTERRUPT_ENABLE_MASK)) {
+		mmsc |= BMC_INTERRUPT_STATUS_MASK;
+		writel(mmsc, (void *)diag_enable_reg_addr);
+	}
+}
+
+static void a64fx_diag_interrupt_disable(struct a64fx_diag_priv *priv)
+{
+	u32 mmsc;
+	const void __iomem *diag_enable_reg_addr;
+
+	diag_enable_reg_addr = priv->mmsc_reg_base + BMC_DIAG_INTERRUPT_ENABLE_OFFSET;
+	mmsc = readl(diag_enable_reg_addr);
+	if (mmsc & BMC_INTERRUPT_ENABLE_MASK) {
+		mmsc &= ~BMC_INTERRUPT_ENABLE_MASK;
+		writel(mmsc, (void *)diag_enable_reg_addr);
+	}
+}
+
+static int a64fx_diag_probe(struct platform_device *pdev)
+{
+	int ret;
+	unsigned long irq_flags;
+	struct device *dev = &pdev->dev;
+	struct a64fx_diag_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(struct a64fx_diag_priv), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	priv->mmsc_reg_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->mmsc_reg_base))
+		return PTR_ERR(priv->mmsc_reg_base);
+
+	priv->irq = platform_get_irq(pdev, A64FX_DIAG_IRQ);
+	if (priv->irq < 0)
+		return priv->irq;
+
+	platform_set_drvdata(pdev, priv);
+
+	a64fx_diag_interrupt_clear(priv);
+	a64fx_diag_interrupt_enable(priv);
+
+	irq_flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_AUTOEN |
+		   IRQF_NO_THREAD;
+	ret = request_nmi(priv->irq, &a64fx_diag_handler, irq_flags,
+			"a64fx_diag_nmi", NULL);
+	if (ret) {
+		ret = request_irq(priv->irq, &a64fx_diag_handler,
+				irq_flags, "a64fx_diag_irq", NULL);
+		if (ret) {
+			dev_err(dev, "cannot register IRQ %d\n", ret);
+			return ret;
+		}
+		enable_irq(priv->irq);
+		priv->has_nmi = false;
+		dev_info(dev, "registered for IRQ %d\n", priv->irq);
+	} else {
+		enable_nmi(priv->irq);
+		priv->has_nmi = true;
+		dev_info(dev, "registered for NMI %d\n", priv->irq);
+	}
+
+	return 0;
+}
+
+static int __exit a64fx_diag_remove(struct platform_device *pdev)
+{
+	struct a64fx_diag_priv *priv = platform_get_drvdata(pdev);
+
+	a64fx_diag_interrupt_disable(priv);
+	a64fx_diag_interrupt_clear(priv);
+
+	if (priv->has_nmi)
+		free_nmi(priv->irq, NULL);
+	else
+		free_irq(priv->irq, NULL);
+
+	return 0;
+}
+
+static const struct acpi_device_id a64fx_diag_acpi_match[] = {
+	{ "FUJI2007", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, a64fx_diag_acpi_match);
+
+
+static struct platform_driver a64fx_diag_driver = {
+	.driver = {
+		.name = "a64fx_diag_driver",
+		.acpi_match_table = ACPI_PTR(a64fx_diag_acpi_match),
+	},
+	.probe = a64fx_diag_probe,
+	.remove = a64fx_diag_remove,
+};
+
+module_platform_driver(a64fx_diag_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>");
+MODULE_DESCRIPTION("A64FX diag driver");