diff mbox series

[v3,1/2] gpio: mlxbf2: Introduce IRQ support

Message ID 20210923202216.16091-2-asmaa@nvidia.com (mailing list archive)
State Not Applicable
Headers show
Series gpio: mlxbf2: Introduce proper interrupt handling | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Asmaa Mnebhi Sept. 23, 2021, 8:22 p.m. UTC
Introduce standard IRQ handling in the gpio-mlxbf2.c
driver.

Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
---
 drivers/gpio/gpio-mlxbf2.c | 150 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 148 insertions(+), 2 deletions(-)

Comments

Andrew Lunn Sept. 24, 2021, 11:46 a.m. UTC | #1
> +static int
> +mlxbf2_gpio_irq_set_type(struct irq_data *irqd, unsigned int type)
> +{
> +	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
> +	struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
> +	int offset = irqd_to_hwirq(irqd);
> +	unsigned long flags;
> +	bool fall = false;
> +	bool rise = false;
> +	u32 val;
> +
> +	switch (type & IRQ_TYPE_SENSE_MASK) {
> +	case IRQ_TYPE_EDGE_BOTH:
> +	case IRQ_TYPE_LEVEL_MASK:
> +		fall = true;
> +		rise = true;
> +		break;
> +	case IRQ_TYPE_EDGE_RISING:
> +	case IRQ_TYPE_LEVEL_HIGH:
> +		rise = true;
> +		break;
> +	case IRQ_TYPE_EDGE_FALLING:
> +	case IRQ_TYPE_LEVEL_LOW:
> +		fall = true;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}

I'm still not convinced this is correct. Rising edge is different to
high. Rising edge only ever interrupts once, level keeps interrupting
until the source is cleared. You cannot store the four different
options in two bits.

Linus, have you seen anything like this before?

       Andrew
Linus Walleij Sept. 24, 2021, 11:48 p.m. UTC | #2
On Fri, Sep 24, 2021 at 1:46 PM Andrew Lunn <andrew@lunn.ch> wrote:

> > +static int
> > +mlxbf2_gpio_irq_set_type(struct irq_data *irqd, unsigned int type)
> > +{
> > +     struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
> > +     struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
> > +     int offset = irqd_to_hwirq(irqd);
> > +     unsigned long flags;
> > +     bool fall = false;
> > +     bool rise = false;
> > +     u32 val;
> > +
> > +     switch (type & IRQ_TYPE_SENSE_MASK) {
> > +     case IRQ_TYPE_EDGE_BOTH:
> > +     case IRQ_TYPE_LEVEL_MASK:
> > +             fall = true;
> > +             rise = true;
> > +             break;
> > +     case IRQ_TYPE_EDGE_RISING:
> > +     case IRQ_TYPE_LEVEL_HIGH:
> > +             rise = true;
> > +             break;
> > +     case IRQ_TYPE_EDGE_FALLING:
> > +     case IRQ_TYPE_LEVEL_LOW:
> > +             fall = true;
> > +             break;
> > +     default:
> > +             return -EINVAL;
> > +     }
>
> I'm still not convinced this is correct. Rising edge is different to
> high. Rising edge only ever interrupts once, level keeps interrupting
> until the source is cleared. You cannot store the four different
> options in two bits.
>
> Linus, have you seen anything like this before?

No, and I agree it looks weird.

There must be some explanation, what does the datasheet say?

Yours,
Linus Walleij
Asmaa Mnebhi Sept. 27, 2021, 2:04 p.m. UTC | #3
> > +static int
> > +mlxbf2_gpio_irq_set_type(struct irq_data *irqd, unsigned int type) 
> > +{
> > +     struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
> > +     struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
> > +     int offset = irqd_to_hwirq(irqd);
> > +     unsigned long flags;
> > +     bool fall = false;
> > +     bool rise = false;
> > +     u32 val;
> > +
> > +     switch (type & IRQ_TYPE_SENSE_MASK) {
> > +     case IRQ_TYPE_EDGE_BOTH:
> > +     case IRQ_TYPE_LEVEL_MASK:
> > +             fall = true;
> > +             rise = true;
> > +             break;
> > +     case IRQ_TYPE_EDGE_RISING:
> > +     case IRQ_TYPE_LEVEL_HIGH:
> > +             rise = true;
> > +             break;
> > +     case IRQ_TYPE_EDGE_FALLING:
> > +     case IRQ_TYPE_LEVEL_LOW:
> > +             fall = true;
> > +             break;
> > +     default:
> > +             return -EINVAL;
> > +     }
>
> I'm still not convinced this is correct. Rising edge is different to 
> high. Rising edge only ever interrupts once, level keeps interrupting 
> until the source is cleared. You cannot store the four different 
> options in two bits.
>
> Linus, have you seen anything like this before?

> No, and I agree it looks weird.

> There must be some explanation, what does the datasheet say?

I have consulted the HW folks about this, and they confirmed that
Our internal GPIO HW detects the falling edge.
INT_N signal (KSZ9031) ---connected to---- > BlueField GPIO9 (or GPIO12)
Even if INT_N is an active level interrupt, the GPIO HW always detects
the falling edge. This is why the original signal doesn’t really matter.
The BlueField GPIO HW only support Edge interrupts.
Andrew Lunn Sept. 27, 2021, 2:08 p.m. UTC | #4
> The BlueField GPIO HW only support Edge interrupts.

O.K. So please remove all level support from this driver, and return
-EINVAL if requested to do level.

This also means, you cannot use interrupts with the Ethernet PHY. The
PHY is using level interrupts.

    Andrew
Asmaa Mnebhi Sept. 27, 2021, 2:19 p.m. UTC | #5
> The BlueField GPIO HW only support Edge interrupts.

O.K. So please remove all level support from this driver,
and return -EINVAL if requested to do level.
This also means, you cannot use interrupts with the
Ethernet PHY. The PHY is using level interrupts.

Why not? The HW folks said it is alright because they
Do some internal conversion of PHY signal and we have tested
This extensively.
Asmaa Mnebhi Sept. 27, 2021, 2:26 p.m. UTC | #6
> The BlueField GPIO HW only support Edge interrupts.

O.K. So please remove all level support from this driver,
and return -EINVAL if requested to do level.
This also means, you cannot use interrupts with the
Ethernet PHY. The PHY is using level interrupts.

Why not? The HW folks said it is alright because they
Do some internal conversion of PHY signal and we have
tested This extensively.

Oh sorry I misunderstood what you meant.
In software we don't use the GPIO pin value itself to
Register the interrupt. We use a HW interrupt common
To all GPIO pins. So we should be ok. We only set this
EDGE register because it is required from a HW
Perspective to detect the INT_N signal.
Andrew Lunn Sept. 27, 2021, 2:56 p.m. UTC | #7
On Mon, Sep 27, 2021 at 02:19:45PM +0000, Asmaa Mnebhi wrote:
> 
> > The BlueField GPIO HW only support Edge interrupts.
> 
> O.K. So please remove all level support from this driver,
> and return -EINVAL if requested to do level.
> This also means, you cannot use interrupts with the
> Ethernet PHY. The PHY is using level interrupts.
> 
> Why not? The HW folks said it is alright because they
> Do some internal conversion of PHY signal and we have tested
> This extensively.

So the PHY is level based. The PHY is combing multiple interrupt
sources into one external interrupt. If any of those internal
interrupt sources are active, the external interrupt is active. If
there are multiple active sources at once, the interrupt stays low,
until they are all cleared. This means there is not an edge per
interrupt. There is one edge when the first internal source occurs,
and no more edges, even if there are more internal interrupts.

The general flow in the PHY interrupt handler is to read the interrupt
status register, which tells you which internal interrupts have
fired. You then address these internal interrupts one by one. This can
take some time, MDIO is a slow bus etc. While handling these interrupt
sources, it could be another internal interrupt source triggers. This
new internal interrupt source keeps the external interrupt active. But
there has not been an edge, since the interrupt handler is still
clearing the sources which caused the first interrupt. With level
interrupts, this is not an issue. When the interrupt handler exits,
the interrupt is re-enabled. Since it is still active, due to the
unhandled internal interrupt sources, the level interrupt immediately
fires again. the handler then sees this new interrupt and handles
it. At that point the level interrupt goes inactive.

Now think about what happens if you are using an edge interrupt
controller with a level interrupt. You get the first edge, and call
the interrupt handler. And then there are no more edges, despite there
being more interrupts. You not only loose the new interrupt, you never
see any more interrupts. You PHY link can go up and down, it can try
to report being over temperature, that it has detected power from the
peer, cable tests have passed, etc. But since there is no edge, there
is never an interrupt.

So you say it has been extensively tested. Has it been extensively
tested with multiple internal interrupt sources at the same time? And
with slight timing variations, so that you trigger this race
condition? It is not going to happen very often, but when it does, it
is going to be very bad.

	Andrew
Asmaa Mnebhi Sept. 27, 2021, 3:52 p.m. UTC | #8
On Mon, Sep 27, 2021 at 02:19:45PM +0000, Asmaa Mnebhi wrote:
> 
> > The BlueField GPIO HW only support Edge interrupts.
> 
> O.K. So please remove all level support from this driver, and return 
> -EINVAL if requested to do level.
> This also means, you cannot use interrupts with the Ethernet PHY. The 
> PHY is using level interrupts.
> 
> Why not? The HW folks said it is alright because they Do some internal 
> conversion of PHY signal and we have tested This extensively.

So the PHY is level based. The PHY is combing multiple interrupt sources 
into one external interrupt. If any of those internal interrupt sources are active,
the external interrupt is active. If there are multiple active sources at once, the
interrupt stays low, until they are all cleared. This means there is not an edge
per interrupt. There is one edge when the first internal source occurs, and no
more edges, even if there are more internal interrupts.

The general flow in the PHY interrupt handler is to read the interrupt status
register, which tells you which internal interrupts have fired.
You then address these internal interrupts one by one. This can take some
time, MDIO is a slow bus etc. While handling these interrupt sources,
it could be another internal interrupt source triggers. This new internal
interrupt source keeps the external interrupt active. But there has not
been an edge, since the interrupt handler is still clearing the sources
which caused the first interrupt. With level interrupts, this is not an
issue. When the interrupt handler exits, the interrupt is re-enabled. Since
it is still active, due to the unhandled internal interrupt sources,
the level interrupt immediately fires again. the handler then sees this
new interrupt and handles it. At that point the level interrupt goes inactive.

Now think about what happens if you are using an edge interrupt
controller with a level interrupt. You get the first edge, and call the
interrupt handler. And then there are no more edges, despite there
being more interrupts. You not only loose the new interrupt, you
never see any more interrupts. You PHY link can go up and down,
it can try to report being over temperature, that it has detected
power from the peer, cable tests have passed, etc. But since there
is no edge, there is never an interrupt.

So you say it has been extensively tested. Has it been extensively
tested with multiple internal interrupt sources at the same time?
And with slight timing variations, so that you trigger this race
condition? It is not going to happen very often, but when it does,
it is going to be very bad.

Asmaa>> Thank you very much for the detailed and clear explanation!
we only enable/support link up/down interrupts. QA has tested
bringing up/down the network interface +200 times in a loop.
I agree with you that the INT_N should be connected to a GPIO
Pin which also supports level interrupt. From a software perspective,
that HW interrupt flow is not visible/accessible to software.
I was instructed by HW designers to enable the interrupt and set it as falling.
The software interrupt and handler is not registered
based on the GPIO interrupt but rather a HW interrupt which is
common to all GPIO pins (irrelevant here, but this is edge triggered):
ret = devm_request_irq(dev, irq, mlxbf2_gpio_irq_handler,
                                        IRQF_SHARED, name, gs);
Andrew Lunn Sept. 27, 2021, 7:10 p.m. UTC | #9
> Asmaa>> Thank you very much for the detailed and clear explanation!
> we only enable/support link up/down interrupts. QA has tested
> bringing up/down the network interface +200 times in a loop.

The micrel driver currently only uses two interrupts of the available
8. So it will be hard to trigger the problem with the current
driver. Your best way to trigger it is going to bring the link down as
soon as it goes up. So you get first a link up, and then a link down
very shortly afterwards.

There is however nothing stopping developers making use of the other
interrupts. That will then increase the likelihood of problems.

What does help you is that the interrupt register is clear on read. So
the race condition window is small.

> The software interrupt and handler is not registered
> based on the GPIO interrupt but rather a HW interrupt which is
> common to all GPIO pins (irrelevant here, but this is edge triggered):
> ret = devm_request_irq(dev, irq, mlxbf2_gpio_irq_handler,
>                                         IRQF_SHARED, name, gs);

IRQF_SHARED implied level. You cannot have a shared interrupt which is
using edges.

      Andrew
Asmaa Mnebhi Sept. 28, 2021, 3:02 p.m. UTC | #10
> So the PHY is level based. The PHY is combing multiple interrupt sources 
> into one external interrupt. If any of those internal interrupt sources are
> active, the external interrupt is active. If there are > multiple active sources
> at once, the interrupt stays low, until they are all cleared. This means
> there is not an edge per interrupt. There is one edge when the first internal
> source occurs, and no more edges, > even if there are more internal interrupts.

> The general flow in the PHY interrupt handler is to read the interrupt status
> register, which tells you which internal interrupts have fired. You then
> address these internal interrupts one by one.

In KSZ9031, Register MII_KSZPHY_INTCS=0x1B reports all interrupt events and
clear on read. So if there are 4 different interrupts, once it is read once, all 4 clear at once.
The micrel.c driver has defined ack_interrupt to read the above reg and is called every time the
interrupt handler phy_interrupt is called. So in this case, we should be good.
The code flow in our case would look like this:
- 2 interrupt sources (for example, link down followed by link up) set in MII_KSZPHY_INTCS
- interrupt handler (phy_interrupt) reads MII_KSZPHY_INT which automatically clears both
interrupts
- another internal source triggers and sets the register.
- The second edge will be caught accordingly by the GPIO.

> This can take some time, MDIO is a slow bus etc. While handling these interrupt sources,
> it could be another internal interrupt source triggers. This new internal interrupt source
> keeps the external interrupt active. But there has not been an edge, since the interrupt 
> handler is still clearing the sources which caused the first interrupt. With level interrupts,
> this is not an issue. When the interrupt handler exits, the interrupt is re-enabled. Since it
> is still active, due to the unhandled internal interrupt sources, the level interrupt
> immediately fires again. the handler then sees this new interrupt and handles it.
> At that point the level interrupt goes inactive.
Asmaa Mnebhi Sept. 29, 2021, 7:14 p.m. UTC | #11
> Asmaa>> Thank you very much for the detailed and clear explanation!
> we only enable/support link up/down interrupts. QA has tested bringing 
> up/down the network interface +200 times in a loop.

The micrel driver currently only uses two interrupts of the available 8. 
So it will be hard to trigger the problem with the current driver. Your 

best way to trigger it is going to bring the link down as soon as it goes up.

 So you get first a link up, and then a link down very shortly afterwards.

There is however nothing stopping developers making use of the other interrupts. 

That will then increase the likelihood of problems.

What does help you is that the interrupt register is clear on read. So the race condition 
window is small.

Asmaa>> Hi Andrew,

I had a meeting today with the HW folks to explain the problem at stake.
The flow for this issue is like this:
1) PHY issues INT_N signal (active low level interrupt)
2) falling edge detected on the GPIO and transmitted to software
3) the first thing mlxbf2_gpio_irq_handler does is to clear the GPIO interrupt.
However even if we clear the GPIO interrupt, the GPIO value itself
will be low as long as the INT_N signal is low. The GPIO HW triggers
the interrupt by detecting the falling edge of the GPIO pin.
4) mlxbf2_gpio_irq_handler triggers phy_interrupt which
calls drv->handler_interrupt.
handle_interrupt in our case = kszphy_handle_interrupt, which reads
MII_KSZPHY_INTCS regs and hence clears all interrupts at once. 

- if no other interrupt happens within this time frame, INT_N goes
back to 1 and the next interrupt will trigger another GPIO falling edge

- if the interrupt happens after the MDIO read, then it is not a problem. The
read would have already cleared the register and INT_N would go back to 1.
So the new interrupt will trigger a new GPIO falling edge interrupt.

Problem:
- however, if there is a second interrupt right before or during the MDIO read of
MII_KSZPHY_INTCS, it might not be detected by our GPIO HW.

Anyways, the HW folks agreed that this is a problem since indeed they do not
support LEVEL interrupts on the GPIOs at the moment.
They suggested to read the GPIO pin value to check if it has returned to high
in mlxbf2_gpio_irq_handler, then trigger the phy_interrupt handler.
But I don't think it is a good workaround because there could be a chain
of interrupts which hold the  LEVEL low for a long time, and we don't want to
be waiting too long in an interrupt handler routine.
I would greatly appreciate some more feedback on what is the best way to deal
With this in the upstreamed version of the driver.
HW folks said they will fix this in future BlueField generations.


> The software interrupt and handler is not registered based on the GPIO 
> interrupt but rather a HW interrupt which is common to all GPIO pins 
> (irrelevant here, but this is edge triggered):
> ret = devm_request_irq(dev, irq, mlxbf2_gpio_irq_handler,
>                                         IRQF_SHARED, name, gs);

IRQF_SHARED implied level. You cannot have a shared interrupt which is using edges.

      Andrew
Andrew Lunn Sept. 29, 2021, 8:24 p.m. UTC | #12
> In KSZ9031, Register MII_KSZPHY_INTCS=0x1B reports all interrupt events and
> clear on read. So if there are 4 different interrupts, once it is read once, all 4 clear at once.
> The micrel.c driver has defined ack_interrupt to read the above reg and is called every time the
> interrupt handler phy_interrupt is called. So in this case, we should be good.
> The code flow in our case would look like this:
> - 2 interrupt sources (for example, link down followed by link up) set in MII_KSZPHY_INTCS
> - interrupt handler (phy_interrupt) reads MII_KSZPHY_INT which automatically clears both
> interrupts
> - another internal source triggers and sets the register.
> - The second edge will be caught accordingly by the GPIO.

I still think there is a small race window. You product manager needs
to decide if that is acceptable, or if you should poll the PHY.

Anyway, it is clear the hardware only does level interrupts, so the
GPIO driver should only accept level interrupts. -EINVAL otherwise.

I also assume you have a ACPI blob which indicates what sort of
interrupts that should be used, level low, falling edge etc. Since
that is outside of the kernel, i will never know what you decide to
put there. Ideally, until the hardware is fixed, you should not list
any interrupt and fallback to polling.

    Andrew
Asmaa Mnebhi Oct. 8, 2021, 2:47 p.m. UTC | #13
> In KSZ9031, Register MII_KSZPHY_INTCS=0x1B reports all interrupt 
> events and clear on read. So if there are 4 different interrupts, once it is read once, all 4 clear at once.
> The micrel.c driver has defined ack_interrupt to read the above reg 
> and is called every time the interrupt handler phy_interrupt is called. So in this case, we should be good.
> The code flow in our case would look like this:
> - 2 interrupt sources (for example, link down followed by link up) set 
> in MII_KSZPHY_INTCS
> - interrupt handler (phy_interrupt) reads MII_KSZPHY_INT which 
> automatically clears both interrupts
> - another internal source triggers and sets the register.
> - The second edge will be caught accordingly by the GPIO.

> I still think there is a small race window. You product manager needs to decide if that is acceptable, or if you should poll the PHY.

I talked to both our managers and the HW team and they said it is ok to use the interrupt for our product.

> Anyway, it is clear the hardware only does level interrupts, so the GPIO driver should only accept level interrupts. -EINVAL otherwise.

There is an on going conversation with HW folks to address this for future BlueField generations.

Thank you.
Asmaa
diff mbox series

Patch

diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c
index 177d03ef4529..21c53d18ecd3 100644
--- a/drivers/gpio/gpio-mlxbf2.c
+++ b/drivers/gpio/gpio-mlxbf2.c
@@ -1,9 +1,14 @@ 
 // SPDX-License-Identifier: GPL-2.0
 
+/*
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
@@ -43,9 +48,14 @@ 
 #define YU_GPIO_MODE0			0x0c
 #define YU_GPIO_DATASET			0x14
 #define YU_GPIO_DATACLEAR		0x18
+#define YU_GPIO_CAUSE_RISE_EN		0x44
+#define YU_GPIO_CAUSE_FALL_EN		0x48
 #define YU_GPIO_MODE1_CLEAR		0x50
 #define YU_GPIO_MODE0_SET		0x54
 #define YU_GPIO_MODE0_CLEAR		0x58
+#define YU_GPIO_CAUSE_OR_CAUSE_EVTEN0	0x80
+#define YU_GPIO_CAUSE_OR_EVTEN0		0x94
+#define YU_GPIO_CAUSE_OR_CLRCAUSE	0x98
 
 struct mlxbf2_gpio_context_save_regs {
 	u32 gpio_mode0;
@@ -55,6 +65,7 @@  struct mlxbf2_gpio_context_save_regs {
 /* BlueField-2 gpio block context structure. */
 struct mlxbf2_gpio_context {
 	struct gpio_chip gc;
+	struct irq_chip irq_chip;
 
 	/* YU GPIO blocks address */
 	void __iomem *gpio_io;
@@ -218,15 +229,117 @@  static int mlxbf2_gpio_direction_output(struct gpio_chip *chip,
 	return ret;
 }
 
+static void mlxbf2_gpio_irq_enable(struct irq_data *irqd)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
+	struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(irqd);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
+	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
+	val |= BIT(offset);
+	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
+
+	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
+	val |= BIT(offset);
+	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
+	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
+}
+
+static void mlxbf2_gpio_irq_disable(struct irq_data *irqd)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
+	struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(irqd);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
+	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
+	val &= ~BIT(offset);
+	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
+	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
+}
+
+static irqreturn_t mlxbf2_gpio_irq_handler(int irq, void *ptr)
+{
+	struct mlxbf2_gpio_context *gs = ptr;
+	struct gpio_chip *gc = &gs->gc;
+	unsigned long pending;
+	u32 level;
+
+	pending = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0);
+	writel(pending, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
+
+	for_each_set_bit(level, &pending, gc->ngpio) {
+		int gpio_irq = irq_find_mapping(gc->irq.domain, level);
+		generic_handle_irq(gpio_irq);
+	}
+
+	return IRQ_RETVAL(pending);
+}
+
+static int
+mlxbf2_gpio_irq_set_type(struct irq_data *irqd, unsigned int type)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
+	struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
+	int offset = irqd_to_hwirq(irqd);
+	unsigned long flags;
+	bool fall = false;
+	bool rise = false;
+	u32 val;
+
+	switch (type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_BOTH:
+	case IRQ_TYPE_LEVEL_MASK:
+		fall = true;
+		rise = true;
+		break;
+	case IRQ_TYPE_EDGE_RISING:
+	case IRQ_TYPE_LEVEL_HIGH:
+		rise = true;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_LEVEL_LOW:
+		fall = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
+	if (fall) {
+		val = readl(gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
+		val |= BIT(offset);
+		writel(val, gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
+	}
+
+	if (rise) {
+		val = readl(gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
+		val |= BIT(offset);
+		writel(val, gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
+	}
+	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
+
+	return 0;
+}
+
 /* BlueField-2 GPIO driver initialization routine. */
 static int
 mlxbf2_gpio_probe(struct platform_device *pdev)
 {
 	struct mlxbf2_gpio_context *gs;
 	struct device *dev = &pdev->dev;
+	struct gpio_irq_chip *girq;
 	struct gpio_chip *gc;
 	unsigned int npins;
-	int ret;
+	const char *name;
+	int ret, irq;
+
+	name = dev_name(dev);
 
 	gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL);
 	if (!gs)
@@ -256,11 +369,44 @@  mlxbf2_gpio_probe(struct platform_device *pdev)
 			NULL,
 			0);
 
+	if (ret) {
+		dev_err(dev, "bgpio_init failed\n");
+		return ret;
+	}
+
 	gc->direction_input = mlxbf2_gpio_direction_input;
 	gc->direction_output = mlxbf2_gpio_direction_output;
 	gc->ngpio = npins;
 	gc->owner = THIS_MODULE;
 
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0) {
+		gs->irq_chip.name = name;
+		gs->irq_chip.irq_set_type = mlxbf2_gpio_irq_set_type;
+		gs->irq_chip.irq_enable = mlxbf2_gpio_irq_enable;
+		gs->irq_chip.irq_disable = mlxbf2_gpio_irq_disable;
+
+		girq = &gs->gc.irq;
+		girq->chip = &gs->irq_chip;
+		girq->handler = handle_simple_irq;
+		girq->default_type = IRQ_TYPE_NONE;
+		/* This will let us handle the parent IRQ in the driver */
+		girq->num_parents = 0;
+		girq->parents = NULL;
+		girq->parent_handler = NULL;
+
+		/*
+		 * Directly request the irq here instead of passing
+		 * a flow-handler because the irq is shared.
+		 */
+		ret = devm_request_irq(dev, irq, mlxbf2_gpio_irq_handler,
+				       IRQF_SHARED, name, gs);
+		if (ret) {
+			dev_err(dev, "failed to request IRQ");
+			return ret;
+		}
+	}
+
 	platform_set_drvdata(pdev, gs);
 
 	ret = devm_gpiochip_add_data(dev, &gs->gc, gs);
@@ -315,5 +461,5 @@  static struct platform_driver mlxbf2_gpio_driver = {
 module_platform_driver(mlxbf2_gpio_driver);
 
 MODULE_DESCRIPTION("Mellanox BlueField-2 GPIO Driver");
-MODULE_AUTHOR("Mellanox Technologies");
+MODULE_AUTHOR("Asmaa Mnebhi <asmaa@nvidia.com>");
 MODULE_LICENSE("GPL v2");