diff mbox series

[RFC,04/15] drivers/base: Add support for a new IMS irq domain

Message ID 158751205175.36773.1874642824360728883.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State RFC
Headers show
Series Add VFIO mediated device support and IMS support for the idxd driver. | expand

Commit Message

Dave Jiang April 21, 2020, 11:34 p.m. UTC
From: Megha Dey <megha.dey@linux.intel.com>

Add support for the creation of a new IMS irq domain. It creates a new
irq chip associated with the IMS domain and adds the necessary domain
operations to it.

Also, add a new config option MSI_IMS which must be enabled by any driver
who would want to use the IMS infrastructure.

Signed-off-by: Megha Dey <megha.dey@linux.intel.com>
---
 arch/x86/include/asm/hw_irq.h    |    7 +++
 drivers/base/Kconfig             |    9 +++
 drivers/base/Makefile            |    1 
 drivers/base/ims-msi.c           |  100 ++++++++++++++++++++++++++++++++++++++
 drivers/base/platform-msi.c      |    6 +-
 drivers/vfio/mdev/mdev_core.c    |    6 ++
 drivers/vfio/mdev/mdev_private.h |    1 
 include/linux/mdev.h             |    3 +
 include/linux/msi.h              |    2 +
 9 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100644 drivers/base/ims-msi.c

Comments

Jason Gunthorpe April 23, 2020, 8:11 p.m. UTC | #1
On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
> new file mode 100644
> index 000000000000..738f6d153155
> +++ b/drivers/base/ims-msi.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Support for Device Specific IMS interrupts.
> + *
> + * Copyright © 2019 Intel Corporation.
> + *
> + * Author: Megha Dey <megha.dey@intel.com>
> + */
> +
> +#include <linux/dmar.h>
> +#include <linux/irq.h>
> +#include <linux/mdev.h>
> +#include <linux/pci.h>
> +
> +/*
> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> + * Return mdev's parent dev if success.
> + */
> +static inline struct device *mdev_to_parent(struct device *dev)
> +{
> +	struct device *ret = NULL;
> +	struct device *(*fn)(struct device *dev);
> +	struct bus_type *bus = symbol_get(mdev_bus_type);
> +
> +	if (bus && dev->bus == bus) {
> +		fn = symbol_get(mdev_dev_to_parent_dev);
> +		ret = fn(dev);
> +		symbol_put(mdev_dev_to_parent_dev);
> +		symbol_put(mdev_bus_type);

No, things like this are not OK in the drivers/base

Whatever this is doing needs to be properly architected in some
generic way.

> +static int dev_ims_prepare(struct irq_domain *domain, struct device *dev,
> +			   int nvec, msi_alloc_info_t *arg)
> +{
> +	if (dev_is_mdev(dev))
> +		dev = mdev_to_parent(dev);

Like maybe the caller shouldn't be passing in a mdev in the first
place, or some generic driver layer scheme is needed to go from a
child device (eg a mdev or one of these new virtual bus things) to the
struct device that owns the IRQ interface.

> +	init_irq_alloc_info(arg, NULL);
> +	arg->dev = dev;
> +	arg->type = X86_IRQ_ALLOC_TYPE_IMS;

Also very bewildering to see X86_* in drivers/base

> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
> +					      const char *name)
> +{
> +	struct fwnode_handle *fn;
> +	struct irq_domain *domain;
> +
> +	fn = irq_domain_alloc_named_fwnode(name);
> +	if (!fn)
> +		return NULL;
> +
> +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
> +	if (!domain)
> +		return NULL;
> +
> +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
> +	irq_domain_free_fwnode(fn);
> +
> +	return domain;
> +}

I'm still not really clear why all this is called IMS.. This looks
like the normal boilerplate to setup an IRQ domain? What is actually
'ims' in here?

> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
> index 7d922950caaf..c21f1305a76b 100644
> +++ b/drivers/vfio/mdev/mdev_private.h
> @@ -36,7 +36,6 @@ struct mdev_device {
>  };
>  
>  #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
> -#define dev_is_mdev(d)		((d)->bus == &mdev_bus_type)
>  
>  struct mdev_type {
>  	struct kobject kobj;
> diff --git a/include/linux/mdev.h b/include/linux/mdev.h
> index 0ce30ca78db0..fa2344e239ef 100644
> +++ b/include/linux/mdev.h
> @@ -144,5 +144,8 @@ void mdev_unregister_driver(struct mdev_driver *drv);
>  struct device *mdev_parent_dev(struct mdev_device *mdev);
>  struct device *mdev_dev(struct mdev_device *mdev);
>  struct mdev_device *mdev_from_dev(struct device *dev);
> +struct device *mdev_dev_to_parent_dev(struct device *dev);
> +
> +#define dev_is_mdev(dev) ((dev)->bus == symbol_get(mdev_bus_type))

NAK on the symbol_get

Jason
Thomas Gleixner April 25, 2020, 9:38 p.m. UTC | #2
Dave Jiang <dave.jiang@intel.com> writes:
> From: Megha Dey <megha.dey@linux.intel.com>
>
> Add support for the creation of a new IMS irq domain. It creates a new
> irq chip associated with the IMS domain and adds the necessary domain
> operations to it.

And how is a X86 specific thingy related to drivers/base?

> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c

This sits in drivers base because IMS is architecture independent, right?

> new file mode 100644
> index 000000000000..738f6d153155
> --- /dev/null
> +++ b/drivers/base/ims-msi.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Support for Device Specific IMS interrupts.
> + *
> + * Copyright © 2019 Intel Corporation.
> + *
> + * Author: Megha Dey <megha.dey@intel.com>
> + */
> +
> +#include <linux/dmar.h>
> +#include <linux/irq.h>
> +#include <linux/mdev.h>
> +#include <linux/pci.h>
> +
> +/*
> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> + * Return mdev's parent dev if success.
> + */
> +static inline struct device *mdev_to_parent(struct device *dev)
> +{
> +	struct device *ret = NULL;
> +	struct device *(*fn)(struct device *dev);
> +	struct bus_type *bus = symbol_get(mdev_bus_type);

symbol_get()?

> +
> +	if (bus && dev->bus == bus) {
> +		fn = symbol_get(mdev_dev_to_parent_dev);

What's wrong with simple function calls?

> +		ret = fn(dev);
> +		symbol_put(mdev_dev_to_parent_dev);
> +		symbol_put(mdev_bus_type);
> +	}
> +
> +	return ret;
> +}
> +
> +static irq_hw_number_t dev_ims_get_hwirq(struct msi_domain_info *info,
> +					 msi_alloc_info_t *arg)
> +{
> +	return arg->ims_hwirq;
> +}
> +
> +static int dev_ims_prepare(struct irq_domain *domain, struct device *dev,
> +			   int nvec, msi_alloc_info_t *arg)
> +{
> +	if (dev_is_mdev(dev))
> +		dev = mdev_to_parent(dev);

This makes absolutely no sense. Somewhere you claimed that this is
solely for mdev. Now this interface takes both a regular device and mdev.

Lack of explanation seems to be a common scheme here.

> +	init_irq_alloc_info(arg, NULL);
> +	arg->dev = dev;
> +	arg->type = X86_IRQ_ALLOC_TYPE_IMS;
> +
> +	return 0;
> +}
> +
> +static void dev_ims_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
> +{
> +	arg->ims_hwirq = platform_msi_calc_hwirq(desc);
> +}
> +
> +static struct msi_domain_ops dev_ims_domain_ops = {
> +	.get_hwirq	= dev_ims_get_hwirq,
> +	.msi_prepare	= dev_ims_prepare,
> +	.set_desc	= dev_ims_set_desc,
> +};
> +
> +static struct irq_chip dev_ims_ir_controller = {
> +	.name			= "IR-DEV-IMS",
> +	.irq_ack		= irq_chip_ack_parent,
> +	.irq_retrigger		= irq_chip_retrigger_hierarchy,
> +	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
> +	.flags			= IRQCHIP_SKIP_SET_WAKE,
> +	.irq_write_msi_msg	= platform_msi_write_msg,
> +};
> +
> +static struct msi_domain_info ims_ir_domain_info = {
> +	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
> +	.ops		= &dev_ims_domain_ops,
> +	.chip		= &dev_ims_ir_controller,
> +	.handler	= handle_edge_irq,
> +	.handler_name	= "edge",
> +};
> +
> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
> +					      const char *name)

arch_create_ ???? In drivers/base ??? 

> +{
> +	struct fwnode_handle *fn;
> +	struct irq_domain *domain;
> +
> +	fn = irq_domain_alloc_named_fwnode(name);
> +	if (!fn)
> +		return NULL;
> +
> +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
> +	if (!domain)
> +		return NULL;
> +
> +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
> +	irq_domain_free_fwnode(fn);
> +
> +	return domain;
> +}
> diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
> index 2696aa75983b..59160e8cbfb1 100644
> --- a/drivers/base/platform-msi.c
> +++ b/drivers/base/platform-msi.c
> @@ -31,12 +31,11 @@ struct platform_msi_priv_data {
>  /* The devid allocator */
>  static DEFINE_IDA(platform_msi_devid_ida);
>  
> -#ifdef GENERIC_MSI_DOMAIN_OPS
>  /*
>   * Convert an msi_desc to a globaly unique identifier (per-device
>   * devid + msi_desc position in the msi_list).
>   */
> -static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
> +irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
>  {
>  	u32 devid;
>  
> @@ -45,6 +44,7 @@ static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
>  	return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index;
>  }
>  
> +#ifdef GENERIC_MSI_DOMAIN_OPS
>  static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
>  {
>  	arg->desc = desc;
> @@ -76,7 +76,7 @@ static void platform_msi_update_dom_ops(struct msi_domain_info *info)
>  		ops->set_desc = platform_msi_set_desc;
>  }
>  
> -static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
> +void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
>  {
>  	struct msi_desc *desc = irq_data_get_msi_desc(data);
>  	struct platform_msi_priv_data *priv_data;
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index b558d4cfd082..cecc6a6bdbef 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -33,6 +33,12 @@ struct device *mdev_parent_dev(struct mdev_device *mdev)
>  }
>  EXPORT_SYMBOL(mdev_parent_dev);
>  
> +struct device *mdev_dev_to_parent_dev(struct device *dev)
> +{
> +	return to_mdev_device(dev)->parent->dev;
> +}
> +EXPORT_SYMBOL(mdev_dev_to_parent_dev);

And this needs to be EXPORT_SYMBOL because this is designed to support
non GPL drivers from the very beginning, right? Ditto for the other
exports in this file.

> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
> index 7d922950caaf..c21f1305a76b 100644
> --- a/drivers/vfio/mdev/mdev_private.h
> +++ b/drivers/vfio/mdev/mdev_private.h
> @@ -36,7 +36,6 @@ struct mdev_device {
>  };
>  
>  #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
> -#define dev_is_mdev(d)		((d)->bus == &mdev_bus_type)

Moving stuff around 3 patches later makes tons of sense.
  
Thanks,

        tglx
Dey, Megha May 1, 2020, 10:30 p.m. UTC | #3
Hi Jason,

On 4/23/2020 1:11 PM, Jason Gunthorpe wrote:
> On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
>> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
>> new file mode 100644
>> index 000000000000..738f6d153155
>> +++ b/drivers/base/ims-msi.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Support for Device Specific IMS interrupts.
>> + *
>> + * Copyright © 2019 Intel Corporation.
>> + *
>> + * Author: Megha Dey <megha.dey@intel.com>
>> + */
>> +
>> +#include <linux/dmar.h>
>> +#include <linux/irq.h>
>> +#include <linux/mdev.h>
>> +#include <linux/pci.h>
>> +
>> +/*
>> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
>> + * Return mdev's parent dev if success.
>> + */
>> +static inline struct device *mdev_to_parent(struct device *dev)
>> +{
>> +	struct device *ret = NULL;
>> +	struct device *(*fn)(struct device *dev);
>> +	struct bus_type *bus = symbol_get(mdev_bus_type);
>> +
>> +	if (bus && dev->bus == bus) {
>> +		fn = symbol_get(mdev_dev_to_parent_dev);
>> +		ret = fn(dev);
>> +		symbol_put(mdev_dev_to_parent_dev);
>> +		symbol_put(mdev_bus_type);
> 
> No, things like this are not OK in the drivers/base
> 
> Whatever this is doing needs to be properly architected in some
> generic way.

Basically what I am trying to do here is to determine if the device is 
an mdev device or not. mdev devices have no IRQ domain associated to it 
and use their parent dev's IRQ domain to allocate interrupts.

The issue is that
1. all the vfio-mdev code today can be compiled as a module
2. None of the mdev macros/functions are being used outside of the 
drivers/vfio/mdev code path (where they are defined).

Hence, these definitions are not visible outside of drivers/vfio/mdev 
when compiled as a module and thus I have used symbol_get/put.

I will try asking the mdev folks if they would have a better solution 
for this or some of this code can be mde more generic.

> 
>> +static int dev_ims_prepare(struct irq_domain *domain, struct device *dev,
>> +			   int nvec, msi_alloc_info_t *arg)
>> +{
>> +	if (dev_is_mdev(dev))
>> +		dev = mdev_to_parent(dev);
> 
> Like maybe the caller shouldn't be passing in a mdev in the first
> place, or some generic driver layer scheme is needed to go from a
> child device (eg a mdev or one of these new virtual bus things) to the
> struct device that owns the IRQ interface.

In our current use case, IMS interrupts are only used by guest (mdev's), 
although they can be used by host as well. So the 'dev' passed by the 
caller of platform_msi_domain_alloc_irqs_group() is effectively an mdev.

I am not sure about how we could have a generic code to convert the 
'child' mdev device to struct device. Do you have any suggestions on how 
we could do this?

> 
>> +	init_irq_alloc_info(arg, NULL);
>> +	arg->dev = dev;
>> +	arg->type = X86_IRQ_ALLOC_TYPE_IMS;
> 
> Also very bewildering to see X86_* in drivers/base

Well, this needs to go for sure. I will replace it with something more 
generic.
> 
>> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
>> +					      const char *name)
>> +{
>> +	struct fwnode_handle *fn;
>> +	struct irq_domain *domain;
>> +
>> +	fn = irq_domain_alloc_named_fwnode(name);
>> +	if (!fn)
>> +		return NULL;
>> +
>> +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
>> +	if (!domain)
>> +		return NULL;
>> +
>> +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
>> +	irq_domain_free_fwnode(fn);
>> +
>> +	return domain;
>> +}
> 
> I'm still not really clear why all this is called IMS.. This looks
> like the normal boilerplate to setup an IRQ domain? What is actually
> 'ims' in here?

It is just a way to create a new domain specifically for IMS interrupts. 
Although, since there is a platform_msi_create_irq_domain already, which 
does something similar, I will use the same for IMS as well.

Also, since there is quite a stir over the name 'IMS' do you have any 
suggestion for a more generic name for this?

> 
>> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
>> index 7d922950caaf..c21f1305a76b 100644
>> +++ b/drivers/vfio/mdev/mdev_private.h
>> @@ -36,7 +36,6 @@ struct mdev_device {
>>   };
>>   
>>   #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
>> -#define dev_is_mdev(d)		((d)->bus == &mdev_bus_type)
>>   
>>   struct mdev_type {
>>   	struct kobject kobj;
>> diff --git a/include/linux/mdev.h b/include/linux/mdev.h
>> index 0ce30ca78db0..fa2344e239ef 100644
>> +++ b/include/linux/mdev.h
>> @@ -144,5 +144,8 @@ void mdev_unregister_driver(struct mdev_driver *drv);
>>   struct device *mdev_parent_dev(struct mdev_device *mdev);
>>   struct device *mdev_dev(struct mdev_device *mdev);
>>   struct mdev_device *mdev_from_dev(struct device *dev);
>> +struct device *mdev_dev_to_parent_dev(struct device *dev);
>> +
>> +#define dev_is_mdev(dev) ((dev)->bus == symbol_get(mdev_bus_type))
> 
> NAK on the symbol_get

As I mentioned earlier, given the way the current mdev code is 
structured, the only way to use dev_is_mdev or other macros/functions in 
the mdev subsystem outside of drivers/vfio/mdev is to use 
symbol_get/put. Obviously, seems like this is not a correct thing to do, 
I will have to find another way.

> 
> Jason
>
Jason Gunthorpe May 3, 2020, 10:25 p.m. UTC | #4
On Fri, May 01, 2020 at 03:30:02PM -0700, Dey, Megha wrote:
> Hi Jason,
> 
> On 4/23/2020 1:11 PM, Jason Gunthorpe wrote:
> > On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
> > > diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
> > > new file mode 100644
> > > index 000000000000..738f6d153155
> > > +++ b/drivers/base/ims-msi.c
> > > @@ -0,0 +1,100 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * Support for Device Specific IMS interrupts.
> > > + *
> > > + * Copyright © 2019 Intel Corporation.
> > > + *
> > > + * Author: Megha Dey <megha.dey@intel.com>
> > > + */
> > > +
> > > +#include <linux/dmar.h>
> > > +#include <linux/irq.h>
> > > +#include <linux/mdev.h>
> > > +#include <linux/pci.h>
> > > +
> > > +/*
> > > + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> > > + * Return mdev's parent dev if success.
> > > + */
> > > +static inline struct device *mdev_to_parent(struct device *dev)
> > > +{
> > > +	struct device *ret = NULL;
> > > +	struct device *(*fn)(struct device *dev);
> > > +	struct bus_type *bus = symbol_get(mdev_bus_type);
> > > +
> > > +	if (bus && dev->bus == bus) {
> > > +		fn = symbol_get(mdev_dev_to_parent_dev);
> > > +		ret = fn(dev);
> > > +		symbol_put(mdev_dev_to_parent_dev);
> > > +		symbol_put(mdev_bus_type);
> > 
> > No, things like this are not OK in the drivers/base
> > 
> > Whatever this is doing needs to be properly architected in some
> > generic way.
> 
> Basically what I am trying to do here is to determine if the device is an
> mdev device or not.

Why? mdev devices are virtual they don't have HW elements.

The caller should use the concrete pci_device to allocate
platform_msi? What is preventing this?

> > > +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
> > > +					      const char *name)
> > > +{
> > > +	struct fwnode_handle *fn;
> > > +	struct irq_domain *domain;
> > > +
> > > +	fn = irq_domain_alloc_named_fwnode(name);
> > > +	if (!fn)
> > > +		return NULL;
> > > +
> > > +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
> > > +	if (!domain)
> > > +		return NULL;
> > > +
> > > +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
> > > +	irq_domain_free_fwnode(fn);
> > > +
> > > +	return domain;
> > > +}
> > 
> > I'm still not really clear why all this is called IMS.. This looks
> > like the normal boilerplate to setup an IRQ domain? What is actually
> > 'ims' in here?
> 
> It is just a way to create a new domain specifically for IMS interrupts.
> Although, since there is a platform_msi_create_irq_domain already, which
> does something similar, I will use the same for IMS as well.

But this is all code already intended to be used by the platform, why
is it in drivers/base?

> Also, since there is quite a stir over the name 'IMS' do you have any
> suggestion for a more generic name for this?

It seems we have a name, this is called platform_msi in Linux?

Jason
Dey, Megha May 3, 2020, 10:40 p.m. UTC | #5
Hi Jason,

On 5/3/2020 3:25 PM, Jason Gunthorpe wrote:
> On Fri, May 01, 2020 at 03:30:02PM -0700, Dey, Megha wrote:
>> Hi Jason,
>>
>> On 4/23/2020 1:11 PM, Jason Gunthorpe wrote:
>>> On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
>>>> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
>>>> new file mode 100644
>>>> index 000000000000..738f6d153155
>>>> +++ b/drivers/base/ims-msi.c
>>>> @@ -0,0 +1,100 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Support for Device Specific IMS interrupts.
>>>> + *
>>>> + * Copyright © 2019 Intel Corporation.
>>>> + *
>>>> + * Author: Megha Dey <megha.dey@intel.com>
>>>> + */
>>>> +
>>>> +#include <linux/dmar.h>
>>>> +#include <linux/irq.h>
>>>> +#include <linux/mdev.h>
>>>> +#include <linux/pci.h>
>>>> +
>>>> +/*
>>>> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
>>>> + * Return mdev's parent dev if success.
>>>> + */
>>>> +static inline struct device *mdev_to_parent(struct device *dev)
>>>> +{
>>>> +	struct device *ret = NULL;
>>>> +	struct device *(*fn)(struct device *dev);
>>>> +	struct bus_type *bus = symbol_get(mdev_bus_type);
>>>> +
>>>> +	if (bus && dev->bus == bus) {
>>>> +		fn = symbol_get(mdev_dev_to_parent_dev);
>>>> +		ret = fn(dev);
>>>> +		symbol_put(mdev_dev_to_parent_dev);
>>>> +		symbol_put(mdev_bus_type);
>>>
>>> No, things like this are not OK in the drivers/base
>>>
>>> Whatever this is doing needs to be properly architected in some
>>> generic way.
>>
>> Basically what I am trying to do here is to determine if the device is an
>> mdev device or not.
> 
> Why? mdev devices are virtual they don't have HW elements.

Hmm yeah exactly, since they are virtual, they do not have an associated 
IRQ domain right? So they use the irq domain of the parent device..

> 
> The caller should use the concrete pci_device to allocate
> platform_msi? What is preventing this?

hmmm do you mean to say all platform-msi adhere to the rules of a PCI 
device? The use case if when we have a device assigned to a guest and we 
want to allocate IMS(platform-msi) interrupts for that guest-assigned 
device. Currently, this is abstracted through a mdev interface.

> 
>>>> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
>>>> +					      const char *name)
>>>> +{
>>>> +	struct fwnode_handle *fn;
>>>> +	struct irq_domain *domain;
>>>> +
>>>> +	fn = irq_domain_alloc_named_fwnode(name);
>>>> +	if (!fn)
>>>> +		return NULL;
>>>> +
>>>> +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
>>>> +	if (!domain)
>>>> +		return NULL;
>>>> +
>>>> +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
>>>> +	irq_domain_free_fwnode(fn);
>>>> +
>>>> +	return domain;
>>>> +}
>>>
>>> I'm still not really clear why all this is called IMS.. This looks
>>> like the normal boilerplate to setup an IRQ domain? What is actually
>>> 'ims' in here?
>>
>> It is just a way to create a new domain specifically for IMS interrupts.
>> Although, since there is a platform_msi_create_irq_domain already, which
>> does something similar, I will use the same for IMS as well.
> 
> But this is all code already intended to be used by the platform, why
> is it in drivers/base?

yeah this code will not exist in the next version anyways..
> 
>> Also, since there is quite a stir over the name 'IMS' do you have any
>> suggestion for a more generic name for this?
> 
> It seems we have a name, this is called platform_msi in Linux?

yeah, ultimately IMS boils down to "Extended platform_msi" looks like ..
> 
> Jason
>
Jason Gunthorpe May 3, 2020, 10:46 p.m. UTC | #6
On Sun, May 03, 2020 at 03:40:44PM -0700, Dey, Megha wrote:
> On 5/3/2020 3:25 PM, Jason Gunthorpe wrote:
> > On Fri, May 01, 2020 at 03:30:02PM -0700, Dey, Megha wrote:
> > > Hi Jason,
> > > 
> > > On 4/23/2020 1:11 PM, Jason Gunthorpe wrote:
> > > > On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
> > > > > diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
> > > > > new file mode 100644
> > > > > index 000000000000..738f6d153155
> > > > > +++ b/drivers/base/ims-msi.c
> > > > > @@ -0,0 +1,100 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > +/*
> > > > > + * Support for Device Specific IMS interrupts.
> > > > > + *
> > > > > + * Copyright © 2019 Intel Corporation.
> > > > > + *
> > > > > + * Author: Megha Dey <megha.dey@intel.com>
> > > > > + */
> > > > > +
> > > > > +#include <linux/dmar.h>
> > > > > +#include <linux/irq.h>
> > > > > +#include <linux/mdev.h>
> > > > > +#include <linux/pci.h>
> > > > > +
> > > > > +/*
> > > > > + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> > > > > + * Return mdev's parent dev if success.
> > > > > + */
> > > > > +static inline struct device *mdev_to_parent(struct device *dev)
> > > > > +{
> > > > > +	struct device *ret = NULL;
> > > > > +	struct device *(*fn)(struct device *dev);
> > > > > +	struct bus_type *bus = symbol_get(mdev_bus_type);
> > > > > +
> > > > > +	if (bus && dev->bus == bus) {
> > > > > +		fn = symbol_get(mdev_dev_to_parent_dev);
> > > > > +		ret = fn(dev);
> > > > > +		symbol_put(mdev_dev_to_parent_dev);
> > > > > +		symbol_put(mdev_bus_type);
> > > > 
> > > > No, things like this are not OK in the drivers/base
> > > > 
> > > > Whatever this is doing needs to be properly architected in some
> > > > generic way.
> > > 
> > > Basically what I am trying to do here is to determine if the device is an
> > > mdev device or not.
> > 
> > Why? mdev devices are virtual they don't have HW elements.
> 
> Hmm yeah exactly, since they are virtual, they do not have an associated IRQ
> domain right? So they use the irq domain of the parent device..
> 
> > 
> > The caller should use the concrete pci_device to allocate
> > platform_msi? What is preventing this?
> 
> hmmm do you mean to say all platform-msi adhere to the rules of a PCI
> device? 

I mean where a platform-msi can work should be defined by the arch,
and probably is related to things like having an irq_domain attached

So, like pci, drivers must only try to do platfor_msi stuff on
particular devices. eg on pci_device and platform_device types.

Even so it may not even work, but I can't think of any reason why it
should be made to work on a virtual device like mdev.

> The use case if when we have a device assigned to a guest and we
> want to allocate IMS(platform-msi) interrupts for that
> guest-assigned device. Currently, this is abstracted through a mdev
> interface.

And the mdev has the pci_device internally, so it should simply pass
that pci_device to the platform_msi machinery.

This is no different from something like pci_iomap() which must be
used with the pci_device.

Jason
Dey, Megha May 4, 2020, 12:11 a.m. UTC | #7
Hi Thomas,

On 4/25/2020 2:38 PM, Thomas Gleixner wrote:
> Dave Jiang <dave.jiang@intel.com> writes:
>> From: Megha Dey <megha.dey@linux.intel.com>
>>
>> Add support for the creation of a new IMS irq domain. It creates a new
>> irq chip associated with the IMS domain and adds the necessary domain
>> operations to it.
> 
> And how is a X86 specific thingy related to drivers/base?

Well, clearly this file has both arch independent sand dependent code 
which is incorrect. From various discussions, we have now concluded that 
IMS is after all not a X86 specific thingy after all. IMS is just a name 
intel came up with, all it really means is device managed addr/data 
writes to generate interrupts.

> 
>> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
> 
> This sits in drivers base because IMS is architecture independent, right?

Per my above comment, technically we can call something IMS even if 
device has its own location to store interrupts in non-pci standard 
mechanism, much like platform-msi indeed. We simply need to extend 
platform-msi to its address some of its shortcomings: increase number of 
interrupts to > 2048, enable dynamic allocation of interrupts, add 
mask/unmask callbacks in addition to write_msg etc.

I will be sending out an email shortly outlining the new design for IMS 
and what are the improvements we want to add to the already exisitng 
platform-msi infrastructure.

> 
>> new file mode 100644
>> index 000000000000..738f6d153155
>> --- /dev/null
>> +++ b/drivers/base/ims-msi.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Support for Device Specific IMS interrupts.
>> + *
>> + * Copyright © 2019 Intel Corporation.
>> + *
>> + * Author: Megha Dey <megha.dey@intel.com>
>> + */
>> +
>> +#include <linux/dmar.h>
>> +#include <linux/irq.h>
>> +#include <linux/mdev.h>
>> +#include <linux/pci.h>
>> +
>> +/*
>> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
>> + * Return mdev's parent dev if success.
>> + */
>> +static inline struct device *mdev_to_parent(struct device *dev)
>> +{
>> +	struct device *ret = NULL;
>> +	struct device *(*fn)(struct device *dev);
>> +	struct bus_type *bus = symbol_get(mdev_bus_type);
> 
> symbol_get()?

mdev_bus_type is defined in driver/vfio/mdev/ directory. The entire 
vfio-mdev can be compiled as a module and if so, then this symbol is not 
visible outside of that directory and there are some linker errors. 
Currently, there these symbols sare self-contained and are not used 
outside of the directory where they are defined. I did not know earlier 
that is not advisible to use symbol_get() for this. I will try to come 
up with a better approach.

> 
>> +
>> +	if (bus && dev->bus == bus) {
>> +		fn = symbol_get(mdev_dev_to_parent_dev);
> 
> What's wrong with simple function calls?

Hmmm, same reason as above..
> 
>> +		ret = fn(dev);
>> +		symbol_put(mdev_dev_to_parent_dev);
>> +		symbol_put(mdev_bus_type);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static irq_hw_number_t dev_ims_get_hwirq(struct msi_domain_info *info,
>> +					 msi_alloc_info_t *arg)
>> +{
>> +	return arg->ims_hwirq;
>> +}
>> +
>> +static int dev_ims_prepare(struct irq_domain *domain, struct device *dev,
>> +			   int nvec, msi_alloc_info_t *arg)
>> +{
>> +	if (dev_is_mdev(dev))
>> +		dev = mdev_to_parent(dev);
> 
> This makes absolutely no sense. Somewhere you claimed that this is
> solely for mdev. Now this interface takes both a regular device and mdev.
> 
> Lack of explanation seems to be a common scheme here.

IMS can be used for mdev or a regular device. I do not think it is 
claimed anywhere that IMS is solely for mdev. In the current use case 
for DSA, IMS is used only by the guest (mdev) although it can very well 
be used by the host driver as well.

> 
>> +	init_irq_alloc_info(arg, NULL);
>> +	arg->dev = dev;
>> +	arg->type = X86_IRQ_ALLOC_TYPE_IMS;
>> +
>> +	return 0;
>> +}
>> +
>> +static void dev_ims_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
>> +{
>> +	arg->ims_hwirq = platform_msi_calc_hwirq(desc);
>> +}
>> +
>> +static struct msi_domain_ops dev_ims_domain_ops = {
>> +	.get_hwirq	= dev_ims_get_hwirq,
>> +	.msi_prepare	= dev_ims_prepare,
>> +	.set_desc	= dev_ims_set_desc,
>> +};
>> +
>> +static struct irq_chip dev_ims_ir_controller = {
>> +	.name			= "IR-DEV-IMS",
>> +	.irq_ack		= irq_chip_ack_parent,
>> +	.irq_retrigger		= irq_chip_retrigger_hierarchy,
>> +	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
>> +	.flags			= IRQCHIP_SKIP_SET_WAKE,
>> +	.irq_write_msi_msg	= platform_msi_write_msg,
>> +};
>> +
>> +static struct msi_domain_info ims_ir_domain_info = {
>> +	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
>> +	.ops		= &dev_ims_domain_ops,
>> +	.chip		= &dev_ims_ir_controller,
>> +	.handler	= handle_edge_irq,
>> +	.handler_name	= "edge",
>> +};
>> +
>> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
>> +					      const char *name)
> 
> arch_create_ ???? In drivers/base ???

Needs to go away. On second thought, per Jason Gunthorpe's comment, this 
is not even required. We can simply use the existing 
platform_msi_create_irq_domain API itself.
> 
>> +{
>> +	struct fwnode_handle *fn;
>> +	struct irq_domain *domain;
>> +
>> +	fn = irq_domain_alloc_named_fwnode(name);
>> +	if (!fn)
>> +		return NULL;
>> +
>> +	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
>> +	if (!domain)
>> +		return NULL;
>> +
>> +	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
>> +	irq_domain_free_fwnode(fn);
>> +
>> +	return domain;
>> +}
>> diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
>> index 2696aa75983b..59160e8cbfb1 100644
>> --- a/drivers/base/platform-msi.c
>> +++ b/drivers/base/platform-msi.c
>> @@ -31,12 +31,11 @@ struct platform_msi_priv_data {
>>   /* The devid allocator */
>>   static DEFINE_IDA(platform_msi_devid_ida);
>>   
>> -#ifdef GENERIC_MSI_DOMAIN_OPS
>>   /*
>>    * Convert an msi_desc to a globaly unique identifier (per-device
>>    * devid + msi_desc position in the msi_list).
>>    */
>> -static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
>> +irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
>>   {
>>   	u32 devid;
>>   
>> @@ -45,6 +44,7 @@ static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
>>   	return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index;
>>   }
>>   
>> +#ifdef GENERIC_MSI_DOMAIN_OPS
>>   static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
>>   {
>>   	arg->desc = desc;
>> @@ -76,7 +76,7 @@ static void platform_msi_update_dom_ops(struct msi_domain_info *info)
>>   		ops->set_desc = platform_msi_set_desc;
>>   }
>>   
>> -static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
>> +void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
>>   {
>>   	struct msi_desc *desc = irq_data_get_msi_desc(data);
>>   	struct platform_msi_priv_data *priv_data;
>> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
>> index b558d4cfd082..cecc6a6bdbef 100644
>> --- a/drivers/vfio/mdev/mdev_core.c
>> +++ b/drivers/vfio/mdev/mdev_core.c
>> @@ -33,6 +33,12 @@ struct device *mdev_parent_dev(struct mdev_device *mdev)
>>   }
>>   EXPORT_SYMBOL(mdev_parent_dev);
>>   
>> +struct device *mdev_dev_to_parent_dev(struct device *dev)
>> +{
>> +	return to_mdev_device(dev)->parent->dev;
>> +}
>> +EXPORT_SYMBOL(mdev_dev_to_parent_dev);
> 
> And this needs to be EXPORT_SYMBOL because this is designed to support
> non GPL drivers from the very beginning, right? Ditto for the other
> exports in this file.

Hmm, I followed the same convention as the other exports here. Guess I 
would have to change all other exports to EXPORT_SYMBOL_GPL as well.

> 
>> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
>> index 7d922950caaf..c21f1305a76b 100644
>> --- a/drivers/vfio/mdev/mdev_private.h
>> +++ b/drivers/vfio/mdev/mdev_private.h
>> @@ -36,7 +36,6 @@ struct mdev_device {
>>   };
>>   
>>   #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
>> -#define dev_is_mdev(d)		((d)->bus == &mdev_bus_type)
> 
> Moving stuff around 3 patches later makes tons of sense.

ok will add it earlier then.
>    
> Thanks,
> 
>          tglx
>
Dey, Megha May 4, 2020, 12:25 a.m. UTC | #8
On 5/3/2020 3:46 PM, Jason Gunthorpe wrote:
> On Sun, May 03, 2020 at 03:40:44PM -0700, Dey, Megha wrote:
>> On 5/3/2020 3:25 PM, Jason Gunthorpe wrote:
>>> On Fri, May 01, 2020 at 03:30:02PM -0700, Dey, Megha wrote:
>>>> Hi Jason,
>>>>
>>>> On 4/23/2020 1:11 PM, Jason Gunthorpe wrote:
>>>>> On Tue, Apr 21, 2020 at 04:34:11PM -0700, Dave Jiang wrote:
>>>>>> diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..738f6d153155
>>>>>> +++ b/drivers/base/ims-msi.c
>>>>>> @@ -0,0 +1,100 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>>> +/*
>>>>>> + * Support for Device Specific IMS interrupts.
>>>>>> + *
>>>>>> + * Copyright © 2019 Intel Corporation.
>>>>>> + *
>>>>>> + * Author: Megha Dey <megha.dey@intel.com>
>>>>>> + */
>>>>>> +
>>>>>> +#include <linux/dmar.h>
>>>>>> +#include <linux/irq.h>
>>>>>> +#include <linux/mdev.h>
>>>>>> +#include <linux/pci.h>
>>>>>> +
>>>>>> +/*
>>>>>> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
>>>>>> + * Return mdev's parent dev if success.
>>>>>> + */
>>>>>> +static inline struct device *mdev_to_parent(struct device *dev)
>>>>>> +{
>>>>>> +	struct device *ret = NULL;
>>>>>> +	struct device *(*fn)(struct device *dev);
>>>>>> +	struct bus_type *bus = symbol_get(mdev_bus_type);
>>>>>> +
>>>>>> +	if (bus && dev->bus == bus) {
>>>>>> +		fn = symbol_get(mdev_dev_to_parent_dev);
>>>>>> +		ret = fn(dev);
>>>>>> +		symbol_put(mdev_dev_to_parent_dev);
>>>>>> +		symbol_put(mdev_bus_type);
>>>>>
>>>>> No, things like this are not OK in the drivers/base
>>>>>
>>>>> Whatever this is doing needs to be properly architected in some
>>>>> generic way.
>>>>
>>>> Basically what I am trying to do here is to determine if the device is an
>>>> mdev device or not.
>>>
>>> Why? mdev devices are virtual they don't have HW elements.
>>
>> Hmm yeah exactly, since they are virtual, they do not have an associated IRQ
>> domain right? So they use the irq domain of the parent device..
>>
>>>
>>> The caller should use the concrete pci_device to allocate
>>> platform_msi? What is preventing this?
>>
>> hmmm do you mean to say all platform-msi adhere to the rules of a PCI
>> device?
> 
> I mean where a platform-msi can work should be defined by the arch,
> and probably is related to things like having an irq_domain attached
> 
> So, like pci, drivers must only try to do platfor_msi stuff on
> particular devices. eg on pci_device and platform_device types.
> 
> Even so it may not even work, but I can't think of any reason why it
> should be made to work on a virtual device like mdev.
> 
>> The use case if when we have a device assigned to a guest and we
>> want to allocate IMS(platform-msi) interrupts for that
>> guest-assigned device. Currently, this is abstracted through a mdev
>> interface.
> 
> And the mdev has the pci_device internally, so it should simply pass
> that pci_device to the platform_msi machinery.

hmm i am not sure I follow this. mdev has a pci_device internally? which 
struct are you referring to here?

mdev is merely a micropartitioned PCI device right, which no real PCI 
resource backing. I am not how else we can find the IRQ domain 
associated with an mdev..

> 
> This is no different from something like pci_iomap() which must be
> used with the pci_device.
> 
> Jason
>
Jason Gunthorpe May 4, 2020, 12:14 p.m. UTC | #9
On Sun, May 03, 2020 at 05:25:28PM -0700, Dey, Megha wrote:
> > > The use case if when we have a device assigned to a guest and we
> > > want to allocate IMS(platform-msi) interrupts for that
> > > guest-assigned device. Currently, this is abstracted through a mdev
> > > interface.
> > 
> > And the mdev has the pci_device internally, so it should simply pass
> > that pci_device to the platform_msi machinery.
> 
> hmm i am not sure I follow this. mdev has a pci_device internally? which
> struct are you referring to here?

mdev in general may not, but any ADI trying to use mdev will
necessarily have access to a struct pci_device.

> mdev is merely a micropartitioned PCI device right, which no real PCI
> resource backing. I am not how else we can find the IRQ domain associated
> with an mdev..

ADI always has real PCI resource backing.

Jason
Tian, Kevin May 6, 2020, 10:27 a.m. UTC | #10
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Monday, May 4, 2020 8:14 PM
> 
> On Sun, May 03, 2020 at 05:25:28PM -0700, Dey, Megha wrote:
> > > > The use case if when we have a device assigned to a guest and we
> > > > want to allocate IMS(platform-msi) interrupts for that
> > > > guest-assigned device. Currently, this is abstracted through a mdev
> > > > interface.
> > >
> > > And the mdev has the pci_device internally, so it should simply pass
> > > that pci_device to the platform_msi machinery.
> >
> > hmm i am not sure I follow this. mdev has a pci_device internally? which
> > struct are you referring to here?
> 
> mdev in general may not, but any ADI trying to use mdev will
> necessarily have access to a struct pci_device.

Agree here. Mdev is just driver internal concept. It doesn't make sense to
expose it in driver/base, just like how we avoided exposing mdev in iommu
layer.

Megha, every mdev/ADI has a parent device, which is the struct pci_device
that Jason refers to. In irq domain level, it only needs to care about the
PCI device and related IMS management. It doesn't matter whether the
allocated IMS entry is used for a mdev or by parent driver itself.

Thanks
Kevin
diff mbox series

Patch

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4154bc5f6a4e..2e355aa6ba50 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -62,6 +62,7 @@  enum irq_alloc_type {
 	X86_IRQ_ALLOC_TYPE_MSIX,
 	X86_IRQ_ALLOC_TYPE_DMAR,
 	X86_IRQ_ALLOC_TYPE_UV,
+	X86_IRQ_ALLOC_TYPE_IMS,
 };
 
 struct irq_alloc_info {
@@ -83,6 +84,12 @@  struct irq_alloc_info {
 			irq_hw_number_t	msi_hwirq;
 		};
 #endif
+#ifdef	CONFIG_MSI_IMS
+		struct {
+			struct device	*dev;
+			irq_hw_number_t	ims_hwirq;
+		};
+#endif
 #ifdef	CONFIG_X86_IO_APIC
 		struct {
 			int		ioapic_id;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 5f0bc74d2409..877e0fdee013 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -209,4 +209,13 @@  config GENERIC_ARCH_TOPOLOGY
 	  appropriate scaling, sysfs interface for reading capacity values at
 	  runtime.
 
+config MSI_IMS
+	bool "Device Specific Interrupt Message Storage (IMS)"
+	depends on X86
+	select GENERIC_MSI_IRQ_DOMAIN
+	select IRQ_REMAP
+	help
+	  This allows device drivers to enable device specific
+	  interrupt message storage (IMS) besides standard MSI-X interrupts.
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 157452080f3d..659b9b0c0b8a 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -22,6 +22,7 @@  obj-$(CONFIG_SOC_BUS) += soc.o
 obj-$(CONFIG_PINCTRL) += pinctrl.o
 obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
 obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
+obj-$(CONFIG_MSI_IMS) += ims-msi.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
 
 obj-y			+= test/
diff --git a/drivers/base/ims-msi.c b/drivers/base/ims-msi.c
new file mode 100644
index 000000000000..738f6d153155
--- /dev/null
+++ b/drivers/base/ims-msi.c
@@ -0,0 +1,100 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for Device Specific IMS interrupts.
+ *
+ * Copyright © 2019 Intel Corporation.
+ *
+ * Author: Megha Dey <megha.dey@intel.com>
+ */
+
+#include <linux/dmar.h>
+#include <linux/irq.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+
+/*
+ * Determine if a dev is mdev or not. Return NULL if not mdev device.
+ * Return mdev's parent dev if success.
+ */
+static inline struct device *mdev_to_parent(struct device *dev)
+{
+	struct device *ret = NULL;
+	struct device *(*fn)(struct device *dev);
+	struct bus_type *bus = symbol_get(mdev_bus_type);
+
+	if (bus && dev->bus == bus) {
+		fn = symbol_get(mdev_dev_to_parent_dev);
+		ret = fn(dev);
+		symbol_put(mdev_dev_to_parent_dev);
+		symbol_put(mdev_bus_type);
+	}
+
+	return ret;
+}
+
+static irq_hw_number_t dev_ims_get_hwirq(struct msi_domain_info *info,
+					 msi_alloc_info_t *arg)
+{
+	return arg->ims_hwirq;
+}
+
+static int dev_ims_prepare(struct irq_domain *domain, struct device *dev,
+			   int nvec, msi_alloc_info_t *arg)
+{
+	if (dev_is_mdev(dev))
+		dev = mdev_to_parent(dev);
+
+	init_irq_alloc_info(arg, NULL);
+	arg->dev = dev;
+	arg->type = X86_IRQ_ALLOC_TYPE_IMS;
+
+	return 0;
+}
+
+static void dev_ims_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+	arg->ims_hwirq = platform_msi_calc_hwirq(desc);
+}
+
+static struct msi_domain_ops dev_ims_domain_ops = {
+	.get_hwirq	= dev_ims_get_hwirq,
+	.msi_prepare	= dev_ims_prepare,
+	.set_desc	= dev_ims_set_desc,
+};
+
+static struct irq_chip dev_ims_ir_controller = {
+	.name			= "IR-DEV-IMS",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+	.irq_write_msi_msg	= platform_msi_write_msg,
+};
+
+static struct msi_domain_info ims_ir_domain_info = {
+	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
+	.ops		= &dev_ims_domain_ops,
+	.chip		= &dev_ims_ir_controller,
+	.handler	= handle_edge_irq,
+	.handler_name	= "edge",
+};
+
+struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent,
+					      const char *name)
+{
+	struct fwnode_handle *fn;
+	struct irq_domain *domain;
+
+	fn = irq_domain_alloc_named_fwnode(name);
+	if (!fn)
+		return NULL;
+
+	domain = msi_create_irq_domain(fn, &ims_ir_domain_info, parent);
+	if (!domain)
+		return NULL;
+
+	irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
+	irq_domain_free_fwnode(fn);
+
+	return domain;
+}
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 2696aa75983b..59160e8cbfb1 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -31,12 +31,11 @@  struct platform_msi_priv_data {
 /* The devid allocator */
 static DEFINE_IDA(platform_msi_devid_ida);
 
-#ifdef GENERIC_MSI_DOMAIN_OPS
 /*
  * Convert an msi_desc to a globaly unique identifier (per-device
  * devid + msi_desc position in the msi_list).
  */
-static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
+irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
 {
 	u32 devid;
 
@@ -45,6 +44,7 @@  static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
 	return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index;
 }
 
+#ifdef GENERIC_MSI_DOMAIN_OPS
 static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
 {
 	arg->desc = desc;
@@ -76,7 +76,7 @@  static void platform_msi_update_dom_ops(struct msi_domain_info *info)
 		ops->set_desc = platform_msi_set_desc;
 }
 
-static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
 {
 	struct msi_desc *desc = irq_data_get_msi_desc(data);
 	struct platform_msi_priv_data *priv_data;
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index b558d4cfd082..cecc6a6bdbef 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -33,6 +33,12 @@  struct device *mdev_parent_dev(struct mdev_device *mdev)
 }
 EXPORT_SYMBOL(mdev_parent_dev);
 
+struct device *mdev_dev_to_parent_dev(struct device *dev)
+{
+	return to_mdev_device(dev)->parent->dev;
+}
+EXPORT_SYMBOL(mdev_dev_to_parent_dev);
+
 void *mdev_get_drvdata(struct mdev_device *mdev)
 {
 	return mdev->driver_data;
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 7d922950caaf..c21f1305a76b 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -36,7 +36,6 @@  struct mdev_device {
 };
 
 #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
-#define dev_is_mdev(d)		((d)->bus == &mdev_bus_type)
 
 struct mdev_type {
 	struct kobject kobj;
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 0ce30ca78db0..fa2344e239ef 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -144,5 +144,8 @@  void mdev_unregister_driver(struct mdev_driver *drv);
 struct device *mdev_parent_dev(struct mdev_device *mdev);
 struct device *mdev_dev(struct mdev_device *mdev);
 struct mdev_device *mdev_from_dev(struct device *dev);
+struct device *mdev_dev_to_parent_dev(struct device *dev);
+
+#define dev_is_mdev(dev) ((dev)->bus == symbol_get(mdev_bus_type))
 
 #endif /* MDEV_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 3890b143b04d..80386468a7bc 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -418,6 +418,8 @@  int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
 			      unsigned int nvec);
 void *platform_msi_get_host_data(struct irq_domain *domain);
+irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc);
+void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN