diff mbox

[V3,00/22] genirq, PCI/MSI: Support for per device MSI and PCI/IMS - Part 2 API rework

Message ID 20221124225331.464480443@linutronix.de (mailing list archive)
State Handled Elsewhere
Headers show

Commit Message

Thomas Gleixner Nov. 24, 2022, 11:24 p.m. UTC
This is V3 of the second part of the effort to provide support for per
device MSI interrupt domains.

Version 2 of this second part can be found here:

  https://lore.kernel.org/all/20221121083210.309161925@linutronix.de

The first part is available here:

  https://lore.kernel.org/all/20221111120501.026511281@linutronix.de

and has been merged into:

 git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core

This part on top of the tip irq/core branch is also available here:

   git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git devmsi-v3-part2

This part is the main preparation step for per device MSI domains:

  1) Introduce irqdomain pointer storage space in device::msi::data
     and related helpers.

  2) Convert interfaces to handle multiple per device MSI domains
     based on a domain ID

  3) Provide new interfaces for allocation/free which are domain ID
     based and provide range allocation/free which is a prerequisite
     for post MSI-X enable alloc/free.

  4) Switch all existing call sites of msi allocation/free interfaces
     over to the new interfaces

  5) Remove the old interfaces

Changes vs. V2:

  - Rework the xarray storage into per domain storage (Marc)

@Kevin: I did not pick up your Reviewed-by tag due to these changes.

The delta patch vs. V2 is attached below.

Thanks,

	tglx
---

Comments

Tian, Kevin Nov. 28, 2022, 2:33 a.m. UTC | #1
> From: Thomas Gleixner <tglx@linutronix.de>
> Sent: Friday, November 25, 2022 7:24 AM
> 
> Changes vs. V2:
> 
>   - Rework the xarray storage into per domain storage (Marc)
> 
> @Kevin: I did not pick up your Reviewed-by tag due to these changes.
> 

The incremental change looks good to me:

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Just one nit.

> 
> -	xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) {
> +	xa = &dev->msi.data->__domains[ctrl->domid].store;

Better to have a macro here given multiple references on the store
in following lines.
Marc Zyngier Dec. 5, 2022, 11:04 a.m. UTC | #2
On Thu, 24 Nov 2022 23:24:07 +0000,
Thomas Gleixner <tglx@linutronix.de> wrote:
> 
> This is V3 of the second part of the effort to provide support for per
> device MSI interrupt domains.
> 
> Version 2 of this second part can be found here:
> 
>   https://lore.kernel.org/all/20221121083210.309161925@linutronix.de

Bandwidth is lacking to review such a series (let alone 3) in details,
but the direction of travel is the right one (per-device, per-bus MSI
domains are the natural way to deal with resources managed at the
device level).

I'm sure we'll find issues along the way, but this code is better in
the kernel than outside, so:

Acked-by: Marc Zyngier <maz@kernel.org>

	M.
diff mbox

Patch

diff --git a/include/linux/msi.h b/include/linux/msi.h
index af9437ee4d82..43b8866c8431 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -174,24 +174,31 @@  enum msi_desc_filter {
 	MSI_DESC_ASSOCIATED,
 };
 
+
+/**
+ * struct msi_dev_domain - The internals of MSI domain info per device
+ * @store:		Xarray for storing MSI descriptor pointers
+ * @irqdomain:		Pointer to a per device interrupt domain
+ */
+struct msi_dev_domain {
+	struct xarray		store;
+	struct irq_domain	*domain;
+};
+
 /**
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
  * @platform_data:	Platform-MSI specific data
  * @mutex:		Mutex protecting the MSI descriptor store
- * @__store:		Xarray for storing MSI descriptor pointers
+ * @__domains:		Internal data for per device MSI domains
  * @__iter_idx:		Index to search the next entry for iterators
- * @__iter_max:		Index to limit the search
- * @__irqdomains:	Per device interrupt domains
  */
 struct msi_device_data {
 	unsigned long			properties;
 	struct platform_msi_priv_data	*platform_data;
 	struct mutex			mutex;
-	struct xarray			__store;
+	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
 	unsigned long			__iter_idx;
-	unsigned long			__iter_max;
-	struct irq_domain		*__irqdomains[MSI_MAX_DEVICE_IRQDOMAINS];
 };
 
 int msi_setup_device_data(struct device *dev);
@@ -219,7 +226,8 @@  static inline struct msi_desc *msi_first_desc(struct device *dev,
 	return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter);
 }
 
-struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
+struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
+			       enum msi_desc_filter filter);
 
 /**
  * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain
@@ -236,7 +244,7 @@  struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
  */
 #define msi_domain_for_each_desc(desc, dev, domid, filter)			\
 	for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc);	\
-	     (desc) = msi_next_desc((dev), (filter)))
+	     (desc) = msi_next_desc((dev), (domid), (filter)))
 
 /**
  * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index c98c07ae46c9..695801b3cb68 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -34,37 +34,13 @@  struct msi_ctrl {
 	unsigned int			nirqs;
 };
 
-static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
-static inline int msi_sysfs_create_group(struct device *dev);
-
-/* Invalid XA index which is outside of any searchable range */
+/* Invalid Xarray index which is outside of any searchable range */
 #define MSI_XA_MAX_INDEX	(ULONG_MAX - 1)
+/* The maximum domain size */
 #define MSI_XA_DOMAIN_SIZE	(MSI_MAX_INDEX + 1)
 
-static inline void msi_setup_default_irqdomain(struct device *dev, struct msi_device_data *md)
-{
-	if (!dev->msi.domain)
-		return;
-	/*
-	 * If @dev::msi::domain is a global MSI domain, copy the pointer
-	 * into the domain array to avoid conditionals all over the place.
-	 */
-	if (!irq_domain_is_msi_parent(dev->msi.domain))
-		md->__irqdomains[MSI_DEFAULT_DOMAIN] = dev->msi.domain;
-}
-
-static int msi_get_domain_base_index(struct device *dev, unsigned int domid)
-{
-	lockdep_assert_held(&dev->msi.data->mutex);
-
-	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
-		return -ENODEV;
-
-	if (WARN_ON_ONCE(!dev->msi.data->__irqdomains[domid]))
-		return -ENODEV;
-
-	return domid * MSI_XA_DOMAIN_SIZE;
-}
+static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
+static inline int msi_sysfs_create_group(struct device *dev);
 
 
 /**
@@ -104,27 +80,16 @@  static void msi_free_desc(struct msi_desc *desc)
 	kfree(desc);
 }
 
-static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
+static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc,
 			   unsigned int domid, unsigned int index)
 {
-	struct msi_device_data *md = dev->msi.data;
-	int baseidx, ret;
-
-	baseidx = msi_get_domain_base_index(dev, domid);
-	if (baseidx < 0) {
-		ret = baseidx;
-		goto fail;
-	}
+	struct xarray *xa = &md->__domains[domid].store;
+	int ret;
 
 	desc->msi_index = index;
-	index += baseidx;
-	ret = xa_insert(&md->__store, index, desc, GFP_KERNEL);
+	ret = xa_insert(xa, index, desc, GFP_KERNEL);
 	if (ret)
-		goto fail;
-	return 0;
-
-fail:
-	msi_free_desc(desc);
+		msi_free_desc(desc);
 	return ret;
 }
 
@@ -152,7 +117,7 @@  int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
 	/* Copy type specific data to the new descriptor. */
 	desc->pci = init_desc->pci;
 
-	return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
+	return msi_insert_desc(dev->msi.data, desc, domid, init_desc->msi_index);
 }
 
 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
@@ -169,9 +134,11 @@  static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
 	return false;
 }
 
-static bool msi_ctrl_range_valid(struct device *dev, struct msi_ctrl *ctrl)
+static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
 {
-	if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
+	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
+			 !dev->msi.data->__domains[ctrl->domid].domain ||
+			 ctrl->first > ctrl->last ||
 			 ctrl->first > MSI_MAX_INDEX ||
 			 ctrl->last > MSI_MAX_INDEX))
 		return false;
@@ -180,21 +147,17 @@  static bool msi_ctrl_range_valid(struct device *dev, struct msi_ctrl *ctrl)
 
 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
 {
-	struct xarray *xa = &dev->msi.data->__store;
 	struct msi_desc *desc;
+	struct xarray *xa;
 	unsigned long idx;
-	int base;
 
 	lockdep_assert_held(&dev->msi.data->mutex);
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
-		return;
-
-	base = msi_get_domain_base_index(dev, ctrl->domid);
-	if (base < 0)
+	if (!msi_ctrl_valid(dev, ctrl))
 		return;
 
-	xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) {
+	xa = &dev->msi.data->__domains[ctrl->domid].store;
+	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
 		xa_erase(xa, idx);
 
 		/* Leak the descriptor when it is still referenced */
@@ -234,22 +197,18 @@  static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *
 {
 	struct msi_desc *desc;
 	unsigned int idx;
-	int ret, baseidx;
+	int ret;
 
 	lockdep_assert_held(&dev->msi.data->mutex);
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
+	if (!msi_ctrl_valid(dev, ctrl))
 		return -EINVAL;
 
-	baseidx = msi_get_domain_base_index(dev, ctrl->domid);
-	if (baseidx < 0)
-		return baseidx;
-
 	for (idx = ctrl->first; idx <= ctrl->last; idx++) {
 		desc = msi_alloc_desc(dev, 1, NULL);
 		if (!desc)
 			goto fail_mem;
-		ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
+		ret = msi_insert_desc(dev->msi.data, desc, ctrl->domid, idx);
 		if (ret)
 			goto fail;
 	}
@@ -278,9 +237,12 @@  EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 static void msi_device_data_release(struct device *dev, void *res)
 {
 	struct msi_device_data *md = res;
+	int i;
 
-	WARN_ON_ONCE(!xa_empty(&md->__store));
-	xa_destroy(&md->__store);
+	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
+		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
+		xa_destroy(&md->__domains[i].store);
+	}
 	dev->msi.data = NULL;
 }
 
@@ -297,7 +259,7 @@  static void msi_device_data_release(struct device *dev, void *res)
 int msi_setup_device_data(struct device *dev)
 {
 	struct msi_device_data *md;
-	int ret;
+	int ret, i;
 
 	if (dev->msi.data)
 		return 0;
@@ -312,11 +274,19 @@  int msi_setup_device_data(struct device *dev)
 		return ret;
 	}
 
-	msi_setup_default_irqdomain(dev, md);
+	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
+		xa_init(&md->__domains[i].store);
+
+	/*
+	 * If @dev::msi::domain is set and is a global MSI domain, copy the
+	 * pointer into the domain array so all code can operate on domain
+	 * ids. The NULL pointer check is required to keep the legacy
+	 * architecture specific PCI/MSI support working.
+	 */
+	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
+		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
 
-	xa_init(&md->__store);
 	mutex_init(&md->mutex);
-	md->__iter_idx = MSI_XA_MAX_INDEX;
 	dev->msi.data = md;
 	devres_add(dev, md);
 	return 0;
@@ -338,17 +308,19 @@  EXPORT_SYMBOL_GPL(msi_lock_descs);
  */
 void msi_unlock_descs(struct device *dev)
 {
-	/* Invalidate the index wich was cached by the iterator */
+	/* Invalidate the index which was cached by the iterator */
 	dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
 	mutex_unlock(&dev->msi.data->mutex);
 }
 EXPORT_SYMBOL_GPL(msi_unlock_descs);
 
-static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter)
+static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
+				      enum msi_desc_filter filter)
 {
+	struct xarray *xa = &md->__domains[domid].store;
 	struct msi_desc *desc;
 
-	xa_for_each_range(&md->__store, md->__iter_idx, desc, md->__iter_idx, md->__iter_max) {
+	xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
 		if (msi_desc_match(desc, filter))
 			return desc;
 	}
@@ -372,26 +344,21 @@  struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
 				       enum msi_desc_filter filter)
 {
 	struct msi_device_data *md = dev->msi.data;
-	int baseidx;
 
-	if (WARN_ON_ONCE(!md))
+	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
 		return NULL;
 
 	lockdep_assert_held(&md->mutex);
 
-	baseidx = msi_get_domain_base_index(dev, domid);
-	if (baseidx < 0)
-		return NULL;
-
-	md->__iter_idx = baseidx;
-	md->__iter_max = baseidx + MSI_MAX_INDEX;
-	return msi_find_desc(md, filter);
+	md->__iter_idx = 0;
+	return msi_find_desc(md, domid, filter);
 }
 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
 
 /**
  * msi_next_desc - Get the next MSI descriptor of a device
  * @dev:	Device to operate on
+ * @domid:	The id of the interrupt domain which should be walked.
  * @filter:	Descriptor state filter
  *
  * The first invocation of msi_next_desc() has to be preceeded by a
@@ -402,20 +369,21 @@  EXPORT_SYMBOL_GPL(msi_domain_first_desc);
  * Return: Pointer to the next MSI descriptor matching the search
  *	   criteria, NULL if none found.
  */
-struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter)
+struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
+			       enum msi_desc_filter filter)
 {
 	struct msi_device_data *md = dev->msi.data;
 
-	if (WARN_ON_ONCE(!md))
+	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
 		return NULL;
 
 	lockdep_assert_held(&md->mutex);
 
-	if (md->__iter_idx >= md->__iter_max)
+	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
 		return NULL;
 
 	md->__iter_idx++;
-	return msi_find_desc(md, filter);
+	return msi_find_desc(md, domid, filter);
 }
 EXPORT_SYMBOL_GPL(msi_next_desc);
 
@@ -432,12 +400,12 @@  unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
 	struct msi_desc *desc;
 	unsigned int ret = 0;
 	bool pcimsi = false;
-	int base;
+	struct xarray *xa;
 
 	if (!dev->msi.data)
 		return 0;
 
-	if (WARN_ON_ONCE(index > MSI_MAX_INDEX))
+	if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
 		return 0;
 
 	/* This check is only valid for the PCI default MSI domain */
@@ -445,13 +413,8 @@  unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
 		pcimsi = to_pci_dev(dev)->msi_enabled;
 
 	msi_lock_descs(dev);
-
-	base = msi_get_domain_base_index(dev, domid);
-	if (base < 0)
-		goto unlock;
-
-	base += pcimsi ? 0 : index;
-	desc = xa_load(&dev->msi.data->__store, base);
+	xa = &dev->msi.data->__domains[domid].store;
+	desc = xa_load(xa, pcimsi ? 0 : index);
 	if (desc && desc->irq) {
 		/*
 		 * PCI-MSI has only one descriptor for multiple interrupts.
@@ -466,7 +429,6 @@  unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
 		}
 	}
 
-unlock:
 	msi_unlock_descs(dev);
 	return ret;
 }
@@ -595,7 +557,7 @@  static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int
 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
 		return NULL;
 
-	domain = dev->msi.data->__irqdomains[domid];
+	domain = dev->msi.data->__domains[domid].domain;
 	if (!domain)
 		return NULL;
 
@@ -846,9 +808,10 @@  int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 		.last	= virq_base + nvec - 1,
 	};
 	struct msi_desc *desc;
+	struct xarray *xa;
 	int ret, virq;
 
-	if (!msi_ctrl_range_valid(dev, &ctrl))
+	if (!msi_ctrl_valid(dev, &ctrl))
 		return -EINVAL;
 
 	msi_lock_descs(dev);
@@ -856,8 +819,10 @@  int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 	if (ret)
 		goto unlock;
 
+	xa = &dev->msi.data->__domains[ctrl.domid].store;
+
 	for (virq = virq_base; virq < virq_base + nvec; virq++) {
-		desc = xa_load(&dev->msi.data->__store, virq);
+		desc = xa_load(xa, virq);
 		desc->irq = virq;
 
 		ops->set_desc(arg, desc);
@@ -987,8 +952,8 @@  static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
 				   struct msi_ctrl *ctrl)
 {
+	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
 	struct msi_domain_info *info = domain->host_data;
-	struct xarray *xa = &dev->msi.data->__store;
 	struct msi_domain_ops *ops = info->ops;
 	unsigned int vflags = 0, allocated = 0;
 	msi_alloc_info_t arg = { };
@@ -1074,7 +1039,7 @@  static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
 	struct irq_domain *domain;
 	int ret;
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
+	if (!msi_ctrl_valid(dev, ctrl))
 		return -EINVAL;
 
 	domain = msi_get_device_domain(dev, ctrl->domid);
@@ -1180,16 +1145,14 @@  int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int
 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
 				   struct msi_ctrl *ctrl)
 {
+	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
 	struct msi_domain_info *info = domain->host_data;
-	struct xarray *xa = &dev->msi.data->__store;
 	struct irq_data *irqd;
 	struct msi_desc *desc;
 	unsigned long idx;
-	int i, base;
-
-	base = ctrl->domid * MSI_XA_DOMAIN_SIZE;
+	int i;
 
-	xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) {
+	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
 		/* Only handle MSI entries which have an interrupt associated */
 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
 			continue;
@@ -1214,7 +1177,7 @@  static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
 	struct msi_domain_ops *ops;
 	struct irq_domain *domain;
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
+	if (!msi_ctrl_valid(dev, ctrl))
 		return;
 
 	domain = msi_get_device_domain(dev, ctrl->domid);