diff mbox series

[03/32] mlx5: Convert mkey_table to XArray

Message ID 20190221002107.22625-4-willy@infradead.org (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series Convert the Infiniband subsystem to XArray | expand

Commit Message

Matthew Wilcox Feb. 21, 2019, 12:20 a.m. UTC
The lock protecting the data structure does not need to be an rwlock.
The only read access to the lock is in an error path, and if that's
limiting your scalability, you have bigger performance problems.

Eliminate mlx5_mkey_table in favour of using the xarray directly.
Continue to use GFP_ATOMIC for allocating XArray nodes as we may be
called in interrupt context.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 drivers/infiniband/hw/mlx5/cq.c |  4 ++--
 drivers/infiniband/hw/mlx5/mr.c | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

Comments

Jason Gunthorpe Feb. 21, 2019, 6:05 p.m. UTC | #1
On Wed, Feb 20, 2019 at 04:20:38PM -0800, Matthew Wilcox wrote:
> The lock protecting the data structure does not need to be an rwlock.
> The only read access to the lock is in an error path, and if that's
> limiting your scalability, you have bigger performance problems.
> 
> Eliminate mlx5_mkey_table in favour of using the xarray directly.
> Continue to use GFP_ATOMIC for allocating XArray nodes as we may be
> called in interrupt context.
> 
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
>  drivers/infiniband/hw/mlx5/cq.c |  4 ++--
>  drivers/infiniband/hw/mlx5/mr.c | 10 +++++-----
>  2 files changed, 7 insertions(+), 7 deletions(-)

This patch doesn't compile..

drivers/infiniband/hw/mlx5/cq.c: In function ‘mlx5_poll_one’:
./include/linux/xarray.h:444:39: error: ‘struct mlx5_mkey_table’ has no member named ‘xa_lock’; did you mean ‘lock’?
 #define xa_lock(xa)  spin_lock(&(xa)->xa_lock)
                                       ^~~~~~~
drivers/infiniband/hw/mlx5/cq.c:525:3: note: in expansion of macro ‘xa_lock’
   xa_lock(&dev->mdev->priv.mkey_table);
   ^~~~~~~
./include/linux/xarray.h:445:43: error: ‘struct mlx5_mkey_table’ has no member named ‘xa_lock’; did you mean ‘lock’?
 #define xa_unlock(xa)  spin_unlock(&(xa)->xa_lock)
                                           ^~~~~~~
drivers/infiniband/hw/mlx5/cq.c:540:3: note: in expansion of macro ‘xa_unlock’
   xa_unlock(&dev->mdev->priv.mkey_table);
   ^~~~~~~~~
scripts/Makefile.build:276: recipe for target 'drivers/infiniband/hw/mlx5/cq.o' failed
make[4]: *** [drivers/infiniband/hw/mlx5/cq.o] Error 1
make[4]: *** Waiting for unfinished jobs....
drivers/infiniband/hw/mlx5/mr.c: In function ‘reg_mr_callback’:
drivers/infiniband/hw/mlx5/mr.c:133:25: error: initialization of ‘struct xarray *’ from incompatible pointer type ‘struct mlx5_mkey_table *’ [-Werror=incompatible-pointer-types]
  struct xarray *mkeys = &dev->mdev->priv.mkey_table;
                         ^
cc1: some warnings being treated as errors

The change to the header file is missing. I think we will find other
readers than just MLX5_CQE_SIG_ERR when the patch is made complete.

Jason
Matthew Wilcox Feb. 21, 2019, 6:17 p.m. UTC | #2
On Wed, Feb 20, 2019 at 04:20:38PM -0800, Matthew Wilcox wrote:
[patch affected by bad tooling]


From 6bb1a6a443b9738db6af8ce416590d034fb2f4e0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 24 Oct 2018 13:40:19 -0400
Subject: [PATCH 03/32] mlx5: Convert mkey_table to XArray
To: linux-rdma@vger.kernel.org

The lock protecting the data structure does not need to be an rwlock.
The only read access to the lock is in an error path, and if that's
limiting your scalability, you have bigger performance problems.

Eliminate mlx5_mkey_table in favour of using the xarray directly.
Continue to use GFP_ATOMIC for allocating XArray nodes as we may be
called in interrupt context.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 drivers/infiniband/hw/mlx5/cq.c              |  4 +--
 drivers/infiniband/hw/mlx5/mr.c              | 10 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 26 ++++++++------------
 include/linux/mlx5/driver.h                  | 13 ++--------
 include/linux/mlx5/qp.h                      |  2 +-
 5 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 90f1b0bae5b5..3326e07ab7ae 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -522,7 +522,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	case MLX5_CQE_SIG_ERR:
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
-		read_lock(&dev->mdev->priv.mkey_table.lock);
+		xa_lock(&dev->mdev->priv.mkey_table);
 		mmkey = __mlx5_mr_lookup(dev->mdev,
 					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
 		mr = to_mibmr(mmkey);
@@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.actual);
 
-		read_unlock(&dev->mdev->priv.mkey_table.lock);
+		xa_unlock(&dev->mdev->priv.mkey_table);
 		goto repoll;
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index fd6ea1f75085..65cf11080bbd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -132,7 +132,7 @@ static void reg_mr_callback(int status, void *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
-	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+	struct xarray *mkeys = &dev->mdev->priv.mkey_table;
 	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
@@ -160,12 +160,12 @@ static void reg_mr_callback(int status, void *context)
 	ent->size++;
 	spin_unlock_irqrestore(&ent->lock, flags);
 
-	write_lock_irqsave(&table->lock, flags);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
-				&mr->mmkey);
+	xa_lock_irqsave(mkeys, flags);
+	err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+				&mr->mmkey, GFP_ATOMIC));
 	if (err)
 		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-	write_unlock_irqrestore(&table->lock, flags);
+	xa_unlock_irqrestore(mkeys, flags);
 
 	if (!completion_done(&ent->compl))
 		complete(&ent->compl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 0670165afd5f..e237759f70ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -38,11 +38,7 @@
 
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
-	memset(table, 0, sizeof(*table));
-	rwlock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
 }
 
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
@@ -55,7 +51,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 			     u32 *out, int outlen,
 			     mlx5_cmd_cbk_t callback, void *context)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
 	u32 mkey_index;
 	void *mkc;
@@ -87,12 +83,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      mkey_index, key, mkey->key);
 
-	/* connect to mkey tree */
-	write_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
-	write_unlock_irq(&table->lock);
+	err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+				GFP_KERNEL));
 	if (err) {
-		mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+		mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
 			       mlx5_base_mkey(mkey->key), err);
 		mlx5_core_destroy_mkey(dev, mkey);
 	}
@@ -113,17 +107,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
 	struct mlx5_core_mkey *deleted_mkey;
 	unsigned long flags;
 
-	write_lock_irqsave(&table->lock, flags);
-	deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
-	write_unlock_irqrestore(&table->lock, flags);
+	xa_lock_irqsave(mkeys, flags);
+	deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+	xa_unlock_irqrestore(mkeys, flags);
 	if (!deleted_mkey) {
-		mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+		mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
 			      mlx5_base_mkey(mkey->key));
 		return -ENOENT;
 	}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 54299251d40d..0d332b5a782d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -41,7 +41,7 @@
 #include <linux/semaphore.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
@@ -449,13 +449,6 @@ struct mlx5_qp_table {
 	struct radix_tree_root	tree;
 };
 
-struct mlx5_mkey_table {
-	/* protect radix tree
-	 */
-	rwlock_t		lock;
-	struct radix_tree_root	tree;
-};
-
 struct mlx5_vf_context {
 	int	enabled;
 	u64	port_guid;
@@ -533,9 +526,7 @@ struct mlx5_priv {
 	struct dentry	       *cmdif_debugfs;
 	/* end: qp staff */
 
-	/* start: mkey staff */
-	struct mlx5_mkey_table	mkey_table;
-	/* end: mkey staff */
+	struct xarray		mkey_table;
 
 	/* start: alloc staff */
 	/* protect buffer alocation according to numa node */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index b26ea9077384..5b3fb2642d0c 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -552,7 +552,7 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
 
 static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
 {
-	return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
+	return xa_load(&dev->priv.mkey_table, key);
 }
 
 int mlx5_core_create_dct(struct mlx5_core_dev *dev,
Jason Gunthorpe Feb. 21, 2019, 7:21 p.m. UTC | #3
On Thu, Feb 21, 2019 at 10:17:28AM -0800, Matthew Wilcox wrote:
> diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
> index 90f1b0bae5b5..3326e07ab7ae 100644
> +++ b/drivers/infiniband/hw/mlx5/cq.c
> @@ -522,7 +522,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>  	case MLX5_CQE_SIG_ERR:
>  		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
>  
> -		read_lock(&dev->mdev->priv.mkey_table.lock);
> +		xa_lock(&dev->mdev->priv.mkey_table);
>  		mmkey = __mlx5_mr_lookup(dev->mdev,
>  					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
>  		mr = to_mibmr(mmkey);
> @@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>  			     mr->sig->err_item.expected,
>  			     mr->sig->err_item.actual);
>  
> -		read_unlock(&dev->mdev->priv.mkey_table.lock);
> +		xa_unlock(&dev->mdev->priv.mkey_table);
>  		goto repoll;
>  	}

I think this xa_lock() should really be 

  srcu_read_lock(&dev->mr_srcu)

Moni?

> diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
> index b26ea9077384..5b3fb2642d0c 100644
> +++ b/include/linux/mlx5/qp.h
> @@ -552,7 +552,7 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
>  
>  static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
>  {
> -	return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
> +	return xa_load(&dev->priv.mkey_table, key);

I think this actually fixes a bug, as pagefault_single_data_segment is
calling radix_tree_lookup only under srcu, which is not compatible
with the radix tree's use of kfree_rcu ?

Jason
Moni Shoua Feb. 24, 2019, 2:11 p.m. UTC | #4
On Thu, Feb 21, 2019 at 9:22 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Thu, Feb 21, 2019 at 10:17:28AM -0800, Matthew Wilcox wrote:
> > diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
> > index 90f1b0bae5b5..3326e07ab7ae 100644
> > +++ b/drivers/infiniband/hw/mlx5/cq.c
> > @@ -522,7 +522,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
> >       case MLX5_CQE_SIG_ERR:
> >               sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
> >
> > -             read_lock(&dev->mdev->priv.mkey_table.lock);
> > +             xa_lock(&dev->mdev->priv.mkey_table);
> >               mmkey = __mlx5_mr_lookup(dev->mdev,
> >                                        mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
> >               mr = to_mibmr(mmkey);
> > @@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
> >                            mr->sig->err_item.expected,
> >                            mr->sig->err_item.actual);
> >
> > -             read_unlock(&dev->mdev->priv.mkey_table.lock);
> > +             xa_unlock(&dev->mdev->priv.mkey_table);
> >               goto repoll;
> >       }
>
> I think this xa_lock() should really be
>
>   srcu_read_lock(&dev->mr_srcu)
>
> Moni?
>
When mkey table was implemented as radix_tree I believe that
read_unlock needed to be replaces with srcu_read_lock() (like in all
other places that lookup mkeys).
When mkey table is implemented as xarray I'm not sure. It's not clear
to me from the documentation what are the requirements.

> > diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
> > index b26ea9077384..5b3fb2642d0c 100644
> > +++ b/include/linux/mlx5/qp.h
> > @@ -552,7 +552,7 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
> >
> >  static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
> >  {
> > -     return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
> > +     return xa_load(&dev->priv.mkey_table, key);
>
> I think this actually fixes a bug, as pagefault_single_data_segment is
> calling radix_tree_lookup only under srcu, which is not compatible
> with the radix tree's use of kfree_rcu ?
>
> Jason
Matthew Wilcox Feb. 24, 2019, 9:06 p.m. UTC | #5
On Sun, Feb 24, 2019 at 04:11:30PM +0200, Moni Shoua wrote:
> On Thu, Feb 21, 2019 at 9:22 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > On Thu, Feb 21, 2019 at 10:17:28AM -0800, Matthew Wilcox wrote:
> > > diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
> > > index 90f1b0bae5b5..3326e07ab7ae 100644
> > > +++ b/drivers/infiniband/hw/mlx5/cq.c
> > > @@ -522,7 +522,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
> > >       case MLX5_CQE_SIG_ERR:
> > >               sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
> > >
> > > -             read_lock(&dev->mdev->priv.mkey_table.lock);
> > > +             xa_lock(&dev->mdev->priv.mkey_table);
> > >               mmkey = __mlx5_mr_lookup(dev->mdev,
> > >                                        mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
> > >               mr = to_mibmr(mmkey);
> > > @@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
> > >                            mr->sig->err_item.expected,
> > >                            mr->sig->err_item.actual);
> > >
> > > -             read_unlock(&dev->mdev->priv.mkey_table.lock);
> > > +             xa_unlock(&dev->mdev->priv.mkey_table);
> > >               goto repoll;
> > >       }
> >
> > I think this xa_lock() should really be
> >
> >   srcu_read_lock(&dev->mr_srcu)
> >
> > Moni?
> >
> When mkey table was implemented as radix_tree I believe that
> read_unlock needed to be replaces with srcu_read_lock() (like in all
> other places that lookup mkeys).
> When mkey table is implemented as xarray I'm not sure. It's not clear
> to me from the documentation what are the requirements.

Can you tell me what else you need from the documentation?
I'd like it to be clear.
https://www.kernel.org/doc/html/latest/core-api/xarray.html#normal-api
(there's unfortunately no anchor for the 'Locking' subsection, so you
have to scroll down a bit).
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 90f1b0bae5b5..3326e07ab7ae 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -522,7 +522,7 @@  static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	case MLX5_CQE_SIG_ERR:
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
-		read_lock(&dev->mdev->priv.mkey_table.lock);
+		xa_lock(&dev->mdev->priv.mkey_table);
 		mmkey = __mlx5_mr_lookup(dev->mdev,
 					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
 		mr = to_mibmr(mmkey);
@@ -537,7 +537,7 @@  static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.actual);
 
-		read_unlock(&dev->mdev->priv.mkey_table.lock);
+		xa_unlock(&dev->mdev->priv.mkey_table);
 		goto repoll;
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index fd6ea1f75085..65cf11080bbd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -132,7 +132,7 @@  static void reg_mr_callback(int status, void *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
-	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+	struct xarray *mkeys = &dev->mdev->priv.mkey_table;
 	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
@@ -160,12 +160,12 @@  static void reg_mr_callback(int status, void *context)
 	ent->size++;
 	spin_unlock_irqrestore(&ent->lock, flags);
 
-	write_lock_irqsave(&table->lock, flags);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
-				&mr->mmkey);
+	xa_lock_irqsave(mkeys, flags);
+	err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+				&mr->mmkey, GFP_ATOMIC));
 	if (err)
 		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-	write_unlock_irqrestore(&table->lock, flags);
+	xa_unlock_irqrestore(mkeys, flags);
 
 	if (!completion_done(&ent->compl))
 		complete(&ent->compl);