diff mbox

[01/12] target: Convert se_node_acl->device_list[] to RCU hlist

Message ID 1431422736-29125-2-git-send-email-nab@daterainc.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nicholas A. Bellinger May 12, 2015, 9:25 a.m. UTC
From: Nicholas Bellinger <nab@linux-iscsi.org>

This patch converts se_node_acl->device_list[] table for mappedluns
to modern RCU hlist_head usage in order to support an arbitrary number
of node_acl lun mappings.

This includes changes to core_[enable,disable]_device_list_for_node()
rcu_assign_pointer() and invokes call_rcu() for releasing memory, along
with a number of RCU read path conversions in target_core_device.c code.

Required for subsequent conversion of transport_lookup_cmd() to lock-less
RCU read path.

Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c   | 218 +++++++++++++++++++---------------
 drivers/target/target_core_internal.h |   1 +
 drivers/target/target_core_tpg.c      |  23 ++--
 include/target/target_core_base.h     |   7 +-
 4 files changed, 137 insertions(+), 112 deletions(-)

Comments

Andy Grover May 12, 2015, 8:58 p.m. UTC | #1
On 05/12/2015 02:25 AM, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
>
> This patch converts se_node_acl->device_list[] table for mappedluns
> to modern RCU hlist_head usage in order to support an arbitrary number
> of node_acl lun mappings.
>
> This includes changes to core_[enable,disable]_device_list_for_node()
> rcu_assign_pointer() and invokes call_rcu() for releasing memory, along
> with a number of RCU read path conversions in target_core_device.c code.
>
> Required for subsequent conversion of transport_lookup_cmd() to lock-less
> RCU read path.
>
> Cc: Hannes Reinecke <hare@suse.de>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Sagi Grimberg <sagig@mellanox.com>
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> ---
>   drivers/target/target_core_device.c   | 218 +++++++++++++++++++---------------
>   drivers/target/target_core_internal.h |   1 +
>   drivers/target/target_core_tpg.c      |  23 ++--
>   include/target/target_core_base.h     |   7 +-
>   4 files changed, 137 insertions(+), 112 deletions(-)
>
> diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
> index 6e58976..1df14ce 100644
> --- a/drivers/target/target_core_device.c
> +++ b/drivers/target/target_core_device.c
> @@ -198,12 +198,9 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(
>   	struct se_lun *lun;
>   	struct se_port *port;
>   	struct se_portal_group *tpg = nacl->se_tpg;
> -	u32 i;
> -
> -	spin_lock_irq(&nacl->device_list_lock);
> -	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> -		deve = nacl->device_list[i];
>
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
>   		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
>   			continue;
>
> @@ -225,11 +222,11 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(
>   			continue;
>
>   		atomic_inc_mb(&deve->pr_ref_count);
> -		spin_unlock_irq(&nacl->device_list_lock);
> +		rcu_read_unlock();
>
>   		return deve;
>   	}
> -	spin_unlock_irq(&nacl->device_list_lock);
> +	rcu_read_unlock();
>
>   	return NULL;
>   }
> @@ -240,18 +237,12 @@ int core_free_device_list_for_node(
>   {
>   	struct se_dev_entry *deve;
>   	struct se_lun *lun;
> -	u32 i;
> -
> -	if (!nacl->device_list)
> -		return 0;
> -
> -	spin_lock_irq(&nacl->device_list_lock);
> -	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> -		deve = nacl->device_list[i];
> +	u32 mapped_lun;
>
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
>   		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
>   			continue;
> -
>   		if (!deve->se_lun) {
>   			pr_err("%s device entries device pointer is"
>   				" NULL, but Initiator has access.\n",
> @@ -259,16 +250,14 @@ int core_free_device_list_for_node(
>   			continue;
>   		}
>   		lun = deve->se_lun;
> +		mapped_lun = deve->mapped_lun;
> +		rcu_read_unlock();
>
> -		spin_unlock_irq(&nacl->device_list_lock);
> -		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
> -			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> -		spin_lock_irq(&nacl->device_list_lock);
> +		core_disable_device_list_for_node(lun, NULL, mapped_lun,
> +					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> +		rcu_read_lock();
>   	}
> -	spin_unlock_irq(&nacl->device_list_lock);
> -
> -	array_free(nacl->device_list, TRANSPORT_MAX_LUNS_PER_TPG);
> -	nacl->device_list = NULL;
> +	rcu_read_unlock();
>
>   	return 0;
>   }
> @@ -280,18 +269,44 @@ void core_update_device_list_access(
>   {
>   	struct se_dev_entry *deve;
>
> -	spin_lock_irq(&nacl->device_list_lock);
> -	deve = nacl->device_list[mapped_lun];
> -	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> -		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> -		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> -	} else {
> -		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> -		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> +	spin_lock_irq(&nacl->lun_entry_lock);
> +	deve = target_nacl_find_deve(nacl, mapped_lun);
> +	if (deve) {
> +		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> +		} else {
> +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> +		}
>   	}
> -	spin_unlock_irq(&nacl->device_list_lock);
> +	spin_unlock_irq(&nacl->lun_entry_lock);
> +
> +	synchronize_rcu();
> +}
> +
> +static void target_nacl_deve_callrcu(struct rcu_head *head)
> +{
> +	struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
> +						 rcu_head);
> +	kfree(deve);
>   }
>
> +/*
> + * Called with rcu_read_lock or nacl->device_list_lock held.
> + */
> +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u32 mapped_lun)
> +{
> +	struct se_dev_entry *deve;
> +
> +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link)
> +		if (deve->mapped_lun == mapped_lun)
> +			return deve;
> +
> +	return NULL;
> +}
> +EXPORT_SYMBOL(target_nacl_find_deve);
> +
>   /*      core_enable_device_list_for_node():
>    *
>    *
> @@ -305,67 +320,61 @@ int core_enable_device_list_for_node(
>   	struct se_portal_group *tpg)
>   {
>   	struct se_port *port = lun->lun_sep;
> -	struct se_dev_entry *deve;
> +	struct se_dev_entry *orig, *new;
>
> -	spin_lock_irq(&nacl->device_list_lock);
> +	new = kzalloc(sizeof(*new), GFP_KERNEL);
> +	if (!new) {
> +		pr_err("Unable to allocate se_dev_entry memory\n");
> +		return -ENOMEM;
> +	}
>
> -	deve = nacl->device_list[mapped_lun];
> +	new->se_node_acl = nacl;
> +	atomic_set(&new->ua_count, 0);
> +	spin_lock_init(&new->ua_lock);
> +	INIT_LIST_HEAD(&new->alua_port_list);
> +	INIT_LIST_HEAD(&new->ua_list);
>
> -	/*
> -	 * Check if the call is handling demo mode -> explicit LUN ACL
> -	 * transition.  This transition must be for the same struct se_lun
> -	 * + mapped_lun that was setup in demo mode..
> -	 */
> -	if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
> -		if (deve->se_lun_acl != NULL) {
> -			pr_err("struct se_dev_entry->se_lun_acl"
> -			       " already set for demo mode -> explicit"
> -			       " LUN ACL transition\n");
> -			spin_unlock_irq(&nacl->device_list_lock);
> -			return -EINVAL;
> -		}
> -		if (deve->se_lun != lun) {
> -			pr_err("struct se_dev_entry->se_lun does"
> -			       " match passed struct se_lun for demo mode"
> -			       " -> explicit LUN ACL transition\n");
> -			spin_unlock_irq(&nacl->device_list_lock);
> -			return -EINVAL;
> -		}
> -		deve->se_lun_acl = lun_acl;
> +	new->mapped_lun = mapped_lun;
> +	new->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS;
>
> -		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> -			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> -			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> -		} else {
> -			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> -			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> -		}
> +	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE)
> +		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> +	else
> +		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> +
> +	new->creation_time = get_jiffies_64();
> +	new->attach_count++;
>
> +	spin_lock_irq(&nacl->device_list_lock);
> +	orig = target_nacl_find_deve(nacl, mapped_lun);
> +	if (orig && orig->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
> +		BUG_ON(orig->se_lun_acl != NULL);
> +		BUG_ON(orig->se_lun != lun);
> +
> +		rcu_assign_pointer(new->se_lun, lun);
> +		rcu_assign_pointer(new->se_lun_acl, lun_acl);
> +		hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
>   		spin_unlock_irq(&nacl->device_list_lock);
> -		return 0;
> -	}
>
> -	deve->se_lun = lun;
> -	deve->se_lun_acl = lun_acl;
> -	deve->mapped_lun = mapped_lun;
> -	deve->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS;
> +		spin_lock_bh(&port->sep_alua_lock);
> +		list_del(&orig->alua_port_list);
> +		list_add_tail(&new->alua_port_list, &port->sep_alua_list);
> +		spin_unlock_bh(&port->sep_alua_lock);
>
> -	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> -		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> -		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> -	} else {
> -		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> -		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> +		call_rcu(&orig->rcu_head, target_nacl_deve_callrcu);
> +		return 0;
>   	}
>
> -	deve->creation_time = get_jiffies_64();
> -	deve->attach_count++;
> +	rcu_assign_pointer(new->se_lun, lun);
> +	rcu_assign_pointer(new->se_lun_acl, lun_acl);
> +	hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
>   	spin_unlock_irq(&nacl->device_list_lock);
>
>   	spin_lock_bh(&port->sep_alua_lock);
> -	list_add_tail(&deve->alua_port_list, &port->sep_alua_list);
> +	list_add_tail(&new->alua_port_list, &port->sep_alua_list);
>   	spin_unlock_bh(&port->sep_alua_lock);
>
> +	synchronize_rcu();
>   	return 0;
>   }
>
> @@ -382,8 +391,14 @@ int core_disable_device_list_for_node(
>   	struct se_portal_group *tpg)
>   {
>   	struct se_port *port = lun->lun_sep;
> -	struct se_dev_entry *deve = nacl->device_list[mapped_lun];
> +	struct se_dev_entry *orig;
>
> +	spin_lock_irq(&nacl->device_list_lock);
> +	orig = target_nacl_find_deve(nacl, mapped_lun);
> +	if (!orig) {
> +		spin_unlock_irq(&nacl->device_list_lock);
> +		return 0;
> +	}
>   	/*
>   	 * If the MappedLUN entry is being disabled, the entry in
>   	 * port->sep_alua_list must be removed now before clearing the
> @@ -398,27 +413,33 @@ int core_disable_device_list_for_node(
>   	 * MappedLUN *deve will be released below..
>   	 */
>   	spin_lock_bh(&port->sep_alua_lock);
> -	list_del(&deve->alua_port_list);
> +	list_del(&orig->alua_port_list);
>   	spin_unlock_bh(&port->sep_alua_lock);
>   	/*
>   	 * Wait for any in process SPEC_I_PT=1 or REGISTER_AND_MOVE
>   	 * PR operation to complete.
>   	 */
> -	while (atomic_read(&deve->pr_ref_count) != 0)
> +	while (atomic_read(&orig->pr_ref_count) != 0)
>   		cpu_relax();
>
> -	spin_lock_irq(&nacl->device_list_lock);
>   	/*
>   	 * Disable struct se_dev_entry LUN ACL mapping
>   	 */
> -	core_scsi3_ua_release_all(deve);
> -	deve->se_lun = NULL;
> -	deve->se_lun_acl = NULL;
> -	deve->lun_flags = 0;
> -	deve->creation_time = 0;
> -	deve->attach_count--;
> +	core_scsi3_ua_release_all(orig);
> +	rcu_assign_pointer(orig->se_lun, NULL);
> +	rcu_assign_pointer(orig->se_lun_acl, NULL);
> +	orig->lun_flags = 0;
> +	orig->creation_time = 0;
> +	orig->attach_count--;
> +	hlist_del_rcu(&orig->link);
>   	spin_unlock_irq(&nacl->device_list_lock);
>
> +	/*
> +	 * Fire off RCU callback to wait for any in process SPEC_I_PT=1
> +	 * or REGISTER_AND_MOVE PR operation to complete.
> +	 */
> +	call_rcu(&orig->rcu_head, target_nacl_deve_callrcu);
> +
>   	core_scsi3_free_pr_reg_from_nacl(lun->lun_se_dev, nacl);
>   	return 0;
>   }
> @@ -431,26 +452,25 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
>   {
>   	struct se_node_acl *nacl;
>   	struct se_dev_entry *deve;
> -	u32 i;
> +	u32 mapped_lun;
>
>   	spin_lock_irq(&tpg->acl_node_lock);
>   	list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
>   		spin_unlock_irq(&tpg->acl_node_lock);
>
> -		spin_lock_irq(&nacl->device_list_lock);
> -		for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> -			deve = nacl->device_list[i];
> +		rcu_read_lock();
> +		hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
>   			if (lun != deve->se_lun)
>   				continue;
> -			spin_unlock_irq(&nacl->device_list_lock);
>
> -			core_disable_device_list_for_node(lun, NULL,
> -				deve->mapped_lun, TRANSPORT_LUNFLAGS_NO_ACCESS,
> -				nacl, tpg);
> +			mapped_lun = deve->mapped_lun;
> +			rcu_read_unlock();
>
> -			spin_lock_irq(&nacl->device_list_lock);
> +			core_disable_device_list_for_node(lun, NULL, mapped_lun,
> +					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> +			rcu_read_lock();
>   		}
> -		spin_unlock_irq(&nacl->device_list_lock);
> +		rcu_read_unlock();
>
>   		spin_lock_irq(&tpg->acl_node_lock);
>   	}
> diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
> index d0344ad..9c4bce0 100644
> --- a/drivers/target/target_core_internal.h
> +++ b/drivers/target/target_core_internal.h
> @@ -12,6 +12,7 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
>   int	core_free_device_list_for_node(struct se_node_acl *,
>   		struct se_portal_group *);
>   void	core_update_device_list_access(u32, u32, struct se_node_acl *);
> +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u32);
>   int	core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *,
>   		u32, u32, struct se_node_acl *, struct se_portal_group *);
>   int	core_disable_device_list_for_node(struct se_lun *, struct se_lun_acl *,
> diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
> index c0c1f67..dbdd3e3 100644
> --- a/drivers/target/target_core_tpg.c
> +++ b/drivers/target/target_core_tpg.c
> @@ -55,32 +55,29 @@ static void core_clear_initiator_node_from_tpg(
>   	struct se_node_acl *nacl,
>   	struct se_portal_group *tpg)
>   {
> -	int i;
>   	struct se_dev_entry *deve;
>   	struct se_lun *lun;
> +	u32 mapped_lun;
>
> -	spin_lock_irq(&nacl->device_list_lock);
> -	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> -		deve = nacl->device_list[i];
> -
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
>   		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
>   			continue;
> -
>   		if (!deve->se_lun) {
>   			pr_err("%s device entries device pointer is"
>   				" NULL, but Initiator has access.\n",
>   				tpg->se_tpg_tfo->get_fabric_name());
>   			continue;
>   		}
> -
>   		lun = deve->se_lun;
> -		spin_unlock_irq(&nacl->device_list_lock);
> -		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
> -			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> +		mapped_lun = deve->mapped_lun;
> +		rcu_read_unlock();
>
> -		spin_lock_irq(&nacl->device_list_lock);
> +		core_disable_device_list_for_node(lun, NULL, mapped_lun,
> +					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> +		rcu_read_lock();
>   	}
> -	spin_unlock_irq(&nacl->device_list_lock);
> +	rcu_read_unlock();
>   }
>
>   /*	__core_tpg_get_initiator_node_acl():
> @@ -266,10 +263,12 @@ static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg,
>
>   	INIT_LIST_HEAD(&acl->acl_list);
>   	INIT_LIST_HEAD(&acl->acl_sess_list);
> +	INIT_HLIST_HEAD(&acl->lun_entry_hlist);
>   	kref_init(&acl->acl_kref);
>   	init_completion(&acl->acl_free_comp);
>   	spin_lock_init(&acl->device_list_lock);
>   	spin_lock_init(&acl->nacl_sess_lock);
> +	spin_lock_init(&acl->lun_entry_lock);
>   	atomic_set(&acl->acl_pr_ref_count, 0);
>   	if (tpg->se_tpg_tfo->tpg_get_default_depth)
>   		acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg);
> diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
> index 042a734..6fb38df 100644
> --- a/include/target/target_core_base.h
> +++ b/include/target/target_core_base.h
> @@ -584,10 +584,12 @@ struct se_node_acl {
>   	char			acl_tag[MAX_ACL_TAG_SIZE];
>   	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
>   	atomic_t		acl_pr_ref_count;
> +	struct hlist_head	lun_entry_hlist;
>   	struct se_dev_entry	**device_list;

Very nice to see all this posted!

Patch 6 is where device_list is finally removed. Suggest squashing 1-6, 
maybe after review, I'm guessing they're not bisectable.

-- Andy

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas A. Bellinger May 13, 2015, 5:08 a.m. UTC | #2
On Tue, 2015-05-12 at 13:58 -0700, Andy Grover wrote:
> On 05/12/2015 02:25 AM, Nicholas A. Bellinger wrote:
> > From: Nicholas Bellinger <nab@linux-iscsi.org>
> >
> > This patch converts se_node_acl->device_list[] table for mappedluns
> > to modern RCU hlist_head usage in order to support an arbitrary number
> > of node_acl lun mappings.
> >
> > This includes changes to core_[enable,disable]_device_list_for_node()
> > rcu_assign_pointer() and invokes call_rcu() for releasing memory, along
> > with a number of RCU read path conversions in target_core_device.c code.
> >
> > Required for subsequent conversion of transport_lookup_cmd() to lock-less
> > RCU read path.
> >
> > Cc: Hannes Reinecke <hare@suse.de>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Sagi Grimberg <sagig@mellanox.com>
> > Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>

<SNIP>

> > diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
> > index 042a734..6fb38df 100644
> > --- a/include/target/target_core_base.h
> > +++ b/include/target/target_core_base.h
> > @@ -584,10 +584,12 @@ struct se_node_acl {
> >   	char			acl_tag[MAX_ACL_TAG_SIZE];
> >   	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
> >   	atomic_t		acl_pr_ref_count;
> > +	struct hlist_head	lun_entry_hlist;
> >   	struct se_dev_entry	**device_list;
> 
> Very nice to see all this posted!
> 
> Patch 6 is where device_list is finally removed. Suggest squashing 1-6, 
> maybe after review, I'm guessing they're not bisectable.
> 

The series is bisectable.  With patch #1 in place ->device_list[] is
still kzalloc()'ed, but new RCU pointer assignments are made into
lun_entry_hlist[].

Squashing the RCU reader paths (#2-6) for merge is OK, but it's still
nice to break up reader / updater changes into separate patches.

--nab

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 13, 2015, 5:32 a.m. UTC | #3
On Tue, May 12, 2015 at 10:08:51PM -0700, Nicholas A. Bellinger wrote:
> The series is bisectable.  With patch #1 in place ->device_list[] is
> still kzalloc()'ed, but new RCU pointer assignments are made into
> lun_entry_hlist[].
> 
> Squashing the RCU reader paths (#2-6) for merge is OK, but it's still
> nice to break up reader / updater changes into separate patches.

Having the full data structure switch over in one patch really makes
reviewing and understning the change a lot easier.  But changes to
say move to a mutex should indeed stay separate.  So patches 1-6
really should be mostly one.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas A. Bellinger May 13, 2015, 5:41 a.m. UTC | #4
On Wed, 2015-05-13 at 07:32 +0200, Christoph Hellwig wrote:
> On Tue, May 12, 2015 at 10:08:51PM -0700, Nicholas A. Bellinger wrote:
> > The series is bisectable.  With patch #1 in place ->device_list[] is
> > still kzalloc()'ed, but new RCU pointer assignments are made into
> > lun_entry_hlist[].
> > 
> > Squashing the RCU reader paths (#2-6) for merge is OK, but it's still
> > nice to break up reader / updater changes into separate patches.
> 
> Having the full data structure switch over in one patch really makes
> reviewing and understning the change a lot easier.  But changes to
> say move to a mutex should indeed stay separate.  So patches 1-6
> really should be mostly one.

Well, was thinking 1-6 is too big for one patch, but I guess it's not so
bad:

 drivers/target/target_core_device.c          | 268 +++++++++++++++++++++++++++---------------------
 drivers/target/target_core_fabric_configfs.c |  35 ++++---
 drivers/target/target_core_internal.h        |   1 +
 drivers/target/target_core_pr.c              |   1 +
 drivers/target/target_core_pscsi.c           |  17 ++-
 drivers/target/target_core_spc.c             |  27 +++--
 drivers/target/target_core_stat.c            | 180 ++++++++++++++++----------------
 drivers/target/target_core_tpg.c             |  59 +++--------
 drivers/target/target_core_ua.c              |  51 ++++++---
 include/target/target_core_base.h            |   8 +-
 10 files changed, 350 insertions(+), 297 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 13, 2015, 5:46 a.m. UTC | #5
On Tue, May 12, 2015 at 09:25:25AM +0000, Nicholas A. Bellinger wrote:
> @@ -240,18 +237,12 @@ int core_free_device_list_for_node(
>  {
>  	struct se_dev_entry *deve;
>  	struct se_lun *lun;
> -	u32 i;
> -
> -	if (!nacl->device_list)
> -		return 0;
> -
> -	spin_lock_irq(&nacl->device_list_lock);
> -	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> -		deve = nacl->device_list[i];
> +	u32 mapped_lun;
>  
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
>  		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
>  			continue;
> -
>  		if (!deve->se_lun) {
>  			pr_err("%s device entries device pointer is"
>  				" NULL, but Initiator has access.\n",
> @@ -259,16 +250,14 @@ int core_free_device_list_for_node(
>  			continue;
>  		}
>  		lun = deve->se_lun;
> +		mapped_lun = deve->mapped_lun;
> +		rcu_read_unlock();
>  
> -		spin_unlock_irq(&nacl->device_list_lock);
> -		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
> -			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> -		spin_lock_irq(&nacl->device_list_lock);
> +		core_disable_device_list_for_node(lun, NULL, mapped_lun,
> +					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);

I don't think this change is a good idea.  Now that you've just switched
to a list call into core_disable_device_list_for_node with the lock
instead of retaking it and restart the list walk after it instead of
encoding the previous wrong behavior with the local mapped_lun
variable.  Note that this patter is the same for all all but one of the
callers, and even core_dev_del_initiator_node_lun_acl would benefit
from being called locked and with an already looked up dev entry.

Note that if you cherry picked this patch I posted a while ago
to be before the series one of the callers would already be gone:

http://git.infradead.org/users/hch/scsi.git/commitdiff/dfb7096ba5ea47cb5b7fb5b6e2f8d7d6436af24f

> +	spin_lock_irq(&nacl->lun_entry_lock);
> +	deve = target_nacl_find_deve(nacl, mapped_lun);
> +	if (deve) {
> +		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> +		} else {
> +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> +		}
>  	}
> -	spin_unlock_irq(&nacl->device_list_lock);
> +	spin_unlock_irq(&nacl->lun_entry_lock);
> +
> +	synchronize_rcu();

This only updates scalar fields, the synchronize_rcu() calls isn't
going to buy you anything.

Btw, it would be good to always document what a synchronize_rcu()
call code is for.

> +
> +static void target_nacl_deve_callrcu(struct rcu_head *head)
> +{
> +	struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
> +						 rcu_head);
> +	kfree(deve);
>  }

Just use kfree_rcu instead of open coding it.

> +/*
> + * Called with rcu_read_lock or nacl->device_list_lock held.
> + */

It would be good to assert that.  Paul, is there a good way to assert
we're called under rcu_read_lock?

> +	spin_lock_irq(&nacl->device_list_lock);
> +	orig = target_nacl_find_deve(nacl, mapped_lun);
> +	if (orig && orig->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
> +		BUG_ON(orig->se_lun_acl != NULL);
> +		BUG_ON(orig->se_lun != lun);
> +
> +		rcu_assign_pointer(new->se_lun, lun);
> +		rcu_assign_pointer(new->se_lun_acl, lun_acl);
> +		hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
>  		spin_unlock_irq(&nacl->device_list_lock);
> +		spin_lock_bh(&port->sep_alua_lock);
> +		list_del(&orig->alua_port_list);
> +		list_add_tail(&new->alua_port_list, &port->sep_alua_list);
> +		spin_unlock_bh(&port->sep_alua_lock);
>  
> +		return 0;
>  	}

The case where we have an original one is the demo mode -> explicit
change.  So I don't think we actually need the newly allocate dev
entry here.  Just change lun_flags like in core_update_device_list_access
and do an rcu_assign_pointer for the lun ACLs.

> -	deve->creation_time = get_jiffies_64();
> -	deve->attach_count++;
> +	rcu_assign_pointer(new->se_lun, lun);
> +	rcu_assign_pointer(new->se_lun_acl, lun_acl);
> +	hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
>  	spin_unlock_irq(&nacl->device_list_lock);
>  
>  	spin_lock_bh(&port->sep_alua_lock);
> -	list_add_tail(&deve->alua_port_list, &port->sep_alua_list);
> +	list_add_tail(&new->alua_port_list, &port->sep_alua_list);
>  	spin_unlock_bh(&port->sep_alua_lock);
>  
> +	synchronize_rcu();

Please add a comment why we need the synchronize_rcu here again.  Nothing
is delete from any list, and nothing is freed so I don't see any need
to wait for a grace period.

> +	core_scsi3_ua_release_all(orig);
> +	rcu_assign_pointer(orig->se_lun, NULL);
> +	rcu_assign_pointer(orig->se_lun_acl, NULL);

Can you document the life time rules that ensure ->se_lun and ->se_lun_acl
stay around while readers in the RCU grace period may still access them?
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas A. Bellinger May 13, 2015, 6:20 a.m. UTC | #6
On Wed, 2015-05-13 at 07:46 +0200, Christoph Hellwig wrote:
> On Tue, May 12, 2015 at 09:25:25AM +0000, Nicholas A. Bellinger wrote:
> > @@ -240,18 +237,12 @@ int core_free_device_list_for_node(
> >  {
> >  	struct se_dev_entry *deve;
> >  	struct se_lun *lun;
> > -	u32 i;
> > -
> > -	if (!nacl->device_list)
> > -		return 0;
> > -
> > -	spin_lock_irq(&nacl->device_list_lock);
> > -	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
> > -		deve = nacl->device_list[i];
> > +	u32 mapped_lun;
> >  
> > +	rcu_read_lock();
> > +	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
> >  		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
> >  			continue;
> > -
> >  		if (!deve->se_lun) {
> >  			pr_err("%s device entries device pointer is"
> >  				" NULL, but Initiator has access.\n",
> > @@ -259,16 +250,14 @@ int core_free_device_list_for_node(
> >  			continue;
> >  		}
> >  		lun = deve->se_lun;
> > +		mapped_lun = deve->mapped_lun;
> > +		rcu_read_unlock();
> >  
> > -		spin_unlock_irq(&nacl->device_list_lock);
> > -		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
> > -			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> > -		spin_lock_irq(&nacl->device_list_lock);
> > +		core_disable_device_list_for_node(lun, NULL, mapped_lun,
> > +					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
> 
> I don't think this change is a good idea.  Now that you've just switched
> to a list call into core_disable_device_list_for_node with the lock
> instead of retaking it and restart the list walk after it instead of
> encoding the previous wrong behavior with the local mapped_lun
> variable.  Note that this patter is the same for all all but one of the
> callers, and even core_dev_del_initiator_node_lun_acl would benefit
> from being called locked and with an already looked up dev entry.
> 

Ugh, yes.  Fixing up clear_lun_from_tpg + free_device_list_for_node to
use a common caller acquiring se_node_acl->lun_entry_mutex during
se_dev_entry release.

Fixing up target_fabric_mappedlun_unlink() as well.

> Note that if you cherry picked this patch I posted a while ago
> to be before the series one of the callers would already be gone:
> 
> http://git.infradead.org/users/hch/scsi.git/commitdiff/dfb7096ba5ea47cb5b7fb5b6e2f8d7d6436af24f
> 
> > +	spin_lock_irq(&nacl->lun_entry_lock);
> > +	deve = target_nacl_find_deve(nacl, mapped_lun);
> > +	if (deve) {
> > +		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
> > +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
> > +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
> > +		} else {
> > +			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
> > +			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
> > +		}
> >  	}
> > -	spin_unlock_irq(&nacl->device_list_lock);
> > +	spin_unlock_irq(&nacl->lun_entry_lock);
> > +
> > +	synchronize_rcu();
> 
> This only updates scalar fields, the synchronize_rcu() calls isn't
> going to buy you anything.
> 
> Btw, it would be good to always document what a synchronize_rcu()
> call code is for.

<nod>, dropping synchronize_rcu() here

> 
> > +
> > +static void target_nacl_deve_callrcu(struct rcu_head *head)
> > +{
> > +	struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
> > +						 rcu_head);
> > +	kfree(deve);
> >  }
> 
> Just use kfree_rcu instead of open coding it.
> 

Done

> > +/*
> > + * Called with rcu_read_lock or nacl->device_list_lock held.
> > + */
> 
> It would be good to assert that.  Paul, is there a good way to assert
> we're called under rcu_read_lock?
> 
> > +	spin_lock_irq(&nacl->device_list_lock);
> > +	orig = target_nacl_find_deve(nacl, mapped_lun);
> > +	if (orig && orig->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
> > +		BUG_ON(orig->se_lun_acl != NULL);
> > +		BUG_ON(orig->se_lun != lun);
> > +
> > +		rcu_assign_pointer(new->se_lun, lun);
> > +		rcu_assign_pointer(new->se_lun_acl, lun_acl);
> > +		hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
> >  		spin_unlock_irq(&nacl->device_list_lock);
> > +		spin_lock_bh(&port->sep_alua_lock);
> > +		list_del(&orig->alua_port_list);
> > +		list_add_tail(&new->alua_port_list, &port->sep_alua_list);
> > +		spin_unlock_bh(&port->sep_alua_lock);
> >  
> > +		return 0;
> >  	}
> 
> The case where we have an original one is the demo mode -> explicit
> change.  So I don't think we actually need the newly allocate dev
> entry here.  Just change lun_flags like in core_update_device_list_access
> and do an rcu_assign_pointer for the lun ACLs.

Will take a look at this.

> 
> > -	deve->creation_time = get_jiffies_64();
> > -	deve->attach_count++;
> > +	rcu_assign_pointer(new->se_lun, lun);
> > +	rcu_assign_pointer(new->se_lun_acl, lun_acl);
> > +	hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
> >  	spin_unlock_irq(&nacl->device_list_lock);
> >  
> >  	spin_lock_bh(&port->sep_alua_lock);
> > -	list_add_tail(&deve->alua_port_list, &port->sep_alua_list);
> > +	list_add_tail(&new->alua_port_list, &port->sep_alua_list);
> >  	spin_unlock_bh(&port->sep_alua_lock);
> >  
> > +	synchronize_rcu();
> 
> Please add a comment why we need the synchronize_rcu here again.  Nothing
> is delete from any list, and nothing is freed so I don't see any need
> to wait for a grace period.
> 

I don't think it's required either.  Dropping.

> > +	core_scsi3_ua_release_all(orig);
> > +	rcu_assign_pointer(orig->se_lun, NULL);
> > +	rcu_assign_pointer(orig->se_lun_acl, NULL);
> 
> Can you document the life time rules that ensure ->se_lun and ->se_lun_acl
> stay around while readers in the RCU grace period may still access them?

Will do.

Thanks HCH.

--nab

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 13, 2015, 6:35 a.m. UTC | #7
Onemore comments from looking oer the RCU usage with all the patches
applied:

In core_get_se_deve_from_rtpi we dereference lun->lun_sep, so
either struct se_port needs to be switched to kfree_rcu,
or we need to mirror the rtpi value into the se_lun.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 13, 2015, 6:48 a.m. UTC | #8
On Tue, May 12, 2015 at 11:20:12PM -0700, Nicholas A. Bellinger wrote:
> Ugh, yes.  Fixing up clear_lun_from_tpg + free_device_list_for_node to
> use a common caller acquiring se_node_acl->lun_entry_mutex during
> se_dev_entry release.
> 
> Fixing up target_fabric_mappedlun_unlink() as well.

Good prep for that one might be:

http://git.infradead.org/users/hch/scsi.git/commitdiff/111b30a2430c8af492d8e67d18658f60313ad3be

and 

http://git.infradead.org/users/hch/scsi.git/commitdiff/054d9e0cc048f664cde5b13d34742c61ee535a04

> > The case where we have an original one is the demo mode -> explicit
> > change.  So I don't think we actually need the newly allocate dev
> > entry here.  Just change lun_flags like in core_update_device_list_access
> > and do an rcu_assign_pointer for the lun ACLs.
> 
> Will take a look at this.

FYI, this was my idea how to handle the transition from generate
to explicit ACLs:

http://git.infradead.org/users/hch/scsi.git/commitdiff/e3438480c0eaa020d1ad55ec1a0be88f05ae8372

I don't think it's quite correct, though as we need to update ->lun_flags
in the transition case as well.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas A. Bellinger May 13, 2015, 8:46 a.m. UTC | #9
On Wed, 2015-05-13 at 08:35 +0200, Christoph Hellwig wrote:
> Onemore comments from looking oer the RCU usage with all the patches
> applied:
> 
> In core_get_se_deve_from_rtpi we dereference lun->lun_sep, so
> either struct se_port needs to be switched to kfree_rcu,
> or we need to mirror the rtpi value into the se_lun.

Updated to use a mirror lun->lun_rtpi, with the assignment from sep_rtpi
occuring in core_dev_export() code.

Thanks HCH.



--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 17, 2015, 4:51 p.m. UTC | #10
On Wed, May 13, 2015 at 01:46:11AM -0700, Nicholas A. Bellinger wrote:
> Updated to use a mirror lun->lun_rtpi, with the assignment from sep_rtpi
> occuring in core_dev_export() code.

From looking at your current tree I suspect freeing the se_port structure
using kfree_rcu might be a better idea.  Together with dropping the references
to the se_device from call_rcu context this basically means all pointers
in struct se_lun are rcu protected which is much safer if you want
to access struct se_lun under rcu protection, as this avoids having to
deal with special cases.  Additionally that basically allows you to
replace lun_sep_lock with rcu_read_lock for anything remotely like
a fast path.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 6e58976..1df14ce 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -198,12 +198,9 @@  struct se_dev_entry *core_get_se_deve_from_rtpi(
 	struct se_lun *lun;
 	struct se_port *port;
 	struct se_portal_group *tpg = nacl->se_tpg;
-	u32 i;
-
-	spin_lock_irq(&nacl->device_list_lock);
-	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = nacl->device_list[i];
 
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
 
@@ -225,11 +222,11 @@  struct se_dev_entry *core_get_se_deve_from_rtpi(
 			continue;
 
 		atomic_inc_mb(&deve->pr_ref_count);
-		spin_unlock_irq(&nacl->device_list_lock);
+		rcu_read_unlock();
 
 		return deve;
 	}
-	spin_unlock_irq(&nacl->device_list_lock);
+	rcu_read_unlock();
 
 	return NULL;
 }
@@ -240,18 +237,12 @@  int core_free_device_list_for_node(
 {
 	struct se_dev_entry *deve;
 	struct se_lun *lun;
-	u32 i;
-
-	if (!nacl->device_list)
-		return 0;
-
-	spin_lock_irq(&nacl->device_list_lock);
-	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = nacl->device_list[i];
+	u32 mapped_lun;
 
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
-
 		if (!deve->se_lun) {
 			pr_err("%s device entries device pointer is"
 				" NULL, but Initiator has access.\n",
@@ -259,16 +250,14 @@  int core_free_device_list_for_node(
 			continue;
 		}
 		lun = deve->se_lun;
+		mapped_lun = deve->mapped_lun;
+		rcu_read_unlock();
 
-		spin_unlock_irq(&nacl->device_list_lock);
-		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
-			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
-		spin_lock_irq(&nacl->device_list_lock);
+		core_disable_device_list_for_node(lun, NULL, mapped_lun,
+					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
+		rcu_read_lock();
 	}
-	spin_unlock_irq(&nacl->device_list_lock);
-
-	array_free(nacl->device_list, TRANSPORT_MAX_LUNS_PER_TPG);
-	nacl->device_list = NULL;
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -280,18 +269,44 @@  void core_update_device_list_access(
 {
 	struct se_dev_entry *deve;
 
-	spin_lock_irq(&nacl->device_list_lock);
-	deve = nacl->device_list[mapped_lun];
-	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
-		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
-		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
-	} else {
-		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
-		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
+	spin_lock_irq(&nacl->lun_entry_lock);
+	deve = target_nacl_find_deve(nacl, mapped_lun);
+	if (deve) {
+		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
+			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
+			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
+		} else {
+			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
+			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
+		}
 	}
-	spin_unlock_irq(&nacl->device_list_lock);
+	spin_unlock_irq(&nacl->lun_entry_lock);
+
+	synchronize_rcu();
+}
+
+static void target_nacl_deve_callrcu(struct rcu_head *head)
+{
+	struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
+						 rcu_head);
+	kfree(deve);
 }
 
+/*
+ * Called with rcu_read_lock or nacl->device_list_lock held.
+ */
+struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u32 mapped_lun)
+{
+	struct se_dev_entry *deve;
+
+	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link)
+		if (deve->mapped_lun == mapped_lun)
+			return deve;
+
+	return NULL;
+}
+EXPORT_SYMBOL(target_nacl_find_deve);
+
 /*      core_enable_device_list_for_node():
  *
  *
@@ -305,67 +320,61 @@  int core_enable_device_list_for_node(
 	struct se_portal_group *tpg)
 {
 	struct se_port *port = lun->lun_sep;
-	struct se_dev_entry *deve;
+	struct se_dev_entry *orig, *new;
 
-	spin_lock_irq(&nacl->device_list_lock);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new) {
+		pr_err("Unable to allocate se_dev_entry memory\n");
+		return -ENOMEM;
+	}
 
-	deve = nacl->device_list[mapped_lun];
+	new->se_node_acl = nacl;
+	atomic_set(&new->ua_count, 0);
+	spin_lock_init(&new->ua_lock);
+	INIT_LIST_HEAD(&new->alua_port_list);
+	INIT_LIST_HEAD(&new->ua_list);
 
-	/*
-	 * Check if the call is handling demo mode -> explicit LUN ACL
-	 * transition.  This transition must be for the same struct se_lun
-	 * + mapped_lun that was setup in demo mode..
-	 */
-	if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
-		if (deve->se_lun_acl != NULL) {
-			pr_err("struct se_dev_entry->se_lun_acl"
-			       " already set for demo mode -> explicit"
-			       " LUN ACL transition\n");
-			spin_unlock_irq(&nacl->device_list_lock);
-			return -EINVAL;
-		}
-		if (deve->se_lun != lun) {
-			pr_err("struct se_dev_entry->se_lun does"
-			       " match passed struct se_lun for demo mode"
-			       " -> explicit LUN ACL transition\n");
-			spin_unlock_irq(&nacl->device_list_lock);
-			return -EINVAL;
-		}
-		deve->se_lun_acl = lun_acl;
+	new->mapped_lun = mapped_lun;
+	new->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS;
 
-		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
-			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
-			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
-		} else {
-			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
-			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
-		}
+	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE)
+		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
+	else
+		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
+
+	new->creation_time = get_jiffies_64();
+	new->attach_count++;
 
+	spin_lock_irq(&nacl->device_list_lock);
+	orig = target_nacl_find_deve(nacl, mapped_lun);
+	if (orig && orig->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
+		BUG_ON(orig->se_lun_acl != NULL);
+		BUG_ON(orig->se_lun != lun);
+
+		rcu_assign_pointer(new->se_lun, lun);
+		rcu_assign_pointer(new->se_lun_acl, lun_acl);
+		hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
 		spin_unlock_irq(&nacl->device_list_lock);
-		return 0;
-	}
 
-	deve->se_lun = lun;
-	deve->se_lun_acl = lun_acl;
-	deve->mapped_lun = mapped_lun;
-	deve->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS;
+		spin_lock_bh(&port->sep_alua_lock);
+		list_del(&orig->alua_port_list);
+		list_add_tail(&new->alua_port_list, &port->sep_alua_list);
+		spin_unlock_bh(&port->sep_alua_lock);
 
-	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
-		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
-		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
-	} else {
-		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
-		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
+		call_rcu(&orig->rcu_head, target_nacl_deve_callrcu);
+		return 0;
 	}
 
-	deve->creation_time = get_jiffies_64();
-	deve->attach_count++;
+	rcu_assign_pointer(new->se_lun, lun);
+	rcu_assign_pointer(new->se_lun_acl, lun_acl);
+	hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist);
 	spin_unlock_irq(&nacl->device_list_lock);
 
 	spin_lock_bh(&port->sep_alua_lock);
-	list_add_tail(&deve->alua_port_list, &port->sep_alua_list);
+	list_add_tail(&new->alua_port_list, &port->sep_alua_list);
 	spin_unlock_bh(&port->sep_alua_lock);
 
+	synchronize_rcu();
 	return 0;
 }
 
@@ -382,8 +391,14 @@  int core_disable_device_list_for_node(
 	struct se_portal_group *tpg)
 {
 	struct se_port *port = lun->lun_sep;
-	struct se_dev_entry *deve = nacl->device_list[mapped_lun];
+	struct se_dev_entry *orig;
 
+	spin_lock_irq(&nacl->device_list_lock);
+	orig = target_nacl_find_deve(nacl, mapped_lun);
+	if (!orig) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return 0;
+	}
 	/*
 	 * If the MappedLUN entry is being disabled, the entry in
 	 * port->sep_alua_list must be removed now before clearing the
@@ -398,27 +413,33 @@  int core_disable_device_list_for_node(
 	 * MappedLUN *deve will be released below..
 	 */
 	spin_lock_bh(&port->sep_alua_lock);
-	list_del(&deve->alua_port_list);
+	list_del(&orig->alua_port_list);
 	spin_unlock_bh(&port->sep_alua_lock);
 	/*
 	 * Wait for any in process SPEC_I_PT=1 or REGISTER_AND_MOVE
 	 * PR operation to complete.
 	 */
-	while (atomic_read(&deve->pr_ref_count) != 0)
+	while (atomic_read(&orig->pr_ref_count) != 0)
 		cpu_relax();
 
-	spin_lock_irq(&nacl->device_list_lock);
 	/*
 	 * Disable struct se_dev_entry LUN ACL mapping
 	 */
-	core_scsi3_ua_release_all(deve);
-	deve->se_lun = NULL;
-	deve->se_lun_acl = NULL;
-	deve->lun_flags = 0;
-	deve->creation_time = 0;
-	deve->attach_count--;
+	core_scsi3_ua_release_all(orig);
+	rcu_assign_pointer(orig->se_lun, NULL);
+	rcu_assign_pointer(orig->se_lun_acl, NULL);
+	orig->lun_flags = 0;
+	orig->creation_time = 0;
+	orig->attach_count--;
+	hlist_del_rcu(&orig->link);
 	spin_unlock_irq(&nacl->device_list_lock);
 
+	/*
+	 * Fire off RCU callback to wait for any in process SPEC_I_PT=1
+	 * or REGISTER_AND_MOVE PR operation to complete.
+	 */
+	call_rcu(&orig->rcu_head, target_nacl_deve_callrcu);
+
 	core_scsi3_free_pr_reg_from_nacl(lun->lun_se_dev, nacl);
 	return 0;
 }
@@ -431,26 +452,25 @@  void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
 {
 	struct se_node_acl *nacl;
 	struct se_dev_entry *deve;
-	u32 i;
+	u32 mapped_lun;
 
 	spin_lock_irq(&tpg->acl_node_lock);
 	list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
 		spin_unlock_irq(&tpg->acl_node_lock);
 
-		spin_lock_irq(&nacl->device_list_lock);
-		for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-			deve = nacl->device_list[i];
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
 			if (lun != deve->se_lun)
 				continue;
-			spin_unlock_irq(&nacl->device_list_lock);
 
-			core_disable_device_list_for_node(lun, NULL,
-				deve->mapped_lun, TRANSPORT_LUNFLAGS_NO_ACCESS,
-				nacl, tpg);
+			mapped_lun = deve->mapped_lun;
+			rcu_read_unlock();
 
-			spin_lock_irq(&nacl->device_list_lock);
+			core_disable_device_list_for_node(lun, NULL, mapped_lun,
+					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
+			rcu_read_lock();
 		}
-		spin_unlock_irq(&nacl->device_list_lock);
+		rcu_read_unlock();
 
 		spin_lock_irq(&tpg->acl_node_lock);
 	}
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index d0344ad..9c4bce0 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -12,6 +12,7 @@  struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
 int	core_free_device_list_for_node(struct se_node_acl *,
 		struct se_portal_group *);
 void	core_update_device_list_access(u32, u32, struct se_node_acl *);
+struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u32);
 int	core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *,
 		u32, u32, struct se_node_acl *, struct se_portal_group *);
 int	core_disable_device_list_for_node(struct se_lun *, struct se_lun_acl *,
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c0c1f67..dbdd3e3 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -55,32 +55,29 @@  static void core_clear_initiator_node_from_tpg(
 	struct se_node_acl *nacl,
 	struct se_portal_group *tpg)
 {
-	int i;
 	struct se_dev_entry *deve;
 	struct se_lun *lun;
+	u32 mapped_lun;
 
-	spin_lock_irq(&nacl->device_list_lock);
-	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = nacl->device_list[i];
-
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) {
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
-
 		if (!deve->se_lun) {
 			pr_err("%s device entries device pointer is"
 				" NULL, but Initiator has access.\n",
 				tpg->se_tpg_tfo->get_fabric_name());
 			continue;
 		}
-
 		lun = deve->se_lun;
-		spin_unlock_irq(&nacl->device_list_lock);
-		core_disable_device_list_for_node(lun, NULL, deve->mapped_lun,
-			TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
+		mapped_lun = deve->mapped_lun;
+		rcu_read_unlock();
 
-		spin_lock_irq(&nacl->device_list_lock);
+		core_disable_device_list_for_node(lun, NULL, mapped_lun,
+					TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg);
+		rcu_read_lock();
 	}
-	spin_unlock_irq(&nacl->device_list_lock);
+	rcu_read_unlock();
 }
 
 /*	__core_tpg_get_initiator_node_acl():
@@ -266,10 +263,12 @@  static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg,
 
 	INIT_LIST_HEAD(&acl->acl_list);
 	INIT_LIST_HEAD(&acl->acl_sess_list);
+	INIT_HLIST_HEAD(&acl->lun_entry_hlist);
 	kref_init(&acl->acl_kref);
 	init_completion(&acl->acl_free_comp);
 	spin_lock_init(&acl->device_list_lock);
 	spin_lock_init(&acl->nacl_sess_lock);
+	spin_lock_init(&acl->lun_entry_lock);
 	atomic_set(&acl->acl_pr_ref_count, 0);
 	if (tpg->se_tpg_tfo->tpg_get_default_depth)
 		acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 042a734..6fb38df 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -584,10 +584,12 @@  struct se_node_acl {
 	char			acl_tag[MAX_ACL_TAG_SIZE];
 	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
 	atomic_t		acl_pr_ref_count;
+	struct hlist_head	lun_entry_hlist;
 	struct se_dev_entry	**device_list;
 	struct se_session	*nacl_sess;
 	struct se_portal_group *se_tpg;
 	spinlock_t		device_list_lock;
+	spinlock_t		lun_entry_lock;
 	spinlock_t		nacl_sess_lock;
 	struct config_group	acl_group;
 	struct config_group	acl_attrib_group;
@@ -653,11 +655,14 @@  struct se_dev_entry {
 	atomic_t		ua_count;
 	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
 	atomic_t		pr_ref_count;
-	struct se_lun_acl	*se_lun_acl;
+	struct se_node_acl	*se_node_acl;
+	struct se_lun_acl __rcu	*se_lun_acl;
 	spinlock_t		ua_lock;
 	struct se_lun		*se_lun;
 	struct list_head	alua_port_list;
 	struct list_head	ua_list;
+	struct hlist_node	link;
+	struct rcu_head		rcu_head;
 };
 
 struct se_dev_attrib {