[4/8] ocfs2/dlm: do not purge lockres that is queued for assert master
diff mbox

Message ID 20140609200403.6A34D31C75B@corp2gmr1-1.hot.corp.google.com
State New, archived
Headers show

Commit Message

Andrew Morton June 9, 2014, 8:04 p.m. UTC
From: Xue jiufei <xuejiufei@huawei.com>
Subject: ocfs2/dlm: do not purge lockres that is queued for assert master

When workqueue is delayed, it may occur that a lockres is purged while it
is still queued for master assert.  it may trigger BUG() as follows.

N1                                         N2
dlm_get_lockres()
->dlm_do_master_requery
                                  is the master of lockres,
                                  so queue assert_master work

                                  dlm_thread() start running
                                  and purge the lockres

                                  dlm_assert_master_worker()
                                  send assert master message
                                  to other nodes
receiving the assert_master
message, set master to N2

dlmlock_remote() send create_lock message to N2, but receive DLM_IVLOCKID,
if it is RECOVERY lockres, it triggers the BUG().

Another BUG() is triggered when N3 become the new master and send
assert_master to N1, N1 will trigger the BUG() because owner doesn't
match.  So we should not purge lockres when it is queued for assert
master.

Signed-off-by: joyce.xue <xuejiufei@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/ocfs2/dlm/dlmcommon.h   |    4 +++
 fs/ocfs2/dlm/dlmmaster.c   |   43 ++++++++++++++++++++++++++++++++++-
 fs/ocfs2/dlm/dlmrecovery.c |    3 +-
 fs/ocfs2/dlm/dlmthread.c   |   11 +++++---
 4 files changed, 55 insertions(+), 6 deletions(-)

Comments

Mark Fasheh June 13, 2014, 9:31 p.m. UTC | #1
On Mon, Jun 09, 2014 at 01:04:03PM -0700, Andrew Morton wrote:
> @@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struc
>  	wake_up(&res->wq);
>  }
>  
> +void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
> +		struct dlm_lock_resource *res)
> +{
> +	assert_spin_locked(&res->spinlock);
> +	res->inflight_assert_workers++;
> +	mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
> +			dlm->name, res->lockname.len, res->lockname.name,
> +			res->inflight_assert_workers);
> +}
> +
> +void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
> +		struct dlm_lock_resource *res)
> +{
> +	spin_lock(&res->spinlock);
> +	__dlm_lockres_grab_inflight_worker(dlm, res);
> +	spin_unlock(&res->spinlock);
> +}
> +
> +void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
> +		struct dlm_lock_resource *res)
> +{
> +	assert_spin_locked(&res->spinlock);
> +	BUG_ON(res->inflight_assert_workers == 0);
> +	res->inflight_assert_workers--;
> +	mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
> +			dlm->name, res->lockname.len, res->lockname.name,
> +			res->inflight_assert_workers);
> +}
> +
> +void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
> +		struct dlm_lock_resource *res)
> +{
> +	spin_lock(&res->spinlock);
> +	__dlm_lockres_drop_inflight_worker(dlm, res);
> +	spin_unlock(&res->spinlock);
> +}
> +
>  /*
>   * lookup a lock resource by name.
>   * may already exist in the hashtable.
> @@ -1603,7 +1641,8 @@ send_response:
>  			mlog(ML_ERROR, "failed to dispatch assert master work\n");
>  			response = DLM_MASTER_RESP_ERROR;
>  			dlm_lockres_put(res);
> -		}
> +		} else
> +			dlm_lockres_grab_inflight_worker(dlm, res);
>  	} else {
>  		if (res)
>  			dlm_lockres_put(res);
> @@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(str
>  	dlm_lockres_release_ast(dlm, res);
>  
>  put:
> +	dlm_lockres_drop_inflight_worker(dlm, res);
> +
>  	dlm_lockres_put(res);
>  
>  	mlog(0, "finished with dlm_assert_master_worker\n");
> diff -puN fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmrecovery.c
> --- a/fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
> +++ a/fs/ocfs2/dlm/dlmrecovery.c
> @@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2
>  				mlog_errno(-ENOMEM);
>  				/* retry!? */
>  				BUG();
> -			}
> +			} else
> +				__dlm_lockres_grab_inflight_worker(dlm, res);
>  		} else /* put.. incase we are not the master */
>  			dlm_lockres_put(res);
>  		spin_unlock(&res->spinlock);
> diff -puN fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmthread.c
> --- a/fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
> +++ a/fs/ocfs2/dlm/dlmthread.c
> @@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dl
>  		 * refs on it. */
>  		unused = __dlm_lockres_unused(lockres);
>  		if (!unused ||
> -		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
> +		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
> +		    (lockres->inflight_assert_workers != 0)) {

If there's any assert master message we will halt purging *all* lock
resources. That seems extreme to me :/

What about putting a flag on lockres->state to indicate that it's got some
work queued? We would set it before queuing the work, then clear it in the
worker, or if we ever cancel the work.
	--Mark

--
Mark Fasheh
Xue jiufei June 16, 2014, 1:26 a.m. UTC | #2
On 2014/6/14 5:31, Mark Fasheh wrote:
> On Mon, Jun 09, 2014 at 01:04:03PM -0700, Andrew Morton wrote:
>> @@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struc
>>  	wake_up(&res->wq);
>>  }
>>  
>> +void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
>> +		struct dlm_lock_resource *res)
>> +{
>> +	assert_spin_locked(&res->spinlock);
>> +	res->inflight_assert_workers++;
>> +	mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
>> +			dlm->name, res->lockname.len, res->lockname.name,
>> +			res->inflight_assert_workers);
>> +}
>> +
>> +void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
>> +		struct dlm_lock_resource *res)
>> +{
>> +	spin_lock(&res->spinlock);
>> +	__dlm_lockres_grab_inflight_worker(dlm, res);
>> +	spin_unlock(&res->spinlock);
>> +}
>> +
>> +void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
>> +		struct dlm_lock_resource *res)
>> +{
>> +	assert_spin_locked(&res->spinlock);
>> +	BUG_ON(res->inflight_assert_workers == 0);
>> +	res->inflight_assert_workers--;
>> +	mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
>> +			dlm->name, res->lockname.len, res->lockname.name,
>> +			res->inflight_assert_workers);
>> +}
>> +
>> +void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
>> +		struct dlm_lock_resource *res)
>> +{
>> +	spin_lock(&res->spinlock);
>> +	__dlm_lockres_drop_inflight_worker(dlm, res);
>> +	spin_unlock(&res->spinlock);
>> +}
>> +
>>  /*
>>   * lookup a lock resource by name.
>>   * may already exist in the hashtable.
>> @@ -1603,7 +1641,8 @@ send_response:
>>  			mlog(ML_ERROR, "failed to dispatch assert master work\n");
>>  			response = DLM_MASTER_RESP_ERROR;
>>  			dlm_lockres_put(res);
>> -		}
>> +		} else
>> +			dlm_lockres_grab_inflight_worker(dlm, res);
>>  	} else {
>>  		if (res)
>>  			dlm_lockres_put(res);
>> @@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(str
>>  	dlm_lockres_release_ast(dlm, res);
>>  
>>  put:
>> +	dlm_lockres_drop_inflight_worker(dlm, res);
>> +
>>  	dlm_lockres_put(res);
>>  
>>  	mlog(0, "finished with dlm_assert_master_worker\n");
>> diff -puN fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmrecovery.c
>> --- a/fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
>> +++ a/fs/ocfs2/dlm/dlmrecovery.c
>> @@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2
>>  				mlog_errno(-ENOMEM);
>>  				/* retry!? */
>>  				BUG();
>> -			}
>> +			} else
>> +				__dlm_lockres_grab_inflight_worker(dlm, res);
>>  		} else /* put.. incase we are not the master */
>>  			dlm_lockres_put(res);
>>  		spin_unlock(&res->spinlock);
>> diff -puN fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmthread.c
>> --- a/fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
>> +++ a/fs/ocfs2/dlm/dlmthread.c
>> @@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dl
>>  		 * refs on it. */
>>  		unused = __dlm_lockres_unused(lockres);
>>  		if (!unused ||
>> -		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
>> +		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
>> +		    (lockres->inflight_assert_workers != 0)) {
> 
> If there's any assert master message we will halt purging *all* lock
> resources. That seems extreme to me :/
> 
Not halt purging *all* lock resource, when one lockres is queued for
master assert, it will be moved to the tail of the purge list, so
dlm_thread can keep purging other lock resources.
   -- joyce.xue
> What about putting a flag on lockres->state to indicate that it's got some
> work queued? We would set it before queuing the work, then clear it in the
> worker, or if we ever cancel the work.
> 	--Mark
> 
> --
> Mark Fasheh
> .
>
Andrew Morton June 20, 2014, 10:33 p.m. UTC | #3
On Mon, 16 Jun 2014 09:26:53 +0800 Xue jiufei <xuejiufei@huawei.com> wrote:

> >>  		spin_unlock(&res->spinlock);
> >> diff -puN fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmthread.c
> >> --- a/fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
> >> +++ a/fs/ocfs2/dlm/dlmthread.c
> >> @@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dl
> >>  		 * refs on it. */
> >>  		unused = __dlm_lockres_unused(lockres);
> >>  		if (!unused ||
> >> -		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
> >> +		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
> >> +		    (lockres->inflight_assert_workers != 0)) {
> > 
> > If there's any assert master message we will halt purging *all* lock
> > resources. That seems extreme to me :/
> > 
> Not halt purging *all* lock resource, when one lockres is queued for
> master assert, it will be moved to the tail of the purge list, so
> dlm_thread can keep purging other lock resources.

Where are we up to with this one?

Thanks.
Mark Fasheh June 20, 2014, 10:49 p.m. UTC | #4
On Fri, Jun 20, 2014 at 03:33:46PM -0700, Andrew Morton wrote:
> On Mon, 16 Jun 2014 09:26:53 +0800 Xue jiufei <xuejiufei@huawei.com> wrote:
> 
> > >>  		spin_unlock(&res->spinlock);
> > >> diff -puN fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmthread.c
> > >> --- a/fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
> > >> +++ a/fs/ocfs2/dlm/dlmthread.c
> > >> @@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dl
> > >>  		 * refs on it. */
> > >>  		unused = __dlm_lockres_unused(lockres);
> > >>  		if (!unused ||
> > >> -		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
> > >> +		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
> > >> +		    (lockres->inflight_assert_workers != 0)) {
> > > 
> > > If there's any assert master message we will halt purging *all* lock
> > > resources. That seems extreme to me :/
> > > 
> > Not halt purging *all* lock resource, when one lockres is queued for
> > master assert, it will be moved to the tail of the purge list, so
> > dlm_thread can keep purging other lock resources.
> 
> Where are we up to with this one?

I read the code wrong, Xue is correct:

Reviewed-by: Mark Fasheh <mfasheh@suse.de>

Thanks,
	--Mark


--
Mark Fasheh

Patch
diff mbox

diff -puN fs/ocfs2/dlm/dlmcommon.h~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmcommon.h
--- a/fs/ocfs2/dlm/dlmcommon.h~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
+++ a/fs/ocfs2/dlm/dlmcommon.h
@@ -332,6 +332,7 @@  struct dlm_lock_resource
 	u16 state;
 	char lvb[DLM_LVB_LEN];
 	unsigned int inflight_locks;
+	unsigned int inflight_assert_workers;
 	unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 };
 
@@ -911,6 +912,9 @@  void dlm_lockres_drop_inflight_ref(struc
 void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
 				   struct dlm_lock_resource *res);
 
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+		struct dlm_lock_resource *res);
+
 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff -puN fs/ocfs2/dlm/dlmmaster.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmmaster.c
--- a/fs/ocfs2/dlm/dlmmaster.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
+++ a/fs/ocfs2/dlm/dlmmaster.c
@@ -581,6 +581,7 @@  static void dlm_init_lockres(struct dlm_
 	atomic_set(&res->asts_reserved, 0);
 	res->migration_pending = 0;
 	res->inflight_locks = 0;
+	res->inflight_assert_workers = 0;
 
 	res->dlm = dlm;
 
@@ -683,6 +684,43 @@  void dlm_lockres_drop_inflight_ref(struc
 	wake_up(&res->wq);
 }
 
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+		struct dlm_lock_resource *res)
+{
+	assert_spin_locked(&res->spinlock);
+	res->inflight_assert_workers++;
+	mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
+			dlm->name, res->lockname.len, res->lockname.name,
+			res->inflight_assert_workers);
+}
+
+void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+		struct dlm_lock_resource *res)
+{
+	spin_lock(&res->spinlock);
+	__dlm_lockres_grab_inflight_worker(dlm, res);
+	spin_unlock(&res->spinlock);
+}
+
+void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+		struct dlm_lock_resource *res)
+{
+	assert_spin_locked(&res->spinlock);
+	BUG_ON(res->inflight_assert_workers == 0);
+	res->inflight_assert_workers--;
+	mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
+			dlm->name, res->lockname.len, res->lockname.name,
+			res->inflight_assert_workers);
+}
+
+void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+		struct dlm_lock_resource *res)
+{
+	spin_lock(&res->spinlock);
+	__dlm_lockres_drop_inflight_worker(dlm, res);
+	spin_unlock(&res->spinlock);
+}
+
 /*
  * lookup a lock resource by name.
  * may already exist in the hashtable.
@@ -1603,7 +1641,8 @@  send_response:
 			mlog(ML_ERROR, "failed to dispatch assert master work\n");
 			response = DLM_MASTER_RESP_ERROR;
 			dlm_lockres_put(res);
-		}
+		} else
+			dlm_lockres_grab_inflight_worker(dlm, res);
 	} else {
 		if (res)
 			dlm_lockres_put(res);
@@ -2118,6 +2157,8 @@  static void dlm_assert_master_worker(str
 	dlm_lockres_release_ast(dlm, res);
 
 put:
+	dlm_lockres_drop_inflight_worker(dlm, res);
+
 	dlm_lockres_put(res);
 
 	mlog(0, "finished with dlm_assert_master_worker\n");
diff -puN fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmrecovery.c
--- a/fs/ocfs2/dlm/dlmrecovery.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
+++ a/fs/ocfs2/dlm/dlmrecovery.c
@@ -1708,7 +1708,8 @@  int dlm_master_requery_handler(struct o2
 				mlog_errno(-ENOMEM);
 				/* retry!? */
 				BUG();
-			}
+			} else
+				__dlm_lockres_grab_inflight_worker(dlm, res);
 		} else /* put.. incase we are not the master */
 			dlm_lockres_put(res);
 		spin_unlock(&res->spinlock);
diff -puN fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master fs/ocfs2/dlm/dlmthread.c
--- a/fs/ocfs2/dlm/dlmthread.c~ocfs2-dlm-do-not-purge-lockres-that-is-queued-for-assert-master
+++ a/fs/ocfs2/dlm/dlmthread.c
@@ -259,11 +259,14 @@  static void dlm_run_purge_list(struct dl
 		 * refs on it. */
 		unused = __dlm_lockres_unused(lockres);
 		if (!unused ||
-		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
+		    (lockres->inflight_assert_workers != 0)) {
 			mlog(0, "%s: res %.*s is in use or being remastered, "
-			     "used %d, state %d\n", dlm->name,
-			     lockres->lockname.len, lockres->lockname.name,
-			     !unused, lockres->state);
+			     "used %d, state %d, assert master workers %u\n",
+			     dlm->name, lockres->lockname.len,
+			     lockres->lockname.name,
+			     !unused, lockres->state,
+			     lockres->inflight_assert_workers);
 			list_move_tail(&dlm->purge_list, &lockres->purge);
 			spin_unlock(&lockres->spinlock);
 			continue;