diff mbox

Possible Race Condition on SIGKILL

Message ID 4FA345DA4F4AE44899BD2B03EEEC2FA911993F1B@SACEXCMBX04-PRD.hq.netapp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Trond Myklebust Jan. 8, 2013, 11:10 p.m. UTC
On Tue, 2013-01-08 at 17:19 -0500, Chris Perl wrote:
> On Tue, Jan 08, 2013 at 05:16:51PM -0500, Chris Perl wrote:
> > > The lock is associated with the rpc_task. Threads can normally only
> > > access an rpc_task when it is on a wait queue (while holding the wait
> > > queue lock) unless they are given ownership of the rpc_task.
> > > 
> > > IOW: the scenario you describe should not be possible, since it relies
> > > on thread 1 assigning the lock to the rpc_task after it has been removed
> > > from the wait queue.
> > 
> > Hrm.  I guess I'm in over my head here. Apologoies if I'm just asking
> > silly bumbling questions.  You can start ignoring me at any time. :)
> > 
> > I was talking about setting (or leaving set) the XPRT_LOCKED bit in
> > rpc_xprt->state.  By "assigning the lock" I really just mean that thread
> > 1 leaves XPRT_LOCKED set in rpc_xprt->state and sets rpc_xprt->snd_task
> > to thread 2.
> > 
> > > If you are recompiling the kernel, perhaps you can also add in a patch
> > > to rpc_show_tasks() to display the current value of
> > > clnt->cl_xprt->snd_task?
> > 
> > Sure.  This is what 'echo 0 > /proc/sys/sunrpc/rpc_debug' shows after
> > the hang (with my extra prints):
> > 
> > # cat /proc/kmsg
> > ...
> > <6>client: ffff88082b6c9c00, xprt: ffff880824aef800, snd_task: ffff881029c63ec0
> > <6>client: ffff88082b6c9e00, xprt: ffff880824aef800, snd_task: ffff881029c63ec0
> > <6>-pid- flgs status -client- --rqstp- -timeout ---ops--
> > <6>18091 0080    -11 ffff88082b6c9e00   (null)      ffff0770ns3 ACCESS a:call_reserveresult q:xprt_sending
> > <6>client: ffff88082a244600, xprt: ffff88082a343000, snd_task:   (null)
> > <6>client: ffff880829181600, xprt: ffff88082a343000, snd_task:   (null)
> > <6>client: ffff880828170200, xprt: ffff880824aef800, snd_task: ffff881029c63ec0
> 
> Sorry, that output was a little messed up.  Here it is again:
> 
> <6>client: ffff88082b6c9c00, xprt: ffff880824aef800, snd_task: ffff881029c63ec0
> <6>client: ffff88082b6c9e00, xprt: ffff880824aef800, snd_task: ffff881029c63ec0
> <6>-pid- flgs status -client- --rqstp- -timeout ---ops--
> <6>18091 0080    -11 ffff88082b6c9e00   (null)        0 ffffffffa027b7e0 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
> <6>client: ffff88082a244600, xprt: ffff88082a343000, snd_task:   (null)
> <6>client: ffff880829181600, xprt: ffff88082a343000, snd_task:   (null)
> <6>client: ffff880828170200, xprt: ffff880824aef800, snd_task: ffff881029c63ec0

Hi Chris,

It looks as if the problem here is that the rpc_task in question is not
being woken up. I'm aware of at least one problem with priority queues
in RHEL-6.3/CentOS-6.3, and that has been fixed in the upstream kernel.

See attachment.

Cheers
  Trond

Comments

Chris Perl Jan. 9, 2013, 5:55 p.m. UTC | #1
> Hrm.  I guess I'm in over my head here. Apologoies if I'm just asking
> silly bumbling questions.  You can start ignoring me at any time. :)

I stared at the code for a while and more and now see why what I
outlined is not possible.  Thanks for helping to clarify!

I decided to pull your git repo and compile with HEAD at
87ed50036b866db2ec2ba16b2a7aec4a2b0b7c39 (linux-next as of this
morning).  Using this kernel, I can no longer induce any hangs.

Interestingly, I tried recompiling the CentOS 6.3 kernel with
both the original patch (v4) and the last patch you sent about fixing
priority queues.  With both of those in place, I still run into a
problem.

echo 0 > /proc/sys/sunrpc/rpc_debug after the hang shows (I left in the
previous additional prints and added printing of the tasks pointer
itself):

<6>client: ffff88082896c200, xprt: ffff880829011000, snd_task: ffff880829a1aac0
<6>client: ffff8808282b5600, xprt: ffff880829011000, snd_task: ffff880829a1aac0
<6>--task-- -pid- flgs status -client- --rqstp- -timeout ---ops--
<6>ffff88082a463180 22007 0080    -11 ffff8808282b5600   (null)        0 ffffffffa027b7a0 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
<6>client: ffff88082838cc00, xprt: ffff88082b7c5800, snd_task:   (null)
<6>client: ffff8808283db400, xprt: ffff88082b7c5800, snd_task:   (null)
<6>client: ffff8808283db200, xprt: ffff880829011000, snd_task: ffff880829a1aac0

Any thoughts about other patches that might affect this?
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 30 Nov 2012 23:59:29 -0500
Subject: [PATCH] SUNRPC: Don't allow low priority tasks to pre-empt higher
 priority ones

Currently, the priority queues attempt to be 'fair' to lower priority
tasks by scheduling them after a certain number of higher priority tasks
have run. The problem is that both the transport send queue and
the NFSv4.1 session slot queue have strong ordering requirements.

This patch therefore removes the fairness code in favour of strong
ordering of task priorities.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 -
 net/sunrpc/sched.c           | 44 ++++++++++++++++++++++----------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc0c3cc..b64f8eb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -192,7 +192,6 @@  struct rpc_wait_queue {
 	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
-	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1aefc9f..d17a704 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -98,6 +98,23 @@  __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+	queue->priority = priority;
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+	queue->owner = pid;
+	queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+	rpc_set_waitqueue_priority(queue, queue->maxpriority);
+	rpc_set_waitqueue_owner(queue, 0);
+}
+
 /*
  * Add new request to a priority queue.
  */
@@ -109,9 +126,11 @@  static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
 	struct rpc_task *t;
 
 	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	q = &queue->tasks[queue_priority];
 	if (unlikely(queue_priority > queue->maxpriority))
-		q = &queue->tasks[queue->maxpriority];
+		queue_priority = queue->maxpriority;
+	if (queue_priority > queue->priority)
+		rpc_set_waitqueue_priority(queue, queue_priority);
+	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
 			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -180,24 +199,6 @@  static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 			task->tk_pid, queue, rpc_qname(queue));
 }
 
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
-	queue->priority = priority;
-	queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
-	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
-}
-
 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
 	int i;
@@ -464,8 +465,7 @@  static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		/*
 		 * Check if we need to switch queues.
 		 */
-		if (--queue->count)
-			goto new_owner;
+		goto new_owner;
 	}
 
 	/*
-- 
1.7.11.7