Message ID | 20120719211629.GC32763@google.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
On Thu, 2012-07-19 at 14:16 -0700, Tejun Heo wrote: > From 06f9a06f4aeecdb9d07014713ab41b548ae219b5 Mon Sep 17 00:00:00 2001 > From: Tejun Heo <tj@kernel.org> > Date: Thu, 19 Jul 2012 13:52:53 -0700 > > kthread_worker provides minimalistic workqueue-like interface for > users which need a dedicated worker thread (e.g. for realtime > priority). It has basic queue, flush_work, flush_worker operations > which mostly match the workqueue counterparts; however, due to the way > flush_work() is implemented, it has a noticeable difference of not > allowing work items to be freed while being executed. > > While the current users of kthread_worker are okay with the current > behavior, the restriction does impede some valid use cases. Also, > removing this difference isn't difficult and actually makes the code > easier to understand. > > This patch reimplements flush_kthread_work() such that it uses a > flush_work item instead of queue/done sequence numbers. > > Signed-off-by: Tejun Heo <tj@kernel.org> > --- > include/linux/kthread.h | 8 +----- > kernel/kthread.c | 48 ++++++++++++++++++++++++++-------------------- > 2 files changed, 29 insertions(+), 27 deletions(-) Hi Tejun, I have a question and comment below. > diff --git a/include/linux/kthread.h b/include/linux/kthread.h > index 0714b24..22ccf9d 100644 > --- a/include/linux/kthread.h > +++ b/include/linux/kthread.h > @@ -49,8 +49,6 @@ extern int tsk_fork_get_node(struct task_struct *tsk); > * can be queued and flushed using queue/flush_kthread_work() > * respectively. Queued kthread_works are processed by a kthread > * running kthread_worker_fn(). > - * > - * A kthread_work can't be freed while it is executing. > */ > struct kthread_work; > typedef void (*kthread_work_func_t)(struct kthread_work *work); > @@ -59,15 +57,14 @@ struct kthread_worker { > spinlock_t lock; > struct list_head work_list; > struct task_struct *task; > + struct kthread_work *current_work; > }; > > struct kthread_work { > struct list_head node; > kthread_work_func_t func; > wait_queue_head_t done; > - atomic_t flushing; > - int queue_seq; > - int done_seq; > + struct kthread_worker *worker; > }; > > #define KTHREAD_WORKER_INIT(worker) { \ > @@ -79,7 +76,6 @@ struct kthread_work { > .node = LIST_HEAD_INIT((work).node), \ > .func = (fn), \ > .done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done), \ > - .flushing = ATOMIC_INIT(0), \ > } > > #define DEFINE_KTHREAD_WORKER(worker) \ > diff --git a/kernel/kthread.c b/kernel/kthread.c > index 7b8a678..4034b2b 100644 > --- a/kernel/kthread.c > +++ b/kernel/kthread.c > @@ -360,16 +360,12 @@ repeat: > struct kthread_work, node); > list_del_init(&work->node); > } > + worker->current_work = work; > spin_unlock_irq(&worker->lock); > > if (work) { > __set_current_state(TASK_RUNNING); > work->func(work); If the call to 'work->func(work);' frees the memory pointed to by 'work', 'worker->current_work' points to deallocated memory. So 'worker->current_work' will only ever used as a unique 'work' identifier to handle, correct? > - smp_wmb(); /* wmb worker-b0 paired with flush-b1 */ > - work->done_seq = work->queue_seq; > - smp_mb(); /* mb worker-b1 paired with flush-b0 */ > - if (atomic_read(&work->flushing)) > - wake_up_all(&work->done); > } else if (!freezing(current)) > schedule(); > > @@ -384,7 +380,7 @@ static void insert_kthread_work(struct kthread_worker *worker, > struct list_head *pos) > { > list_add_tail(&work->node, pos); > - work->queue_seq++; > + work->worker = worker; > if (likely(worker->task)) > wake_up_process(worker->task); > } > @@ -434,25 +430,35 @@ static void kthread_flush_work_fn(struct kthread_work *work) > */ > void flush_kthread_work(struct kthread_work *work) > { > - int seq = work->queue_seq; > + struct kthread_flush_work fwork = { > + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), > + COMPLETION_INITIALIZER_ONSTACK(fwork.done), > + }; > + struct kthread_worker *worker; > + bool noop = false; > + You might want a check for 'work == NULL' here, to gracefully handle code like the following: void driver_work_handler(struct kthread_work *work) { ... kfree(work); } struct kthread_work *driver_queue_batch(void) { struct kthread_work *work = NULL; ... while (driver_more_stuff_todo()) { work = kzalloc(sizeof(struct kthread work), GFP_WHATEVER); ... queue_kthread_work(&driver_worker, work); } return work; } void driver_foobar(void) { ... flush_kthread_work(driver_queue_batch()); ... } Otherwise, things look OK to me. Regards, Andy > +retry: > + worker = work->worker; > + if (!worker) > + return; > > - atomic_inc(&work->flushing); > + spin_lock_irq(&worker->lock); > + if (work->worker != worker) { > + spin_unlock_irq(&worker->lock); > + goto retry; > + } > > - /* > - * mb flush-b0 paired with worker-b1, to make sure either > - * worker sees the above increment or we see done_seq update. > - */ > - smp_mb__after_atomic_inc(); > + if (!list_empty(&work->node)) > + insert_kthread_work(worker, &fwork.work, work->node.next); > + else if (worker->current_work == work) > + insert_kthread_work(worker, &fwork.work, worker->work_list.next); > + else > + noop = true; > > - /* A - B <= 0 tests whether B is in front of A regardless of overflow */ > - wait_event(work->done, seq - work->done_seq <= 0); > - atomic_dec(&work->flushing); > + spin_unlock_irq(&worker->lock); > > - /* > - * rmb flush-b1 paired with worker-b0, to make sure our caller > - * sees every change made by work->func(). > - */ > - smp_mb__after_atomic_dec(); > + if (!noop) > + wait_for_completion(&fwork.done); > } > EXPORT_SYMBOL_GPL(flush_kthread_work); > ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
Hello, On Sat, Jul 21, 2012 at 02:20:06PM -0400, Andy Walls wrote: > > + worker->current_work = work; > > spin_unlock_irq(&worker->lock); > > > > if (work) { > > __set_current_state(TASK_RUNNING); > > work->func(work); > > If the call to 'work->func(work);' frees the memory pointed to by > 'work', 'worker->current_work' points to deallocated memory. > So 'worker->current_work' will only ever used as a unique 'work' > identifier to handle, correct? Yeah. flush_kthread_work(@work) which can only be called if @work is known to be alive looks at the pointer to determine whether it's the current work item on the worker. > > void flush_kthread_work(struct kthread_work *work) > > { > > - int seq = work->queue_seq; > > + struct kthread_flush_work fwork = { > > + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), > > + COMPLETION_INITIALIZER_ONSTACK(fwork.done), > > + }; > > + struct kthread_worker *worker; > > + bool noop = false; > > + > > You might want a check for 'work == NULL' here, to gracefully handle > code like the following: > > void driver_work_handler(struct kthread_work *work) > { > ... > kfree(work); > } > > struct kthread_work *driver_queue_batch(void) > { > struct kthread_work *work = NULL; > ... > while (driver_more_stuff_todo()) { > work = kzalloc(sizeof(struct kthread work), GFP_WHATEVER); > ... > queue_kthread_work(&driver_worker, work); > } > return work; > } > > void driver_foobar(void) > { > ... > flush_kthread_work(driver_queue_batch()); > ... > } workqueue's flush_work() doesn't allow %NULL pointer. I don't want to make the behaviors deviate and don't see much point in changing workqueue's behavior at this point. Thanks.
Hi Tejun, Thanks for responding to my previous questions. I have one more. On Sat, 2012-07-21 at 14:20 -0400, Andy Walls wrote: > On Thu, 2012-07-19 at 14:16 -0700, Tejun Heo wrote: > > From 06f9a06f4aeecdb9d07014713ab41b548ae219b5 Mon Sep 17 00:00:00 2001 > > From: Tejun Heo <tj@kernel.org> > > Date: Thu, 19 Jul 2012 13:52:53 -0700 > > > > kthread_worker provides minimalistic workqueue-like interface for > > users which need a dedicated worker thread (e.g. for realtime > > priority). It has basic queue, flush_work, flush_worker operations > > which mostly match the workqueue counterparts; however, due to the way > > flush_work() is implemented, it has a noticeable difference of not > > allowing work items to be freed while being executed. [snip] > > @@ -434,25 +430,35 @@ static void kthread_flush_work_fn(struct kthread_work *work) > > */ > > void flush_kthread_work(struct kthread_work *work) > > { > > - int seq = work->queue_seq; > > + struct kthread_flush_work fwork = { > > + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), > > + COMPLETION_INITIALIZER_ONSTACK(fwork.done), > > + }; > > + struct kthread_worker *worker; > > + bool noop = false; > > + > > You might want a check for 'work == NULL' here, to gracefully handle > code like the following: > > void driver_work_handler(struct kthread_work *work) > { > ... > kfree(work); > } > > struct kthread_work *driver_queue_batch(void) > { > struct kthread_work *work = NULL; > ... > while (driver_more_stuff_todo()) { > work = kzalloc(sizeof(struct kthread work), GFP_WHATEVER); > ... > queue_kthread_work(&driver_worker, work); > } > return work; > } > > void driver_foobar(void) > { > ... > flush_kthread_work(driver_queue_batch()); > ... > } [snip] > > +retry: > > + worker = work->worker; > > + if (!worker) > > + return; > > > > - atomic_inc(&work->flushing); > > + spin_lock_irq(&worker->lock); > > + if (work->worker != worker) { > > + spin_unlock_irq(&worker->lock); > > + goto retry; > > + } > > > > - /* > > - * mb flush-b0 paired with worker-b1, to make sure either > > - * worker sees the above increment or we see done_seq update. > > - */ > > - smp_mb__after_atomic_inc(); > > + if (!list_empty(&work->node)) > > + insert_kthread_work(worker, &fwork.work, work->node.next); > > + else if (worker->current_work == work) > > + insert_kthread_work(worker, &fwork.work, worker->work_list.next); > > + else > > + noop = true; The objective is "allowing work items to be freed while being executed", to me, it does not seem safe to me to allow flush_kthread_work() to actually dereference the passed in work pointer. flush_kthread_work() could theoretically be executed after the work function was executed by the worker kthread which frees the 'work' object, and that the memory 'work' points to could theoretically already be reallocated for something else. (I admit the above likely has very low probability of occuring.) Is there a way to avoid dereferencing 'work' here? Regards, Andy > > > > - /* A - B <= 0 tests whether B is in front of A regardless of overflow */ > > - wait_event(work->done, seq - work->done_seq <= 0); > > - atomic_dec(&work->flushing); > > + spin_unlock_irq(&worker->lock); > > > > - /* > > - * rmb flush-b1 paired with worker-b0, to make sure our caller > > - * sees every change made by work->func(). > > - */ > > - smp_mb__after_atomic_dec(); > > + if (!noop) > > + wait_for_completion(&fwork.done); > > } > > EXPORT_SYMBOL_GPL(flush_kthread_work); > > > ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
On Sun, 2012-07-22 at 09:49 -0700, Tejun Heo wrote: > Hello, > > On Sat, Jul 21, 2012 at 02:20:06PM -0400, Andy Walls wrote: > > > + worker->current_work = work; > > > spin_unlock_irq(&worker->lock); > > > > > > if (work) { > > > __set_current_state(TASK_RUNNING); > > > work->func(work); > > > > If the call to 'work->func(work);' frees the memory pointed to by > > 'work', 'worker->current_work' points to deallocated memory. > > So 'worker->current_work' will only ever used as a unique 'work' > > identifier to handle, correct? > > Yeah. flush_kthread_work(@work) which can only be called if @work is > known to be alive looks at the pointer to determine whether it's the > current work item on the worker. OK. Thanks. Hmmm, I didn't know about the constraint about 'known to be alive' in the other email I just sent. That might make calling flush_kthread_work() hard for a user to use, if the user lets the work get freed by another thread executing the work. > > > void flush_kthread_work(struct kthread_work *work) > > > { > > > - int seq = work->queue_seq; > > > + struct kthread_flush_work fwork = { > > > + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), > > > + COMPLETION_INITIALIZER_ONSTACK(fwork.done), > > > + }; > > > + struct kthread_worker *worker; > > > + bool noop = false; > > > + > > > > You might want a check for 'work == NULL' here, to gracefully handle > > code like the following: > workqueue's flush_work() doesn't allow %NULL pointer. I don't want to > make the behaviors deviate and don't see much point in changing > workqueue's behavior at this point. OK. Fair enough. Thanks. Regards, Andy ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
Hello, On Sun, Jul 22, 2012 at 04:46:54PM -0400, Andy Walls wrote: > Hmmm, I didn't know about the constraint about 'known to be alive' in > the other email I just sent. > > That might make calling flush_kthread_work() hard for a user to use, if > the user lets the work get freed by another thread executing the work. Umm... flushing a freed work item doesn't make any sense at all. The pointer itself loses the ability to identify anything. What if it gets recycled to another work item which happens to depend on the flusher to make forward progress? You now have a circular dependency through a recycled memory area. Good luck hunting that down. For pretty much any API, allowing dangling pointers as argument is insane. If you want to flush self-freeing work items, flush the kthread_worker. That's how it is with workqueue and how it should be with kthread_worker too. Thanks.
On Mon, 2012-07-23 at 10:12 -0700, Tejun Heo wrote: > Hello, > > On Sun, Jul 22, 2012 at 04:46:54PM -0400, Andy Walls wrote: > > Hmmm, I didn't know about the constraint about 'known to be alive' in > > the other email I just sent. > > > > That might make calling flush_kthread_work() hard for a user to use, if > > the user lets the work get freed by another thread executing the work. > > Umm... flushing a freed work item doesn't make any sense at all. The > pointer itself loses the ability to identify anything. What if it > gets recycled to another work item which happens to depend on the > flusher to make forward progress? You now have a circular dependency > through a recycled memory area. Good luck hunting that down. > > For pretty much any API, allowing dangling pointers as argument is > insane. If you want to flush self-freeing work items, flush the > kthread_worker. That's how it is with workqueue and how it should be > with kthread_worker too. Hi, Ah. My problem was that I mentally assigned the wrong rationale for why you reworked flush_kthread_work(). Thank you for your patience and explanations. Sorry for the noise. For patch 2/2: Reviewed-by: Andy Walls <awalls@md.metrocast.net> Regards, Andy ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0714b24..22ccf9d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -49,8 +49,6 @@ extern int tsk_fork_get_node(struct task_struct *tsk); * can be queued and flushed using queue/flush_kthread_work() * respectively. Queued kthread_works are processed by a kthread * running kthread_worker_fn(). - * - * A kthread_work can't be freed while it is executing. */ struct kthread_work; typedef void (*kthread_work_func_t)(struct kthread_work *work); @@ -59,15 +57,14 @@ struct kthread_worker { spinlock_t lock; struct list_head work_list; struct task_struct *task; + struct kthread_work *current_work; }; struct kthread_work { struct list_head node; kthread_work_func_t func; wait_queue_head_t done; - atomic_t flushing; - int queue_seq; - int done_seq; + struct kthread_worker *worker; }; #define KTHREAD_WORKER_INIT(worker) { \ @@ -79,7 +76,6 @@ struct kthread_work { .node = LIST_HEAD_INIT((work).node), \ .func = (fn), \ .done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done), \ - .flushing = ATOMIC_INIT(0), \ } #define DEFINE_KTHREAD_WORKER(worker) \ diff --git a/kernel/kthread.c b/kernel/kthread.c index 7b8a678..4034b2b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -360,16 +360,12 @@ repeat: struct kthread_work, node); list_del_init(&work->node); } + worker->current_work = work; spin_unlock_irq(&worker->lock); if (work) { __set_current_state(TASK_RUNNING); work->func(work); - smp_wmb(); /* wmb worker-b0 paired with flush-b1 */ - work->done_seq = work->queue_seq; - smp_mb(); /* mb worker-b1 paired with flush-b0 */ - if (atomic_read(&work->flushing)) - wake_up_all(&work->done); } else if (!freezing(current)) schedule(); @@ -384,7 +380,7 @@ static void insert_kthread_work(struct kthread_worker *worker, struct list_head *pos) { list_add_tail(&work->node, pos); - work->queue_seq++; + work->worker = worker; if (likely(worker->task)) wake_up_process(worker->task); } @@ -434,25 +430,35 @@ static void kthread_flush_work_fn(struct kthread_work *work) */ void flush_kthread_work(struct kthread_work *work) { - int seq = work->queue_seq; + struct kthread_flush_work fwork = { + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), + COMPLETION_INITIALIZER_ONSTACK(fwork.done), + }; + struct kthread_worker *worker; + bool noop = false; + +retry: + worker = work->worker; + if (!worker) + return; - atomic_inc(&work->flushing); + spin_lock_irq(&worker->lock); + if (work->worker != worker) { + spin_unlock_irq(&worker->lock); + goto retry; + } - /* - * mb flush-b0 paired with worker-b1, to make sure either - * worker sees the above increment or we see done_seq update. - */ - smp_mb__after_atomic_inc(); + if (!list_empty(&work->node)) + insert_kthread_work(worker, &fwork.work, work->node.next); + else if (worker->current_work == work) + insert_kthread_work(worker, &fwork.work, worker->work_list.next); + else + noop = true; - /* A - B <= 0 tests whether B is in front of A regardless of overflow */ - wait_event(work->done, seq - work->done_seq <= 0); - atomic_dec(&work->flushing); + spin_unlock_irq(&worker->lock); - /* - * rmb flush-b1 paired with worker-b0, to make sure our caller - * sees every change made by work->func(). - */ - smp_mb__after_atomic_dec(); + if (!noop) + wait_for_completion(&fwork.done); } EXPORT_SYMBOL_GPL(flush_kthread_work);