Message ID | 1638780405-38026-1-git-send-email-quic_pragalla@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [V1] fuse: give wakeup hints to the scheduler | expand |
On Mon, 6 Dec 2021 at 09:47, Pradeep P V K <quic_pragalla@quicinc.com> wrote: > > The synchronous wakeup interface is available only for the > interruptible wakeup. Add it for normal wakeup and use this > synchronous wakeup interface to wakeup the userspace daemon. > Scheduler can make use of this hint to find a better CPU for > the waker task. Ingo, Peter, What exactly does WF_SYNC do? Does it try to give the waker's CPU immediately to the waked? If that doesn't work (e.g. in this patch the wake up is done with a spin lock held) does it do anything? Does it give a hint that the waked task should be scheduled on this CPU at the next scheduling point? Thanks, Miklos > > With this change the performance numbers for compress, decompress > and copy use-cases on /sdcard path has improved by ~30%. > > Use-case details: > 1. copy 10000 files of each 4k size into /sdcard path > 2. use any File explorer application that has compress/decompress > support > 3. start compress/decompress and capture the time. > > ------------------------------------------------- > | Default | wakeup support | Improvement/Diff | > ------------------------------------------------- > | 13.8 sec | 9.9 sec | 3.9 sec (28.26%) | > ------------------------------------------------- > > Co-developed-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com> > Signed-off-by: Pradeep P V K <quic_pragalla@quicinc.com> > --- > fs/fuse/dev.c | 22 +++++++++++++--------- > fs/fuse/fuse_i.h | 6 +++--- > fs/fuse/virtio_fs.c | 8 +++++--- > include/linux/wait.h | 1 + > 4 files changed, 22 insertions(+), 15 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index cd54a52..fef2e23 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -207,10 +207,13 @@ static unsigned int fuse_req_hash(u64 unique) > /** > * A new request is available, wake fiq->waitq > */ > -static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) > +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock) > { > - wake_up(&fiq->waitq); > + if (sync) > + wake_up_sync(&fiq->waitq); > + else > + wake_up(&fiq->waitq); > kill_fasync(&fiq->fasync, SIGIO, POLL_IN); > spin_unlock(&fiq->lock); > } > @@ -223,14 +226,15 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { > EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); > > static void queue_request_and_unlock(struct fuse_iqueue *fiq, > - struct fuse_req *req) > + struct fuse_req *req, > + bool sync) > __releases(fiq->lock) > { > req->in.h.len = sizeof(struct fuse_in_header) + > fuse_len_args(req->args->in_numargs, > (struct fuse_arg *) req->args->in_args); > list_add_tail(&req->list, &fiq->pending); > - fiq->ops->wake_pending_and_unlock(fiq); > + fiq->ops->wake_pending_and_unlock(fiq, sync); > } > > void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > @@ -245,7 +249,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > if (fiq->connected) { > fiq->forget_list_tail->next = forget; > fiq->forget_list_tail = forget; > - fiq->ops->wake_forget_and_unlock(fiq); > + fiq->ops->wake_forget_and_unlock(fiq, 0); > } else { > kfree(forget); > spin_unlock(&fiq->lock); > @@ -265,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc) > fc->active_background++; > spin_lock(&fiq->lock); > req->in.h.unique = fuse_get_unique(fiq); > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 0); > } > } > > @@ -358,7 +362,7 @@ static int queue_interrupt(struct fuse_req *req) > spin_unlock(&fiq->lock); > return 0; > } > - fiq->ops->wake_interrupt_and_unlock(fiq); > + fiq->ops->wake_interrupt_and_unlock(fiq, 0); > } else { > spin_unlock(&fiq->lock); > } > @@ -425,7 +429,7 @@ static void __fuse_request_send(struct fuse_req *req) > /* acquire extra reference, since request is still needed > after fuse_request_end() */ > __fuse_get_request(req); > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 1); > > request_wait_answer(req); > /* Pairs with smp_wmb() in fuse_request_end() */ > @@ -600,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm, > > spin_lock(&fiq->lock); > if (fiq->connected) { > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 0); > } else { > err = -ENODEV; > spin_unlock(&fiq->lock); > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index c1a8b31..363e0ba 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -389,19 +389,19 @@ struct fuse_iqueue_ops { > /** > * Signal that a forget has been queued > */ > - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > * Signal that an INTERRUPT request has been queued > */ > - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > * Signal that a request has been queued > */ > - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c > index 4cfa4bc..bdc3dcc 100644 > --- a/fs/fuse/virtio_fs.c > +++ b/fs/fuse/virtio_fs.c > @@ -972,7 +972,7 @@ static struct virtio_driver virtio_fs_driver = { > #endif > }; > > -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock) > { > struct fuse_forget_link *link; > @@ -1007,7 +1007,8 @@ __releases(fiq->lock) > kfree(link); > } > > -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq, > + bool sync) > __releases(fiq->lock) > { > /* > @@ -1222,7 +1223,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, > return ret; > } > > -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq, > + bool sync) > __releases(fiq->lock) > { > unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ > diff --git a/include/linux/wait.h b/include/linux/wait.h > index 2d0df57..690572ee 100644 > --- a/include/linux/wait.h > +++ b/include/linux/wait.h > @@ -228,6 +228,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); > #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) > #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) > #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) > +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) > > /* > * Wakeup macros to be used to report events to the targets. > -- > 2.7.4 >
On Tue, Dec 07, 2021 at 10:07:45AM +0100, Miklos Szeredi wrote: > On Mon, 6 Dec 2021 at 09:47, Pradeep P V K <quic_pragalla@quicinc.com> wrote: > > > > The synchronous wakeup interface is available only for the > > interruptible wakeup. Add it for normal wakeup and use this > > synchronous wakeup interface to wakeup the userspace daemon. > > Scheduler can make use of this hint to find a better CPU for > > the waker task. That's a horrendoubly bad changelog :-/ Also, if you need it for UNINTERRUPTIBLE that's trivial to do ofc. > Ingo, Peter, > > What exactly does WF_SYNC do? Does it try to give the waker's CPU > immediately to the waked? > > If that doesn't work (e.g. in this patch the wake up is done with a > spin lock held) does it do anything? > > Does it give a hint that the waked task should be scheduled on this > CPU at the next scheduling point? WF_SYNC is a hint to the scheduler that the waker is about to go sleep and as such it is reasonable to stack the woken thread on this CPU instead of going to find an idle CPU for it. Typically it also means that the waker and wakee share data, and thus having them share the CPU is beneficial for cache locality. That said, WF_SYNC is 'difficult' since not all users of the hint keep their promise, so there's a bit of heuristics sprinkled on :/
On Tue, 7 Dec 2021 at 11:07, Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Dec 07, 2021 at 10:07:45AM +0100, Miklos Szeredi wrote: > > On Mon, 6 Dec 2021 at 09:47, Pradeep P V K <quic_pragalla@quicinc.com> wrote: > > > > > > The synchronous wakeup interface is available only for the > > > interruptible wakeup. Add it for normal wakeup and use this > > > synchronous wakeup interface to wakeup the userspace daemon. > > > Scheduler can make use of this hint to find a better CPU for > > > the waker task. > > That's a horrendoubly bad changelog :-/ Also, if you need it for > UNINTERRUPTIBLE that's trivial to do ofc. > > > Ingo, Peter, > > > > What exactly does WF_SYNC do? Does it try to give the waker's CPU > > immediately to the waked? > > > > If that doesn't work (e.g. in this patch the wake up is done with a > > spin lock held) does it do anything? > > > > Does it give a hint that the waked task should be scheduled on this > > CPU at the next scheduling point? > > WF_SYNC is a hint to the scheduler that the waker is about to go sleep > and as such it is reasonable to stack the woken thread on this CPU > instead of going to find an idle CPU for it. > > Typically it also means that the waker and wakee share data, and thus > having them share the CPU is beneficial for cache locality. Okay, so it doesn't give up the CPU immediately to the woken task, just marks the woken task as a "successor" when the current task goes to sleep. Right? That may be good for fuse as the data is indeed shared. It would be even better if the woken task had already a known affinity to this CPU, since that would mean thread local data wouldn't have to be migrated each time... So I'm not sure sync wakeup alone is worth it, needs real life benchmarking. Thanks, Miklos
On Tue, Dec 07, 2021 at 11:20:59AM +0100, Miklos Szeredi wrote: > On Tue, 7 Dec 2021 at 11:07, Peter Zijlstra <peterz@infradead.org> wrote: > > > > On Tue, Dec 07, 2021 at 10:07:45AM +0100, Miklos Szeredi wrote: > > > On Mon, 6 Dec 2021 at 09:47, Pradeep P V K <quic_pragalla@quicinc.com> wrote: > > > > > > > > The synchronous wakeup interface is available only for the > > > > interruptible wakeup. Add it for normal wakeup and use this > > > > synchronous wakeup interface to wakeup the userspace daemon. > > > > Scheduler can make use of this hint to find a better CPU for > > > > the waker task. > > > > That's a horrendoubly bad changelog :-/ Also, if you need it for > > UNINTERRUPTIBLE that's trivial to do ofc. > > > > > Ingo, Peter, > > > > > > What exactly does WF_SYNC do? Does it try to give the waker's CPU > > > immediately to the waked? > > > > > > If that doesn't work (e.g. in this patch the wake up is done with a > > > spin lock held) does it do anything? > > > > > > Does it give a hint that the waked task should be scheduled on this > > > CPU at the next scheduling point? > > > > WF_SYNC is a hint to the scheduler that the waker is about to go sleep > > and as such it is reasonable to stack the woken thread on this CPU > > instead of going to find an idle CPU for it. > > > > Typically it also means that the waker and wakee share data, and thus > > having them share the CPU is beneficial for cache locality. > > Okay, so it doesn't give up the CPU immediately to the woken task, > just marks the woken task as a "successor" when the current task goes > to sleep. Right? More or less. > That may be good for fuse as the data is indeed shared. It would be > even better if the woken task had already a known affinity to this > CPU, since that would mean thread local data wouldn't have to be > migrated each time... So I'm not sure sync wakeup alone is worth it, > needs real life benchmarking. Hard affinities have other down-sides.. occasional migrations shouldn't be a problem, constant migrations are bad.
On Tue, 7 Dec 2021 at 11:42, Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Dec 07, 2021 at 11:20:59AM +0100, Miklos Szeredi wrote: > > That may be good for fuse as the data is indeed shared. It would be > > even better if the woken task had already a known affinity to this > > CPU, since that would mean thread local data wouldn't have to be > > migrated each time... So I'm not sure sync wakeup alone is worth it, > > needs real life benchmarking. > > Hard affinities have other down-sides.. occasional migrations shouldn't > be a problem, constant migrations are bad. Currently fuse readers are sleeping in wait_event_interruptible_exclusive(). wake_up_interruptible_sync() will pick the first thread in the queue and wake that up. That will likely result in migration, right? What would be much nicer, is to look at all the threads on the waitq and pick one that previously ran on the current CPU if there's one. Could this be implemented? Thanks, Miklos
On Tue, Dec 07, 2021 at 01:44:49PM +0100, Miklos Szeredi wrote: > On Tue, 7 Dec 2021 at 11:42, Peter Zijlstra <peterz@infradead.org> wrote: > > > > On Tue, Dec 07, 2021 at 11:20:59AM +0100, Miklos Szeredi wrote: > > > > That may be good for fuse as the data is indeed shared. It would be > > > even better if the woken task had already a known affinity to this > > > CPU, since that would mean thread local data wouldn't have to be > > > migrated each time... So I'm not sure sync wakeup alone is worth it, > > > needs real life benchmarking. > > > > Hard affinities have other down-sides.. occasional migrations shouldn't > > be a problem, constant migrations are bad. > > Currently fuse readers are sleeping in > wait_event_interruptible_exclusive(). wake_up_interruptible_sync() > will pick the first thread in the queue and wake that up. That will > likely result in migration, right? Per the _sync it will try harder to place the thread on the waking CPU. Which is what you want right? For the worker to do the work on the CPU that wakes it, since that CPU has the data cache-hot etc.. > What would be much nicer, is to look at all the threads on the waitq > and pick one that previously ran on the current CPU if there's one. > Could this be implemented? It would violate the FIFO semantics of _exclusive.
On Tue, Dec 07, 2021 at 02:45:49PM +0100, Peter Zijlstra wrote: > > What would be much nicer, is to look at all the threads on the waitq > > and pick one that previously ran on the current CPU if there's one. > > Could this be implemented? > > It would violate the FIFO semantics of _exclusive. That said, look at kernel/locking/percpu-rwsem.c:percpu_rwsem_wake_function() for how to do really terrible things with waitqueues, possibly including what you suggest.
On Tue, 7 Dec 2021 at 14:51, Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Dec 07, 2021 at 02:45:49PM +0100, Peter Zijlstra wrote: > > > > What would be much nicer, is to look at all the threads on the waitq > > > and pick one that previously ran on the current CPU if there's one. > > > Could this be implemented? > > > > It would violate the FIFO semantics of _exclusive. > > That said, look at > kernel/locking/percpu-rwsem.c:percpu_rwsem_wake_function() for how to do > really terrible things with waitqueues, possibly including what you > suggest. Okay, so it looks doable, but rather more involved than just sticking that _sync onto the wake helper. FIFO is used so that we always wake the most recently used thread, right? That makes sense if it doesn't involve migration, but if the hot thread is going to be moved to another CPU then we'd lost most of the advantages. Am I missing something? Thanks, Miklos
On Tue, Dec 07, 2021 at 03:03:01PM +0100, Miklos Szeredi wrote: > On Tue, 7 Dec 2021 at 14:51, Peter Zijlstra <peterz@infradead.org> wrote: > > > > On Tue, Dec 07, 2021 at 02:45:49PM +0100, Peter Zijlstra wrote: > > > > > > What would be much nicer, is to look at all the threads on the waitq > > > > and pick one that previously ran on the current CPU if there's one. > > > > Could this be implemented? > > > > > > It would violate the FIFO semantics of _exclusive. > > > > That said, look at > > kernel/locking/percpu-rwsem.c:percpu_rwsem_wake_function() for how to do > > really terrible things with waitqueues, possibly including what you > > suggest. > > Okay, so it looks doable, but rather more involved than just sticking > that _sync onto the wake helper. > > FIFO is used so that we always wake the most recently used thread, right? > > That makes sense if it doesn't involve migration, but if the hot > thread is going to be moved to another CPU then we'd lost most of the > advantages. Am I missing something? FIFO means the thread used longest ago gets to go first. If your threads are an idempotent workers, FIFO might not be the best option. But I'm not much familiar with the FUSE code or it's design.
On Tue, 7 Dec 2021 at 15:25, Peter Zijlstra <peterz@infradead.org> wrote: > FIFO means the thread used longest ago gets to go first. If your threads > are an idempotent workers, FIFO might not be the best option. But I'm > not much familiar with the FUSE code or it's design. Okay. Did some experiments, but couldn't see wake_up_interruptible_sync() actually migrate the woken task, the behavior was identical to wake_up_interruptible(). I guess this is the "less" part in "more or less", but it would be good to see more clearly what is happening. I'll try to describe the design to give more context: - FUSE is similar to network filesystem in that there's a server and a client, except both are on the same host. The client lives in the kernel and the server lives in userspace. - Communication between them is done with read and write syscalls. - Usually the server has multiple threads. When a server thread is idle it is blocking in sys_read -> ... -> fuse_dev_do_read -> wait_event_interruptible_exclusive(fiq->waitq,...). - When a filesystem request comes in (e.g. mkdir) a request is constructed, put on the input queue (fiq->pending) and fiq->waitq woken up. After this the client task goes to sleep in request_wait_answer -> wait_event_interruptible(req->waitq, ...). - The server thread takes the request off the pending list, copies the data to the userspace buffer and puts the request on the processing list. - The userspace part interprets the read buffer, performs the fs operation, and writes the reply. - During the write(2) the reply is now copied to the kernel and the request is looked up on the processing list. The client is woken up through req->waitq. After returning from write(2) the server thread again calls read(2) to get the next request. - After being woken up, the client task now returns with the result of the operation. - The above example is for synchronous requests. There are async requests like readahead or buffered writes. In that case the client does not call request_wait_answer() but returns immediately and the result is processed from the server thread using a callback function (req->args->end()). From a scheduling prospective it would be ideal if the server thread's CPU was matched to the client thread's CPU, since that would make the data stay local, and for synchronous requests a _sync type wakeup is perfect, since the client goes to sleep just as the server starts processing and vice versa. Always migrating the woken server thread to the client's CPU is not going to be good, since this would result in too many migrations and would loose locality for the server's stack. Another idea is to add per-cpu input queues. The client then would queue the request on the pending queue corresponding to its CPU and wake up the server thread blocked on that queue. What happens though if this particular queue has no servers? Or if a queue is starved because it's served by less threads than another? Handing these cases seems really complicated. Is there a simper way? Thanks, Miklos
On Wed, Dec 08, 2021 at 04:46:06PM +0100, Miklos Szeredi wrote: >On Tue, 7 Dec 2021 at 15:25, Peter Zijlstra <peterz@infradead.org> wrote: > >> FIFO means the thread used longest ago gets to go first. If your threads >> are an idempotent workers, FIFO might not be the best option. But I'm >> not much familiar with the FUSE code or it's design. > >Okay. Did some experiments, but couldn't see >wake_up_interruptible_sync() actually migrate the woken task, the >behavior was identical to wake_up_interruptible(). I guess this is >the "less" part in "more or less", but it would be good to see more >clearly what is happening. > >I'll try to describe the design to give more context: > >- FUSE is similar to network filesystem in that there's a server and a >client, except both are on the same host. The client lives in the >kernel and the server lives in userspace. > >- Communication between them is done with read and write syscalls. > >- Usually the server has multiple threads. When a server thread is >idle it is blocking in sys_read -> ... -> fuse_dev_do_read -> >wait_event_interruptible_exclusive(fiq->waitq,...). > >- When a filesystem request comes in (e.g. mkdir) a request is >constructed, put on the input queue (fiq->pending) and fiq->waitq >woken up. After this the client task goes to sleep in >request_wait_answer -> wait_event_interruptible(req->waitq, ...). > >- The server thread takes the request off the pending list, copies the >data to the userspace buffer and puts the request on the processing >list. > >- The userspace part interprets the read buffer, performs the fs >operation, and writes the reply. > >- During the write(2) the reply is now copied to the kernel and the >request is looked up on the processing list. The client is woken up >through req->waitq. After returning from write(2) the server thread >again calls read(2) to get the next request. > >- After being woken up, the client task now returns with the result of >the operation. > >- The above example is for synchronous requests. There are async >requests like readahead or buffered writes. In that case the client >does not call request_wait_answer() but returns immediately and the >result is processed from the server thread using a callback function >(req->args->end()). > >From a scheduling prospective it would be ideal if the server thread's >CPU was matched to the client thread's CPU, since that would make the >data stay local, and for synchronous requests a _sync type wakeup is >perfect, since the client goes to sleep just as the server starts >processing and vice versa. > >Always migrating the woken server thread to the client's CPU is not >going to be good, since this would result in too many migrations and >would loose locality for the server's stack. > >Another idea is to add per-cpu input queues. The client then would >queue the request on the pending queue corresponding to its CPU and >wake up the server thread blocked on that queue. > >What happens though if this particular queue has no servers? Or if a >queue is starved because it's served by less threads than another? >Handing these cases seems really complicated. > Per-CPU input queue is a great idea. It was a key concept in ZUFS[1] design. However, describing it as complicated would be an understatement, to say the least. >Is there a simper way? Here is an idea: hint the userspace server on which cpu to execute the _next_ request via the (unused) fuse_in_header.padding field in current request. Naturally, this method would be effective only for cases where queue-size > 1. > >Thanks, >Miklos [1] https://lwn.net/Articles/795996/
Hi Pradeep, Miklos, We have found this patch benefits the decompress performance up to ~46% on our Android smart devices. Use-case details: 1. prepare a zip file with 2000 pictures into /sdcard path 2. use unzip command to decompress the zip file ------------------------------------------------- | Default | patch | Improvement/Diff | ------------------------------------------------- | 13 sec | 7 sec | 6 sec (46%) | ------------------------------------------------- On Android platform, the server thread runs in MediaProvider as a background service, and it may runs on little cores. However, applications usually run on big cores, for they are forground. With this patch, the server threads can run much faster and benefit the decompress performance. ------------------------------------------------------------------------ | Total Wall duration of server threads (ms) | ------------------------------------------------------------------------ | | Wall duration | Occurrences | Occurrences on big core | ------------------------------------------------------------------------ | Default | 3583 (ms) | 13926 | 5% | ------------------------------------------------------------------------ | patch | 1276 (ms) | 12996 | 79% | ------------------------------------------------------------------------ Thanks, Xiaoyu On Mon, Dec 06, 2021 at 02:16:45PM +0530, Pradeep P V K wrote: > The synchronous wakeup interface is available only for the > interruptible wakeup. Add it for normal wakeup and use this > synchronous wakeup interface to wakeup the userspace daemon. > Scheduler can make use of this hint to find a better CPU for > the waker task. > > With this change the performance numbers for compress, decompress > and copy use-cases on /sdcard path has improved by ~30%. > > Use-case details: > 1. copy 10000 files of each 4k size into /sdcard path > 2. use any File explorer application that has compress/decompress > support > 3. start compress/decompress and capture the time. > > ------------------------------------------------- > | Default | wakeup support | Improvement/Diff | > ------------------------------------------------- > | 13.8 sec | 9.9 sec | 3.9 sec (28.26%) | > ------------------------------------------------- > > Co-developed-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com> > Signed-off-by: Pradeep P V K <quic_pragalla@quicinc.com> > --- > fs/fuse/dev.c | 22 +++++++++++++--------- > fs/fuse/fuse_i.h | 6 +++--- > fs/fuse/virtio_fs.c | 8 +++++--- > include/linux/wait.h | 1 + > 4 files changed, 22 insertions(+), 15 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index cd54a52..fef2e23 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -207,10 +207,13 @@ static unsigned int fuse_req_hash(u64 unique) > /** > * A new request is available, wake fiq->waitq > */ > -static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) > +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock) > { > - wake_up(&fiq->waitq); > + if (sync) > + wake_up_sync(&fiq->waitq); > + else > + wake_up(&fiq->waitq); > kill_fasync(&fiq->fasync, SIGIO, POLL_IN); > spin_unlock(&fiq->lock); > } > @@ -223,14 +226,15 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { > EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); > > static void queue_request_and_unlock(struct fuse_iqueue *fiq, > - struct fuse_req *req) > + struct fuse_req *req, > + bool sync) > __releases(fiq->lock) > { > req->in.h.len = sizeof(struct fuse_in_header) + > fuse_len_args(req->args->in_numargs, > (struct fuse_arg *) req->args->in_args); > list_add_tail(&req->list, &fiq->pending); > - fiq->ops->wake_pending_and_unlock(fiq); > + fiq->ops->wake_pending_and_unlock(fiq, sync); > } > > void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > @@ -245,7 +249,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > if (fiq->connected) { > fiq->forget_list_tail->next = forget; > fiq->forget_list_tail = forget; > - fiq->ops->wake_forget_and_unlock(fiq); > + fiq->ops->wake_forget_and_unlock(fiq, 0); > } else { > kfree(forget); > spin_unlock(&fiq->lock); > @@ -265,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc) > fc->active_background++; > spin_lock(&fiq->lock); > req->in.h.unique = fuse_get_unique(fiq); > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 0); > } > } > > @@ -358,7 +362,7 @@ static int queue_interrupt(struct fuse_req *req) > spin_unlock(&fiq->lock); > return 0; > } > - fiq->ops->wake_interrupt_and_unlock(fiq); > + fiq->ops->wake_interrupt_and_unlock(fiq, 0); > } else { > spin_unlock(&fiq->lock); > } > @@ -425,7 +429,7 @@ static void __fuse_request_send(struct fuse_req *req) > /* acquire extra reference, since request is still needed > after fuse_request_end() */ > __fuse_get_request(req); > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 1); > > request_wait_answer(req); > /* Pairs with smp_wmb() in fuse_request_end() */ > @@ -600,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm, > > spin_lock(&fiq->lock); > if (fiq->connected) { > - queue_request_and_unlock(fiq, req); > + queue_request_and_unlock(fiq, req, 0); > } else { > err = -ENODEV; > spin_unlock(&fiq->lock); > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index c1a8b31..363e0ba 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -389,19 +389,19 @@ struct fuse_iqueue_ops { > /** > * Signal that a forget has been queued > */ > - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > * Signal that an INTERRUPT request has been queued > */ > - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > * Signal that a request has been queued > */ > - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) > + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock); > > /** > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c > index 4cfa4bc..bdc3dcc 100644 > --- a/fs/fuse/virtio_fs.c > +++ b/fs/fuse/virtio_fs.c > @@ -972,7 +972,7 @@ static struct virtio_driver virtio_fs_driver = { > #endif > }; > > -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq, bool sync) > __releases(fiq->lock) > { > struct fuse_forget_link *link; > @@ -1007,7 +1007,8 @@ __releases(fiq->lock) > kfree(link); > } > > -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq, > + bool sync) > __releases(fiq->lock) > { > /* > @@ -1222,7 +1223,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, > return ret; > } > > -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) > +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq, > + bool sync) > __releases(fiq->lock) > { > unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ > diff --git a/include/linux/wait.h b/include/linux/wait.h > index 2d0df57..690572ee 100644 > --- a/include/linux/wait.h > +++ b/include/linux/wait.h > @@ -228,6 +228,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); > #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) > #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) > #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) > +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) > > /* > * Wakeup macros to be used to report events to the targets. > -- > 2.7.4 >
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cd54a52..fef2e23 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -207,10 +207,13 @@ static unsigned int fuse_req_hash(u64 unique) /** * A new request is available, wake fiq->waitq */ -static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { - wake_up(&fiq->waitq); + if (sync) + wake_up_sync(&fiq->waitq); + else + wake_up(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); spin_unlock(&fiq->lock); } @@ -223,14 +226,15 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); static void queue_request_and_unlock(struct fuse_iqueue *fiq, - struct fuse_req *req) + struct fuse_req *req, + bool sync) __releases(fiq->lock) { req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); list_add_tail(&req->list, &fiq->pending); - fiq->ops->wake_pending_and_unlock(fiq); + fiq->ops->wake_pending_and_unlock(fiq, sync); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, @@ -245,7 +249,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; - fiq->ops->wake_forget_and_unlock(fiq); + fiq->ops->wake_forget_and_unlock(fiq, 0); } else { kfree(forget); spin_unlock(&fiq->lock); @@ -265,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc) fc->active_background++; spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, 0); } } @@ -358,7 +362,7 @@ static int queue_interrupt(struct fuse_req *req) spin_unlock(&fiq->lock); return 0; } - fiq->ops->wake_interrupt_and_unlock(fiq); + fiq->ops->wake_interrupt_and_unlock(fiq, 0); } else { spin_unlock(&fiq->lock); } @@ -425,7 +429,7 @@ static void __fuse_request_send(struct fuse_req *req) /* acquire extra reference, since request is still needed after fuse_request_end() */ __fuse_get_request(req); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, 1); request_wait_answer(req); /* Pairs with smp_wmb() in fuse_request_end() */ @@ -600,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm, spin_lock(&fiq->lock); if (fiq->connected) { - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, 0); } else { err = -ENODEV; spin_unlock(&fiq->lock); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c1a8b31..363e0ba 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -389,19 +389,19 @@ struct fuse_iqueue_ops { /** * Signal that a forget has been queued */ - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that an INTERRUPT request has been queued */ - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that a request has been queued */ - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4cfa4bc..bdc3dcc 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -972,7 +972,7 @@ static struct virtio_driver virtio_fs_driver = { #endif }; -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { struct fuse_forget_link *link; @@ -1007,7 +1007,8 @@ __releases(fiq->lock) kfree(link); } -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { /* @@ -1222,7 +1223,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, return ret; } -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 2d0df57..690572ee 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -228,6 +228,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) /* * Wakeup macros to be used to report events to the targets.
The synchronous wakeup interface is available only for the interruptible wakeup. Add it for normal wakeup and use this synchronous wakeup interface to wakeup the userspace daemon. Scheduler can make use of this hint to find a better CPU for the waker task. With this change the performance numbers for compress, decompress and copy use-cases on /sdcard path has improved by ~30%. Use-case details: 1. copy 10000 files of each 4k size into /sdcard path 2. use any File explorer application that has compress/decompress support 3. start compress/decompress and capture the time. ------------------------------------------------- | Default | wakeup support | Improvement/Diff | ------------------------------------------------- | 13.8 sec | 9.9 sec | 3.9 sec (28.26%) | ------------------------------------------------- Co-developed-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com> Signed-off-by: Pradeep P V K <quic_pragalla@quicinc.com> --- fs/fuse/dev.c | 22 +++++++++++++--------- fs/fuse/fuse_i.h | 6 +++--- fs/fuse/virtio_fs.c | 8 +++++--- include/linux/wait.h | 1 + 4 files changed, 22 insertions(+), 15 deletions(-)