diff mbox series

[RFC,net-next,5/5] eventpoll: Control irq suspension for prefer_busy_poll

Message ID 20240812125717.413108-6-jdamato@fastly.com (mailing list archive)
State New
Headers show
Series Suspend IRQs during preferred busy poll | expand

Commit Message

Joe Damato Aug. 12, 2024, 12:57 p.m. UTC
From: Martin Karsten <mkarsten@uwaterloo.ca>

When events are reported to userland and prefer_busy_poll is set, irqs are
temporarily suspended using napi_suspend_irqs.

If no events are found and ep_poll would go to sleep, irq suspension is
cancelled using napi_resume_irqs.

Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Co-developed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Tested-by: Joe Damato <jdamato@fastly.com>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
---
 fs/eventpoll.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

Comments

Stanislav Fomichev Aug. 12, 2024, 8:20 p.m. UTC | #1
On 08/12, Joe Damato wrote:
> From: Martin Karsten <mkarsten@uwaterloo.ca>
> 
> When events are reported to userland and prefer_busy_poll is set, irqs are
> temporarily suspended using napi_suspend_irqs.
> 
> If no events are found and ep_poll would go to sleep, irq suspension is
> cancelled using napi_resume_irqs.
> 
> Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
> Co-developed-by: Joe Damato <jdamato@fastly.com>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Tested-by: Joe Damato <jdamato@fastly.com>
> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> ---
>  fs/eventpoll.c | 22 +++++++++++++++++++++-
>  1 file changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index cc47f72005ed..d74b5b9c1f51 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -457,6 +457,8 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
>  		 * it back in when we have moved a socket with a valid NAPI
>  		 * ID onto the ready list.
>  		 */
> +		if (prefer_busy_poll)
> +			napi_resume_irqs(napi_id);
>  		ep->napi_id = 0;
>  		return false;
>  	}
> @@ -540,6 +542,14 @@ static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
>  	}
>  }
>  
> +static void ep_suspend_napi_irqs(struct eventpoll *ep)
> +{
> +	unsigned int napi_id = READ_ONCE(ep->napi_id);
> +
> +	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
> +		napi_suspend_irqs(napi_id);
> +}
> +
>  #else
>  
>  static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
> @@ -557,6 +567,10 @@ static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
>  	return -EOPNOTSUPP;
>  }
>  
> +static void ep_suspend_napi_irqs(struct eventpoll *ep)
> +{
> +}
> +
>  #endif /* CONFIG_NET_RX_BUSY_POLL */
>  
>  /*
> @@ -788,6 +802,10 @@ static bool ep_refcount_dec_and_test(struct eventpoll *ep)
>  
>  static void ep_free(struct eventpoll *ep)
>  {
> +	unsigned int napi_id = READ_ONCE(ep->napi_id);
> +
> +	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
> +		napi_resume_irqs(napi_id);
>  	mutex_destroy(&ep->mtx);
>  	free_uid(ep->user);
>  	wakeup_source_unregister(ep->ws);
> @@ -2005,8 +2023,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  			 * trying again in search of more luck.
>  			 */
>  			res = ep_send_events(ep, events, maxevents);
> -			if (res)
> +			if (res) {
> +				ep_suspend_napi_irqs(ep);

Aren't we already doing defer in the busy_poll_stop? (or in napi_poll
when it's complete/done). Why do we need another rearming here?
Martin Karsten Aug. 12, 2024, 9:47 p.m. UTC | #2
On 2024-08-12 16:20, Stanislav Fomichev wrote:
> On 08/12, Joe Damato wrote:
>> From: Martin Karsten <mkarsten@uwaterloo.ca>

[snip]

>> @@ -2005,8 +2023,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>>   			 * trying again in search of more luck.
>>   			 */
>>   			res = ep_send_events(ep, events, maxevents);
>> -			if (res)
>> +			if (res) {
>> +				ep_suspend_napi_irqs(ep);
> 
> Aren't we already doing defer in the busy_poll_stop? (or in napi_poll
> when it's complete/done). Why do we need another rearming here?

If ep_poll finds data present due to previous softirq activity during a 
sleep (or application bootstrap), the timer is armed with the shorter 
gro-flush-timeout. This timer rearming with irq-suspend-timeout 
kickstarts the suspend mechanism by injecting the larger timer.

Thanks,
Martin
diff mbox series

Patch

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc47f72005ed..d74b5b9c1f51 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -457,6 +457,8 @@  static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 		 * it back in when we have moved a socket with a valid NAPI
 		 * ID onto the ready list.
 		 */
+		if (prefer_busy_poll)
+			napi_resume_irqs(napi_id);
 		ep->napi_id = 0;
 		return false;
 	}
@@ -540,6 +542,14 @@  static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
 	}
 }
 
+static void ep_suspend_napi_irqs(struct eventpoll *ep)
+{
+	unsigned int napi_id = READ_ONCE(ep->napi_id);
+
+	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+		napi_suspend_irqs(napi_id);
+}
+
 #else
 
 static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -557,6 +567,10 @@  static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
 	return -EOPNOTSUPP;
 }
 
+static void ep_suspend_napi_irqs(struct eventpoll *ep)
+{
+}
+
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
 /*
@@ -788,6 +802,10 @@  static bool ep_refcount_dec_and_test(struct eventpoll *ep)
 
 static void ep_free(struct eventpoll *ep)
 {
+	unsigned int napi_id = READ_ONCE(ep->napi_id);
+
+	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+		napi_resume_irqs(napi_id);
 	mutex_destroy(&ep->mtx);
 	free_uid(ep->user);
 	wakeup_source_unregister(ep->ws);
@@ -2005,8 +2023,10 @@  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 			 * trying again in search of more luck.
 			 */
 			res = ep_send_events(ep, events, maxevents);
-			if (res)
+			if (res) {
+				ep_suspend_napi_irqs(ep);
 				return res;
+			}
 		}
 
 		if (timed_out)