diff mbox

[4/5] kvm,rcu,nohz: use RCU extended quiescent state when running KVM guest

Message ID 1423167832-17609-5-git-send-email-riel@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rik van Riel Feb. 5, 2015, 8:23 p.m. UTC
From: Rik van Riel <riel@redhat.com>

The host kernel is not doing anything while the CPU is executing
a KVM guest VCPU, so it can be marked as being in an extended
quiescent state, identical to that used when running user space
code.

The only exception to that rule is when the host handles an
interrupt, which is already handled by the irq code, which
calls rcu_irq_enter and rcu_irq_exit.

The guest_enter and guest_exit functions already switch vtime
accounting independent of context tracking, so leave those calls
where they are, instead of moving them into the context tracking
code.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 include/linux/context_tracking.h       | 8 +++++++-
 include/linux/context_tracking_state.h | 1 +
 include/linux/kvm_host.h               | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)

Comments

Paul E. McKenney Feb. 5, 2015, 11:56 p.m. UTC | #1
On Thu, Feb 05, 2015 at 03:23:51PM -0500, riel@redhat.com wrote:
> From: Rik van Riel <riel@redhat.com>
> 
> The host kernel is not doing anything while the CPU is executing
> a KVM guest VCPU, so it can be marked as being in an extended
> quiescent state, identical to that used when running user space
> code.
> 
> The only exception to that rule is when the host handles an
> interrupt, which is already handled by the irq code, which
> calls rcu_irq_enter and rcu_irq_exit.
> 
> The guest_enter and guest_exit functions already switch vtime
> accounting independent of context tracking, so leave those calls
> where they are, instead of moving them into the context tracking
> code.
> 
> Signed-off-by: Rik van Riel <riel@redhat.com>

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  include/linux/context_tracking.h       | 8 +++++++-
>  include/linux/context_tracking_state.h | 1 +
>  include/linux/kvm_host.h               | 3 ++-
>  3 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
> index bd9f000fc98d..a5d3bb44b897 100644
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -43,7 +43,7 @@ static inline enum ctx_state exception_enter(void)
>  static inline void exception_exit(enum ctx_state prev_ctx)
>  {
>  	if (context_tracking_is_enabled()) {
> -		if (prev_ctx == IN_USER)
> +		if (prev_ctx == IN_USER || prev_ctx == IN_GUEST)
>  			context_tracking_user_enter(prev_ctx);
>  	}
>  }
> @@ -78,6 +78,9 @@ static inline void guest_enter(void)
>  		vtime_guest_enter(current);
>  	else
>  		current->flags |= PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_enter(IN_GUEST);
>  }
> 
>  static inline void guest_exit(void)
> @@ -86,6 +89,9 @@ static inline void guest_exit(void)
>  		vtime_guest_exit(current);
>  	else
>  		current->flags &= ~PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_exit(IN_GUEST);
>  }
> 
>  #else
> diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
> index 97a81225d037..f3ef027af749 100644
> --- a/include/linux/context_tracking_state.h
> +++ b/include/linux/context_tracking_state.h
> @@ -15,6 +15,7 @@ struct context_tracking {
>  	enum ctx_state {
>  		IN_KERNEL = 0,
>  		IN_USER,
> +		IN_GUEST,
>  	} state;
>  };
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 26f106022c88..c7828a6a9614 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -772,7 +772,8 @@ static inline void kvm_guest_enter(void)
>  	 * one time slice). Lets treat guest mode as quiescent state, just like
>  	 * we do with user-mode execution.
>  	 */
> -	rcu_virt_note_context_switch(smp_processor_id());
> +	if (!context_tracking_cpu_is_enabled())
> +		rcu_virt_note_context_switch(smp_processor_id());
>  }
> 
>  static inline void kvm_guest_exit(void)
> -- 
> 1.9.3
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Frederic Weisbecker Feb. 6, 2015, 6:01 p.m. UTC | #2
On Thu, Feb 05, 2015 at 03:23:51PM -0500, riel@redhat.com wrote:
> From: Rik van Riel <riel@redhat.com>
> 
> The host kernel is not doing anything while the CPU is executing
> a KVM guest VCPU, so it can be marked as being in an extended
> quiescent state, identical to that used when running user space
> code.
> 
> The only exception to that rule is when the host handles an
> interrupt, which is already handled by the irq code, which
> calls rcu_irq_enter and rcu_irq_exit.
> 
> The guest_enter and guest_exit functions already switch vtime
> accounting independent of context tracking, so leave those calls
> where they are, instead of moving them into the context tracking
> code.
> 
> Signed-off-by: Rik van Riel <riel@redhat.com>
> ---
>  include/linux/context_tracking.h       | 8 +++++++-
>  include/linux/context_tracking_state.h | 1 +
>  include/linux/kvm_host.h               | 3 ++-
>  3 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
> index bd9f000fc98d..a5d3bb44b897 100644
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -43,7 +43,7 @@ static inline enum ctx_state exception_enter(void)
>  static inline void exception_exit(enum ctx_state prev_ctx)
>  {
>  	if (context_tracking_is_enabled()) {
> -		if (prev_ctx == IN_USER)
> +		if (prev_ctx == IN_USER || prev_ctx == IN_GUEST)
>  			context_tracking_user_enter(prev_ctx);
>  	}
>  }
> @@ -78,6 +78,9 @@ static inline void guest_enter(void)
>  		vtime_guest_enter(current);
>  	else
>  		current->flags |= PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_enter(IN_GUEST);

So you should probably just call rcu_user_enter() directly from
there. context_tracking_user_enter() is really about userspace
boundaries.

>  }
>  
>  static inline void guest_exit(void)
> @@ -86,6 +89,9 @@ static inline void guest_exit(void)
>  		vtime_guest_exit(current);
>  	else
>  		current->flags &= ~PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_exit(IN_GUEST);
>  }
>  
>  #else
> diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
> index 97a81225d037..f3ef027af749 100644
> --- a/include/linux/context_tracking_state.h
> +++ b/include/linux/context_tracking_state.h
> @@ -15,6 +15,7 @@ struct context_tracking {
>  	enum ctx_state {
>  		IN_KERNEL = 0,
>  		IN_USER,
> +		IN_GUEST,
>  	} state;
>  };
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 26f106022c88..c7828a6a9614 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -772,7 +772,8 @@ static inline void kvm_guest_enter(void)
>  	 * one time slice). Lets treat guest mode as quiescent state, just like
>  	 * we do with user-mode execution.
>  	 */
> -	rcu_virt_note_context_switch(smp_processor_id());
> +	if (!context_tracking_cpu_is_enabled())
> +		rcu_virt_note_context_switch(smp_processor_id());

Should we have a specific CONFIG for this feature? Or relying on full dynticks
to be enabled (and thus context tracking enabled) is enough?

Thanks.

>  }
>  
>  static inline void kvm_guest_exit(void)
> -- 
> 1.9.3
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Frederic Weisbecker Feb. 6, 2015, 11:24 p.m. UTC | #3
On Thu, Feb 05, 2015 at 03:23:51PM -0500, riel@redhat.com wrote:
> From: Rik van Riel <riel@redhat.com>
> 
> The host kernel is not doing anything while the CPU is executing
> a KVM guest VCPU, so it can be marked as being in an extended
> quiescent state, identical to that used when running user space
> code.
> 
> The only exception to that rule is when the host handles an
> interrupt, which is already handled by the irq code, which
> calls rcu_irq_enter and rcu_irq_exit.
> 
> The guest_enter and guest_exit functions already switch vtime
> accounting independent of context tracking, so leave those calls
> where they are, instead of moving them into the context tracking
> code.
> 
> Signed-off-by: Rik van Riel <riel@redhat.com>
> ---
>  include/linux/context_tracking.h       | 8 +++++++-
>  include/linux/context_tracking_state.h | 1 +
>  include/linux/kvm_host.h               | 3 ++-
>  3 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
> index bd9f000fc98d..a5d3bb44b897 100644
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -43,7 +43,7 @@ static inline enum ctx_state exception_enter(void)
>  static inline void exception_exit(enum ctx_state prev_ctx)
>  {
>  	if (context_tracking_is_enabled()) {
> -		if (prev_ctx == IN_USER)
> +		if (prev_ctx == IN_USER || prev_ctx == IN_GUEST)

That's nitpicking but != IN_KERNEL would be more generic. We are exiting an exception
and we know that the exception executes IN_KERNEL, so we want to restore any context
(whether IN_USER, IN_GUEST, or anything added in the future) prior the exception if that
was anything else than IN_KERNEL.

>  			context_tracking_user_enter(prev_ctx);
>  	}
>  }
> @@ -78,6 +78,9 @@ static inline void guest_enter(void)
>  		vtime_guest_enter(current);
>  	else
>  		current->flags |= PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_enter(IN_GUEST);
>  }
>  
>  static inline void guest_exit(void)
> @@ -86,6 +89,9 @@ static inline void guest_exit(void)
>  		vtime_guest_exit(current);
>  	else
>  		current->flags &= ~PF_VCPU;
> +
> +	if (context_tracking_is_enabled())
> +		context_tracking_user_exit(IN_GUEST);

I suggest you to restore RCU before anything else. I believe cputime
accounting doesn't use RCU but we never know with all the debug/tracing
code behind, the acct accounting...

Thanks.

>  }
>  
>  #else
> diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
> index 97a81225d037..f3ef027af749 100644
> --- a/include/linux/context_tracking_state.h
> +++ b/include/linux/context_tracking_state.h
> @@ -15,6 +15,7 @@ struct context_tracking {
>  	enum ctx_state {
>  		IN_KERNEL = 0,
>  		IN_USER,
> +		IN_GUEST,
>  	} state;
>  };
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 26f106022c88..c7828a6a9614 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -772,7 +772,8 @@ static inline void kvm_guest_enter(void)
>  	 * one time slice). Lets treat guest mode as quiescent state, just like
>  	 * we do with user-mode execution.
>  	 */
> -	rcu_virt_note_context_switch(smp_processor_id());
> +	if (!context_tracking_cpu_is_enabled())
> +		rcu_virt_note_context_switch(smp_processor_id());
>  }
>  
>  static inline void kvm_guest_exit(void)
> -- 
> 1.9.3
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bd9f000fc98d..a5d3bb44b897 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -43,7 +43,7 @@  static inline enum ctx_state exception_enter(void)
 static inline void exception_exit(enum ctx_state prev_ctx)
 {
 	if (context_tracking_is_enabled()) {
-		if (prev_ctx == IN_USER)
+		if (prev_ctx == IN_USER || prev_ctx == IN_GUEST)
 			context_tracking_user_enter(prev_ctx);
 	}
 }
@@ -78,6 +78,9 @@  static inline void guest_enter(void)
 		vtime_guest_enter(current);
 	else
 		current->flags |= PF_VCPU;
+
+	if (context_tracking_is_enabled())
+		context_tracking_user_enter(IN_GUEST);
 }
 
 static inline void guest_exit(void)
@@ -86,6 +89,9 @@  static inline void guest_exit(void)
 		vtime_guest_exit(current);
 	else
 		current->flags &= ~PF_VCPU;
+
+	if (context_tracking_is_enabled())
+		context_tracking_user_exit(IN_GUEST);
 }
 
 #else
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 97a81225d037..f3ef027af749 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -15,6 +15,7 @@  struct context_tracking {
 	enum ctx_state {
 		IN_KERNEL = 0,
 		IN_USER,
+		IN_GUEST,
 	} state;
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 26f106022c88..c7828a6a9614 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -772,7 +772,8 @@  static inline void kvm_guest_enter(void)
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
 	 */
-	rcu_virt_note_context_switch(smp_processor_id());
+	if (!context_tracking_cpu_is_enabled())
+		rcu_virt_note_context_switch(smp_processor_id());
 }
 
 static inline void kvm_guest_exit(void)