diff mbox series

[RFC,v2,3/5] kvm: interface for managing pvsched driver for guest VMs

Message ID 20240403140116.3002809-4-vineeth@bitbyteword.org (mailing list archive)
State New
Headers show
Series Paravirt Scheduling (Dynamic vcpu priority management) | expand

Commit Message

Vineeth Remanan Pillai April 3, 2024, 2:01 p.m. UTC
Implement ioctl for assigning and unassigning pvsched driver for a
guest. VMMs would need to adopt this ioctls for supporting the feature.
Also add a temporary debugfs interface for managing this.

Ideally, the hypervisor would be able to determine the pvsched driver
based on the information received from the guest. Guest VMs with the
feature enabled would request hypervisor to select a pvsched driver.
ioctl api is an override mechanism to give more control to the admin.

Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/uapi/linux/kvm.h |   6 ++
 virt/kvm/kvm_main.c      | 117 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

Comments

Vineeth Remanan Pillai April 8, 2024, 1:59 p.m. UTC | #1
Adding sched_ext folks

On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google)
<vineeth@bitbyteword.org> wrote:
>
> Implement ioctl for assigning and unassigning pvsched driver for a
> guest. VMMs would need to adopt this ioctls for supporting the feature.
> Also add a temporary debugfs interface for managing this.
>
> Ideally, the hypervisor would be able to determine the pvsched driver
> based on the information received from the guest. Guest VMs with the
> feature enabled would request hypervisor to select a pvsched driver.
> ioctl api is an override mechanism to give more control to the admin.
>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  include/uapi/linux/kvm.h |   6 ++
>  virt/kvm/kvm_main.c      | 117 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 123 insertions(+)
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index c3308536482b..4b29bdad4188 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -2227,4 +2227,10 @@ struct kvm_create_guest_memfd {
>         __u64 reserved[6];
>  };
>
> +struct kvm_pvsched_ops {
> +       __u8 ops_name[32]; /* PVSCHED_NAME_MAX */
> +};
> +
> +#define KVM_GET_PVSCHED_OPS            _IOR(KVMIO, 0xe4, struct kvm_pvsched_ops)
> +#define KVM_REPLACE_PVSCHED_OPS                _IOWR(KVMIO, 0xe5, struct kvm_pvsched_ops)
>  #endif /* __LINUX_KVM_H */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0546814e4db7..b3d9c362d2e3 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1223,6 +1223,79 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
>         }
>  }
>
> +#ifdef CONFIG_PARAVIRT_SCHED_KVM
> +static int pvsched_vcpu_ops_show(struct seq_file *m, void *data)
> +{
> +       char ops_name[PVSCHED_NAME_MAX];
> +       struct pvsched_vcpu_ops *ops;
> +       struct kvm *kvm = (struct kvm *) m->private;
> +
> +       rcu_read_lock();
> +       ops = rcu_dereference(kvm->pvsched_ops);
> +       if (ops)
> +               strncpy(ops_name, ops->name, PVSCHED_NAME_MAX);
> +       rcu_read_unlock();
> +
> +       seq_printf(m, "%s\n", ops_name);
> +
> +       return 0;
> +}
> +
> +static ssize_t
> +pvsched_vcpu_ops_write(struct file *filp, const char __user *ubuf,
> +               size_t cnt, loff_t *ppos)
> +{
> +       int ret;
> +       char *cmp;
> +       char buf[PVSCHED_NAME_MAX];
> +       struct inode *inode;
> +       struct kvm *kvm;
> +
> +       if (cnt > PVSCHED_NAME_MAX)
> +               return -EINVAL;
> +
> +       if (copy_from_user(&buf, ubuf, cnt))
> +               return -EFAULT;
> +
> +       cmp = strstrip(buf);
> +
> +       inode = file_inode(filp);
> +       inode_lock(inode);
> +       kvm = (struct kvm *)inode->i_private;
> +       ret = kvm_replace_pvsched_ops(kvm, cmp);
> +       inode_unlock(inode);
> +
> +       if (ret)
> +               return ret;
> +
> +       *ppos += cnt;
> +       return cnt;
> +}
> +
> +static int pvsched_vcpu_ops_open(struct inode *inode, struct file *filp)
> +{
> +       return single_open(filp, pvsched_vcpu_ops_show, inode->i_private);
> +}
> +
> +static const struct file_operations pvsched_vcpu_ops_fops = {
> +       .open           = pvsched_vcpu_ops_open,
> +       .write          = pvsched_vcpu_ops_write,
> +       .read           = seq_read,
> +       .llseek         = seq_lseek,
> +       .release        = single_release,
> +};
> +
> +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
> +{
> +       debugfs_create_file("pvsched_vcpu_ops", 0644, kvm->debugfs_dentry, kvm,
> +                           &pvsched_vcpu_ops_fops);
> +}
> +#else
> +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
> +{
> +}
> +#endif
> +
>  static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
>  {
>         static DEFINE_MUTEX(kvm_debugfs_lock);
> @@ -1288,6 +1361,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
>                                     &stat_fops_per_vm);
>         }
>
> +       kvm_create_vm_pvsched_debugfs(kvm);
> +
>         ret = kvm_arch_create_vm_debugfs(kvm);
>         if (ret)
>                 goto out_err;
> @@ -5474,6 +5549,48 @@ static long kvm_vm_ioctl(struct file *filp,
>                 r = kvm_gmem_create(kvm, &guest_memfd);
>                 break;
>         }
> +#endif
> +#ifdef CONFIG_PARAVIRT_SCHED_KVM
> +       case KVM_REPLACE_PVSCHED_OPS:
> +               struct pvsched_vcpu_ops *ops;
> +               struct kvm_pvsched_ops in_ops, out_ops;
> +
> +               r = -EFAULT;
> +               if (copy_from_user(&in_ops, argp, sizeof(in_ops)))
> +                       goto out;
> +
> +               out_ops.ops_name[0] = 0;
> +
> +               rcu_read_lock();
> +               ops = rcu_dereference(kvm->pvsched_ops);
> +               if (ops)
> +                       strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
> +               rcu_read_unlock();
> +
> +               r = kvm_replace_pvsched_ops(kvm, (char *)in_ops.ops_name);
> +               if (r)
> +                       goto out;
> +
> +               r = -EFAULT;
> +               if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
> +                       goto out;
> +
> +               r = 0;
> +               break;
> +       case KVM_GET_PVSCHED_OPS:
> +               out_ops.ops_name[0] = 0;
> +               rcu_read_lock();
> +               ops = rcu_dereference(kvm->pvsched_ops);
> +               if (ops)
> +                       strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
> +               rcu_read_unlock();
> +
> +               r = -EFAULT;
> +               if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
> +                       goto out;
> +
> +               r = 0;
> +               break;
>  #endif
>         default:
>                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> --
> 2.40.1
>
diff mbox series

Patch

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c3308536482b..4b29bdad4188 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2227,4 +2227,10 @@  struct kvm_create_guest_memfd {
 	__u64 reserved[6];
 };
 
+struct kvm_pvsched_ops {
+	__u8 ops_name[32]; /* PVSCHED_NAME_MAX */
+};
+
+#define KVM_GET_PVSCHED_OPS		_IOR(KVMIO, 0xe4, struct kvm_pvsched_ops)
+#define KVM_REPLACE_PVSCHED_OPS		_IOWR(KVMIO, 0xe5, struct kvm_pvsched_ops)
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0546814e4db7..b3d9c362d2e3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1223,6 +1223,79 @@  static void kvm_destroy_vm_debugfs(struct kvm *kvm)
 	}
 }
 
+#ifdef CONFIG_PARAVIRT_SCHED_KVM
+static int pvsched_vcpu_ops_show(struct seq_file *m, void *data)
+{
+	char ops_name[PVSCHED_NAME_MAX];
+	struct pvsched_vcpu_ops *ops;
+	struct kvm *kvm = (struct kvm *) m->private;
+
+	rcu_read_lock();
+	ops = rcu_dereference(kvm->pvsched_ops);
+	if (ops)
+		strncpy(ops_name, ops->name, PVSCHED_NAME_MAX);
+	rcu_read_unlock();
+
+	seq_printf(m, "%s\n", ops_name);
+
+	return 0;
+}
+
+static ssize_t
+pvsched_vcpu_ops_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	int ret;
+	char *cmp;
+	char buf[PVSCHED_NAME_MAX];
+	struct inode *inode;
+	struct kvm *kvm;
+
+	if (cnt > PVSCHED_NAME_MAX)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	cmp = strstrip(buf);
+
+	inode = file_inode(filp);
+	inode_lock(inode);
+	kvm = (struct kvm *)inode->i_private;
+	ret = kvm_replace_pvsched_ops(kvm, cmp);
+	inode_unlock(inode);
+
+	if (ret)
+		return ret;
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static int pvsched_vcpu_ops_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, pvsched_vcpu_ops_show, inode->i_private);
+}
+
+static const struct file_operations pvsched_vcpu_ops_fops = {
+	.open		= pvsched_vcpu_ops_open,
+	.write		= pvsched_vcpu_ops_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
+{
+	debugfs_create_file("pvsched_vcpu_ops", 0644, kvm->debugfs_dentry, kvm,
+			    &pvsched_vcpu_ops_fops);
+}
+#else
+static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
+{
+}
+#endif
+
 static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
 {
 	static DEFINE_MUTEX(kvm_debugfs_lock);
@@ -1288,6 +1361,8 @@  static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
 				    &stat_fops_per_vm);
 	}
 
+	kvm_create_vm_pvsched_debugfs(kvm);
+
 	ret = kvm_arch_create_vm_debugfs(kvm);
 	if (ret)
 		goto out_err;
@@ -5474,6 +5549,48 @@  static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_gmem_create(kvm, &guest_memfd);
 		break;
 	}
+#endif
+#ifdef CONFIG_PARAVIRT_SCHED_KVM
+	case KVM_REPLACE_PVSCHED_OPS:
+		struct pvsched_vcpu_ops *ops;
+		struct kvm_pvsched_ops in_ops, out_ops;
+
+		r = -EFAULT;
+		if (copy_from_user(&in_ops, argp, sizeof(in_ops)))
+			goto out;
+
+		out_ops.ops_name[0] = 0;
+
+		rcu_read_lock();
+		ops = rcu_dereference(kvm->pvsched_ops);
+		if (ops)
+			strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
+		rcu_read_unlock();
+
+		r = kvm_replace_pvsched_ops(kvm, (char *)in_ops.ops_name);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
+			goto out;
+
+		r = 0;
+		break;
+	case KVM_GET_PVSCHED_OPS:
+		out_ops.ops_name[0] = 0;
+		rcu_read_lock();
+		ops = rcu_dereference(kvm->pvsched_ops);
+		if (ops)
+			strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
+		rcu_read_unlock();
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
+			goto out;
+
+		r = 0;
+		break;
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);