Message ID | 20240403140116.3002809-4-vineeth@bitbyteword.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Paravirt Scheduling (Dynamic vcpu priority management) | expand |
Adding sched_ext folks On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google) <vineeth@bitbyteword.org> wrote: > > Implement ioctl for assigning and unassigning pvsched driver for a > guest. VMMs would need to adopt this ioctls for supporting the feature. > Also add a temporary debugfs interface for managing this. > > Ideally, the hypervisor would be able to determine the pvsched driver > based on the information received from the guest. Guest VMs with the > feature enabled would request hypervisor to select a pvsched driver. > ioctl api is an override mechanism to give more control to the admin. > > Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > --- > include/uapi/linux/kvm.h | 6 ++ > virt/kvm/kvm_main.c | 117 +++++++++++++++++++++++++++++++++++++++ > 2 files changed, 123 insertions(+) > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index c3308536482b..4b29bdad4188 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -2227,4 +2227,10 @@ struct kvm_create_guest_memfd { > __u64 reserved[6]; > }; > > +struct kvm_pvsched_ops { > + __u8 ops_name[32]; /* PVSCHED_NAME_MAX */ > +}; > + > +#define KVM_GET_PVSCHED_OPS _IOR(KVMIO, 0xe4, struct kvm_pvsched_ops) > +#define KVM_REPLACE_PVSCHED_OPS _IOWR(KVMIO, 0xe5, struct kvm_pvsched_ops) > #endif /* __LINUX_KVM_H */ > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 0546814e4db7..b3d9c362d2e3 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1223,6 +1223,79 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) > } > } > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > +static int pvsched_vcpu_ops_show(struct seq_file *m, void *data) > +{ > + char ops_name[PVSCHED_NAME_MAX]; > + struct pvsched_vcpu_ops *ops; > + struct kvm *kvm = (struct kvm *) m->private; > + > + rcu_read_lock(); > + ops = rcu_dereference(kvm->pvsched_ops); > + if (ops) > + strncpy(ops_name, ops->name, PVSCHED_NAME_MAX); > + rcu_read_unlock(); > + > + seq_printf(m, "%s\n", ops_name); > + > + return 0; > +} > + > +static ssize_t > +pvsched_vcpu_ops_write(struct file *filp, const char __user *ubuf, > + size_t cnt, loff_t *ppos) > +{ > + int ret; > + char *cmp; > + char buf[PVSCHED_NAME_MAX]; > + struct inode *inode; > + struct kvm *kvm; > + > + if (cnt > PVSCHED_NAME_MAX) > + return -EINVAL; > + > + if (copy_from_user(&buf, ubuf, cnt)) > + return -EFAULT; > + > + cmp = strstrip(buf); > + > + inode = file_inode(filp); > + inode_lock(inode); > + kvm = (struct kvm *)inode->i_private; > + ret = kvm_replace_pvsched_ops(kvm, cmp); > + inode_unlock(inode); > + > + if (ret) > + return ret; > + > + *ppos += cnt; > + return cnt; > +} > + > +static int pvsched_vcpu_ops_open(struct inode *inode, struct file *filp) > +{ > + return single_open(filp, pvsched_vcpu_ops_show, inode->i_private); > +} > + > +static const struct file_operations pvsched_vcpu_ops_fops = { > + .open = pvsched_vcpu_ops_open, > + .write = pvsched_vcpu_ops_write, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > + > +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm) > +{ > + debugfs_create_file("pvsched_vcpu_ops", 0644, kvm->debugfs_dentry, kvm, > + &pvsched_vcpu_ops_fops); > +} > +#else > +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm) > +{ > +} > +#endif > + > static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) > { > static DEFINE_MUTEX(kvm_debugfs_lock); > @@ -1288,6 +1361,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) > &stat_fops_per_vm); > } > > + kvm_create_vm_pvsched_debugfs(kvm); > + > ret = kvm_arch_create_vm_debugfs(kvm); > if (ret) > goto out_err; > @@ -5474,6 +5549,48 @@ static long kvm_vm_ioctl(struct file *filp, > r = kvm_gmem_create(kvm, &guest_memfd); > break; > } > +#endif > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > + case KVM_REPLACE_PVSCHED_OPS: > + struct pvsched_vcpu_ops *ops; > + struct kvm_pvsched_ops in_ops, out_ops; > + > + r = -EFAULT; > + if (copy_from_user(&in_ops, argp, sizeof(in_ops))) > + goto out; > + > + out_ops.ops_name[0] = 0; > + > + rcu_read_lock(); > + ops = rcu_dereference(kvm->pvsched_ops); > + if (ops) > + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX); > + rcu_read_unlock(); > + > + r = kvm_replace_pvsched_ops(kvm, (char *)in_ops.ops_name); > + if (r) > + goto out; > + > + r = -EFAULT; > + if (copy_to_user(argp, &out_ops, sizeof(out_ops))) > + goto out; > + > + r = 0; > + break; > + case KVM_GET_PVSCHED_OPS: > + out_ops.ops_name[0] = 0; > + rcu_read_lock(); > + ops = rcu_dereference(kvm->pvsched_ops); > + if (ops) > + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX); > + rcu_read_unlock(); > + > + r = -EFAULT; > + if (copy_to_user(argp, &out_ops, sizeof(out_ops))) > + goto out; > + > + r = 0; > + break; > #endif > default: > r = kvm_arch_vm_ioctl(filp, ioctl, arg); > -- > 2.40.1 >
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c3308536482b..4b29bdad4188 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2227,4 +2227,10 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +struct kvm_pvsched_ops { + __u8 ops_name[32]; /* PVSCHED_NAME_MAX */ +}; + +#define KVM_GET_PVSCHED_OPS _IOR(KVMIO, 0xe4, struct kvm_pvsched_ops) +#define KVM_REPLACE_PVSCHED_OPS _IOWR(KVMIO, 0xe5, struct kvm_pvsched_ops) #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0546814e4db7..b3d9c362d2e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1223,6 +1223,79 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) } } +#ifdef CONFIG_PARAVIRT_SCHED_KVM +static int pvsched_vcpu_ops_show(struct seq_file *m, void *data) +{ + char ops_name[PVSCHED_NAME_MAX]; + struct pvsched_vcpu_ops *ops; + struct kvm *kvm = (struct kvm *) m->private; + + rcu_read_lock(); + ops = rcu_dereference(kvm->pvsched_ops); + if (ops) + strncpy(ops_name, ops->name, PVSCHED_NAME_MAX); + rcu_read_unlock(); + + seq_printf(m, "%s\n", ops_name); + + return 0; +} + +static ssize_t +pvsched_vcpu_ops_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char *cmp; + char buf[PVSCHED_NAME_MAX]; + struct inode *inode; + struct kvm *kvm; + + if (cnt > PVSCHED_NAME_MAX) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + cmp = strstrip(buf); + + inode = file_inode(filp); + inode_lock(inode); + kvm = (struct kvm *)inode->i_private; + ret = kvm_replace_pvsched_ops(kvm, cmp); + inode_unlock(inode); + + if (ret) + return ret; + + *ppos += cnt; + return cnt; +} + +static int pvsched_vcpu_ops_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, pvsched_vcpu_ops_show, inode->i_private); +} + +static const struct file_operations pvsched_vcpu_ops_fops = { + .open = pvsched_vcpu_ops_open, + .write = pvsched_vcpu_ops_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm) +{ + debugfs_create_file("pvsched_vcpu_ops", 0644, kvm->debugfs_dentry, kvm, + &pvsched_vcpu_ops_fops); +} +#else +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm) +{ +} +#endif + static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) { static DEFINE_MUTEX(kvm_debugfs_lock); @@ -1288,6 +1361,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) &stat_fops_per_vm); } + kvm_create_vm_pvsched_debugfs(kvm); + ret = kvm_arch_create_vm_debugfs(kvm); if (ret) goto out_err; @@ -5474,6 +5549,48 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_gmem_create(kvm, &guest_memfd); break; } +#endif +#ifdef CONFIG_PARAVIRT_SCHED_KVM + case KVM_REPLACE_PVSCHED_OPS: + struct pvsched_vcpu_ops *ops; + struct kvm_pvsched_ops in_ops, out_ops; + + r = -EFAULT; + if (copy_from_user(&in_ops, argp, sizeof(in_ops))) + goto out; + + out_ops.ops_name[0] = 0; + + rcu_read_lock(); + ops = rcu_dereference(kvm->pvsched_ops); + if (ops) + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX); + rcu_read_unlock(); + + r = kvm_replace_pvsched_ops(kvm, (char *)in_ops.ops_name); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &out_ops, sizeof(out_ops))) + goto out; + + r = 0; + break; + case KVM_GET_PVSCHED_OPS: + out_ops.ops_name[0] = 0; + rcu_read_lock(); + ops = rcu_dereference(kvm->pvsched_ops); + if (ops) + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX); + rcu_read_unlock(); + + r = -EFAULT; + if (copy_to_user(argp, &out_ops, sizeof(out_ops))) + goto out; + + r = 0; + break; #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg);