Message ID | 20240403140116.3002809-2-vineeth@bitbyteword.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Paravirt Scheduling (Dynamic vcpu priority management) | expand |
Adding sched_ext folks On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google) <vineeth@bitbyteword.org> wrote: > > Implement a paravirt scheduling framework for linux kernel. > > The framework allows for pvsched driver to register to the kernel and > receive callbacks from hypervisor(eg: kvm) for interested vcpu events > like VMENTER, VMEXIT etc. > > The framework also allows hypervisor to select a pvsched driver (from > the available list of registered drivers) for each guest. > > Also implement a sysctl for listing the available pvsched drivers. > > Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > --- > Kconfig | 2 + > include/linux/pvsched.h | 102 +++++++++++++++++++ > kernel/sysctl.c | 27 +++++ > virt/Makefile | 2 +- > virt/pvsched/Kconfig | 12 +++ > virt/pvsched/Makefile | 2 + > virt/pvsched/pvsched.c | 215 ++++++++++++++++++++++++++++++++++++++++ > 7 files changed, 361 insertions(+), 1 deletion(-) > create mode 100644 include/linux/pvsched.h > create mode 100644 virt/pvsched/Kconfig > create mode 100644 virt/pvsched/Makefile > create mode 100644 virt/pvsched/pvsched.c > > diff --git a/Kconfig b/Kconfig > index 745bc773f567..4a52eaa21166 100644 > --- a/Kconfig > +++ b/Kconfig > @@ -29,4 +29,6 @@ source "lib/Kconfig" > > source "lib/Kconfig.debug" > > +source "virt/pvsched/Kconfig" > + > source "Documentation/Kconfig" > diff --git a/include/linux/pvsched.h b/include/linux/pvsched.h > new file mode 100644 > index 000000000000..59df6b44aacb > --- /dev/null > +++ b/include/linux/pvsched.h > @@ -0,0 +1,102 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* Copyright (c) 2024 Google */ > + > +#ifndef _LINUX_PVSCHED_H > +#define _LINUX_PVSCHED_H 1 > + > +/* > + * List of events for which hypervisor calls back into pvsched driver. > + * Driver can specify the events it is interested in. > + */ > +enum pvsched_vcpu_events { > + PVSCHED_VCPU_VMENTER = 0x1, > + PVSCHED_VCPU_VMEXIT = 0x2, > + PVSCHED_VCPU_HALT = 0x4, > + PVSCHED_VCPU_INTR_INJ = 0x8, > +}; > + > +#define PVSCHED_NAME_MAX 32 > +#define PVSCHED_MAX 8 > +#define PVSCHED_DRV_BUF_MAX (PVSCHED_NAME_MAX * PVSCHED_MAX + PVSCHED_MAX) > + > +/* > + * pvsched driver callbacks. > + * TODO: versioning support for better compatibility with the guest > + * component implementing this feature. > + */ > +struct pvsched_vcpu_ops { > + /* > + * pvsched_vcpu_register() - Register the vcpu with pvsched driver. > + * @pid: pid of the vcpu task. > + * > + * pvsched driver can store the pid internally and initialize > + * itself to prepare for receiving callbacks from thsi vcpu. > + */ > + int (*pvsched_vcpu_register)(struct pid *pid); > + > + /* > + * pvsched_vcpu_unregister() - Un-register the vcpu with pvsched driver. > + * @pid: pid of the vcpu task. > + */ > + void (*pvsched_vcpu_unregister)(struct pid *pid); > + > + /* > + * pvsched_vcpu_notify_event() - Callback for pvsched events > + * @addr: Address of the memory region shared with guest > + * @pid: pid of the vcpu task. > + * @events: bit mask of the events that hypervisor wants to notify. > + */ > + void (*pvsched_vcpu_notify_event)(void *addr, struct pid *pid, u32 event); > + > + char name[PVSCHED_NAME_MAX]; > + struct module *owner; > + struct list_head list; > + u32 events; > + u32 key; > +}; > + > +#ifdef CONFIG_PARAVIRT_SCHED_HOST > +int pvsched_get_available_drivers(char *buf, size_t maxlen); > + > +int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops); > +void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops); > + > +struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name); > +void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops); > + > +static inline int pvsched_validate_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > + /* > + * All callbacks are mandatory. > + */ > + if (!ops->pvsched_vcpu_register || !ops->pvsched_vcpu_unregister || > + !ops->pvsched_vcpu_notify_event) > + return -EINVAL; > + > + return 0; > +} > +#else > +static inline void pvsched_get_available_drivers(char *buf, size_t maxlen) > +{ > +} > + > +static inline int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > + return -ENOTSUPP; > +} > + > +static inline void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > +} > + > +static inline struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name) > +{ > + return NULL; > +} > + > +static inline void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > +} > +#endif > + > +#endif > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 157f7ce2942d..10a18a791b4f 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -63,6 +63,7 @@ > #include <linux/mount.h> > #include <linux/userfaultfd_k.h> > #include <linux/pid.h> > +#include <linux/pvsched.h> > > #include "../lib/kstrtox.h" > > @@ -1615,6 +1616,24 @@ int proc_do_static_key(struct ctl_table *table, int write, > return ret; > } > > +#ifdef CONFIG_PARAVIRT_SCHED_HOST > +static int proc_pvsched_available_drivers(struct ctl_table *ctl, > + int write, void *buffer, > + size_t *lenp, loff_t *ppos) > +{ > + struct ctl_table tbl = { .maxlen = PVSCHED_DRV_BUF_MAX, }; > + int ret; > + > + tbl.data = kmalloc(tbl.maxlen, GFP_USER); > + if (!tbl.data) > + return -ENOMEM; > + pvsched_get_available_drivers(tbl.data, PVSCHED_DRV_BUF_MAX); > + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); > + kfree(tbl.data); > + return ret; > +} > +#endif > + > static struct ctl_table kern_table[] = { > { > .procname = "panic", > @@ -2033,6 +2052,14 @@ static struct ctl_table kern_table[] = { > .extra1 = SYSCTL_ONE, > .extra2 = SYSCTL_INT_MAX, > }, > +#endif > +#ifdef CONFIG_PARAVIRT_SCHED_HOST > + { > + .procname = "pvsched_available_drivers", > + .maxlen = PVSCHED_DRV_BUF_MAX, > + .mode = 0444, > + .proc_handler = proc_pvsched_available_drivers, > + }, > #endif > { } > }; > diff --git a/virt/Makefile b/virt/Makefile > index 1cfea9436af9..9d0f32d775a1 100644 > --- a/virt/Makefile > +++ b/virt/Makefile > @@ -1,2 +1,2 @@ > # SPDX-License-Identifier: GPL-2.0-only > -obj-y += lib/ > +obj-y += lib/ pvsched/ > diff --git a/virt/pvsched/Kconfig b/virt/pvsched/Kconfig > new file mode 100644 > index 000000000000..5ca2669060cb > --- /dev/null > +++ b/virt/pvsched/Kconfig > @@ -0,0 +1,12 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +config PARAVIRT_SCHED_HOST > + bool "Paravirt scheduling framework in the host kernel" > + default n > + help > + Paravirtualized scheduling facilitates the exchange of scheduling > + related information between the host and guest through shared memory, > + enhancing the efficiency of vCPU thread scheduling by the hypervisor. > + An illustrative use case involves dynamically boosting the priority of > + a vCPU thread when the guest is executing a latency-sensitive workload > + on that specific vCPU. > + This config enables paravirt scheduling framework in the host kernel. > diff --git a/virt/pvsched/Makefile b/virt/pvsched/Makefile > new file mode 100644 > index 000000000000..4ca38e30479b > --- /dev/null > +++ b/virt/pvsched/Makefile > @@ -0,0 +1,2 @@ > + > +obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o > diff --git a/virt/pvsched/pvsched.c b/virt/pvsched/pvsched.c > new file mode 100644 > index 000000000000..610c85cf90d2 > --- /dev/null > +++ b/virt/pvsched/pvsched.c > @@ -0,0 +1,215 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2024 Google */ > + > +/* > + * Paravirt scheduling framework > + * > + */ > + > +/* > + * Heavily inspired from tcp congestion avoidance implementation. > + * (net/ipv4/tcp_cong.c) > + */ > + > +#define pr_fmt(fmt) "PVSCHED: " fmt > + > +#include <linux/module.h> > +#include <linux/bpf.h> > +#include <linux/gfp.h> > +#include <linux/types.h> > +#include <linux/list.h> > +#include <linux/jhash.h> > +#include <linux/pvsched.h> > + > +static DEFINE_SPINLOCK(pvsched_drv_list_lock); > +static int nr_pvsched_drivers = 0; > +static LIST_HEAD(pvsched_drv_list); > + > +/* > + * Retrieve pvsched_vcpu_ops given the name. > + */ > +static struct pvsched_vcpu_ops *pvsched_find_vcpu_ops_name(char *name) > +{ > + struct pvsched_vcpu_ops *ops; > + > + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { > + if (strcmp(ops->name, name) == 0) > + return ops; > + } > + > + return NULL; > +} > + > +/* > + * Retrieve pvsched_vcpu_ops given the hash key. > + */ > +static struct pvsched_vcpu_ops *pvsched_find_vcpu_ops_key(u32 key) > +{ > + struct pvsched_vcpu_ops *ops; > + > + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { > + if (ops->key == key) > + return ops; > + } > + > + return NULL; > +} > + > +/* > + * pvsched_get_available_drivers() - Copy space separated list of pvsched > + * driver names. > + * @buf: buffer to store the list of driver names > + * @maxlen: size of the buffer > + * > + * Return: 0 on success, negative value on error. > + */ > +int pvsched_get_available_drivers(char *buf, size_t maxlen) > +{ > + struct pvsched_vcpu_ops *ops; > + size_t offs = 0; > + > + if (!buf) > + return -EINVAL; > + > + if (maxlen > PVSCHED_DRV_BUF_MAX) > + maxlen = PVSCHED_DRV_BUF_MAX; > + > + rcu_read_lock(); > + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { > + offs += snprintf(buf + offs, maxlen - offs, > + "%s%s", > + offs == 0 ? "" : " ", ops->name); > + > + if (WARN_ON_ONCE(offs >= maxlen)) > + break; > + } > + rcu_read_unlock(); > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(pvsched_get_available_drivers); > + > +/* > + * pvsched_register_vcpu_ops() - Register the driver in the kernel. > + * @ops: Driver data(callbacks) > + * > + * After the registration, driver will be exposed to the hypervisor > + * for assignment to the guest VMs. > + * > + * Return: 0 on success, negative value on error. > + */ > +int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > + int ret = 0; > + > + ops->key = jhash(ops->name, sizeof(ops->name), strlen(ops->name)); > + spin_lock(&pvsched_drv_list_lock); > + if (nr_pvsched_drivers > PVSCHED_MAX) { > + ret = -ENOSPC; > + } if (pvsched_find_vcpu_ops_key(ops->key)) { > + ret = -EEXIST; > + } else if (!(ret = pvsched_validate_vcpu_ops(ops))) { > + list_add_tail_rcu(&ops->list, &pvsched_drv_list); > + nr_pvsched_drivers++; > + } > + spin_unlock(&pvsched_drv_list_lock); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(pvsched_register_vcpu_ops); > + > +/* > + * pvsched_register_vcpu_ops() - Un-register the driver from the kernel. > + * @ops: Driver data(callbacks) > + * > + * After un-registration, driver will not be visible to hypervisor. > + */ > +void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > + spin_lock(&pvsched_drv_list_lock); > + list_del_rcu(&ops->list); > + nr_pvsched_drivers--; > + spin_unlock(&pvsched_drv_list_lock); > + > + synchronize_rcu(); > +} > +EXPORT_SYMBOL_GPL(pvsched_unregister_vcpu_ops); > + > +/* > + * pvsched_get_vcpu_ops: Acquire the driver. > + * @name: Name of the driver to be acquired. > + * > + * Hypervisor can use this API to get the driver structure for > + * assigning it to guest VMs. This API takes a reference on the > + * module/bpf program so that driver doesn't vanish under the > + * hypervisor. > + * > + * Return: driver structure if found, else NULL. > + */ > +struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name) > +{ > + struct pvsched_vcpu_ops *ops; > + > + if (!name || (strlen(name) >= PVSCHED_NAME_MAX)) > + return NULL; > + > + rcu_read_lock(); > + ops = pvsched_find_vcpu_ops_name(name); > + if (!ops) > + goto out; > + > + if (unlikely(!bpf_try_module_get(ops, ops->owner))) { > + ops = NULL; > + goto out; > + } > + > +out: > + rcu_read_unlock(); > + return ops; > +} > +EXPORT_SYMBOL_GPL(pvsched_get_vcpu_ops); > + > +/* > + * pvsched_put_vcpu_ops: Release the driver. > + * @name: Name of the driver to be releases. > + * > + * Hypervisor can use this API to release the driver. > + */ > +void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops) > +{ > + bpf_module_put(ops, ops->owner); > +} > +EXPORT_SYMBOL_GPL(pvsched_put_vcpu_ops); > + > +/* > + * NOP vm_ops Sample implementation. > + * This driver doesn't do anything other than registering itself. > + * Placeholder for adding some default logic when the feature is > + * complete. > + */ > +static int nop_pvsched_vcpu_register(struct pid *pid) > +{ > + return 0; > +} > +static void nop_pvsched_vcpu_unregister(struct pid *pid) > +{ > +} > +static void nop_pvsched_notify_event(void *addr, struct pid *pid, u32 event) > +{ > +} > + > +struct pvsched_vcpu_ops nop_vcpu_ops = { > + .events = PVSCHED_VCPU_VMENTER | PVSCHED_VCPU_VMEXIT | PVSCHED_VCPU_HALT, > + .pvsched_vcpu_register = nop_pvsched_vcpu_register, > + .pvsched_vcpu_unregister = nop_pvsched_vcpu_unregister, > + .pvsched_vcpu_notify_event = nop_pvsched_notify_event, > + .name = "pvsched_nop", > + .owner = THIS_MODULE, > +}; > + > +static int __init pvsched_init(void) > +{ > + return WARN_ON(pvsched_register_vcpu_ops(&nop_vcpu_ops)); > +} > + > +late_initcall(pvsched_init); > -- > 2.40.1 >
diff --git a/Kconfig b/Kconfig index 745bc773f567..4a52eaa21166 100644 --- a/Kconfig +++ b/Kconfig @@ -29,4 +29,6 @@ source "lib/Kconfig" source "lib/Kconfig.debug" +source "virt/pvsched/Kconfig" + source "Documentation/Kconfig" diff --git a/include/linux/pvsched.h b/include/linux/pvsched.h new file mode 100644 index 000000000000..59df6b44aacb --- /dev/null +++ b/include/linux/pvsched.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Google */ + +#ifndef _LINUX_PVSCHED_H +#define _LINUX_PVSCHED_H 1 + +/* + * List of events for which hypervisor calls back into pvsched driver. + * Driver can specify the events it is interested in. + */ +enum pvsched_vcpu_events { + PVSCHED_VCPU_VMENTER = 0x1, + PVSCHED_VCPU_VMEXIT = 0x2, + PVSCHED_VCPU_HALT = 0x4, + PVSCHED_VCPU_INTR_INJ = 0x8, +}; + +#define PVSCHED_NAME_MAX 32 +#define PVSCHED_MAX 8 +#define PVSCHED_DRV_BUF_MAX (PVSCHED_NAME_MAX * PVSCHED_MAX + PVSCHED_MAX) + +/* + * pvsched driver callbacks. + * TODO: versioning support for better compatibility with the guest + * component implementing this feature. + */ +struct pvsched_vcpu_ops { + /* + * pvsched_vcpu_register() - Register the vcpu with pvsched driver. + * @pid: pid of the vcpu task. + * + * pvsched driver can store the pid internally and initialize + * itself to prepare for receiving callbacks from thsi vcpu. + */ + int (*pvsched_vcpu_register)(struct pid *pid); + + /* + * pvsched_vcpu_unregister() - Un-register the vcpu with pvsched driver. + * @pid: pid of the vcpu task. + */ + void (*pvsched_vcpu_unregister)(struct pid *pid); + + /* + * pvsched_vcpu_notify_event() - Callback for pvsched events + * @addr: Address of the memory region shared with guest + * @pid: pid of the vcpu task. + * @events: bit mask of the events that hypervisor wants to notify. + */ + void (*pvsched_vcpu_notify_event)(void *addr, struct pid *pid, u32 event); + + char name[PVSCHED_NAME_MAX]; + struct module *owner; + struct list_head list; + u32 events; + u32 key; +}; + +#ifdef CONFIG_PARAVIRT_SCHED_HOST +int pvsched_get_available_drivers(char *buf, size_t maxlen); + +int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops); +void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops); + +struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name); +void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops); + +static inline int pvsched_validate_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ + /* + * All callbacks are mandatory. + */ + if (!ops->pvsched_vcpu_register || !ops->pvsched_vcpu_unregister || + !ops->pvsched_vcpu_notify_event) + return -EINVAL; + + return 0; +} +#else +static inline void pvsched_get_available_drivers(char *buf, size_t maxlen) +{ +} + +static inline int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ + return -ENOTSUPP; +} + +static inline void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ +} + +static inline struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name) +{ + return NULL; +} + +static inline void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ +} +#endif + +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 157f7ce2942d..10a18a791b4f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -63,6 +63,7 @@ #include <linux/mount.h> #include <linux/userfaultfd_k.h> #include <linux/pid.h> +#include <linux/pvsched.h> #include "../lib/kstrtox.h" @@ -1615,6 +1616,24 @@ int proc_do_static_key(struct ctl_table *table, int write, return ret; } +#ifdef CONFIG_PARAVIRT_SCHED_HOST +static int proc_pvsched_available_drivers(struct ctl_table *ctl, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = PVSCHED_DRV_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + pvsched_get_available_drivers(tbl.data, PVSCHED_DRV_BUF_MAX); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + kfree(tbl.data); + return ret; +} +#endif + static struct ctl_table kern_table[] = { { .procname = "panic", @@ -2033,6 +2052,14 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, +#endif +#ifdef CONFIG_PARAVIRT_SCHED_HOST + { + .procname = "pvsched_available_drivers", + .maxlen = PVSCHED_DRV_BUF_MAX, + .mode = 0444, + .proc_handler = proc_pvsched_available_drivers, + }, #endif { } }; diff --git a/virt/Makefile b/virt/Makefile index 1cfea9436af9..9d0f32d775a1 100644 --- a/virt/Makefile +++ b/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += lib/ +obj-y += lib/ pvsched/ diff --git a/virt/pvsched/Kconfig b/virt/pvsched/Kconfig new file mode 100644 index 000000000000..5ca2669060cb --- /dev/null +++ b/virt/pvsched/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +config PARAVIRT_SCHED_HOST + bool "Paravirt scheduling framework in the host kernel" + default n + help + Paravirtualized scheduling facilitates the exchange of scheduling + related information between the host and guest through shared memory, + enhancing the efficiency of vCPU thread scheduling by the hypervisor. + An illustrative use case involves dynamically boosting the priority of + a vCPU thread when the guest is executing a latency-sensitive workload + on that specific vCPU. + This config enables paravirt scheduling framework in the host kernel. diff --git a/virt/pvsched/Makefile b/virt/pvsched/Makefile new file mode 100644 index 000000000000..4ca38e30479b --- /dev/null +++ b/virt/pvsched/Makefile @@ -0,0 +1,2 @@ + +obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o diff --git a/virt/pvsched/pvsched.c b/virt/pvsched/pvsched.c new file mode 100644 index 000000000000..610c85cf90d2 --- /dev/null +++ b/virt/pvsched/pvsched.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Google */ + +/* + * Paravirt scheduling framework + * + */ + +/* + * Heavily inspired from tcp congestion avoidance implementation. + * (net/ipv4/tcp_cong.c) + */ + +#define pr_fmt(fmt) "PVSCHED: " fmt + +#include <linux/module.h> +#include <linux/bpf.h> +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/pvsched.h> + +static DEFINE_SPINLOCK(pvsched_drv_list_lock); +static int nr_pvsched_drivers = 0; +static LIST_HEAD(pvsched_drv_list); + +/* + * Retrieve pvsched_vcpu_ops given the name. + */ +static struct pvsched_vcpu_ops *pvsched_find_vcpu_ops_name(char *name) +{ + struct pvsched_vcpu_ops *ops; + + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { + if (strcmp(ops->name, name) == 0) + return ops; + } + + return NULL; +} + +/* + * Retrieve pvsched_vcpu_ops given the hash key. + */ +static struct pvsched_vcpu_ops *pvsched_find_vcpu_ops_key(u32 key) +{ + struct pvsched_vcpu_ops *ops; + + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { + if (ops->key == key) + return ops; + } + + return NULL; +} + +/* + * pvsched_get_available_drivers() - Copy space separated list of pvsched + * driver names. + * @buf: buffer to store the list of driver names + * @maxlen: size of the buffer + * + * Return: 0 on success, negative value on error. + */ +int pvsched_get_available_drivers(char *buf, size_t maxlen) +{ + struct pvsched_vcpu_ops *ops; + size_t offs = 0; + + if (!buf) + return -EINVAL; + + if (maxlen > PVSCHED_DRV_BUF_MAX) + maxlen = PVSCHED_DRV_BUF_MAX; + + rcu_read_lock(); + list_for_each_entry_rcu(ops, &pvsched_drv_list, list) { + offs += snprintf(buf + offs, maxlen - offs, + "%s%s", + offs == 0 ? "" : " ", ops->name); + + if (WARN_ON_ONCE(offs >= maxlen)) + break; + } + rcu_read_unlock(); + + return 0; +} +EXPORT_SYMBOL_GPL(pvsched_get_available_drivers); + +/* + * pvsched_register_vcpu_ops() - Register the driver in the kernel. + * @ops: Driver data(callbacks) + * + * After the registration, driver will be exposed to the hypervisor + * for assignment to the guest VMs. + * + * Return: 0 on success, negative value on error. + */ +int pvsched_register_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ + int ret = 0; + + ops->key = jhash(ops->name, sizeof(ops->name), strlen(ops->name)); + spin_lock(&pvsched_drv_list_lock); + if (nr_pvsched_drivers > PVSCHED_MAX) { + ret = -ENOSPC; + } if (pvsched_find_vcpu_ops_key(ops->key)) { + ret = -EEXIST; + } else if (!(ret = pvsched_validate_vcpu_ops(ops))) { + list_add_tail_rcu(&ops->list, &pvsched_drv_list); + nr_pvsched_drivers++; + } + spin_unlock(&pvsched_drv_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pvsched_register_vcpu_ops); + +/* + * pvsched_register_vcpu_ops() - Un-register the driver from the kernel. + * @ops: Driver data(callbacks) + * + * After un-registration, driver will not be visible to hypervisor. + */ +void pvsched_unregister_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ + spin_lock(&pvsched_drv_list_lock); + list_del_rcu(&ops->list); + nr_pvsched_drivers--; + spin_unlock(&pvsched_drv_list_lock); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(pvsched_unregister_vcpu_ops); + +/* + * pvsched_get_vcpu_ops: Acquire the driver. + * @name: Name of the driver to be acquired. + * + * Hypervisor can use this API to get the driver structure for + * assigning it to guest VMs. This API takes a reference on the + * module/bpf program so that driver doesn't vanish under the + * hypervisor. + * + * Return: driver structure if found, else NULL. + */ +struct pvsched_vcpu_ops *pvsched_get_vcpu_ops(char *name) +{ + struct pvsched_vcpu_ops *ops; + + if (!name || (strlen(name) >= PVSCHED_NAME_MAX)) + return NULL; + + rcu_read_lock(); + ops = pvsched_find_vcpu_ops_name(name); + if (!ops) + goto out; + + if (unlikely(!bpf_try_module_get(ops, ops->owner))) { + ops = NULL; + goto out; + } + +out: + rcu_read_unlock(); + return ops; +} +EXPORT_SYMBOL_GPL(pvsched_get_vcpu_ops); + +/* + * pvsched_put_vcpu_ops: Release the driver. + * @name: Name of the driver to be releases. + * + * Hypervisor can use this API to release the driver. + */ +void pvsched_put_vcpu_ops(struct pvsched_vcpu_ops *ops) +{ + bpf_module_put(ops, ops->owner); +} +EXPORT_SYMBOL_GPL(pvsched_put_vcpu_ops); + +/* + * NOP vm_ops Sample implementation. + * This driver doesn't do anything other than registering itself. + * Placeholder for adding some default logic when the feature is + * complete. + */ +static int nop_pvsched_vcpu_register(struct pid *pid) +{ + return 0; +} +static void nop_pvsched_vcpu_unregister(struct pid *pid) +{ +} +static void nop_pvsched_notify_event(void *addr, struct pid *pid, u32 event) +{ +} + +struct pvsched_vcpu_ops nop_vcpu_ops = { + .events = PVSCHED_VCPU_VMENTER | PVSCHED_VCPU_VMEXIT | PVSCHED_VCPU_HALT, + .pvsched_vcpu_register = nop_pvsched_vcpu_register, + .pvsched_vcpu_unregister = nop_pvsched_vcpu_unregister, + .pvsched_vcpu_notify_event = nop_pvsched_notify_event, + .name = "pvsched_nop", + .owner = THIS_MODULE, +}; + +static int __init pvsched_init(void) +{ + return WARN_ON(pvsched_register_vcpu_ops(&nop_vcpu_ops)); +} + +late_initcall(pvsched_init);