diff mbox series

[RFC,v2,4/5] pvsched: bpf support for pvsched

Message ID 20240403140116.3002809-5-vineeth@bitbyteword.org (mailing list archive)
State New, archived
Headers show
Series Paravirt Scheduling (Dynamic vcpu priority management) | expand

Commit Message

Vineeth Remanan Pillai April 3, 2024, 2:01 p.m. UTC
Add support for implementing bpf pvsched drivers. bpf programs can use
the struct_ops to define the callbacks of pvsched drivers.

This is only a skeleton of the bpf framework for pvsched. Some
verification details are not implemented yet.

Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/bpf/bpf_struct_ops_types.h |   4 +
 virt/pvsched/Makefile             |   2 +-
 virt/pvsched/pvsched_bpf.c        | 141 ++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 virt/pvsched/pvsched_bpf.c

Comments

Vineeth Remanan Pillai April 8, 2024, 2 p.m. UTC | #1
Adding sched_ext folks

On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google)
<vineeth@bitbyteword.org> wrote:
>
> Add support for implementing bpf pvsched drivers. bpf programs can use
> the struct_ops to define the callbacks of pvsched drivers.
>
> This is only a skeleton of the bpf framework for pvsched. Some
> verification details are not implemented yet.
>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  kernel/bpf/bpf_struct_ops_types.h |   4 +
>  virt/pvsched/Makefile             |   2 +-
>  virt/pvsched/pvsched_bpf.c        | 141 ++++++++++++++++++++++++++++++
>  3 files changed, 146 insertions(+), 1 deletion(-)
>  create mode 100644 virt/pvsched/pvsched_bpf.c
>
> diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
> index 5678a9ddf817..9d5e4d1a331a 100644
> --- a/kernel/bpf/bpf_struct_ops_types.h
> +++ b/kernel/bpf/bpf_struct_ops_types.h
> @@ -9,4 +9,8 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
>  #include <net/tcp.h>
>  BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
>  #endif
> +#ifdef CONFIG_PARAVIRT_SCHED_HOST
> +#include <linux/pvsched.h>
> +BPF_STRUCT_OPS_TYPE(pvsched_vcpu_ops)
> +#endif
>  #endif
> diff --git a/virt/pvsched/Makefile b/virt/pvsched/Makefile
> index 4ca38e30479b..02bc072cd806 100644
> --- a/virt/pvsched/Makefile
> +++ b/virt/pvsched/Makefile
> @@ -1,2 +1,2 @@
>
> -obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o
> +obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o pvsched_bpf.o
> diff --git a/virt/pvsched/pvsched_bpf.c b/virt/pvsched/pvsched_bpf.c
> new file mode 100644
> index 000000000000..b125089abc3b
> --- /dev/null
> +++ b/virt/pvsched/pvsched_bpf.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Google  */
> +
> +#include <linux/types.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <linux/filter.h>
> +#include <linux/pvsched.h>
> +
> +
> +/* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
> +extern struct bpf_struct_ops bpf_pvsched_vcpu_ops;
> +
> +static int bpf_pvsched_vcpu_init(struct btf *btf)
> +{
> +       return 0;
> +}
> +
> +static bool bpf_pvsched_vcpu_is_valid_access(int off, int size,
> +                                      enum bpf_access_type type,
> +                                      const struct bpf_prog *prog,
> +                                      struct bpf_insn_access_aux *info)
> +{
> +       if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
> +               return false;
> +       if (type != BPF_READ)
> +               return false;
> +       if (off % size != 0)
> +               return false;
> +
> +       if (!btf_ctx_access(off, size, type, prog, info))
> +               return false;
> +
> +       return true;
> +}
> +
> +static int bpf_pvsched_vcpu_btf_struct_access(struct bpf_verifier_log *log,
> +                                       const struct bpf_reg_state *reg,
> +                                       int off, int size)
> +{
> +       /*
> +        * TODO: Enable write access to Guest shared mem.
> +        */
> +       return -EACCES;
> +}
> +
> +static const struct bpf_func_proto *
> +bpf_pvsched_vcpu_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +       return bpf_base_func_proto(func_id);
> +}
> +
> +static const struct bpf_verifier_ops bpf_pvsched_vcpu_verifier_ops = {
> +       .get_func_proto         = bpf_pvsched_vcpu_get_func_proto,
> +       .is_valid_access        = bpf_pvsched_vcpu_is_valid_access,
> +       .btf_struct_access      = bpf_pvsched_vcpu_btf_struct_access,
> +};
> +
> +static int bpf_pvsched_vcpu_init_member(const struct btf_type *t,
> +                                 const struct btf_member *member,
> +                                 void *kdata, const void *udata)
> +{
> +       const struct pvsched_vcpu_ops *uvm_ops;
> +       struct pvsched_vcpu_ops *vm_ops;
> +       u32 moff;
> +
> +       uvm_ops = (const struct pvsched_vcpu_ops *)udata;
> +       vm_ops = (struct pvsched_vcpu_ops *)kdata;
> +
> +       moff = __btf_member_bit_offset(t, member) / 8;
> +       switch (moff) {
> +       case offsetof(struct pvsched_vcpu_ops, events):
> +               vm_ops->events = *(u32 *)(udata + moff);
> +               return 1;
> +       case offsetof(struct pvsched_vcpu_ops, name):
> +               if (bpf_obj_name_cpy(vm_ops->name, uvm_ops->name,
> +                                       sizeof(vm_ops->name)) <= 0)
> +                       return -EINVAL;
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static int bpf_pvsched_vcpu_check_member(const struct btf_type *t,
> +                                  const struct btf_member *member,
> +                                  const struct bpf_prog *prog)
> +{
> +       return 0;
> +}
> +
> +static int bpf_pvsched_vcpu_reg(void *kdata)
> +{
> +       return pvsched_register_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static void bpf_pvsched_vcpu_unreg(void *kdata)
> +{
> +       pvsched_unregister_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static int bpf_pvsched_vcpu_validate(void *kdata)
> +{
> +       return pvsched_validate_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static int bpf_pvsched_vcpu_update(void *kdata, void *old_kdata)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static int __pvsched_vcpu_register(struct pid *pid)
> +{
> +       return 0;
> +}
> +static void __pvsched_vcpu_unregister(struct pid *pid)
> +{
> +}
> +static void __pvsched_notify_event(void *addr, struct pid *pid, u32 event)
> +{
> +}
> +
> +static struct pvsched_vcpu_ops __bpf_ops_pvsched_vcpu_ops = {
> +       .pvsched_vcpu_register = __pvsched_vcpu_register,
> +       .pvsched_vcpu_unregister = __pvsched_vcpu_unregister,
> +       .pvsched_vcpu_notify_event = __pvsched_notify_event,
> +};
> +
> +struct bpf_struct_ops bpf_pvsched_vcpu_ops = {
> +       .init = &bpf_pvsched_vcpu_init,
> +       .validate = bpf_pvsched_vcpu_validate,
> +       .update = bpf_pvsched_vcpu_update,
> +       .verifier_ops = &bpf_pvsched_vcpu_verifier_ops,
> +       .reg = bpf_pvsched_vcpu_reg,
> +       .unreg = bpf_pvsched_vcpu_unreg,
> +       .check_member = bpf_pvsched_vcpu_check_member,
> +       .init_member = bpf_pvsched_vcpu_init_member,
> +       .name = "pvsched_vcpu_ops",
> +       .cfi_stubs = &__bpf_ops_pvsched_vcpu_ops,
> +};
> --
> 2.40.1
>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 5678a9ddf817..9d5e4d1a331a 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -9,4 +9,8 @@  BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
 #include <net/tcp.h>
 BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
 #endif
+#ifdef CONFIG_PARAVIRT_SCHED_HOST
+#include <linux/pvsched.h>
+BPF_STRUCT_OPS_TYPE(pvsched_vcpu_ops)
+#endif
 #endif
diff --git a/virt/pvsched/Makefile b/virt/pvsched/Makefile
index 4ca38e30479b..02bc072cd806 100644
--- a/virt/pvsched/Makefile
+++ b/virt/pvsched/Makefile
@@ -1,2 +1,2 @@ 
 
-obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o
+obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o pvsched_bpf.o
diff --git a/virt/pvsched/pvsched_bpf.c b/virt/pvsched/pvsched_bpf.c
new file mode 100644
index 000000000000..b125089abc3b
--- /dev/null
+++ b/virt/pvsched/pvsched_bpf.c
@@ -0,0 +1,141 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google  */
+
+#include <linux/types.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/pvsched.h>
+
+
+/* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_pvsched_vcpu_ops;
+
+static int bpf_pvsched_vcpu_init(struct btf *btf)
+{
+	return 0;
+}
+
+static bool bpf_pvsched_vcpu_is_valid_access(int off, int size,
+				       enum bpf_access_type type,
+				       const struct bpf_prog *prog,
+				       struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+
+	if (!btf_ctx_access(off, size, type, prog, info))
+		return false;
+
+	return true;
+}
+
+static int bpf_pvsched_vcpu_btf_struct_access(struct bpf_verifier_log *log,
+					const struct bpf_reg_state *reg,
+					int off, int size)
+{
+	/*
+	 * TODO: Enable write access to Guest shared mem.
+	 */
+	return -EACCES;
+}
+
+static const struct bpf_func_proto *
+bpf_pvsched_vcpu_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_verifier_ops bpf_pvsched_vcpu_verifier_ops = {
+	.get_func_proto		= bpf_pvsched_vcpu_get_func_proto,
+	.is_valid_access	= bpf_pvsched_vcpu_is_valid_access,
+	.btf_struct_access	= bpf_pvsched_vcpu_btf_struct_access,
+};
+
+static int bpf_pvsched_vcpu_init_member(const struct btf_type *t,
+				  const struct btf_member *member,
+				  void *kdata, const void *udata)
+{
+	const struct pvsched_vcpu_ops *uvm_ops;
+	struct pvsched_vcpu_ops *vm_ops;
+	u32 moff;
+
+	uvm_ops = (const struct pvsched_vcpu_ops *)udata;
+	vm_ops = (struct pvsched_vcpu_ops *)kdata;
+
+	moff = __btf_member_bit_offset(t, member) / 8;
+	switch (moff) {
+	case offsetof(struct pvsched_vcpu_ops, events):
+		vm_ops->events = *(u32 *)(udata + moff);
+		return 1;
+	case offsetof(struct pvsched_vcpu_ops, name):
+		if (bpf_obj_name_cpy(vm_ops->name, uvm_ops->name,
+					sizeof(vm_ops->name)) <= 0)
+			return -EINVAL;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int bpf_pvsched_vcpu_check_member(const struct btf_type *t,
+				   const struct btf_member *member,
+				   const struct bpf_prog *prog)
+{
+	return 0;
+}
+
+static int bpf_pvsched_vcpu_reg(void *kdata)
+{
+	return pvsched_register_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
+}
+
+static void bpf_pvsched_vcpu_unreg(void *kdata)
+{
+	pvsched_unregister_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
+}
+
+static int bpf_pvsched_vcpu_validate(void *kdata)
+{
+	return pvsched_validate_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
+}
+
+static int bpf_pvsched_vcpu_update(void *kdata, void *old_kdata)
+{
+	return -EOPNOTSUPP;
+}
+
+static int __pvsched_vcpu_register(struct pid *pid)
+{
+	return 0;
+}
+static void __pvsched_vcpu_unregister(struct pid *pid)
+{
+}
+static void __pvsched_notify_event(void *addr, struct pid *pid, u32 event)
+{
+}
+
+static struct pvsched_vcpu_ops __bpf_ops_pvsched_vcpu_ops = {
+	.pvsched_vcpu_register = __pvsched_vcpu_register,
+	.pvsched_vcpu_unregister = __pvsched_vcpu_unregister,
+	.pvsched_vcpu_notify_event = __pvsched_notify_event,
+};
+
+struct bpf_struct_ops bpf_pvsched_vcpu_ops = {
+	.init = &bpf_pvsched_vcpu_init,
+	.validate = bpf_pvsched_vcpu_validate,
+	.update = bpf_pvsched_vcpu_update,
+	.verifier_ops = &bpf_pvsched_vcpu_verifier_ops,
+	.reg = bpf_pvsched_vcpu_reg,
+	.unreg = bpf_pvsched_vcpu_unreg,
+	.check_member = bpf_pvsched_vcpu_check_member,
+	.init_member = bpf_pvsched_vcpu_init_member,
+	.name = "pvsched_vcpu_ops",
+	.cfi_stubs = &__bpf_ops_pvsched_vcpu_ops,
+};