diff mbox series

[RFC,1/2] block: add support for redirecting IO completion through eBPF

Message ID 20191014122833.64908-2-houtao1@huawei.com (mailing list archive)
State New, archived
Headers show
Series block: use eBPF to redirect IO completion | expand

Commit Message

Hou Tao Oct. 14, 2019, 12:28 p.m. UTC
For network stack, RPS, namely Receive Packet Steering, is used to
distribute network protocol processing from hardware-interrupted CPU
to specific CPUs and alleviating soft-irq load of the interrupted CPU.

For block layer, soft-irq (for single queue device) or hard-irq
(for multiple queue device) is used to handle IO completion, so
RPS will be useful when the soft-irq load or the hard-irq load
of a specific CPU is too high, or a specific CPU set is required
to handle IO completion.

Instead of setting the CPU set used for handling IO completion
through sysfs or procfs, we can attach an eBPF program to the
request-queue, provide some useful info (e.g., the CPU
which submits the request) to the program, and let the program
decides the proper CPU for IO completion handling.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 block/Makefile             |   2 +-
 block/blk-bpf.c            | 127 +++++++++++++++++++++++++++++++++++++
 block/blk-mq.c             |  22 +++++--
 block/blk-softirq.c        |  27 ++++++--
 include/linux/blkdev.h     |   3 +
 include/linux/bpf_blkdev.h |   9 +++
 include/linux/bpf_types.h  |   1 +
 include/uapi/linux/bpf.h   |   2 +
 kernel/bpf/syscall.c       |   9 +++
 9 files changed, 190 insertions(+), 12 deletions(-)
 create mode 100644 block/blk-bpf.c
 create mode 100644 include/linux/bpf_blkdev.h

Comments

Alexei Starovoitov Oct. 15, 2019, 9:04 p.m. UTC | #1
On Mon, Oct 14, 2019 at 5:21 AM Hou Tao <houtao1@huawei.com> wrote:
>
> For network stack, RPS, namely Receive Packet Steering, is used to
> distribute network protocol processing from hardware-interrupted CPU
> to specific CPUs and alleviating soft-irq load of the interrupted CPU.
>
> For block layer, soft-irq (for single queue device) or hard-irq
> (for multiple queue device) is used to handle IO completion, so
> RPS will be useful when the soft-irq load or the hard-irq load
> of a specific CPU is too high, or a specific CPU set is required
> to handle IO completion.
>
> Instead of setting the CPU set used for handling IO completion
> through sysfs or procfs, we can attach an eBPF program to the
> request-queue, provide some useful info (e.g., the CPU
> which submits the request) to the program, and let the program
> decides the proper CPU for IO completion handling.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
...
>
> +       rcu_read_lock();
> +       prog = rcu_dereference_protected(q->prog, 1);
> +       if (prog)
> +               bpf_ccpu = BPF_PROG_RUN(q->prog, NULL);
> +       rcu_read_unlock();
> +
>         cpu = get_cpu();
> -       if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
> -               shared = cpus_share_cache(cpu, ctx->cpu);
> +       if (bpf_ccpu < 0 || !cpu_online(bpf_ccpu)) {
> +               ccpu = ctx->cpu;
> +               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
> +                       shared = cpus_share_cache(cpu, ctx->cpu);
> +       } else
> +               ccpu = bpf_ccpu;
>
> -       if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
> +       if (cpu != ccpu && !shared && cpu_online(ccpu)) {
>                 rq->csd.func = __blk_mq_complete_request_remote;
>                 rq->csd.info = rq;
>                 rq->csd.flags = 0;
> -               smp_call_function_single_async(ctx->cpu, &rq->csd);
> +               smp_call_function_single_async(ccpu, &rq->csd);

Interesting idea.
Not sure whether such programability makes sense from
block layer point of view.

From bpf side having a program with NULL input context is
a bit odd. We never had such things in the past, so this patchset
won't work as-is.
Also no-input means that the program choices are quite limited.
Other than round robin and random I cannot come up with other
cpu selection ideas.
I suggest to do writable tracepoint here instead.
Take a look at trace_nbd_send_request.
BPF prog can write into 'request'.
For your use case it will be able to write into 'bpf_ccpu' local variable.
If you keep it as raw tracepoint and don't add the actual tracepoint
with TP_STRUCT__entry and TP_fast_assign then it won't be abi
and you can change it later or remove it altogether.
Hannes Reinecke Oct. 16, 2019, 7:05 a.m. UTC | #2
On 10/15/19 11:04 PM, Alexei Starovoitov wrote:
> On Mon, Oct 14, 2019 at 5:21 AM Hou Tao <houtao1@huawei.com> wrote:
>>
>> For network stack, RPS, namely Receive Packet Steering, is used to
>> distribute network protocol processing from hardware-interrupted CPU
>> to specific CPUs and alleviating soft-irq load of the interrupted CPU.
>>
>> For block layer, soft-irq (for single queue device) or hard-irq
>> (for multiple queue device) is used to handle IO completion, so
>> RPS will be useful when the soft-irq load or the hard-irq load
>> of a specific CPU is too high, or a specific CPU set is required
>> to handle IO completion.
>>
>> Instead of setting the CPU set used for handling IO completion
>> through sysfs or procfs, we can attach an eBPF program to the
>> request-queue, provide some useful info (e.g., the CPU
>> which submits the request) to the program, and let the program
>> decides the proper CPU for IO completion handling.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ...
>>
>> +       rcu_read_lock();
>> +       prog = rcu_dereference_protected(q->prog, 1);
>> +       if (prog)
>> +               bpf_ccpu = BPF_PROG_RUN(q->prog, NULL);
>> +       rcu_read_unlock();
>> +
>>         cpu = get_cpu();
>> -       if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
>> -               shared = cpus_share_cache(cpu, ctx->cpu);
>> +       if (bpf_ccpu < 0 || !cpu_online(bpf_ccpu)) {
>> +               ccpu = ctx->cpu;
>> +               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
>> +                       shared = cpus_share_cache(cpu, ctx->cpu);
>> +       } else
>> +               ccpu = bpf_ccpu;
>>
>> -       if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
>> +       if (cpu != ccpu && !shared && cpu_online(ccpu)) {
>>                 rq->csd.func = __blk_mq_complete_request_remote;
>>                 rq->csd.info = rq;
>>                 rq->csd.flags = 0;
>> -               smp_call_function_single_async(ctx->cpu, &rq->csd);
>> +               smp_call_function_single_async(ccpu, &rq->csd);
> 
> Interesting idea.
> Not sure whether such programability makes sense from
> block layer point of view.
> 
> From bpf side having a program with NULL input context is
> a bit odd. We never had such things in the past, so this patchset
> won't work as-is.
> Also no-input means that the program choices are quite limited.
> Other than round robin and random I cannot come up with other
> cpu selection ideas.
> I suggest to do writable tracepoint here instead.
> Take a look at trace_nbd_send_request.
> BPF prog can write into 'request'.
> For your use case it will be able to write into 'bpf_ccpu' local variable.
> If you keep it as raw tracepoint and don't add the actual tracepoint
> with TP_STRUCT__entry and TP_fast_assign then it won't be abi
> and you can change it later or remove it altogether.
> 
That basically was my idea, too.

Actually I was coming from a different angle, namely trying to figure
out how we could do generic error injection in the block layer.
eBPF would be one way of doing it, kprobes another.

But writable trace events ... I'll have to check if we can leverage that
here, too.

Cheers,

Hannes
Hou Tao Oct. 21, 2019, 1:42 p.m. UTC | #3
Hi,

On 2019/10/16 5:04, Alexei Starovoitov wrote:
> On Mon, Oct 14, 2019 at 5:21 AM Hou Tao <houtao1@huawei.com> wrote:
>>
>> For network stack, RPS, namely Receive Packet Steering, is used to
>> distribute network protocol processing from hardware-interrupted CPU
>> to specific CPUs and alleviating soft-irq load of the interrupted CPU.
>>
>> For block layer, soft-irq (for single queue device) or hard-irq
>> (for multiple queue device) is used to handle IO completion, so
>> RPS will be useful when the soft-irq load or the hard-irq load
>> of a specific CPU is too high, or a specific CPU set is required
>> to handle IO completion.
>>
>> Instead of setting the CPU set used for handling IO completion
>> through sysfs or procfs, we can attach an eBPF program to the
>> request-queue, provide some useful info (e.g., the CPU
>> which submits the request) to the program, and let the program
>> decides the proper CPU for IO completion handling.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ...
>>
>> +       rcu_read_lock();
>> +       prog = rcu_dereference_protected(q->prog, 1);
>> +       if (prog)
>> +               bpf_ccpu = BPF_PROG_RUN(q->prog, NULL);
>> +       rcu_read_unlock();
>> +
>>         cpu = get_cpu();
>> -       if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
>> -               shared = cpus_share_cache(cpu, ctx->cpu);
>> +       if (bpf_ccpu < 0 || !cpu_online(bpf_ccpu)) {
>> +               ccpu = ctx->cpu;
>> +               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
>> +                       shared = cpus_share_cache(cpu, ctx->cpu);
>> +       } else
>> +               ccpu = bpf_ccpu;
>>
>> -       if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
>> +       if (cpu != ccpu && !shared && cpu_online(ccpu)) {
>>                 rq->csd.func = __blk_mq_complete_request_remote;
>>                 rq->csd.info = rq;
>>                 rq->csd.flags = 0;
>> -               smp_call_function_single_async(ctx->cpu, &rq->csd);
>> +               smp_call_function_single_async(ccpu, &rq->csd);
> 
> Interesting idea.
> Not sure whether such programability makes sense from
> block layer point of view.
> 
>>From bpf side having a program with NULL input context is
> a bit odd. We never had such things in the past, so this patchset
> won't work as-is.
No, it just works.

> Also no-input means that the program choices are quite limited.
> Other than round robin and random I cannot come up with other
> cpu selection idea> I suggest to do writable tracepoint here instead.
> Take a look at trace_nbd_send_request.
> BPF prog can write into 'request'.
> For your use case it will be able to write into 'bpf_ccpu' local variable.
> If you keep it as raw tracepoint and don't add the actual tracepoint
> with TP_STRUCT__entry and TP_fast_assign then it won't be abi
> and you can change it later or remove it altogether.
> 
Your suggestion is much simpler, so there will be no need for adding a new
program type, and all things need to be done are adding a raw tracepoint,
moving bpf_ccpu into struct request, and letting a BPF program to modify it.

I will try and thanks for your suggestions.

Regards,
Tao

> .
>
Bart Van Assche Oct. 21, 2019, 1:48 p.m. UTC | #4
On 10/21/19 6:42 AM, Hou Tao wrote:
> Your suggestion is much simpler, so there will be no need for adding a new
> program type, and all things need to be done are adding a raw tracepoint,
> moving bpf_ccpu into struct request, and letting a BPF program to modify it.

blk-mq already supports processing completions on the CPU that submitted
a request so it's not clear to me why any changes in the block layer are
being proposed for redirecting I/O completions?

Thanks,

Bart.
Jens Axboe Oct. 21, 2019, 2:45 p.m. UTC | #5
On 10/21/19 7:48 AM, Bart Van Assche wrote:
> On 10/21/19 6:42 AM, Hou Tao wrote:
>> Your suggestion is much simpler, so there will be no need for adding a new
>> program type, and all things need to be done are adding a raw tracepoint,
>> moving bpf_ccpu into struct request, and letting a BPF program to modify it.
> 
> blk-mq already supports processing completions on the CPU that submitted
> a request so it's not clear to me why any changes in the block layer are
> being proposed for redirecting I/O completions?

That's where I'm getting confused as well. I'm not against adding BPF
functionality to the block layer, but this one seems a bit contrived.
diff mbox series

Patch

diff --git a/block/Makefile b/block/Makefile
index 9ef57ace90d4..0adb0f655e8c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@  obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o partition-generic.o ioprio.o \
-			badblocks.o partitions/ blk-rq-qos.o
+			badblocks.o partitions/ blk-rq-qos.o blk-bpf.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
diff --git a/block/blk-bpf.c b/block/blk-bpf.c
new file mode 100644
index 000000000000..d9e3b1caead4
--- /dev/null
+++ b/block/blk-bpf.c
@@ -0,0 +1,127 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Hou Tao <houtao1@huawei.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/fs.h>
+#include <linux/bpf_blkdev.h>
+#include <linux/blkdev.h>
+
+extern const struct file_operations def_blk_fops;
+
+static DEFINE_SPINLOCK(blkdev_bpf_lock);
+
+const struct bpf_prog_ops blkdev_prog_ops = {
+};
+
+static const struct bpf_func_proto *
+blkdev_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_get_numa_node_id:
+		return &bpf_get_numa_node_id_proto;
+	default:
+		return NULL;
+	}
+}
+
+const struct bpf_verifier_ops blkdev_verifier_ops = {
+	.get_func_proto = blkdev_prog_func_proto,
+};
+
+static struct request_queue *blkdev_rq_by_file(struct file *filp)
+{
+	struct block_device *bdev;
+
+	if (filp->f_op != &def_blk_fops)
+		return ERR_PTR(-EINVAL);
+
+	bdev = I_BDEV(filp->f_mapping->host);
+
+	return bdev->bd_queue;
+}
+
+int blkdev_bpf_prog_attach(const union bpf_attr *attr,
+		enum bpf_prog_type ptype, struct bpf_prog *prog)
+{
+	int ret = 0;
+	struct file *filp;
+	struct request_queue *rq;
+
+	filp = fget(attr->target_fd);
+	if (!filp) {
+		ret = -EINVAL;
+		goto fget_err;
+	}
+
+	rq = blkdev_rq_by_file(filp);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto to_rq_err;
+	}
+
+	spin_lock(&blkdev_bpf_lock);
+	if (rq->prog) {
+		ret = -EBUSY;
+		goto set_prog_err;
+	}
+
+	rcu_assign_pointer(rq->prog, prog);
+
+set_prog_err:
+	spin_unlock(&blkdev_bpf_lock);
+to_rq_err:
+	fput(filp);
+fget_err:
+	return ret;
+}
+
+int blkdev_bpf_prog_detach(const union bpf_attr *attr)
+{
+	int ret = 0;
+	struct file *filp;
+	struct request_queue *rq;
+	struct bpf_prog *old_prog;
+
+	filp = fget(attr->target_fd);
+	if (!filp) {
+		ret = -EINVAL;
+		goto fget_err;
+	}
+
+	rq = blkdev_rq_by_file(filp);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto to_rq_err;
+	}
+
+	old_prog = NULL;
+	spin_lock(&blkdev_bpf_lock);
+	if (!rq->prog) {
+		ret = -ENODATA;
+		goto clr_prog_err;
+	}
+	rcu_swap_protected(rq->prog, old_prog, 1);
+
+clr_prog_err:
+	spin_unlock(&blkdev_bpf_lock);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+to_rq_err:
+	fput(filp);
+fget_err:
+	return ret;
+}
+
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 20a49be536b5..5ac6fe6dbcd0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -26,6 +26,7 @@ 
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
+#include <linux/filter.h>
 
 #include <trace/events/block.h>
 
@@ -584,6 +585,9 @@  static void __blk_mq_complete_request(struct request *rq)
 	struct request_queue *q = rq->q;
 	bool shared = false;
 	int cpu;
+	int ccpu;
+	int bpf_ccpu = -1;
+	struct bpf_prog *prog;
 
 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 	/*
@@ -610,15 +614,25 @@  static void __blk_mq_complete_request(struct request *rq)
 		return;
 	}
 
+	rcu_read_lock();
+	prog = rcu_dereference_protected(q->prog, 1);
+	if (prog)
+		bpf_ccpu = BPF_PROG_RUN(q->prog, NULL);
+	rcu_read_unlock();
+
 	cpu = get_cpu();
-	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-		shared = cpus_share_cache(cpu, ctx->cpu);
+	if (bpf_ccpu < 0 || !cpu_online(bpf_ccpu)) {
+		ccpu = ctx->cpu;
+		if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
+			shared = cpus_share_cache(cpu, ctx->cpu);
+	} else
+		ccpu = bpf_ccpu;
 
-	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+	if (cpu != ccpu && !shared && cpu_online(ccpu)) {
 		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
-		smp_call_function_single_async(ctx->cpu, &rq->csd);
+		smp_call_function_single_async(ccpu, &rq->csd);
 	} else {
 		q->mq_ops->complete(rq);
 	}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 457d9ba3eb20..1139a5352a59 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -11,6 +11,7 @@ 
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/filter.h>
 
 #include "blk.h"
 
@@ -101,20 +102,32 @@  void __blk_complete_request(struct request *req)
 	int cpu, ccpu = req->mq_ctx->cpu;
 	unsigned long flags;
 	bool shared = false;
+	int bpf_ccpu = -1;
+	struct bpf_prog *prog;
 
 	BUG_ON(!q->mq_ops->complete);
 
-	local_irq_save(flags);
-	cpu = smp_processor_id();
+	rcu_read_lock();
+	prog = rcu_dereference_protected(q->prog, 1);
+	if (prog)
+		bpf_ccpu = BPF_PROG_RUN(q->prog, NULL);
+	rcu_read_unlock();
 
 	/*
-	 * Select completion CPU
+	 * Select completion CPU.
+	 * If a valid CPU number is returned by eBPF program, use it directly.
 	 */
-	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
-		if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-			shared = cpus_share_cache(cpu, ccpu);
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (bpf_ccpu < 0 || !cpu_online(bpf_ccpu)) {
+		if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) &&
+			ccpu != -1) {
+			if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
+				shared = cpus_share_cache(cpu, ccpu);
+		} else
+			ccpu = cpu;
 	} else
-		ccpu = cpu;
+		ccpu = bpf_ccpu;
 
 	/*
 	 * If current CPU and requested CPU share a cache, run the softirq on
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d9db32fb75ee..849589c3c51c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -397,6 +397,8 @@  static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
 
 #endif /* CONFIG_BLK_DEV_ZONED */
 
+struct bpf_prog;
+
 struct request_queue {
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
@@ -590,6 +592,7 @@  struct request_queue {
 
 #define BLK_MAX_WRITE_HINTS	5
 	u64			write_hints[BLK_MAX_WRITE_HINTS];
+	struct bpf_prog __rcu *prog;
 };
 
 #define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
diff --git a/include/linux/bpf_blkdev.h b/include/linux/bpf_blkdev.h
new file mode 100644
index 000000000000..0777428bc6e2
--- /dev/null
+++ b/include/linux/bpf_blkdev.h
@@ -0,0 +1,9 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_BLKDEV_H__
+#define __BPF_BLKDEV_H__
+
+extern int blkdev_bpf_prog_attach(const union bpf_attr *attr,
+		enum bpf_prog_type ptype, struct bpf_prog *prog);
+extern int blkdev_bpf_prog_detach(const union bpf_attr *attr);
+
+#endif /* !__BPF_BLKDEV_H__ */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 36a9c2325176..008facd336e5 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -38,6 +38,7 @@  BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_BLKDEV, blkdev)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77c6be96d676..36aa35e29be2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+	BPF_PROG_TYPE_BLKDEV,
 };
 
 enum bpf_attach_type {
@@ -199,6 +200,7 @@  enum bpf_attach_type {
 	BPF_CGROUP_UDP6_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
+	BPF_BLKDEV_IOC_CPU,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 82eabd4e38ad..9724c0809f21 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4,6 +4,7 @@ 
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/bpf_lirc.h>
+#include <linux/bpf_blkdev.h>
 #include <linux/btf.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
@@ -1942,6 +1943,9 @@  static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_SETSOCKOPT:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
 		break;
+	case BPF_BLKDEV_IOC_CPU:
+		ptype = BPF_PROG_TYPE_BLKDEV;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1966,6 +1970,9 @@  static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_BLKDEV:
+		ret = blkdev_bpf_prog_attach(attr, ptype, prog);
+		break;
 	default:
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
@@ -2029,6 +2036,8 @@  static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_SETSOCKOPT:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
 		break;
+	case BPF_BLKDEV_IOC_CPU:
+		return blkdev_bpf_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}