diff mbox series

[UBLKSRV] Add ebpf support.

Message ID 20230215004618.35503-1-xiaoguang.wang@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [UBLKSRV] Add ebpf support. | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16

Commit Message

Xiaoguang Wang Feb. 15, 2023, 12:46 a.m. UTC
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
---
 bpf/ublk.bpf.c         | 168 +++++++++++++++++++++++++++++++++++++++++
 include/ublk_cmd.h     |   2 +
 include/ublksrv.h      |   8 ++
 include/ublksrv_priv.h |   1 +
 include/ublksrv_tgt.h  |   1 +
 lib/ublksrv.c          |   4 +
 lib/ublksrv_cmd.c      |  21 ++++++
 tgt_loop.cpp           |  31 +++++++-
 ublksrv_tgt.cpp        |  33 ++++++++
 9 files changed, 268 insertions(+), 1 deletion(-)
 create mode 100644 bpf/ublk.bpf.c

Comments

Ming Lei Feb. 16, 2023, 8:28 a.m. UTC | #1
On Wed, Feb 15, 2023 at 08:46:18AM +0800, Xiaoguang Wang wrote:
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> ---
>  bpf/ublk.bpf.c         | 168 +++++++++++++++++++++++++++++++++++++++++
>  include/ublk_cmd.h     |   2 +
>  include/ublksrv.h      |   8 ++
>  include/ublksrv_priv.h |   1 +
>  include/ublksrv_tgt.h  |   1 +
>  lib/ublksrv.c          |   4 +
>  lib/ublksrv_cmd.c      |  21 ++++++
>  tgt_loop.cpp           |  31 +++++++-
>  ublksrv_tgt.cpp        |  33 ++++++++
>  9 files changed, 268 insertions(+), 1 deletion(-)
>  create mode 100644 bpf/ublk.bpf.c
> 
> diff --git a/bpf/ublk.bpf.c b/bpf/ublk.bpf.c
> new file mode 100644
> index 0000000..80e79de
> --- /dev/null
> +++ b/bpf/ublk.bpf.c
> @@ -0,0 +1,168 @@
> +#include "vmlinux.h"

Where is vmlinux.h?

> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_core_read.h>
> +
> +
> +static long (*bpf_ublk_queue_sqe)(void *ctx, struct io_uring_sqe *sqe,
> +		u32 sqe_len, u32 fd) = (void *) 212;
> +
> +int target_fd = -1;
> +
> +struct sqe_key {
> +	u16 q_id;
> +	u16 tag;
> +	u32 res;
> +	u64 offset;
> +};
> +
> +struct sqe_data {
> +	char data[128];
> +};
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__uint(max_entries, 8192);
> +	__type(key, struct sqe_key);
> +	__type(value, struct sqe_data);
> +} sqes_map SEC(".maps");
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_ARRAY);
> +	__uint(max_entries, 128);
> +	__type(key, int);
> +	__type(value, int);
> +} uring_fd_map SEC(".maps");
> +
> +static inline void io_uring_prep_rw(__u8 op, struct io_uring_sqe *sqe, int fd,
> +				    const void *addr, unsigned len,
> +				    __u64 offset)
> +{
> +	sqe->opcode = op;
> +	sqe->flags = 0;
> +	sqe->ioprio = 0;
> +	sqe->fd = fd;
> +	sqe->off = offset;
> +	sqe->addr = (unsigned long) addr;
> +	sqe->len = len;
> +	sqe->fsync_flags = 0;
> +	sqe->buf_index = 0;
> +	sqe->personality = 0;
> +	sqe->splice_fd_in = 0;
> +	sqe->addr3 = 0;
> +	sqe->__pad2[0] = 0;
> +}
> +
> +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
> +{
> +	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, 0, 0, 0);
> +}
> +
> +static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
> +			void *buf, unsigned nbytes, off_t offset)
> +{
> +	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
> +}
> +
> +static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
> +	const void *buf, unsigned nbytes, off_t offset)
> +{
> +	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
> +}
> +
> +/*
> +static u64 submit_sqe(struct bpf_map *map, void *key, void *value, void *data)
> +{
> +	struct io_uring_sqe *sqe = (struct io_uring_sqe *)value;
> +	struct ublk_bpf_ctx *ctx = ((struct callback_ctx *)data)->ctx;
> +	struct sqe_key *skey = (struct sqe_key *)key;
> +	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
> +	char fmt2[] ="submit sqe test prep\n";
> +	u16 qid, tag;
> +	int q_id = skey->q_id, *ring_fd;
> +
> +	bpf_trace_printk(fmt2, sizeof(fmt2));
> +	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
> +	if (ring_fd) {
> +		bpf_trace_printk(fmt, sizeof(fmt), qid, skey->tag);
> +		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
> +		bpf_map_delete_elem(map, key);
> +	}
> +	return 0;
> +}
> +*/
> +
> +static inline __u64 build_user_data(unsigned tag, unsigned op,
> +			unsigned tgt_data, unsigned is_target_io,
> +			unsigned is_bpf_io)
> +{
> +	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63 |
> +		(__u64)is_bpf_io << 60;
> +}
> +
> +SEC("ublk.s/")
> +int ublk_io_prep_prog(struct ublk_bpf_ctx *ctx)
> +{
> +	struct io_uring_sqe *sqe;
> +	struct sqe_data sd = {0};
> +	struct sqe_key key;
> +	u16 q_id = ctx->q_id;
> +	u8 op; // = ctx->op;
> +	u32 nr_sectors = ctx->nr_sectors;
> +	u64 start_sector = ctx->start_sector;
> +	char fmt_1[] ="ublk_io_prep_prog %d %d\n";
> +
> +	key.q_id = ctx->q_id;
> +	key.tag = ctx->tag;
> +	key.offset = 0;
> +	key.res = 0;
> +
> +	bpf_probe_read_kernel(&op, 1, &ctx->op);
> +	bpf_trace_printk(fmt_1, sizeof(fmt_1), q_id, op);
> +	sqe = (struct io_uring_sqe *)&sd;
> +	if (op == REQ_OP_READ) {
> +		char fmt[] ="add read sae\n";
> +
> +		bpf_trace_printk(fmt, sizeof(fmt));
> +		io_uring_prep_read(sqe, target_fd, 0, nr_sectors << 9,
> +				   start_sector << 9);
> +		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
> +		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
> +	} else if (op == REQ_OP_WRITE) {
> +		char fmt[] ="add write sae\n";
> +
> +		bpf_trace_printk(fmt, sizeof(fmt));
> +
> +		io_uring_prep_write(sqe, target_fd, 0, nr_sectors << 9,
> +				    start_sector << 9);
> +		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
> +		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
> +	} else {
> +		;
> +	}
> +	return 0;
> +}
> +
> +SEC("ublk.s/")
> +int ublk_io_submit_prog(struct ublk_bpf_ctx *ctx)
> +{
> +	struct io_uring_sqe *sqe;
> +	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
> +	int q_id = ctx->q_id, *ring_fd;
> +	struct sqe_key key;
> +
> +	key.q_id = ctx->q_id;
> +	key.tag = ctx->tag;
> +	key.offset = 0;
> +	key.res = 0;
> +
> +	sqe = bpf_map_lookup_elem(&sqes_map, &key);
> +	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
> +	if (ring_fd) {
> +		bpf_trace_printk(fmt, sizeof(fmt), key.q_id, key.tag);
> +		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
> +		bpf_map_delete_elem(&sqes_map, &key);
> +	}
> +	return 0;
> +}
> +
> +char LICENSE[] SEC("license") = "GPL";
> diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
> index f6238cc..893ba8c 100644
> --- a/include/ublk_cmd.h
> +++ b/include/ublk_cmd.h
> @@ -17,6 +17,8 @@
>  #define	UBLK_CMD_STOP_DEV	0x07
>  #define	UBLK_CMD_SET_PARAMS	0x08
>  #define	UBLK_CMD_GET_PARAMS	0x09
> +#define UBLK_CMD_REG_BPF_PROG		0x0a
> +#define UBLK_CMD_UNREG_BPF_PROG		0x0b
>  #define	UBLK_CMD_START_USER_RECOVERY	0x10
>  #define	UBLK_CMD_END_USER_RECOVERY	0x11
>  #define	UBLK_CMD_GET_DEV_INFO2		0x12
> diff --git a/include/ublksrv.h b/include/ublksrv.h
> index d38bd46..f5deddb 100644
> --- a/include/ublksrv.h
> +++ b/include/ublksrv.h
> @@ -106,6 +106,7 @@ struct ublksrv_tgt_info {
>  	unsigned int nr_fds;
>  	int fds[UBLKSRV_TGT_MAX_FDS];
>  	void *tgt_data;
> +	void *tgt_bpf_obj;
>  
>  	/*
>  	 * Extra IO slots for each queue, target code can reserve some
> @@ -263,6 +264,8 @@ struct ublksrv_tgt_type {
>  	int (*init_queue)(const struct ublksrv_queue *, void **queue_data_ptr);
>  	void (*deinit_queue)(const struct ublksrv_queue *);
>  
> +	int (*init_queue_bpf)(const struct ublksrv_dev *dev, const struct ublksrv_queue *q);
> +
>  	unsigned long reserved[5];
>  };
>  
> @@ -318,6 +321,11 @@ extern void ublksrv_ctrl_prep_recovery(struct ublksrv_ctrl_dev *dev,
>  		const char *recovery_jbuf);
>  extern const char *ublksrv_ctrl_get_recovery_jbuf(const struct ublksrv_ctrl_dev *dev);
>  
> +extern void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,
> +					  void *obj);
> +extern int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
> +				     int io_prep_fd, int io_submit_fd);
> +
>  /* ublksrv device ("/dev/ublkcN") level APIs */
>  extern const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *
>  		ctrl_dev);
> diff --git a/include/ublksrv_priv.h b/include/ublksrv_priv.h
> index 2996baa..8da8866 100644
> --- a/include/ublksrv_priv.h
> +++ b/include/ublksrv_priv.h
> @@ -42,6 +42,7 @@ struct ublksrv_ctrl_dev {
>  
>  	const char *tgt_type;
>  	const struct ublksrv_tgt_type *tgt_ops;
> +	void *bpf_obj;
>  
>  	/*
>  	 * default is UBLKSRV_RUN_DIR but can be specified via command line,
> diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
> index 234d31e..e0db7d9 100644
> --- a/include/ublksrv_tgt.h
> +++ b/include/ublksrv_tgt.h
> @@ -9,6 +9,7 @@
>  #include <getopt.h>
>  #include <string.h>
>  #include <stdarg.h>
> +#include <limits.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <sys/ioctl.h>
> diff --git a/lib/ublksrv.c b/lib/ublksrv.c
> index 96bed95..110ccb3 100644
> --- a/lib/ublksrv.c
> +++ b/lib/ublksrv.c
> @@ -603,6 +603,9 @@ skip_alloc_buf:
>  		goto fail;
>  	}
>  
> +	if (dev->tgt.ops->init_queue_bpf)
> +		dev->tgt.ops->init_queue_bpf(tdev, local_to_tq(q));
> +
>  	ublksrv_dev_init_io_cmds(dev, q);
>  
>  	/*
> @@ -723,6 +726,7 @@ const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *ctrl_d
>  	}
>  
>  	tgt->fds[0] = dev->cdev_fd;
> +	tgt->tgt_bpf_obj = ctrl_dev->bpf_obj;
>  
>  	ret = ublksrv_tgt_init(dev, ctrl_dev->tgt_type, ctrl_dev->tgt_ops,
>  			ctrl_dev->tgt_argc, ctrl_dev->tgt_argv);
> diff --git a/lib/ublksrv_cmd.c b/lib/ublksrv_cmd.c
> index 0d7265d..0101cb9 100644
> --- a/lib/ublksrv_cmd.c
> +++ b/lib/ublksrv_cmd.c
> @@ -502,6 +502,27 @@ int ublksrv_ctrl_end_recovery(struct ublksrv_ctrl_dev *dev, int daemon_pid)
>  	return ret;
>  }
>  
> +int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
> +			      int io_prep_fd, int io_submit_fd)
> +{
> +	struct ublksrv_ctrl_cmd_data data = {
> +		.cmd_op = UBLK_CMD_REG_BPF_PROG,
> +		.flags = CTRL_CMD_HAS_DATA,
> +	};
> +	int ret;
> +
> +	data.data[0] = io_prep_fd;
> +	data.data[1] = io_submit_fd;
> +
> +	ret = __ublksrv_ctrl_cmd(dev, &data);
> +	return ret;
> +}
> +
> +void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,  void *obj)
> +{
> +	dev->bpf_obj = obj;
> +}
> +
>  const struct ublksrv_ctrl_dev_info *ublksrv_ctrl_get_dev_info(
>  		const struct ublksrv_ctrl_dev *dev)
>  {
> diff --git a/tgt_loop.cpp b/tgt_loop.cpp
> index 79a65d3..b1568fe 100644
> --- a/tgt_loop.cpp
> +++ b/tgt_loop.cpp
> @@ -4,7 +4,11 @@
>  
>  #include <poll.h>
>  #include <sys/epoll.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf.h>
> +#include <bpf/libbpf.h>
>  #include "ublksrv_tgt.h"
> +#include "bpf/.tmp/ublk.skel.h"

Where is bpf/.tmp/ublk.skel.h?

>  
>  static bool backing_supports_discard(char *name)
>  {
> @@ -88,6 +92,20 @@ static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
>  	return 0;
>  }
>  
> +static int loop_init_queue_bpf(const struct ublksrv_dev *dev,
> +			       const struct ublksrv_queue *q)
> +{
> +	int ret, q_id, ring_fd;
> +	const struct ublksrv_tgt_info *tgt = &dev->tgt;
> +	struct ublk_bpf *obj = (struct ublk_bpf*)tgt->tgt_bpf_obj;
> +
> +	q_id = q->q_id;
> +	ring_fd = q->ring_ptr->ring_fd;
> +	ret = bpf_map_update_elem(bpf_map__fd(obj->maps.uring_fd_map), &q_id,
> +				  &ring_fd,  0);
> +	return ret;
> +}
> +
>  static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>  		*argv[])
>  {
> @@ -125,6 +143,7 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>  		},
>  	};
>  	bool can_discard = false;
> +	struct ublk_bpf *bpf_obj;
>  
>  	strcpy(tgt_json.name, "loop");
>  
> @@ -218,6 +237,10 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>  			jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
>  	} while (ret < 0);
>  
> +	if (tgt->tgt_bpf_obj) {
> +		bpf_obj = (struct ublk_bpf *)tgt->tgt_bpf_obj;
> +		bpf_obj->data->target_fd = tgt->fds[1];
> +	}
>  	return 0;
>  }
>  
> @@ -252,9 +275,14 @@ static int loop_queue_tgt_io(const struct ublksrv_queue *q,
>  		const struct ublk_io_data *data, int tag)
>  {
>  	const struct ublksrv_io_desc *iod = data->iod;
> -	struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
> +	struct io_uring_sqe *sqe;
>  	unsigned ublk_op = ublksrv_get_op(iod);
>  
> +	/* ebpf prog wil handle read/write requests. */
> +	if ((ublk_op == UBLK_IO_OP_READ) || (ublk_op == UBLK_IO_OP_WRITE))
> +		return 1;
> +
> +	sqe = io_uring_get_sqe(q->ring_ptr);
>  	if (!sqe)
>  		return 0;
>  
> @@ -374,6 +402,7 @@ struct ublksrv_tgt_type  loop_tgt_type = {
>  	.type	= UBLKSRV_TGT_TYPE_LOOP,
>  	.name	=  "loop",
>  	.recovery_tgt = loop_recovery_tgt,
> +	.init_queue_bpf = loop_init_queue_bpf,
>  };
>  
>  static void tgt_loop_init() __attribute__((constructor));
> diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
> index 5ed328d..d3796cf 100644
> --- a/ublksrv_tgt.cpp
> +++ b/ublksrv_tgt.cpp
> @@ -2,6 +2,7 @@
>  
>  #include "config.h"
>  #include "ublksrv_tgt.h"
> +#include "bpf/.tmp/ublk.skel.h"

Same with above


Thanks, 
Ming
Xiaoguang Wang Feb. 16, 2023, 9:17 a.m. UTC | #2
hello,

> On Wed, Feb 15, 2023 at 08:46:18AM +0800, Xiaoguang Wang wrote:
>> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
>> ---
>>  bpf/ublk.bpf.c         | 168 +++++++++++++++++++++++++++++++++++++++++
>>  include/ublk_cmd.h     |   2 +
>>  include/ublksrv.h      |   8 ++
>>  include/ublksrv_priv.h |   1 +
>>  include/ublksrv_tgt.h  |   1 +
>>  lib/ublksrv.c          |   4 +
>>  lib/ublksrv_cmd.c      |  21 ++++++
>>  tgt_loop.cpp           |  31 +++++++-
>>  ublksrv_tgt.cpp        |  33 ++++++++
>>  9 files changed, 268 insertions(+), 1 deletion(-)
>>  create mode 100644 bpf/ublk.bpf.c
>>
>> diff --git a/bpf/ublk.bpf.c b/bpf/ublk.bpf.c
>> new file mode 100644
>> index 0000000..80e79de
>> --- /dev/null
>> +++ b/bpf/ublk.bpf.c
>> @@ -0,0 +1,168 @@
>> +#include "vmlinux.h"
> Where is vmlinux.h?
Sorry, I forgot to attach Makefile in this commit, which will
show how to generate vmlinux.h and how to compile ebpf
prog object. I'll prepare v2 patch set to fix this issue soon.
Thanks for review.

Regards,
Xiaoguang Wang

>
>> +#include <bpf/bpf_helpers.h>
>> +#include <bpf/bpf_core_read.h>
>> +
>> +
>> +static long (*bpf_ublk_queue_sqe)(void *ctx, struct io_uring_sqe *sqe,
>> +		u32 sqe_len, u32 fd) = (void *) 212;
>> +
>> +int target_fd = -1;
>> +
>> +struct sqe_key {
>> +	u16 q_id;
>> +	u16 tag;
>> +	u32 res;
>> +	u64 offset;
>> +};
>> +
>> +struct sqe_data {
>> +	char data[128];
>> +};
>> +
>> +struct {
>> +	__uint(type, BPF_MAP_TYPE_HASH);
>> +	__uint(max_entries, 8192);
>> +	__type(key, struct sqe_key);
>> +	__type(value, struct sqe_data);
>> +} sqes_map SEC(".maps");
>> +
>> +struct {
>> +	__uint(type, BPF_MAP_TYPE_ARRAY);
>> +	__uint(max_entries, 128);
>> +	__type(key, int);
>> +	__type(value, int);
>> +} uring_fd_map SEC(".maps");
>> +
>> +static inline void io_uring_prep_rw(__u8 op, struct io_uring_sqe *sqe, int fd,
>> +				    const void *addr, unsigned len,
>> +				    __u64 offset)
>> +{
>> +	sqe->opcode = op;
>> +	sqe->flags = 0;
>> +	sqe->ioprio = 0;
>> +	sqe->fd = fd;
>> +	sqe->off = offset;
>> +	sqe->addr = (unsigned long) addr;
>> +	sqe->len = len;
>> +	sqe->fsync_flags = 0;
>> +	sqe->buf_index = 0;
>> +	sqe->personality = 0;
>> +	sqe->splice_fd_in = 0;
>> +	sqe->addr3 = 0;
>> +	sqe->__pad2[0] = 0;
>> +}
>> +
>> +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
>> +{
>> +	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, 0, 0, 0);
>> +}
>> +
>> +static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
>> +			void *buf, unsigned nbytes, off_t offset)
>> +{
>> +	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
>> +}
>> +
>> +static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
>> +	const void *buf, unsigned nbytes, off_t offset)
>> +{
>> +	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
>> +}
>> +
>> +/*
>> +static u64 submit_sqe(struct bpf_map *map, void *key, void *value, void *data)
>> +{
>> +	struct io_uring_sqe *sqe = (struct io_uring_sqe *)value;
>> +	struct ublk_bpf_ctx *ctx = ((struct callback_ctx *)data)->ctx;
>> +	struct sqe_key *skey = (struct sqe_key *)key;
>> +	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
>> +	char fmt2[] ="submit sqe test prep\n";
>> +	u16 qid, tag;
>> +	int q_id = skey->q_id, *ring_fd;
>> +
>> +	bpf_trace_printk(fmt2, sizeof(fmt2));
>> +	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
>> +	if (ring_fd) {
>> +		bpf_trace_printk(fmt, sizeof(fmt), qid, skey->tag);
>> +		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
>> +		bpf_map_delete_elem(map, key);
>> +	}
>> +	return 0;
>> +}
>> +*/
>> +
>> +static inline __u64 build_user_data(unsigned tag, unsigned op,
>> +			unsigned tgt_data, unsigned is_target_io,
>> +			unsigned is_bpf_io)
>> +{
>> +	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63 |
>> +		(__u64)is_bpf_io << 60;
>> +}
>> +
>> +SEC("ublk.s/")
>> +int ublk_io_prep_prog(struct ublk_bpf_ctx *ctx)
>> +{
>> +	struct io_uring_sqe *sqe;
>> +	struct sqe_data sd = {0};
>> +	struct sqe_key key;
>> +	u16 q_id = ctx->q_id;
>> +	u8 op; // = ctx->op;
>> +	u32 nr_sectors = ctx->nr_sectors;
>> +	u64 start_sector = ctx->start_sector;
>> +	char fmt_1[] ="ublk_io_prep_prog %d %d\n";
>> +
>> +	key.q_id = ctx->q_id;
>> +	key.tag = ctx->tag;
>> +	key.offset = 0;
>> +	key.res = 0;
>> +
>> +	bpf_probe_read_kernel(&op, 1, &ctx->op);
>> +	bpf_trace_printk(fmt_1, sizeof(fmt_1), q_id, op);
>> +	sqe = (struct io_uring_sqe *)&sd;
>> +	if (op == REQ_OP_READ) {
>> +		char fmt[] ="add read sae\n";
>> +
>> +		bpf_trace_printk(fmt, sizeof(fmt));
>> +		io_uring_prep_read(sqe, target_fd, 0, nr_sectors << 9,
>> +				   start_sector << 9);
>> +		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
>> +		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
>> +	} else if (op == REQ_OP_WRITE) {
>> +		char fmt[] ="add write sae\n";
>> +
>> +		bpf_trace_printk(fmt, sizeof(fmt));
>> +
>> +		io_uring_prep_write(sqe, target_fd, 0, nr_sectors << 9,
>> +				    start_sector << 9);
>> +		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
>> +		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
>> +	} else {
>> +		;
>> +	}
>> +	return 0;
>> +}
>> +
>> +SEC("ublk.s/")
>> +int ublk_io_submit_prog(struct ublk_bpf_ctx *ctx)
>> +{
>> +	struct io_uring_sqe *sqe;
>> +	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
>> +	int q_id = ctx->q_id, *ring_fd;
>> +	struct sqe_key key;
>> +
>> +	key.q_id = ctx->q_id;
>> +	key.tag = ctx->tag;
>> +	key.offset = 0;
>> +	key.res = 0;
>> +
>> +	sqe = bpf_map_lookup_elem(&sqes_map, &key);
>> +	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
>> +	if (ring_fd) {
>> +		bpf_trace_printk(fmt, sizeof(fmt), key.q_id, key.tag);
>> +		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
>> +		bpf_map_delete_elem(&sqes_map, &key);
>> +	}
>> +	return 0;
>> +}
>> +
>> +char LICENSE[] SEC("license") = "GPL";
>> diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
>> index f6238cc..893ba8c 100644
>> --- a/include/ublk_cmd.h
>> +++ b/include/ublk_cmd.h
>> @@ -17,6 +17,8 @@
>>  #define	UBLK_CMD_STOP_DEV	0x07
>>  #define	UBLK_CMD_SET_PARAMS	0x08
>>  #define	UBLK_CMD_GET_PARAMS	0x09
>> +#define UBLK_CMD_REG_BPF_PROG		0x0a
>> +#define UBLK_CMD_UNREG_BPF_PROG		0x0b
>>  #define	UBLK_CMD_START_USER_RECOVERY	0x10
>>  #define	UBLK_CMD_END_USER_RECOVERY	0x11
>>  #define	UBLK_CMD_GET_DEV_INFO2		0x12
>> diff --git a/include/ublksrv.h b/include/ublksrv.h
>> index d38bd46..f5deddb 100644
>> --- a/include/ublksrv.h
>> +++ b/include/ublksrv.h
>> @@ -106,6 +106,7 @@ struct ublksrv_tgt_info {
>>  	unsigned int nr_fds;
>>  	int fds[UBLKSRV_TGT_MAX_FDS];
>>  	void *tgt_data;
>> +	void *tgt_bpf_obj;
>>  
>>  	/*
>>  	 * Extra IO slots for each queue, target code can reserve some
>> @@ -263,6 +264,8 @@ struct ublksrv_tgt_type {
>>  	int (*init_queue)(const struct ublksrv_queue *, void **queue_data_ptr);
>>  	void (*deinit_queue)(const struct ublksrv_queue *);
>>  
>> +	int (*init_queue_bpf)(const struct ublksrv_dev *dev, const struct ublksrv_queue *q);
>> +
>>  	unsigned long reserved[5];
>>  };
>>  
>> @@ -318,6 +321,11 @@ extern void ublksrv_ctrl_prep_recovery(struct ublksrv_ctrl_dev *dev,
>>  		const char *recovery_jbuf);
>>  extern const char *ublksrv_ctrl_get_recovery_jbuf(const struct ublksrv_ctrl_dev *dev);
>>  
>> +extern void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,
>> +					  void *obj);
>> +extern int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
>> +				     int io_prep_fd, int io_submit_fd);
>> +
>>  /* ublksrv device ("/dev/ublkcN") level APIs */
>>  extern const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *
>>  		ctrl_dev);
>> diff --git a/include/ublksrv_priv.h b/include/ublksrv_priv.h
>> index 2996baa..8da8866 100644
>> --- a/include/ublksrv_priv.h
>> +++ b/include/ublksrv_priv.h
>> @@ -42,6 +42,7 @@ struct ublksrv_ctrl_dev {
>>  
>>  	const char *tgt_type;
>>  	const struct ublksrv_tgt_type *tgt_ops;
>> +	void *bpf_obj;
>>  
>>  	/*
>>  	 * default is UBLKSRV_RUN_DIR but can be specified via command line,
>> diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
>> index 234d31e..e0db7d9 100644
>> --- a/include/ublksrv_tgt.h
>> +++ b/include/ublksrv_tgt.h
>> @@ -9,6 +9,7 @@
>>  #include <getopt.h>
>>  #include <string.h>
>>  #include <stdarg.h>
>> +#include <limits.h>
>>  #include <sys/types.h>
>>  #include <sys/stat.h>
>>  #include <sys/ioctl.h>
>> diff --git a/lib/ublksrv.c b/lib/ublksrv.c
>> index 96bed95..110ccb3 100644
>> --- a/lib/ublksrv.c
>> +++ b/lib/ublksrv.c
>> @@ -603,6 +603,9 @@ skip_alloc_buf:
>>  		goto fail;
>>  	}
>>  
>> +	if (dev->tgt.ops->init_queue_bpf)
>> +		dev->tgt.ops->init_queue_bpf(tdev, local_to_tq(q));
>> +
>>  	ublksrv_dev_init_io_cmds(dev, q);
>>  
>>  	/*
>> @@ -723,6 +726,7 @@ const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *ctrl_d
>>  	}
>>  
>>  	tgt->fds[0] = dev->cdev_fd;
>> +	tgt->tgt_bpf_obj = ctrl_dev->bpf_obj;
>>  
>>  	ret = ublksrv_tgt_init(dev, ctrl_dev->tgt_type, ctrl_dev->tgt_ops,
>>  			ctrl_dev->tgt_argc, ctrl_dev->tgt_argv);
>> diff --git a/lib/ublksrv_cmd.c b/lib/ublksrv_cmd.c
>> index 0d7265d..0101cb9 100644
>> --- a/lib/ublksrv_cmd.c
>> +++ b/lib/ublksrv_cmd.c
>> @@ -502,6 +502,27 @@ int ublksrv_ctrl_end_recovery(struct ublksrv_ctrl_dev *dev, int daemon_pid)
>>  	return ret;
>>  }
>>  
>> +int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
>> +			      int io_prep_fd, int io_submit_fd)
>> +{
>> +	struct ublksrv_ctrl_cmd_data data = {
>> +		.cmd_op = UBLK_CMD_REG_BPF_PROG,
>> +		.flags = CTRL_CMD_HAS_DATA,
>> +	};
>> +	int ret;
>> +
>> +	data.data[0] = io_prep_fd;
>> +	data.data[1] = io_submit_fd;
>> +
>> +	ret = __ublksrv_ctrl_cmd(dev, &data);
>> +	return ret;
>> +}
>> +
>> +void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,  void *obj)
>> +{
>> +	dev->bpf_obj = obj;
>> +}
>> +
>>  const struct ublksrv_ctrl_dev_info *ublksrv_ctrl_get_dev_info(
>>  		const struct ublksrv_ctrl_dev *dev)
>>  {
>> diff --git a/tgt_loop.cpp b/tgt_loop.cpp
>> index 79a65d3..b1568fe 100644
>> --- a/tgt_loop.cpp
>> +++ b/tgt_loop.cpp
>> @@ -4,7 +4,11 @@
>>  
>>  #include <poll.h>
>>  #include <sys/epoll.h>
>> +#include <linux/bpf.h>
>> +#include <bpf/bpf.h>
>> +#include <bpf/libbpf.h>
>>  #include "ublksrv_tgt.h"
>> +#include "bpf/.tmp/ublk.skel.h"
> Where is bpf/.tmp/ublk.skel.h?
>
>>  
>>  static bool backing_supports_discard(char *name)
>>  {
>> @@ -88,6 +92,20 @@ static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
>>  	return 0;
>>  }
>>  
>> +static int loop_init_queue_bpf(const struct ublksrv_dev *dev,
>> +			       const struct ublksrv_queue *q)
>> +{
>> +	int ret, q_id, ring_fd;
>> +	const struct ublksrv_tgt_info *tgt = &dev->tgt;
>> +	struct ublk_bpf *obj = (struct ublk_bpf*)tgt->tgt_bpf_obj;
>> +
>> +	q_id = q->q_id;
>> +	ring_fd = q->ring_ptr->ring_fd;
>> +	ret = bpf_map_update_elem(bpf_map__fd(obj->maps.uring_fd_map), &q_id,
>> +				  &ring_fd,  0);
>> +	return ret;
>> +}
>> +
>>  static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>>  		*argv[])
>>  {
>> @@ -125,6 +143,7 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>>  		},
>>  	};
>>  	bool can_discard = false;
>> +	struct ublk_bpf *bpf_obj;
>>  
>>  	strcpy(tgt_json.name, "loop");
>>  
>> @@ -218,6 +237,10 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
>>  			jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
>>  	} while (ret < 0);
>>  
>> +	if (tgt->tgt_bpf_obj) {
>> +		bpf_obj = (struct ublk_bpf *)tgt->tgt_bpf_obj;
>> +		bpf_obj->data->target_fd = tgt->fds[1];
>> +	}
>>  	return 0;
>>  }
>>  
>> @@ -252,9 +275,14 @@ static int loop_queue_tgt_io(const struct ublksrv_queue *q,
>>  		const struct ublk_io_data *data, int tag)
>>  {
>>  	const struct ublksrv_io_desc *iod = data->iod;
>> -	struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
>> +	struct io_uring_sqe *sqe;
>>  	unsigned ublk_op = ublksrv_get_op(iod);
>>  
>> +	/* ebpf prog wil handle read/write requests. */
>> +	if ((ublk_op == UBLK_IO_OP_READ) || (ublk_op == UBLK_IO_OP_WRITE))
>> +		return 1;
>> +
>> +	sqe = io_uring_get_sqe(q->ring_ptr);
>>  	if (!sqe)
>>  		return 0;
>>  
>> @@ -374,6 +402,7 @@ struct ublksrv_tgt_type  loop_tgt_type = {
>>  	.type	= UBLKSRV_TGT_TYPE_LOOP,
>>  	.name	=  "loop",
>>  	.recovery_tgt = loop_recovery_tgt,
>> +	.init_queue_bpf = loop_init_queue_bpf,
>>  };
>>  
>>  static void tgt_loop_init() __attribute__((constructor));
>> diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
>> index 5ed328d..d3796cf 100644
>> --- a/ublksrv_tgt.cpp
>> +++ b/ublksrv_tgt.cpp
>> @@ -2,6 +2,7 @@
>>  
>>  #include "config.h"
>>  #include "ublksrv_tgt.h"
>> +#include "bpf/.tmp/ublk.skel.h"
> Same with above
>
>
> Thanks, 
> Ming
diff mbox series

Patch

diff --git a/bpf/ublk.bpf.c b/bpf/ublk.bpf.c
new file mode 100644
index 0000000..80e79de
--- /dev/null
+++ b/bpf/ublk.bpf.c
@@ -0,0 +1,168 @@ 
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+
+static long (*bpf_ublk_queue_sqe)(void *ctx, struct io_uring_sqe *sqe,
+		u32 sqe_len, u32 fd) = (void *) 212;
+
+int target_fd = -1;
+
+struct sqe_key {
+	u16 q_id;
+	u16 tag;
+	u32 res;
+	u64 offset;
+};
+
+struct sqe_data {
+	char data[128];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 8192);
+	__type(key, struct sqe_key);
+	__type(value, struct sqe_data);
+} sqes_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 128);
+	__type(key, int);
+	__type(value, int);
+} uring_fd_map SEC(".maps");
+
+static inline void io_uring_prep_rw(__u8 op, struct io_uring_sqe *sqe, int fd,
+				    const void *addr, unsigned len,
+				    __u64 offset)
+{
+	sqe->opcode = op;
+	sqe->flags = 0;
+	sqe->ioprio = 0;
+	sqe->fd = fd;
+	sqe->off = offset;
+	sqe->addr = (unsigned long) addr;
+	sqe->len = len;
+	sqe->fsync_flags = 0;
+	sqe->buf_index = 0;
+	sqe->personality = 0;
+	sqe->splice_fd_in = 0;
+	sqe->addr3 = 0;
+	sqe->__pad2[0] = 0;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, 0, 0, 0);
+}
+
+static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
+			void *buf, unsigned nbytes, off_t offset)
+{
+	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
+}
+
+static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
+	const void *buf, unsigned nbytes, off_t offset)
+{
+	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
+}
+
+/*
+static u64 submit_sqe(struct bpf_map *map, void *key, void *value, void *data)
+{
+	struct io_uring_sqe *sqe = (struct io_uring_sqe *)value;
+	struct ublk_bpf_ctx *ctx = ((struct callback_ctx *)data)->ctx;
+	struct sqe_key *skey = (struct sqe_key *)key;
+	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
+	char fmt2[] ="submit sqe test prep\n";
+	u16 qid, tag;
+	int q_id = skey->q_id, *ring_fd;
+
+	bpf_trace_printk(fmt2, sizeof(fmt2));
+	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
+	if (ring_fd) {
+		bpf_trace_printk(fmt, sizeof(fmt), qid, skey->tag);
+		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
+		bpf_map_delete_elem(map, key);
+	}
+	return 0;
+}
+*/
+
+static inline __u64 build_user_data(unsigned tag, unsigned op,
+			unsigned tgt_data, unsigned is_target_io,
+			unsigned is_bpf_io)
+{
+	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63 |
+		(__u64)is_bpf_io << 60;
+}
+
+SEC("ublk.s/")
+int ublk_io_prep_prog(struct ublk_bpf_ctx *ctx)
+{
+	struct io_uring_sqe *sqe;
+	struct sqe_data sd = {0};
+	struct sqe_key key;
+	u16 q_id = ctx->q_id;
+	u8 op; // = ctx->op;
+	u32 nr_sectors = ctx->nr_sectors;
+	u64 start_sector = ctx->start_sector;
+	char fmt_1[] ="ublk_io_prep_prog %d %d\n";
+
+	key.q_id = ctx->q_id;
+	key.tag = ctx->tag;
+	key.offset = 0;
+	key.res = 0;
+
+	bpf_probe_read_kernel(&op, 1, &ctx->op);
+	bpf_trace_printk(fmt_1, sizeof(fmt_1), q_id, op);
+	sqe = (struct io_uring_sqe *)&sd;
+	if (op == REQ_OP_READ) {
+		char fmt[] ="add read sae\n";
+
+		bpf_trace_printk(fmt, sizeof(fmt));
+		io_uring_prep_read(sqe, target_fd, 0, nr_sectors << 9,
+				   start_sector << 9);
+		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
+		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
+	} else if (op == REQ_OP_WRITE) {
+		char fmt[] ="add write sae\n";
+
+		bpf_trace_printk(fmt, sizeof(fmt));
+
+		io_uring_prep_write(sqe, target_fd, 0, nr_sectors << 9,
+				    start_sector << 9);
+		sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
+		bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
+	} else {
+		;
+	}
+	return 0;
+}
+
+SEC("ublk.s/")
+int ublk_io_submit_prog(struct ublk_bpf_ctx *ctx)
+{
+	struct io_uring_sqe *sqe;
+	char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
+	int q_id = ctx->q_id, *ring_fd;
+	struct sqe_key key;
+
+	key.q_id = ctx->q_id;
+	key.tag = ctx->tag;
+	key.offset = 0;
+	key.res = 0;
+
+	sqe = bpf_map_lookup_elem(&sqes_map, &key);
+	ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
+	if (ring_fd) {
+		bpf_trace_printk(fmt, sizeof(fmt), key.q_id, key.tag);
+		bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
+		bpf_map_delete_elem(&sqes_map, &key);
+	}
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
index f6238cc..893ba8c 100644
--- a/include/ublk_cmd.h
+++ b/include/ublk_cmd.h
@@ -17,6 +17,8 @@ 
 #define	UBLK_CMD_STOP_DEV	0x07
 #define	UBLK_CMD_SET_PARAMS	0x08
 #define	UBLK_CMD_GET_PARAMS	0x09
+#define UBLK_CMD_REG_BPF_PROG		0x0a
+#define UBLK_CMD_UNREG_BPF_PROG		0x0b
 #define	UBLK_CMD_START_USER_RECOVERY	0x10
 #define	UBLK_CMD_END_USER_RECOVERY	0x11
 #define	UBLK_CMD_GET_DEV_INFO2		0x12
diff --git a/include/ublksrv.h b/include/ublksrv.h
index d38bd46..f5deddb 100644
--- a/include/ublksrv.h
+++ b/include/ublksrv.h
@@ -106,6 +106,7 @@  struct ublksrv_tgt_info {
 	unsigned int nr_fds;
 	int fds[UBLKSRV_TGT_MAX_FDS];
 	void *tgt_data;
+	void *tgt_bpf_obj;
 
 	/*
 	 * Extra IO slots for each queue, target code can reserve some
@@ -263,6 +264,8 @@  struct ublksrv_tgt_type {
 	int (*init_queue)(const struct ublksrv_queue *, void **queue_data_ptr);
 	void (*deinit_queue)(const struct ublksrv_queue *);
 
+	int (*init_queue_bpf)(const struct ublksrv_dev *dev, const struct ublksrv_queue *q);
+
 	unsigned long reserved[5];
 };
 
@@ -318,6 +321,11 @@  extern void ublksrv_ctrl_prep_recovery(struct ublksrv_ctrl_dev *dev,
 		const char *recovery_jbuf);
 extern const char *ublksrv_ctrl_get_recovery_jbuf(const struct ublksrv_ctrl_dev *dev);
 
+extern void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,
+					  void *obj);
+extern int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
+				     int io_prep_fd, int io_submit_fd);
+
 /* ublksrv device ("/dev/ublkcN") level APIs */
 extern const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *
 		ctrl_dev);
diff --git a/include/ublksrv_priv.h b/include/ublksrv_priv.h
index 2996baa..8da8866 100644
--- a/include/ublksrv_priv.h
+++ b/include/ublksrv_priv.h
@@ -42,6 +42,7 @@  struct ublksrv_ctrl_dev {
 
 	const char *tgt_type;
 	const struct ublksrv_tgt_type *tgt_ops;
+	void *bpf_obj;
 
 	/*
 	 * default is UBLKSRV_RUN_DIR but can be specified via command line,
diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
index 234d31e..e0db7d9 100644
--- a/include/ublksrv_tgt.h
+++ b/include/ublksrv_tgt.h
@@ -9,6 +9,7 @@ 
 #include <getopt.h>
 #include <string.h>
 #include <stdarg.h>
+#include <limits.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
diff --git a/lib/ublksrv.c b/lib/ublksrv.c
index 96bed95..110ccb3 100644
--- a/lib/ublksrv.c
+++ b/lib/ublksrv.c
@@ -603,6 +603,9 @@  skip_alloc_buf:
 		goto fail;
 	}
 
+	if (dev->tgt.ops->init_queue_bpf)
+		dev->tgt.ops->init_queue_bpf(tdev, local_to_tq(q));
+
 	ublksrv_dev_init_io_cmds(dev, q);
 
 	/*
@@ -723,6 +726,7 @@  const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *ctrl_d
 	}
 
 	tgt->fds[0] = dev->cdev_fd;
+	tgt->tgt_bpf_obj = ctrl_dev->bpf_obj;
 
 	ret = ublksrv_tgt_init(dev, ctrl_dev->tgt_type, ctrl_dev->tgt_ops,
 			ctrl_dev->tgt_argc, ctrl_dev->tgt_argv);
diff --git a/lib/ublksrv_cmd.c b/lib/ublksrv_cmd.c
index 0d7265d..0101cb9 100644
--- a/lib/ublksrv_cmd.c
+++ b/lib/ublksrv_cmd.c
@@ -502,6 +502,27 @@  int ublksrv_ctrl_end_recovery(struct ublksrv_ctrl_dev *dev, int daemon_pid)
 	return ret;
 }
 
+int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
+			      int io_prep_fd, int io_submit_fd)
+{
+	struct ublksrv_ctrl_cmd_data data = {
+		.cmd_op = UBLK_CMD_REG_BPF_PROG,
+		.flags = CTRL_CMD_HAS_DATA,
+	};
+	int ret;
+
+	data.data[0] = io_prep_fd;
+	data.data[1] = io_submit_fd;
+
+	ret = __ublksrv_ctrl_cmd(dev, &data);
+	return ret;
+}
+
+void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,  void *obj)
+{
+	dev->bpf_obj = obj;
+}
+
 const struct ublksrv_ctrl_dev_info *ublksrv_ctrl_get_dev_info(
 		const struct ublksrv_ctrl_dev *dev)
 {
diff --git a/tgt_loop.cpp b/tgt_loop.cpp
index 79a65d3..b1568fe 100644
--- a/tgt_loop.cpp
+++ b/tgt_loop.cpp
@@ -4,7 +4,11 @@ 
 
 #include <poll.h>
 #include <sys/epoll.h>
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "ublksrv_tgt.h"
+#include "bpf/.tmp/ublk.skel.h"
 
 static bool backing_supports_discard(char *name)
 {
@@ -88,6 +92,20 @@  static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
 	return 0;
 }
 
+static int loop_init_queue_bpf(const struct ublksrv_dev *dev,
+			       const struct ublksrv_queue *q)
+{
+	int ret, q_id, ring_fd;
+	const struct ublksrv_tgt_info *tgt = &dev->tgt;
+	struct ublk_bpf *obj = (struct ublk_bpf*)tgt->tgt_bpf_obj;
+
+	q_id = q->q_id;
+	ring_fd = q->ring_ptr->ring_fd;
+	ret = bpf_map_update_elem(bpf_map__fd(obj->maps.uring_fd_map), &q_id,
+				  &ring_fd,  0);
+	return ret;
+}
+
 static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
 		*argv[])
 {
@@ -125,6 +143,7 @@  static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
 		},
 	};
 	bool can_discard = false;
+	struct ublk_bpf *bpf_obj;
 
 	strcpy(tgt_json.name, "loop");
 
@@ -218,6 +237,10 @@  static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
 			jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
 	} while (ret < 0);
 
+	if (tgt->tgt_bpf_obj) {
+		bpf_obj = (struct ublk_bpf *)tgt->tgt_bpf_obj;
+		bpf_obj->data->target_fd = tgt->fds[1];
+	}
 	return 0;
 }
 
@@ -252,9 +275,14 @@  static int loop_queue_tgt_io(const struct ublksrv_queue *q,
 		const struct ublk_io_data *data, int tag)
 {
 	const struct ublksrv_io_desc *iod = data->iod;
-	struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
+	struct io_uring_sqe *sqe;
 	unsigned ublk_op = ublksrv_get_op(iod);
 
+	/* ebpf prog wil handle read/write requests. */
+	if ((ublk_op == UBLK_IO_OP_READ) || (ublk_op == UBLK_IO_OP_WRITE))
+		return 1;
+
+	sqe = io_uring_get_sqe(q->ring_ptr);
 	if (!sqe)
 		return 0;
 
@@ -374,6 +402,7 @@  struct ublksrv_tgt_type  loop_tgt_type = {
 	.type	= UBLKSRV_TGT_TYPE_LOOP,
 	.name	=  "loop",
 	.recovery_tgt = loop_recovery_tgt,
+	.init_queue_bpf = loop_init_queue_bpf,
 };
 
 static void tgt_loop_init() __attribute__((constructor));
diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
index 5ed328d..d3796cf 100644
--- a/ublksrv_tgt.cpp
+++ b/ublksrv_tgt.cpp
@@ -2,6 +2,7 @@ 
 
 #include "config.h"
 #include "ublksrv_tgt.h"
+#include "bpf/.tmp/ublk.skel.h"
 
 /* per-task variable */
 static pthread_mutex_t jbuf_lock;
@@ -575,6 +576,31 @@  static void ublksrv_tgt_set_params(struct ublksrv_ctrl_dev *cdev,
 	}
 }
 
+static int ublksrv_tgt_load_bpf_prog(struct ublksrv_ctrl_dev *cdev)
+{
+	struct ublk_bpf *obj;
+	int ret, io_prep_fd, io_submit_fd;
+
+	obj = ublk_bpf__open();
+	if (!obj) {
+		fprintf(stderr, "failed to open BPF object\n");
+		return -1;
+	}
+	ret = ublk_bpf__load(obj);
+	if (ret) {
+		fprintf(stderr, "failed to load BPF object\n");
+		return -1;
+	}
+
+
+	io_prep_fd = bpf_program__fd(obj->progs.ublk_io_prep_prog);
+	io_submit_fd = bpf_program__fd(obj->progs.ublk_io_submit_prog);
+	ret = ublksrv_ctrl_reg_bpf_prog(cdev, io_prep_fd, io_submit_fd);
+	if (!ret)
+		ublksrv_ctrl_set_bpf_obj_info(cdev, obj);
+	return ret;
+}
+
 static int cmd_dev_add(int argc, char *argv[])
 {
 	static const struct option longopts[] = {
@@ -696,6 +722,13 @@  static int cmd_dev_add(int argc, char *argv[])
 		goto fail;
 	}
 
+	ret = ublksrv_tgt_load_bpf_prog(dev);
+	if (ret < 0) {
+		fprintf(stderr, "dev %d load bpf prog failed, ret %d\n",
+			data.dev_id, ret);
+		goto fail_stop_daemon;
+	}
+
 	{
 		const struct ublksrv_ctrl_dev_info *info =
 			ublksrv_ctrl_get_dev_info(dev);