[RFC,15/22] ublk: bpf: add bpf aio kfunc

Message ID	20250107120417.1237392-16-tom.leiming@gmail.com (mailing list archive)
State	RFC
Headers	show Received: from mail-pl1-f179.google.com (mail-pl1-f179.google.com [209.85.214.179]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A96611F0E38; Tue, 7 Jan 2025 12:08:52 +0000 (UTC) From: Ming Lei <tom.leiming@gmail.com> To: Jens Axboe <axboe@kernel.dk>, linux-block@vger.kernel.org Cc: bpf@vger.kernel.org, Alexei Starovoitov <ast@kernel.org>, Martin KaFai Lau <martin.lau@linux.dev>, Yonghong Song <yonghong.song@linux.dev>, Ming Lei <tom.leiming@gmail.com> Subject: [RFC PATCH 15/22] ublk: bpf: add bpf aio kfunc Date: Tue, 7 Jan 2025 20:04:06 +0800 Message-ID: <20250107120417.1237392-16-tom.leiming@gmail.com> In-Reply-To: <20250107120417.1237392-1-tom.leiming@gmail.com> References: <20250107120417.1237392-1-tom.leiming@gmail.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	ublk: support bpf \| expand [RFC,00/22] ublk: support bpf [RFC,01/22] ublk: remove two unused fields from 'struct ublk_queue' [RFC,02/22] ublk: convert several bool type fields into bitfield of `ublk_queue` [RFC,03/22] ublk: add helper of ublk_need_map_io() [RFC,04/22] ublk: move ublk into one standalone directory [RFC,05/22] ublk: move private definitions into private header [RFC,06/22] ublk: move several helpers to private header [RFC,07/22] ublk: bpf: add bpf prog attach helpers [RFC,08/22] ublk: bpf: add bpf struct_ops [RFC,09/22] ublk: bpf: attach bpf prog to ublk device [RFC,10/22] ublk: bpf: add kfunc for ublk bpf prog [RFC,11/22] ublk: bpf: enable ublk-bpf [RFC,12/22] selftests: ublk: add tests for the ublk-bpf initial implementation [RFC,13/22] selftests: ublk: add tests for covering io split [RFC,14/22] selftests: ublk: add tests for covering redirecting to userspace [RFC,15/22] ublk: bpf: add bpf aio kfunc [RFC,16/22] ublk: bpf: add bpf aio struct_ops [RFC,17/22] ublk: bpf: attach bpf aio prog to ublk device [RFC,18/22] ublk: bpf: add several ublk bpf aio kfuncs [RFC,19/22] ublk: bpf: wire bpf aio with ublk io handling [RFC,20/22] selftests: add tests for ublk bpf aio [RFC,21/22] selftests: add tests for covering both bpf aio and split [RFC,22/22] ublk: document ublk-bpf & bpf-aio

Context	Check	Description
netdev/tree_selection	success	Not a local patch, async

diff --git a/drivers/block/ublk/Makefile b/drivers/block/ublk/Makefile index f843a9005cdb..7094607c040d 100644 --- a/drivers/block/ublk/Makefile +++ b/drivers/block/ublk/Makefile @@ -5,6 +5,6 @@ ccflags-y += -I$(src) ublk_drv-$(CONFIG_BLK_DEV_UBLK) := main.o ifeq ($(CONFIG_UBLK_BPF), y) -ublk_drv-$(CONFIG_BLK_DEV_UBLK) += bpf_ops.o bpf.o +ublk_drv-$(CONFIG_BLK_DEV_UBLK) += bpf_ops.o bpf.o bpf_aio.o endif obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o diff --git a/drivers/block/ublk/bpf.c b/drivers/block/ublk/bpf.c index ef1546a7ccda..d5880d61abe5 100644 --- a/drivers/block/ublk/bpf.c +++ b/drivers/block/ublk/bpf.c @@ -155,8 +155,23 @@ BTF_ID_FLAGS(func, ublk_bpf_get_iod, KF_TRUSTED_ARGS | KF_RET_NULL) BTF_ID_FLAGS(func, ublk_bpf_get_io_tag, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, ublk_bpf_get_queue_id, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, ublk_bpf_get_dev_id, KF_TRUSTED_ARGS) + +/* bpf aio kfunc */ +BTF_ID_FLAGS(func, bpf_aio_alloc, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_aio_alloc_sleepable, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_aio_release) +BTF_ID_FLAGS(func, bpf_aio_submit) BTF_KFUNCS_END(ublk_bpf_kfunc_ids) +__bpf_kfunc void bpf_aio_release_dtor(void *aio) +{ + bpf_aio_release(aio); +} +CFI_NOSEAL(bpf_aio_release_dtor); +BTF_ID_LIST(bpf_aio_dtor_ids) +BTF_ID(struct, bpf_aio) +BTF_ID(func, bpf_aio_release_dtor) + static const struct btf_kfunc_id_set ublk_bpf_kfunc_set = { .owner = THIS_MODULE, .set = &ublk_bpf_kfunc_ids, @@ -164,6 +179,12 @@ static const struct btf_kfunc_id_set ublk_bpf_kfunc_set = { int __init ublk_bpf_init(void) { + const struct btf_id_dtor_kfunc aio_dtors[] = { + { + .btf_id = bpf_aio_dtor_ids[0], + .kfunc_btf_id = bpf_aio_dtor_ids[1] + }, + }; int err; err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, @@ -172,5 +193,22 @@ int __init ublk_bpf_init(void) pr_warn("error while setting UBLK BPF tracing kfuncs: %d", err); return err; } - return ublk_bpf_struct_ops_init(); + + err = ublk_bpf_struct_ops_init(); + if (err) { + pr_warn("error while initializing ublk bpf struct_ops: %d", err); + return err; + } + + err = register_btf_id_dtor_kfuncs(aio_dtors, ARRAY_SIZE(aio_dtors), + THIS_MODULE); + if (err) { + pr_warn("error while registering aio destructor: %d", err); + return err; + } + + err = bpf_aio_init(); + if (err) + pr_warn("error while initializing bpf aio kfunc: %d", err); + return err; } diff --git a/drivers/block/ublk/bpf.h b/drivers/block/ublk/bpf.h index 4e178cbecb74..0ab25743ae7d 100644 --- a/drivers/block/ublk/bpf.h +++ b/drivers/block/ublk/bpf.h @@ -3,6 +3,7 @@ #define UBLK_INT_BPF_HEADER #include "bpf_reg.h" +#include "bpf_aio.h" typedef unsigned long ublk_bpf_return_t; typedef ublk_bpf_return_t (*queue_io_cmd_t)(struct ublk_bpf_io *io, unsigned int); diff --git a/drivers/block/ublk/bpf_aio.c b/drivers/block/ublk/bpf_aio.c new file mode 100644 index 000000000000..65013fe8054f --- /dev/null +++ b/drivers/block/ublk/bpf_aio.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Red Hat */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/bpf.h> +#include <linux/bpf_mem_alloc.h> +#include <linux/btf.h> +#include <linux/btf_ids.h> +#include <linux/filter.h> + +#include "bpf_aio.h" + +static int __bpf_aio_submit(struct bpf_aio *aio); + +static struct kmem_cache *bpf_aio_cachep; +static struct kmem_cache *bpf_aio_work_cachep; +static struct workqueue_struct *bpf_aio_wq; + +static inline bool bpf_aio_is_rw(int op) +{ + return op == BPF_AIO_OP_FS_READ || op == BPF_AIO_OP_FS_WRITE; +} + +/* check if it is short read */ +static bool bpf_aio_is_short_read(const struct bpf_aio *aio, long ret) +{ + return ret >= 0 && ret < aio->bytes && + bpf_aio_get_op(aio) == BPF_AIO_OP_FS_READ; +} + +/* zeroing the remained bytes starting from `off` to end */ +static void bpf_aio_zero_remained(const struct bpf_aio *aio, long off) +{ + struct iov_iter iter; + + iov_iter_bvec(&iter, ITER_DEST, aio->buf.bvec, aio->buf.nr_bvec, aio->bytes); + iter.iov_offset = aio->buf.bvec_off; + + iov_iter_advance(&iter, off); + iov_iter_zero(aio->bytes - off, &iter); +} + +static void bpf_aio_do_completion(struct bpf_aio *aio) +{ + if (aio->iocb.ki_filp) + fput(aio->iocb.ki_filp); + if (aio->work) + kmem_cache_free(bpf_aio_work_cachep, aio->work); +} + +/* ->ki_complete callback */ +static void bpf_aio_complete(struct kiocb *iocb, long ret) +{ + struct bpf_aio *aio = container_of(iocb, struct bpf_aio, iocb); + + if (unlikely(ret == -EAGAIN)) { + aio->opf |= BPF_AIO_FORCE_WQ; + ret = __bpf_aio_submit(aio); + if (!ret) + return; + } + + /* zero the remained bytes in case of short read */ + if (bpf_aio_is_short_read(aio, ret)) + bpf_aio_zero_remained(aio, ret); + + bpf_aio_do_completion(aio); + aio->ops->bpf_aio_complete_cb(aio, ret); +} + +static void bpf_aio_prep_rw(struct bpf_aio *aio, unsigned int rw, + struct iov_iter *iter) +{ + iov_iter_bvec(iter, rw, aio->buf.bvec, aio->buf.nr_bvec, aio->bytes); + iter->iov_offset = aio->buf.bvec_off; + + if (unlikely(aio->opf & BPF_AIO_FORCE_WQ)) { + aio->iocb.ki_flags &= ~IOCB_NOWAIT; + aio->iocb.ki_complete = NULL; + } else { + aio->iocb.ki_flags |= IOCB_NOWAIT; + aio->iocb.ki_complete = bpf_aio_complete; + } +} + +static int bpf_aio_do_submit(struct bpf_aio *aio) +{ + int op = bpf_aio_get_op(aio); + struct iov_iter iter; + struct file *file = aio->iocb.ki_filp; + int ret; + + switch (op) { + case BPF_AIO_OP_FS_READ: + bpf_aio_prep_rw(aio, ITER_DEST, &iter); + if (file->f_op->read_iter) + ret = file->f_op->read_iter(&aio->iocb, &iter); + else + ret = -EOPNOTSUPP; + break; + case BPF_AIO_OP_FS_WRITE: + bpf_aio_prep_rw(aio, ITER_SOURCE, &iter); + if (file->f_op->write_iter) + ret = file->f_op->write_iter(&aio->iocb, &iter); + else + ret = -EOPNOTSUPP; + break; + case BPF_AIO_OP_FS_FSYNC: + ret = vfs_fsync_range(aio->iocb.ki_filp, aio->iocb.ki_pos, + aio->iocb.ki_pos + aio->bytes - 1, 0); + if (unlikely(ret && ret != -EINVAL)) + ret = -EIO; + break; + case BPF_AIO_OP_FS_FALLOCATE: + ret = vfs_fallocate(aio->iocb.ki_filp, aio->iocb.ki_flags, + aio->iocb.ki_pos, aio->bytes); + break; + default: + ret = -EINVAL; + } + + if (ret == -EIOCBQUEUED) { + ret = 0; + } else if (ret != -EAGAIN) { + bpf_aio_complete(&aio->iocb, ret); + ret = 0; + } + + return ret; +} + +static void bpf_aio_submit_work(struct work_struct *work) +{ + struct bpf_aio_work *aio_work = container_of(work, struct bpf_aio_work, work); + + bpf_aio_do_submit(aio_work->aio); +} + +static int __bpf_aio_submit(struct bpf_aio *aio) +{ + struct work_struct *work; + +do_submit: + if (likely(!(aio->opf & BPF_AIO_FORCE_WQ))) { + int ret = bpf_aio_do_submit(aio); + + /* retry via workqueue in case of -EAGAIN */ + if (ret != -EAGAIN) + return ret; + aio->opf |= BPF_AIO_FORCE_WQ; + } + + if (!aio->work) { + bool in_irq = in_interrupt(); + gfp_t gfpflags = in_irq ? GFP_ATOMIC : GFP_NOIO; + + aio->work = kmem_cache_alloc(bpf_aio_work_cachep, gfpflags); + if (unlikely(!aio->work)) { + if (in_irq) + return -ENOMEM; + aio->opf &= ~BPF_AIO_FORCE_WQ; + goto do_submit; + } + } + + aio->work->aio = aio; + work = &aio->work->work; + INIT_WORK(work, bpf_aio_submit_work); + queue_work(bpf_aio_wq, work); + + return 0; +} + +static struct bpf_aio *__bpf_aio_alloc(gfp_t gfpflags, unsigned op, + enum bpf_aio_flag aio_flags) +{ + struct bpf_aio *aio; + + if (op >= BPF_AIO_OP_LAST) + return NULL; + + if (aio_flags & BPF_AIO_OP_MASK) + return NULL; + + aio = kmem_cache_alloc(bpf_aio_cachep, gfpflags); + if (!aio) + return NULL; + + memset(aio, 0, sizeof(*aio)); + aio->opf = op | (unsigned int)aio_flags; + return aio; +} + +__bpf_kfunc struct bpf_aio *bpf_aio_alloc(unsigned int op, enum bpf_aio_flag aio_flags) +{ + return __bpf_aio_alloc(GFP_ATOMIC, op, aio_flags); +} + +__bpf_kfunc struct bpf_aio *bpf_aio_alloc_sleepable(unsigned int op, enum bpf_aio_flag aio_flags) +{ + return __bpf_aio_alloc(GFP_NOIO, op, aio_flags); +} + +__bpf_kfunc void bpf_aio_release(struct bpf_aio *aio) +{ + kmem_cache_free(bpf_aio_cachep, aio); +} + +/* Submit AIO from bpf prog */ +__bpf_kfunc int bpf_aio_submit(struct bpf_aio *aio, int fd, loff_t pos, + unsigned bytes, unsigned io_flags) +{ + struct file *file; + + if (!aio->ops) + return -EINVAL; + + file = fget(fd); + if (!file) + return -EINVAL; + + /* we could be called from io completion handler */ + if (in_interrupt()) + aio->opf |= BPF_AIO_FORCE_WQ; + + aio->iocb.ki_pos = pos; + aio->iocb.ki_filp = file; + aio->iocb.ki_flags = io_flags; + aio->bytes = bytes; + if (bpf_aio_is_rw(bpf_aio_get_op(aio))) { + if (file->f_flags & O_DIRECT) + aio->iocb.ki_flags |= IOCB_DIRECT; + else + aio->opf |= BPF_AIO_FORCE_WQ; + aio->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + } else { + aio->opf |= BPF_AIO_FORCE_WQ; + } + + return __bpf_aio_submit(aio); +} + +int __init bpf_aio_init(void) +{ + bpf_aio_cachep = KMEM_CACHE(bpf_aio, SLAB_PANIC); + bpf_aio_work_cachep = KMEM_CACHE(bpf_aio_work, SLAB_PANIC); + bpf_aio_wq = alloc_workqueue("bpf_aio", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + + return 0; +} diff --git a/drivers/block/ublk/bpf_aio.h b/drivers/block/ublk/bpf_aio.h new file mode 100644 index 000000000000..625737965c90 --- /dev/null +++ b/drivers/block/ublk/bpf_aio.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (c) 2024 Red Hat */ +#ifndef UBLK_BPF_AIO_HEADER +#define UBLK_BPF_AIO_HEADER + +#define BPF_AIO_OP_BITS 8 +#define BPF_AIO_OP_MASK ((1 << BPF_AIO_OP_BITS) - 1) + +enum bpf_aio_op { + BPF_AIO_OP_FS_READ = 0, + BPF_AIO_OP_FS_WRITE, + BPF_AIO_OP_FS_FSYNC, + BPF_AIO_OP_FS_FALLOCATE, + BPF_AIO_OP_LAST, +}; + +enum bpf_aio_flag_bits { + /* force to submit io from wq */ + __BPF_AIO_FORCE_WQ = BPF_AIO_OP_BITS, + __BPF_AIO_NR_BITS, /* stops here */ +}; + +enum bpf_aio_flag { + BPF_AIO_FORCE_WQ = (1 << __BPF_AIO_FORCE_WQ), +}; + +struct bpf_aio_work { + struct bpf_aio *aio; + struct work_struct work; +}; + +/* todo: support ubuf & iovec in future */ +struct bpf_aio_buf { + unsigned int bvec_off; + int nr_bvec; + const struct bio_vec *bvec; +}; + +struct bpf_aio { + unsigned int opf; + unsigned int bytes; + struct bpf_aio_buf buf; + struct bpf_aio_work *work; + const struct bpf_aio_complete_ops *ops; + struct kiocb iocb; +}; + +typedef void (*bpf_aio_complete_t)(struct bpf_aio *io, long ret); + +struct bpf_aio_complete_ops { + unsigned int id; + bpf_aio_complete_t bpf_aio_complete_cb; +}; + +static inline unsigned int bpf_aio_get_op(const struct bpf_aio *aio) +{ + return aio->opf & BPF_AIO_OP_MASK; +} + +int bpf_aio_init(void); +struct bpf_aio *bpf_aio_alloc(unsigned int op, enum bpf_aio_flag aio_flags); +struct bpf_aio *bpf_aio_alloc_sleepable(unsigned int op, enum bpf_aio_flag aio_flags); +void bpf_aio_release(struct bpf_aio *aio); +int bpf_aio_submit(struct bpf_aio *aio, int fd, loff_t pos, unsigned bytes, + unsigned io_flags); +#endif

[RFC,15/22] ublk: bpf: add bpf aio kfunc

Checks

Commit Message

Patch