From patchwork Tue Aug 2 19:36:27 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935006 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BECA7C19F2B for ; Tue, 2 Aug 2022 19:37:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233741AbiHBThI (ORCPT ); Tue, 2 Aug 2022 15:37:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58872 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233719AbiHBTgz (ORCPT ); Tue, 2 Aug 2022 15:36:55 -0400 Received: from mx0b-00082601.pphosted.com (mx0b-00082601.pphosted.com [67.231.153.30]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1193452E7A for ; Tue, 2 Aug 2022 12:36:54 -0700 (PDT) Received: from pps.filterd (m0109331.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272I2bYq027715 for ; Tue, 2 Aug 2022 12:36:53 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=P6XL6+4vlHhOnyN2gEfiiQaDrdsakH/0m5jhLuk8f1I=; b=VS1yyPQ0RRLGYkLAz4KCxcB3Mkivf8ocbCt8zPMqZH42InERkSoQ8NYzB/hKcicAQbIa WnmfykV6wk+wLqeivJCkv+4G1ETNt5RDbprYZtqPdGjRCTZ3LCHWZ0/LJ/QO6E4V6lbi nIXxUfE2q35evX1ah5i9lAPH+mXu3/22R1U= Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hpy36mmd9-4 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:53 -0700 Received: from snc-exhub201.TheFacebook.com (2620:10d:c085:21d::7) by snc-exhub102.TheFacebook.com (2620:10d:c085:11d::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:50 -0700 Received: from twshared33626.07.ash9.facebook.com (2620:10d:c085:108::8) by mail.thefacebook.com (2620:10d:c085:21d::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:50 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id CBDA26E59EF9; Tue, 2 Aug 2022 12:36:37 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 1/7] blk-mq: add ops to dma map bvec Date: Tue, 2 Aug 2022 12:36:27 -0700 Message-ID: <20220802193633.289796-2-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: eJV4b1pojDNzhY69iwcnxYqOozg88Uie X-Proofpoint-ORIG-GUID: eJV4b1pojDNzhY69iwcnxYqOozg88Uie X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch The same buffer may be used for many subsequent IO's. Instead of setting up the mapping per-IO, provide an interface that can allow a buffer to be premapped just once and referenced again later. Signed-off-by: Keith Busch --- block/bdev.c | 20 ++++++++++++++++++++ include/linux/blk-mq.h | 13 +++++++++++++ include/linux/blkdev.h | 16 ++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/block/bdev.c b/block/bdev.c index ce05175e71ce..c3d73ad86fae 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1069,3 +1069,23 @@ void sync_bdevs(bool wait) spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); } + +#ifdef CONFIG_HAS_DMA +void *block_dma_map(struct block_device *bdev, struct bio_vec *bvec, + int nr_vecs) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->mq_ops && q->mq_ops->dma_map) + return q->mq_ops->dma_map(q, bvec, nr_vecs); + return ERR_PTR(-EINVAL); +} + +void block_dma_unmap(struct block_device *bdev, void *dma_tag) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->mq_ops && q->mq_ops->dma_unmap) + return q->mq_ops->dma_unmap(q, dma_tag); +} +#endif diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index effee1dc715a..e10aabb36c2c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -639,6 +639,19 @@ struct blk_mq_ops { */ void (*show_rq)(struct seq_file *m, struct request *rq); #endif + +#ifdef CONFIG_HAS_DMA + /** + * @dma_map: Create a dma mapping. On success, returns an opaque cookie + * that the can be referenced by the driver in future requests. + */ + void *(*dma_map)(struct request_queue *q, struct bio_vec *bvec, int nr_vecs); + + /** + * @dma_unmap: Tear down a previously created dma mapping. + */ + void (*dma_unmap)(struct request_queue *q, void *dma_tag); +#endif }; enum { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49dcd31e283e..efc5e805a46e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1527,4 +1527,20 @@ struct io_comp_batch { #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } +#ifdef CONFIG_HAS_DMA +void *block_dma_map(struct block_device *bdev, struct bio_vec *bvec, + int nr_vecs); +void block_dma_unmap(struct block_device *bdev, void *dma_tag); +#else +static inline void *block_dma_map(struct block_device *bdev, + struct bio_vec *bvec, int nr_vecs) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void block_dma_unmap(struct block_device *bdev, void *dma_tag) +{ +} +#endif + #endif /* _LINUX_BLKDEV_H */ From patchwork Tue Aug 2 19:36:28 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935003 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CE944C25B08 for ; Tue, 2 Aug 2022 19:37:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233294AbiHBThD (ORCPT ); Tue, 2 Aug 2022 15:37:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58616 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233048AbiHBTgx (ORCPT ); Tue, 2 Aug 2022 15:36:53 -0400 Received: from mx0b-00082601.pphosted.com (mx0b-00082601.pphosted.com [67.231.153.30]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8429D52FF0 for ; Tue, 2 Aug 2022 12:36:50 -0700 (PDT) Received: from pps.filterd (m0109331.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272I2YN0027592 for ; Tue, 2 Aug 2022 12:36:49 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=45JgfRe+CWjDGcU8AbT5bsVYYCjWdG2sJd3W42Hx644=; b=VRWp8lVZKF+xmCybnduOKK3I9fcMqM+QdV/5dx/xs2nyNiXNWhNpiLRQYuPPBob4+oy7 S5DM6erD8pyYVDiOrkeG3wR7x9h2iPVdwcYWKxgFoWAlQ5fxTeL2lSs136ApQuheOgdv hnXQmTL2EU2V/ZEBm9e/kSwUvPT4DVkrOYQ= Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hpy36mmd0-2 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:49 -0700 Received: from twshared7556.02.ash8.facebook.com (2620:10d:c085:208::f) by mail.thefacebook.com (2620:10d:c085:11d::5) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:47 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id D80E86E59EFB; Tue, 2 Aug 2022 12:36:37 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 2/7] file: add ops to dma map bvec Date: Tue, 2 Aug 2022 12:36:28 -0700 Message-ID: <20220802193633.289796-3-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: qYEAQeNvbyj7urhnSn_QBk-ZauUWYzI6 X-Proofpoint-ORIG-GUID: qYEAQeNvbyj7urhnSn_QBk-ZauUWYzI6 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch The same buffer may be used for many subsequent IO's. Instead of setting up the mapping per-IO, provide an interface that can allow a buffer to be premapped just once and referenced again later, and implement for the block device file. Signed-off-by: Keith Busch --- block/fops.c | 20 ++++++++++++++++++++ fs/file.c | 15 +++++++++++++++ include/linux/fs.h | 20 ++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/block/fops.c b/block/fops.c index 29066ac5a2fa..db2d1e848f4b 100644 --- a/block/fops.c +++ b/block/fops.c @@ -670,6 +670,22 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return error; } +#ifdef CONFIG_HAS_DMA +void *blkdev_dma_map(struct file *filp, struct bio_vec *bvec, int nr_vecs) +{ + struct block_device *bdev = filp->private_data; + + return block_dma_map(bdev, bvec, nr_vecs); +} + +void blkdev_dma_unmap(struct file *filp, void *dma_tag) +{ + struct block_device *bdev = filp->private_data; + + return block_dma_unmap(bdev, dma_tag); +} +#endif + const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, @@ -686,6 +702,10 @@ const struct file_operations def_blk_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, +#ifdef CONFIG_HAS_DMA + .dma_map = blkdev_dma_map, + .dma_unmap = blkdev_dma_unmap, +#endif }; static __init int blkdev_init(void) diff --git a/fs/file.c b/fs/file.c index 3bcc1ecc314a..767bf9d3205e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1307,3 +1307,18 @@ int iterate_fd(struct files_struct *files, unsigned n, return res; } EXPORT_SYMBOL(iterate_fd); + +#ifdef CONFIG_HAS_DMA +void *file_dma_map(struct file *file, struct bio_vec *bvec, int nr_vecs) +{ + if (file->f_op->dma_map) + return file->f_op->dma_map(file, bvec, nr_vecs); + return ERR_PTR(-EINVAL); +} + +void file_dma_unmap(struct file *file, void *dma_tag) +{ + if (file->f_op->dma_unmap) + return file->f_op->dma_unmap(file, dma_tag); +} +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index c0d99b5a166b..befd8ea5821a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1964,6 +1964,10 @@ struct dir_context { struct iov_iter; struct io_uring_cmd; +#ifdef CONFIG_HAS_DMA +struct bio_vec; +#endif + struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); @@ -2006,6 +2010,10 @@ struct file_operations { loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); +#ifdef CONFIG_HAS_DMA + void *(*dma_map)(struct file *, struct bio_vec *, int); + void (*dma_unmap)(struct file *, void *); +#endif } __randomize_layout; struct inode_operations { @@ -3467,4 +3475,16 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +#ifdef CONFIG_HAS_DMA +void *file_dma_map(struct file *file, struct bio_vec *bvec, int nr_vecs); +void file_dma_unmap(struct file *file, void *dma_tag); +#else +static inline void *file_dma_map(struct file *file, struct bio_vec *bvec, + int nr_vecs) +{ + return ERR_PTR(-ENOTSUPP); +} +static inline void file_dma_unmap(struct file *file, void *dma_tag) {} +#endif + #endif /* _LINUX_FS_H */ From patchwork Tue Aug 2 19:36:29 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935005 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5CF66C25B0D for ; Tue, 2 Aug 2022 19:37:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229576AbiHBThH (ORCPT ); Tue, 2 Aug 2022 15:37:07 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58868 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233660AbiHBTgz (ORCPT ); Tue, 2 Aug 2022 15:36:55 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1D94552E63 for ; Tue, 2 Aug 2022 12:36:53 -0700 (PDT) Received: from pps.filterd (m0109333.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272IdWoF020835 for ; Tue, 2 Aug 2022 12:36:52 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=dfhpVX5a0FbExIz4oOLV8XElGSKNhdl46Z9qFa/bAzI=; b=oqS1Pjft/dk3IuNIXTLxcenj2QEvw8qnTjAeC/psu1bTVHE4wkOLeoep/e/+lFnzX1Iu +ZZypGrDZF3kO1EPhzV2jernocYUneU85tf2KoqZPObm8DuD9fwMglIn9kLBTEjaKdqB yEx/XH4hmlPLIYKphkjKkdRDl2y70zYlyYI= Received: from maileast.thefacebook.com ([163.114.130.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hq9d6rdn1-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:52 -0700 Received: from twshared8442.02.ash8.facebook.com (2620:10d:c0a8:1b::d) by mail.thefacebook.com (2620:10d:c0a8:82::c) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:51 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id E02E46E59EFE; Tue, 2 Aug 2022 12:36:37 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 3/7] iov_iter: introduce type for preregistered dma tags Date: Tue, 2 Aug 2022 12:36:29 -0700 Message-ID: <20220802193633.289796-4-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: trT8AjD0h1ScTgNgH8jHLo5_XR3TQx-h X-Proofpoint-ORIG-GUID: trT8AjD0h1ScTgNgH8jHLo5_XR3TQx-h X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch Introduce a new iov_iter type representing a pre-registered DMA address tag. The tag is an opaque cookie specific to the lower level driver that created it, and can be referenced at any arbitrary offset. Signed-off-by: Keith Busch --- include/linux/uio.h | 9 +++++++++ lib/iov_iter.c | 24 +++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 34ba4a731179..a55e4b86413a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,7 @@ enum iter_type { ITER_PIPE, ITER_XARRAY, ITER_DISCARD, + ITER_DMA_TAG, }; struct iov_iter_state { @@ -46,6 +47,7 @@ struct iov_iter { const struct bio_vec *bvec; struct xarray *xarray; struct pipe_inode_info *pipe; + void *dma_tag; }; union { unsigned long nr_segs; @@ -85,6 +87,11 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i) return iov_iter_type(i) == ITER_BVEC; } +static inline bool iov_iter_is_dma_tag(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_DMA_TAG; +} + static inline bool iov_iter_is_pipe(const struct iov_iter *i) { return iov_iter_type(i) == ITER_PIPE; @@ -229,6 +236,8 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec unsigned long nr_segs, size_t count); void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); +void iov_iter_dma_tag(struct iov_iter *i, unsigned int direction, void *dma_tag, + unsigned int dma_offset, size_t count); void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 507e732ef7cf..d370b45d7f1b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1077,6 +1077,9 @@ void iov_iter_advance(struct iov_iter *i, size_t size) i->count -= size; } else if (iov_iter_is_discard(i)) { i->count -= size; + } else if (iov_iter_is_dma_tag(i)) { + i->iov_offset += size; + i->count -= size; } } EXPORT_SYMBOL(iov_iter_advance); @@ -1201,6 +1204,21 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_bvec); +void iov_iter_dma_tag(struct iov_iter *i, unsigned int direction, + void *dma_tag, unsigned int dma_offset, + size_t count) +{ + WARN_ON(direction & ~(READ | WRITE)); + *i = (struct iov_iter){ + .iter_type = ITER_DMA_TAG, + .data_source = direction, + .dma_tag = dma_tag, + .iov_offset = dma_offset, + .count = count + }; +} +EXPORT_SYMBOL(iov_iter_dma_tag); + void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count) @@ -2124,8 +2142,8 @@ EXPORT_SYMBOL(import_single_range); */ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { - if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && - !iov_iter_is_kvec(i)) + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) && + !iov_iter_is_dma_tag(i)) && !iov_iter_is_kvec(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; @@ -2141,7 +2159,7 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); if (iov_iter_is_bvec(i)) i->bvec -= state->nr_segs - i->nr_segs; - else + else if (!iov_iter_is_dma_tag(i)) i->iov -= state->nr_segs - i->nr_segs; i->nr_segs = state->nr_segs; } From patchwork Tue Aug 2 19:36:30 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F035CC25B06 for ; Tue, 2 Aug 2022 19:37:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233719AbiHBThI (ORCPT ); Tue, 2 Aug 2022 15:37:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58758 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233715AbiHBTgz (ORCPT ); Tue, 2 Aug 2022 15:36:55 -0400 Received: from mx0b-00082601.pphosted.com (mx0b-00082601.pphosted.com [67.231.153.30]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8B8F752E6E for ; Tue, 2 Aug 2022 12:36:53 -0700 (PDT) Received: from pps.filterd (m0109331.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272I2bYp027715 for ; Tue, 2 Aug 2022 12:36:52 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=cw5I09sqF3FGoufmzz1TSByegCOqL//eBdImJfpK/dQ=; b=OqZrnBXBdeLlHRw7kylMpRe1TH+RNmun0ZbXF9Jc7+uMOaV91okzX7PcYBxNFbeGSCOb fvjAL1dwh48dKwebu6nqdW587Zkf4euhZpW4nYWJFGWRAiRyOsIyBMrwP5bDjoYTBaIx HRONQIRHoz1iJJy8gvUhO/Zppe1E6VipWAI= Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hpy36mmd9-3 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:52 -0700 Received: from snc-exhub201.TheFacebook.com (2620:10d:c085:21d::7) by snc-exhub102.TheFacebook.com (2620:10d:c085:11d::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:50 -0700 Received: from twshared33626.07.ash9.facebook.com (2620:10d:c085:208::11) by mail.thefacebook.com (2620:10d:c085:21d::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:50 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id E84DC6E59F00; Tue, 2 Aug 2022 12:36:37 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 4/7] block: add dma tag bio type Date: Tue, 2 Aug 2022 12:36:30 -0700 Message-ID: <20220802193633.289796-5-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: eljWwXFxe-OB6c6igffSA1WvWeCh8rRl X-Proofpoint-ORIG-GUID: eljWwXFxe-OB6c6igffSA1WvWeCh8rRl X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch Premapped buffers don't require a generic bio_vec since these have already been dma mapped to the driver specific data structure. Repurpose the bi_io_vec with the driver specific tag as they are mutually exclusive, and provide all the setup and split helpers to support dma tags. In order to use this, a driver must implement the .dma_map() blk-mq op.. If the driver provides this callback, then it must be aware that any given bio may be using a dma_tag instead of a bio_vec. Note, this isn't working with blk_integrity. Signed-off-by: Keith Busch --- block/bio.c | 25 ++++++++++++++++++++++++- block/blk-merge.c | 19 +++++++++++++++++++ include/linux/bio.h | 21 ++++++++++++--------- include/linux/blk-mq.h | 11 +++++++++++ include/linux/blk_types.h | 6 +++++- 5 files changed, 71 insertions(+), 11 deletions(-) diff --git a/block/bio.c b/block/bio.c index d6eb90d9b20b..3b7accae8996 100644 --- a/block/bio.c +++ b/block/bio.c @@ -229,7 +229,8 @@ static void bio_free(struct bio *bio) WARN_ON_ONCE(!bs); bio_uninit(bio); - bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs); + if (!bio_flagged(bio, BIO_DMA_TAGGED)) + bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs); mempool_free(p - bs->front_pad, &bs->bio_pool); } @@ -762,6 +763,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio_set_flag(bio, BIO_CLONED); if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); + if (bio_flagged(bio_src, BIO_DMA_TAGGED)) + bio_set_flag(bio, BIO_DMA_TAGGED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_iter = bio_src->bi_iter; @@ -1151,6 +1154,21 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) bio_set_flag(bio, BIO_CLONED); } +static void bio_iov_dma_tag_set(struct bio *bio, struct iov_iter *iter) +{ + size_t size = iov_iter_count(iter); + + bio->bi_vcnt = iter->nr_segs; + bio->bi_dma_tag = iter->dma_tag; + bio->bi_iter.bi_bvec_done = iter->iov_offset; + bio->bi_iter.bi_size = size; + bio->bi_opf |= REQ_NOMERGE; + bio_set_flag(bio, BIO_NO_PAGE_REF); + bio_set_flag(bio, BIO_DMA_TAGGED); + + iov_iter_advance(iter, bio->bi_iter.bi_size); +} + static int bio_iov_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { @@ -1287,6 +1305,11 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return 0; } + if (iov_iter_is_dma_tag(iter)) { + bio_iov_dma_tag_set(bio, iter); + return 0; + } + do { ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); diff --git a/block/blk-merge.c b/block/blk-merge.c index ff04e9290715..d024885ad4c4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -274,6 +274,25 @@ static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim, struct bvec_iter iter; unsigned nsegs = 0, bytes = 0; + if (bio_flagged(bio, BIO_DMA_TAGGED)) { + int offset = offset_in_page(bio->bi_iter.bi_bvec_done); + + nsegs = ALIGN(bio->bi_iter.bi_size + offset, PAGE_SIZE) >> + PAGE_SHIFT; + if (bio->bi_iter.bi_size > max_bytes) { + bytes = max_bytes; + nsegs = (bytes + offset) >> PAGE_SHIFT; + } else if (nsegs > lim->max_segments) { + nsegs = lim->max_segments; + bytes = PAGE_SIZE * nsegs - offset; + } else { + *segs = nsegs; + return NULL; + } + + goto split; + } + bio_for_each_bvec(bv, bio, iter) { /* * If the queue doesn't support SG gaps and adding this diff --git a/include/linux/bio.h b/include/linux/bio.h index ca22b06700a9..649348bc03c2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -61,11 +61,17 @@ static inline bool bio_has_data(struct bio *bio) return false; } +static inline bool bio_flagged(const struct bio *bio, unsigned int bit) +{ + return (bio->bi_flags & (1U << bit)) != 0; +} + static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || - bio_op(bio) == REQ_OP_WRITE_ZEROES; + bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_flagged(bio, BIO_DMA_TAGGED); } static inline void *bio_data(struct bio *bio) @@ -98,9 +104,11 @@ static inline void bio_advance_iter(const struct bio *bio, { iter->bi_sector += bytes >> 9; - if (bio_no_advance_iter(bio)) + if (bio_no_advance_iter(bio)) { iter->bi_size -= bytes; - else + if (bio_flagged(bio, BIO_DMA_TAGGED)) + iter->bi_bvec_done += bytes; + } else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } @@ -225,11 +233,6 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) atomic_set(&bio->__bi_cnt, count); } -static inline bool bio_flagged(struct bio *bio, unsigned int bit) -{ - return (bio->bi_flags & (1U << bit)) != 0; -} - static inline void bio_set_flag(struct bio *bio, unsigned int bit) { bio->bi_flags |= (1U << bit); @@ -447,7 +450,7 @@ static inline void bio_wouldblock_error(struct bio *bio) */ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) { - if (iov_iter_is_bvec(iter)) + if (iov_iter_is_bvec(iter) || iov_iter_is_dma_tag(iter)) return 0; return iov_iter_npages(iter, max_segs); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e10aabb36c2c..f5e0aa61bf85 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1141,6 +1141,17 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, } void blk_dump_rq_flags(struct request *, char *); +static inline void *blk_rq_dma_tag(struct request *rq) +{ + return rq->bio && bio_flagged(rq->bio, BIO_DMA_TAGGED) ? + rq->bio->bi_dma_tag : 0; +} + +static inline size_t blk_rq_dma_offset(struct request *rq) +{ + return rq->bio->bi_iter.bi_bvec_done; +} + #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1ef99790f6ed..ea6db439acbe 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -299,7 +299,10 @@ struct bio { atomic_t __bi_cnt; /* pin count */ - struct bio_vec *bi_io_vec; /* the actual vec list */ + union { + struct bio_vec *bi_io_vec; /* the actual vec list */ + void *bi_dma_tag; /* driver specific tag */ + }; struct bio_set *bi_pool; @@ -334,6 +337,7 @@ enum { BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_DMA_TAGGED, /* Using premmaped dma buffers */ BIO_FLAG_LAST }; From patchwork Tue Aug 2 19:36:31 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935004 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BAF83C25B0C for ; Tue, 2 Aug 2022 19:37:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233364AbiHBThE (ORCPT ); Tue, 2 Aug 2022 15:37:04 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58866 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233655AbiHBTgz (ORCPT ); Tue, 2 Aug 2022 15:36:55 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E45BF52E53 for ; Tue, 2 Aug 2022 12:36:52 -0700 (PDT) Received: from pps.filterd (m0109333.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272IdXKu020865 for ; Tue, 2 Aug 2022 12:36:52 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=/ZWLqbL5tGSYE5Rcc02OrOw2EnF/iFGjGINklpZXyq8=; b=ZaEQGSZ8xP5cvdcdRLazgG7+aCc9xmZb324njGHzeOMBlL/VEVnktOcgOTbKTTsyPRbU tvyO6WQfHwjG5FTEq/XZ+RAuTrklS0XZlXiSmFnLLXR7t3wGmVsVhhJBbXxvXar+aOJC XgyQQdN/Qk1zV82HZfdcvReHJtUwVSMWAy4= Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hq9d6rdmt-6 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:52 -0700 Received: from twshared33626.07.ash9.facebook.com (2620:10d:c085:108::8) by mail.thefacebook.com (2620:10d:c085:11d::4) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:50 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id F27DB6E59F02; Tue, 2 Aug 2022 12:36:37 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 5/7] io_uring: introduce file slot release helper Date: Tue, 2 Aug 2022 12:36:31 -0700 Message-ID: <20220802193633.289796-6-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: dz_6iR1CHmTkzcK61dKwvWK-tCEOqFwi X-Proofpoint-ORIG-GUID: dz_6iR1CHmTkzcK61dKwvWK-tCEOqFwi X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch Releasing the pre-registered file follows a repeated pattern. Introduce a helper to make it easier to add more complexity to this resource in the future. Signed-off-by: Keith Busch --- io_uring/filetable.c | 33 +++++++++++++++++++++------------ io_uring/filetable.h | 3 +++ io_uring/rsrc.c | 5 +---- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..1b8db1918678 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -76,19 +76,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); if (file_slot->file_ptr) { - struct file *old_file; - ret = io_rsrc_node_switch_start(ctx); if (ret) goto err; - old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, old_file); + ret = io_file_slot_queue_removal(ctx, file_slot); if (ret) goto err; - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, slot_index); needs_switch = true; } @@ -148,7 +142,6 @@ int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) { struct io_fixed_file *file_slot; - struct file *file; int ret; if (unlikely(!ctx->file_data)) @@ -164,13 +157,10 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) if (!file_slot->file_ptr) return -EBADF; - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + ret = io_file_slot_queue_removal(ctx, file_slot); if (ret) return ret; - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, offset); io_rsrc_node_switch(ctx, ctx->file_data); return 0; } @@ -191,3 +181,22 @@ int io_register_file_alloc_range(struct io_ring_ctx *ctx, io_file_table_set_alloc_range(ctx, range.off, range.len); return 0; } + +int io_file_slot_queue_removal(struct io_ring_ctx *ctx, + struct io_fixed_file *file_slot) +{ + u32 slot_index = file_slot - ctx->file_table.files; + struct file *file; + int ret; + + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, + ctx->rsrc_node, file); + if (ret) + return ret; + + file_slot->file_ptr = 0; + io_file_bitmap_clear(&ctx->file_table, slot_index); + + return 0; +} diff --git a/io_uring/filetable.h b/io_uring/filetable.h index ff3a712e11bf..e52ecf359199 100644 --- a/io_uring/filetable.h +++ b/io_uring/filetable.h @@ -34,6 +34,9 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset); int io_register_file_alloc_range(struct io_ring_ctx *ctx, struct io_uring_file_index_range __user *arg); +int io_file_slot_queue_removal(struct io_ring_ctx *ctx, + struct io_fixed_file *file_slot); + unsigned int io_file_get_flags(struct file *file); static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 59704b9ac537..1f10eecad4d7 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -469,12 +469,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, file_slot = io_fixed_file_slot(&ctx->file_table, i); if (file_slot->file_ptr) { - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); + err = io_file_slot_queue_removal(ctx, file_slot); if (err) break; - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, i); needs_switch = true; } if (fd != -1) { From patchwork Tue Aug 2 19:36:32 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935008 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7239EC2BB41 for ; Tue, 2 Aug 2022 19:37:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229634AbiHBThJ (ORCPT ); Tue, 2 Aug 2022 15:37:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58762 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230199AbiHBTg5 (ORCPT ); Tue, 2 Aug 2022 15:36:57 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 34FE652FD5 for ; Tue, 2 Aug 2022 12:36:56 -0700 (PDT) Received: from pps.filterd (m0044010.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272I2lr7008245 for ; Tue, 2 Aug 2022 12:36:56 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=9t6mf6sLcYSCrBL80uzyjH2yLvQxjclfMoyLt77c6KY=; b=iMZbaguQ09ujTHKruTYuQWJN5Wcg//lCH44u+pTMJdylp9HR42iPuR5lgMWmdzrlxVJL hw9afnLcb2rbnXGHBcccKkJ7xMbd3DuDqyEzw/+/LjfD27vzWTPn1LhScLQDX5rGPUVY sEYhsyGZOBeWlL3qGbGLpNzqt66ksEV1y2I= Received: from maileast.thefacebook.com ([163.114.130.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hpq1perfq-5 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:36:55 -0700 Received: from twshared8442.02.ash8.facebook.com (2620:10d:c0a8:1b::d) by mail.thefacebook.com (2620:10d:c0a8:83::4) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:51 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id 10A7B6E59F06; Tue, 2 Aug 2022 12:36:38 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 6/7] io_uring: add support for dma pre-mapping Date: Tue, 2 Aug 2022 12:36:32 -0700 Message-ID: <20220802193633.289796-7-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: jc9YlddLcnw0BY9kcYuTfbZxtCMdliBd X-Proofpoint-ORIG-GUID: jc9YlddLcnw0BY9kcYuTfbZxtCMdliBd X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch Provide a new register operation that can request to pre-map a known bvec to the requested fixed file's specific implementation. If successful, io_uring will use the returned dma tag for future fixed buffer requests to the same file. Signed-off-by: Keith Busch --- include/linux/io_uring_types.h | 2 + include/uapi/linux/io_uring.h | 12 +++ io_uring/filetable.c | 7 +- io_uring/filetable.h | 7 +- io_uring/io_uring.c | 137 +++++++++++++++++++++++++++++++++ io_uring/net.c | 2 +- io_uring/rsrc.c | 21 +++-- io_uring/rsrc.h | 10 ++- io_uring/rw.c | 2 +- 9 files changed, 185 insertions(+), 15 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f7fab3758cb9..f62ea17cc480 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -23,6 +23,8 @@ struct io_wq_work { int cancel_seq; }; +struct io_mapped_ubuf; + struct io_fixed_file { /* file * with additional FFS_* flags */ unsigned long file_ptr; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b..daacbe899d1d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -485,6 +485,10 @@ enum { IORING_REGISTER_NOTIFIERS = 26, IORING_UNREGISTER_NOTIFIERS = 27, + /* dma map registered buffers */ + IORING_REGISTER_MAP_BUFFERS = 28, + IORING_REGISTER_UNMAP_BUFFERS = 29, + /* this goes last */ IORING_REGISTER_LAST }; @@ -661,4 +665,12 @@ struct io_uring_recvmsg_out { __u32 flags; }; +struct io_uring_map_buffers { + __s32 fd; + __s32 buf_start; + __s32 buf_end; + __u32 flags; + __u64 rsvd[2]; +}; + #endif diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 1b8db1918678..5ca2f27f317f 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -189,9 +189,10 @@ int io_file_slot_queue_removal(struct io_ring_ctx *ctx, struct file *file; int ret; - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, file); + file = io_file_from_fixed(file_slot); + io_dma_unmap_file(ctx, file_slot); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, ctx->rsrc_node, + file); if (ret) return ret; diff --git a/io_uring/filetable.h b/io_uring/filetable.h index e52ecf359199..3b2aae5bff76 100644 --- a/io_uring/filetable.h +++ b/io_uring/filetable.h @@ -58,12 +58,17 @@ io_fixed_file_slot(struct io_file_table *table, unsigned i) return &table->files[i]; } +static inline struct file *io_file_from_fixed(struct io_fixed_file *f) +{ + return (struct file *) (f->file_ptr & FFS_MASK); +} + static inline struct file *io_file_from_index(struct io_file_table *table, int index) { struct io_fixed_file *slot = io_fixed_file_slot(table, index); - return (struct file *) (slot->file_ptr & FFS_MASK); + return io_file_from_fixed(slot); } static inline void io_fixed_file_set(struct io_fixed_file *file_slot, diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b54218da075c..f5be488eaf21 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3681,6 +3681,131 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, return ret; } +static int get_map_range(struct io_ring_ctx *ctx, + struct io_uring_map_buffers *map, void __user *arg) +{ + int ret; + + if (copy_from_user(map, arg, sizeof(*map))) + return -EFAULT; + if (map->flags || map->rsvd[0] || map->rsvd[1]) + return -EINVAL; + if (map->fd >= ctx->nr_user_files) + return -EINVAL; + if (map->buf_start < 0) + return -EINVAL; + if (map->buf_start >= ctx->nr_user_bufs) + return -EINVAL; + if (map->buf_end > ctx->nr_user_bufs) + map->buf_end = ctx->nr_user_bufs; + + ret = map->buf_end - map->buf_start; + if (ret <= 0) + return -EINVAL; + + return ret; +} + +void io_dma_unmap(struct io_mapped_ubuf *imu) +{ + struct file *file; + + if (!imu->dma_tag) + return; + + file = io_file_from_fixed(imu->dma_file); + file_dma_unmap(file, imu->dma_tag); + imu->dma_file = NULL; + imu->dma_tag = NULL; +} + +void io_dma_unmap_file(struct io_ring_ctx *ctx, struct io_fixed_file *file_slot) +{ + int i; + + for (i = 0; i < ctx->nr_user_bufs; i++) { + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + + if (!imu->dma_tag) + continue; + + if (imu->dma_file == file_slot) + io_dma_unmap(imu); + } +} + +static int io_register_unmap_buffers(struct io_ring_ctx *ctx, void __user *arg) +{ + struct io_uring_map_buffers map; + int i, ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = get_map_range(ctx, &map, arg); + if (ret < 0) + return ret; + + for (i = map.buf_start; i < map.buf_end; i++) { + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + + io_dma_unmap(imu); + } + + return 0; +} + +static int io_register_map_buffers(struct io_ring_ctx *ctx, void __user *arg) +{ + struct io_uring_map_buffers map; + struct io_fixed_file *file_slot; + struct file *file; + int ret, i; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = get_map_range(ctx, &map, arg); + if (ret < 0) + return ret; + + file_slot = io_fixed_file_slot(&ctx->file_table, + array_index_nospec(map.fd, ctx->nr_user_files)); + if (!file_slot || !file_slot->file_ptr) + return -EBADF; + + file = io_file_from_fixed(file_slot); + if (!(file->f_flags & O_DIRECT)) + return -EOPNOTSUPP; + + for (i = map.buf_start; i < map.buf_end; i++) { + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + void *tag; + + if (imu->dma_tag) { + ret = -EBUSY; + goto err; + } + + tag = file_dma_map(file, imu->bvec, imu->nr_bvecs); + if (IS_ERR(tag)) { + ret = PTR_ERR(tag); + goto err; + } + + imu->dma_tag = tag; + imu->dma_file = file_slot; + } + + return 0; +err: + while (--i >= map.buf_start) { + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + + io_dma_unmap(imu); + } + return ret; +} + static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) @@ -3847,6 +3972,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_notif_unregister(ctx); break; + case IORING_REGISTER_MAP_BUFFERS: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_register_map_buffers(ctx, arg); + break; + case IORING_REGISTER_UNMAP_BUFFERS: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_register_unmap_buffers(ctx, arg); + break; default: ret = -EINVAL; break; diff --git a/io_uring/net.c b/io_uring/net.c index 32fc3da04e41..2793fd7d99d5 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -977,7 +977,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, - (u64)(uintptr_t)zc->buf, zc->len); + (u64)(uintptr_t)zc->buf, zc->len, NULL); if (unlikely(ret)) return ret; } else { diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 1f10eecad4d7..ee5e5284203d 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -148,6 +148,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); + io_dma_unmap(imu); kvfree(imu); } *slot = NULL; @@ -809,12 +810,16 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) int i; for (i = 0; i < ctx->nr_user_files; i++) { - struct file *file = io_file_from_index(&ctx->file_table, i); + struct io_fixed_file *f = io_fixed_file_slot(&ctx->file_table, i); + struct file *file; - if (!file) + if (!f) continue; - if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM) + if (f->file_ptr & FFS_SCM) continue; + + io_dma_unmap_file(ctx, f); + file = io_file_from_fixed(f); io_file_bitmap_clear(&ctx->file_table, i); fput(file); } @@ -1282,6 +1287,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, imu->ubuf = (unsigned long) iov->iov_base; imu->ubuf_end = imu->ubuf + iov->iov_len; imu->nr_bvecs = nr_pages; + imu->dma_tag = NULL; *pimu = imu; ret = 0; done: @@ -1356,9 +1362,8 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, return ret; } -int io_import_fixed(int ddir, struct iov_iter *iter, - struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len) +int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, + u64 buf_addr, size_t len, struct file *file) { u64 buf_end; size_t offset; @@ -1376,6 +1381,10 @@ int io_import_fixed(int ddir, struct iov_iter *iter, * and advance us to the beginning. */ offset = buf_addr - imu->ubuf; + if (imu->dma_tag && file == io_file_from_fixed(imu->dma_file)) { + iov_iter_dma_tag(iter, ddir, imu->dma_tag, offset, len); + return 0; + } iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); if (offset) { diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index f3a9a177941f..47a2942aa537 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -50,6 +50,8 @@ struct io_mapped_ubuf { u64 ubuf_end; unsigned int nr_bvecs; unsigned long acct_pages; + void *dma_tag; + struct io_fixed_file *dma_file; struct bio_vec bvec[]; }; @@ -64,9 +66,11 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void io_rsrc_node_switch(struct io_ring_ctx *ctx, struct io_rsrc_data *data_to_kill); -int io_import_fixed(int ddir, struct iov_iter *iter, - struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len); +int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, + u64 buf_addr, size_t len, struct file *file); + +void io_dma_unmap(struct io_mapped_ubuf *imu); +void io_dma_unmap_file(struct io_ring_ctx *ctx, struct io_fixed_file *file_slot); void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_unregister(struct io_ring_ctx *ctx); diff --git a/io_uring/rw.c b/io_uring/rw.c index 2b784795103c..9e2164d09adb 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -359,7 +359,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, ssize_t ret; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len); + ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len, req->file); if (ret) return ERR_PTR(ret); return NULL; From patchwork Tue Aug 2 19:36:33 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 12935009 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 22BA3C00140 for ; Tue, 2 Aug 2022 19:37:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234303AbiHBThh (ORCPT ); Tue, 2 Aug 2022 15:37:37 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58866 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233477AbiHBThE (ORCPT ); Tue, 2 Aug 2022 15:37:04 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6BEDE52DDB for ; Tue, 2 Aug 2022 12:37:02 -0700 (PDT) Received: from pps.filterd (m0109334.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 272I2T9F021416 for ; Tue, 2 Aug 2022 12:37:01 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=facebook; bh=/tSE1EYj8ZALKJUlPJ9Sl1+4DKODfpI5WLuyZxKEnJs=; b=DS4LuGlSrgwYWriHxeGvQ1bKF7EfV2PfpZ2/xnnGwUr1bMDYiVGPZNI1HNPSvDjdz6vF V16OCfUWScoZeQbgF4FcEmf7LLCJ2QXKNvjf+3TSaeJezaJlIVVhvpr1O7I1aV1wyVKl YwyHpjIwZb8VL/OTz7ELO/7LaMWpVHhWfb8= Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3hq477k7b6-4 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Tue, 02 Aug 2022 12:37:01 -0700 Received: from snc-exhub201.TheFacebook.com (2620:10d:c085:21d::7) by snc-exhub204.TheFacebook.com (2620:10d:c085:21d::4) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:37:00 -0700 Received: from twshared7556.02.ash8.facebook.com (2620:10d:c085:108::4) by mail.thefacebook.com (2620:10d:c085:21d::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.28; Tue, 2 Aug 2022 12:36:59 -0700 Received: by devbig007.nao1.facebook.com (Postfix, from userid 544533) id 18D406E59F08; Tue, 2 Aug 2022 12:36:38 -0700 (PDT) From: Keith Busch To: , , , CC: , , Alexander Viro , Kernel Team , Keith Busch Subject: [PATCHv2 7/7] nvme-pci: implement dma_map support Date: Tue, 2 Aug 2022 12:36:33 -0700 Message-ID: <20220802193633.289796-8-kbusch@fb.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220802193633.289796-1-kbusch@fb.com> References: <20220802193633.289796-1-kbusch@fb.com> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: ogrfNi1OpY1keOq2KlBYK4udYCRYaWzU X-Proofpoint-ORIG-GUID: ogrfNi1OpY1keOq2KlBYK4udYCRYaWzU X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.883,Hydra:6.0.517,FMLib:17.11.122.1 definitions=2022-08-02_14,2022-08-02_01,2022-06-22_01 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Keith Busch Implement callbacks to convert a registered bio_vec to a prp list, and use this for each IO that uses the returned tag. This saves repeated IO conversions and dma mapping/unmapping. In many cases, the driver can skip per-IO pool allocations entirely, saving potentially signficant CPU cycles. Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 302 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 292 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 644664098ae7..2df2b9bde7d7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -110,6 +110,14 @@ struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); +struct nvme_dma_mapping { + int nr_pages; + u16 offset; + u8 rsvd[2]; + dma_addr_t prp_dma_addr; + __le64 *prps; +}; + /* * Represents an NVM Express device. Each nvme_dev is a PCI function. */ @@ -544,6 +552,34 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } +static void nvme_sync_dma(struct nvme_dev *dev, struct request *req, + struct nvme_dma_mapping *mapping) +{ + int offset, i, j, length, nprps; + bool needs_sync; + + offset = blk_rq_dma_offset(req) + mapping->offset; + i = offset >> NVME_CTRL_PAGE_SHIFT; + needs_sync = rq_data_dir(req) == READ && + dma_need_sync(dev->dev, le64_to_cpu(mapping->prps[i])); + + if (!needs_sync) + return; + + offset = offset & (NVME_CTRL_PAGE_SIZE - 1); + length = blk_rq_payload_bytes(req) - (NVME_CTRL_PAGE_SIZE - offset); + nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); + + dma_sync_single_for_cpu(dev->dev, + le64_to_cpu(mapping->prps[i++]), + NVME_CTRL_PAGE_SIZE - offset, DMA_FROM_DEVICE); + for (j = 1; j < nprps; j++) { + dma_sync_single_for_cpu(dev->dev, + le64_to_cpu(mapping->prps[i++]), + NVME_CTRL_PAGE_SIZE, DMA_FROM_DEVICE); + } +} + static void nvme_free_prps(struct nvme_dev *dev, struct request *req) { const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; @@ -576,10 +612,24 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) } } +static void nvme_free_prp_chain(struct nvme_dev *dev, struct request *req, + struct nvme_iod *iod) +{ + if (iod->npages == 0) + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + iod->first_dma); + else if (iod->use_sgl) + nvme_free_sgls(dev, req); + else + nvme_free_prps(dev, req); + mempool_free(iod->sg, dev->iod_mempool); +} + static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + WARN_ON_ONCE(!iod->nents); if (is_pci_p2pdma_page(sg_page(iod->sg))) pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); @@ -589,25 +639,24 @@ static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { + struct nvme_dma_mapping *mapping = blk_rq_dma_tag(req); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + if (mapping) { + nvme_sync_dma(dev, req, mapping); + if (iod->npages >= 0) + nvme_free_prp_chain(dev, req, iod); + return; + } + if (iod->dma_len) { dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, rq_dma_dir(req)); return; } - WARN_ON_ONCE(!iod->nents); - nvme_unmap_sg(dev, req); - if (iod->npages == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], - iod->first_dma); - else if (iod->use_sgl) - nvme_free_sgls(dev, req); - else - nvme_free_prps(dev, req); - mempool_free(iod->sg, dev->iod_mempool); + nvme_free_prp_chain(dev, req, iod); } static void nvme_print_sgl(struct scatterlist *sgl, int nents) @@ -835,13 +884,136 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, return BLK_STS_OK; } +static blk_status_t nvme_premapped(struct nvme_dev *dev, struct request *req, + struct nvme_rw_command *cmnd, + struct nvme_iod *iod, + struct nvme_dma_mapping *mapping) +{ + static const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; + dma_addr_t prp_list_start, prp_list_end, prp_dma; + int i, offset, j, length, nprps, nprps_left; + struct dma_pool *pool; + __le64 *prp_list; + bool needs_sync; + void **list; + + offset = blk_rq_dma_offset(req) + mapping->offset; + i = offset >> NVME_CTRL_PAGE_SHIFT; + offset = offset & (NVME_CTRL_PAGE_SIZE - 1); + needs_sync = rq_data_dir(req) == WRITE && + dma_need_sync(dev->dev, le64_to_cpu(mapping->prps[i])); + + if (needs_sync) { + dma_sync_single_for_device(dev->dev, + le64_to_cpu(mapping->prps[i]), + NVME_CTRL_PAGE_SIZE - offset, DMA_TO_DEVICE); + } + + length = blk_rq_payload_bytes(req) - (NVME_CTRL_PAGE_SIZE - offset); + cmnd->dptr.prp1 = cpu_to_le64(le64_to_cpu(mapping->prps[i++]) + offset); + + if (length <= 0) + return BLK_STS_OK; + + if (length <= NVME_CTRL_PAGE_SIZE) { + if (needs_sync) + dma_sync_single_for_device(dev->dev, + le64_to_cpu(mapping->prps[i]), + NVME_CTRL_PAGE_SIZE, DMA_TO_DEVICE); + cmnd->dptr.prp2 = mapping->prps[i]; + return BLK_STS_OK; + } + + nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); + prp_list_start = mapping->prp_dma_addr + 8 * i; + prp_list_end = prp_list_start + 8 * nprps; + + /* Optimization when remaining list fits in one nvme page */ + if ((prp_list_start >> NVME_CTRL_PAGE_SHIFT) == + (prp_list_end >> NVME_CTRL_PAGE_SHIFT)) { + cmnd->dptr.prp2 = cpu_to_le64(prp_list_start); + goto sync; + } + + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); + if (!iod->sg) + return BLK_STS_RESOURCE; + + if (nprps <= (256 / 8)) { + pool = dev->prp_small_pool; + iod->npages = 0; + } else { + pool = dev->prp_page_pool; + iod->npages = 1; + } + + prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); + if (!prp_list) { + iod->npages = -1; + goto out_free_sg; + } + + list = nvme_pci_iod_list(req); + list[0] = prp_list; + iod->first_dma = prp_dma; + + for (;;) { + dma_addr_t next_prp_dma; + __le64 *next_prp_list; + + if (nprps_left <= last_prp + 1) { + memcpy(prp_list, &mapping->prps[i], nprps_left * 8); + break; + } + + memcpy(prp_list, &mapping->prps[i], + NVME_CTRL_PAGE_SIZE - 8); + nprps_left -= last_prp; + i += last_prp; + + next_prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &next_prp_dma); + if (!next_prp_list) + goto free_prps; + + prp_list[last_prp] = cpu_to_le64(next_prp_dma); + prp_list = next_prp_list; + prp_dma = next_prp_dma; + list[iod->npages++] = prp_list; + } + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); + +sync: + if (!needs_sync) + return BLK_STS_OK; + + i = offset >> NVME_CTRL_PAGE_SHIFT; + for (j = 0; j < nprps; j++) + dma_sync_single_for_device(dev->dev, + le64_to_cpu(mapping->prps[i++]), + NVME_CTRL_PAGE_SIZE, DMA_TO_DEVICE); + return BLK_STS_OK; + +free_prps: + nvme_free_prps(dev, req); +out_free_sg: + mempool_free(iod->sg, dev->iod_mempool); + return BLK_STS_RESOURCE; +} + static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { + struct nvme_dma_mapping *mapping = blk_rq_dma_tag(req); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret = BLK_STS_RESOURCE; int nr_mapped; + if (mapping) { + iod->dma_len = 0; + iod->use_sgl = false; + return nvme_premapped(dev, req, &cmnd->rw, iod, mapping); + } + if (blk_rq_nr_phys_segments(req) == 1) { struct bio_vec bv = req_bvec(req); @@ -1732,6 +1904,112 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) return result; } +#ifdef CONFIG_HAS_DMA +/* + * Important: bvec must be describing a virtually contiguous buffer. + */ +static void *nvme_pci_dma_map(struct request_queue *q, + struct bio_vec *bvec, int nr_vecs) +{ + const int nvme_pages = 1 << (PAGE_SIZE - NVME_CTRL_PAGE_SIZE); + struct nvme_ns *ns = q->queuedata; + struct nvme_dev *dev = to_nvme_dev(ns->ctrl); + struct nvme_dma_mapping *mapping; + int i, j, k, size, ppv, ret = -ENOMEM; + + if (!nr_vecs) + return ERR_PTR(-EINVAL); + + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return ERR_PTR(-ENOMEM); + + mapping->nr_pages = nr_vecs * nvme_pages; + size = sizeof(*mapping->prps) * mapping->nr_pages; + mapping->prps = dma_alloc_coherent(dev->dev, size, + &mapping->prp_dma_addr, GFP_KERNEL); + if (!mapping->prps) + goto free_mapping; + + for (i = 0, k = 0; i < nr_vecs; i++) { + struct bio_vec *bv = bvec + i; + dma_addr_t dma_addr; + + ppv = nvme_pages; + if (i == 0) { + mapping->offset = bv->bv_offset; + ppv -= mapping->offset >> NVME_CTRL_PAGE_SHIFT; + } else if (bv->bv_offset) { + ret = -EINVAL; + goto err; + } + + if (bv->bv_offset + bv->bv_len != PAGE_SIZE && + i < nr_vecs - 1) { + ret = -EINVAL; + goto err; + } + + dma_addr = dma_map_bvec(dev->dev, bv, 0, 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + ret = -EIO; + goto err; + } + + if (i == 0) + dma_addr -= mapping->offset; + + for (j = 0; j < ppv; j++) + mapping->prps[k++] = cpu_to_le64(dma_addr + + j * NVME_CTRL_PAGE_SIZE); + } + + get_device(dev->dev); + return mapping; + +err: + for (i = 0; i < k; i += ppv) { + __u64 dma_addr = le64_to_cpu(mapping->prps[i]); + ppv = nvme_pages; + + if (i == 0) + ppv -= mapping->offset >> NVME_CTRL_PAGE_SHIFT; + dma_unmap_page(dev->dev, dma_addr, + PAGE_SIZE - offset_in_page(dma_addr), 0); + } + + dma_free_coherent(dev->dev, size, (void *)mapping->prps, + mapping->prp_dma_addr); +free_mapping: + kfree(mapping); + return ERR_PTR(ret); +} + +static void nvme_pci_dma_unmap(struct request_queue *q, void *dma_tag) +{ + const int nvme_pages = 1 << (PAGE_SIZE - NVME_CTRL_PAGE_SIZE); + struct nvme_ns *ns = q->queuedata; + struct nvme_dev *dev = to_nvme_dev(ns->ctrl); + struct nvme_dma_mapping *mapping = dma_tag; + int i, ppv; + + for (i = 0; i < mapping->nr_pages; i += nvme_pages) { + __u64 dma_addr = le64_to_cpu(mapping->prps[i]); + ppv = nvme_pages; + + if (i == 0) + ppv -= mapping->offset >> NVME_CTRL_PAGE_SHIFT; + dma_unmap_page(dev->dev, dma_addr, + PAGE_SIZE - offset_in_page(dma_addr), 0); + } + + dma_free_coherent(dev->dev, mapping->nr_pages * sizeof(*mapping->prps), + (void *)mapping->prps, mapping->prp_dma_addr); + kfree(mapping); + put_device(dev->dev); +} +#endif + static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, @@ -1750,6 +2028,10 @@ static const struct blk_mq_ops nvme_mq_ops = { .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, +#ifdef CONFIG_HAS_DMA + .dma_map = nvme_pci_dma_map, + .dma_unmap = nvme_pci_dma_unmap, +#endif }; static void nvme_dev_remove_admin(struct nvme_dev *dev)