From patchwork Fri Nov 20 16:35:01 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Christoph Hellwig X-Patchwork-Id: 7669621 Return-Path: X-Original-To: patchwork-linux-block@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id C6D5CBF90C for ; Fri, 20 Nov 2015 16:40:51 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 2B7092012E for ; Fri, 20 Nov 2015 16:40:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id C55E2201C8 for ; Fri, 20 Nov 2015 16:40:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1163013AbbKTQkl (ORCPT ); Fri, 20 Nov 2015 11:40:41 -0500 Received: from bombadil.infradead.org ([198.137.202.9]:50524 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1162184AbbKTQki (ORCPT ); Fri, 20 Nov 2015 11:40:38 -0500 Received: from p3ee3e1bb.dip0.t-ipconnect.de ([62.227.225.187] helo=localhost) by bombadil.infradead.org with esmtpsa (Exim 4.80.1 #2 (Red Hat Linux)) id 1Zzoji-0006PC-6A; Fri, 20 Nov 2015 16:40:36 +0000 From: Christoph Hellwig To: keith.busch@intel.com, axboe@fb.com Cc: linux-nvme@lists.infradead.org, linux-block@vger.kernel.org Subject: [PATCH 06/47] blk-mq: add a flags parameter to blk_mq_alloc_request Date: Fri, 20 Nov 2015 17:35:01 +0100 Message-Id: <1448037342-18384-7-git-send-email-hch@lst.de> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1448037342-18384-1-git-send-email-hch@lst.de> References: <1448037342-18384-1-git-send-email-hch@lst.de> X-SRS-Rewrite: SMTP reverse-path rewritten from by bombadil.infradead.org See http://www.infradead.org/rpr.html Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org X-Spam-Status: No, score=-7.5 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP We already have the reserved flag, and a nowait flag awkwardly encoded as a gfp_t. Add a real flags argument to make the scheme more extensible and allow for a nicer calling convention. Signed-off-by: Christoph Hellwig --- block/blk-core.c | 11 +- block/blk-mq-tag.c | 11 +- block/blk-mq.c | 20 +- block/blk-mq.h | 11 +- block/blk.h | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/nvme/host/core.c | 1172 +++++++++++++++++++++++++++++++++++++ drivers/nvme/host/pci.c | 11 +- include/linux/blk-mq.h | 8 +- 9 files changed, 1210 insertions(+), 38 deletions(-) create mode 100644 drivers/nvme/host/core.c diff --git a/block/blk-core.c b/block/blk-core.c index af9c315..d2100aa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -630,7 +630,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); -int blk_queue_enter(struct request_queue *q, gfp_t gfp) +int blk_queue_enter(struct request_queue *q, bool nowait) { while (true) { int ret; @@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp) if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; - if (!gfpflags_allow_blocking(gfp)) + if (nowait) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, @@ -1284,7 +1284,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { if (q->mq_ops) - return blk_mq_alloc_request(q, rw, gfp_mask, false); + return blk_mq_alloc_request(q, rw, + (gfp_mask & __GFP_DIRECT_RECLAIM) ? + 0 : BLK_MQ_REQ_NOWAIT); else return blk_old_get_request(q, rw, gfp_mask); } @@ -2052,8 +2054,7 @@ blk_qc_t generic_make_request(struct bio *bio) do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { - + if (likely(blk_queue_enter(q, false) == 0)) { ret = q->make_request_fn(q, bio); blk_queue_exit(q); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index a07ca34..abdbb47 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data, if (tag != -1) return tag; - if (!gfpflags_allow_blocking(data->gfp)) + if (data->flags & BLK_MQ_REQ_NOWAIT) return -1; bs = bt_wait_ptr(bt, hctx); @@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data, data->ctx = blk_mq_get_ctx(data->q); data->hctx = data->q->mq_ops->map_queue(data->q, data->ctx->cpu); - if (data->reserved) { + if (data->flags & BLK_MQ_REQ_RESERVED) { bt = &data->hctx->tags->breserved_tags; } else { last_tag = &data->ctx->last_tag; @@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) { - if (!data->reserved) - return __blk_mq_get_tag(data); - - return __blk_mq_get_reserved_tag(data); + if (data->flags & BLK_MQ_REQ_RESERVED) + return __blk_mq_get_reserved_tag(data); + return __blk_mq_get_tag(data); } static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) diff --git a/block/blk-mq.c b/block/blk-mq.c index c932605..6da03f1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -230,8 +230,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) return NULL; } -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, - bool reserved) +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, + unsigned int flags) { struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; @@ -239,24 +239,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_alloc_data alloc_data; int ret; - ret = blk_queue_enter(q, gfp); + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); if (ret) return ERR_PTR(ret); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM, - reserved, ctx, hctx); + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); - if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) { + if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx, - hctx); + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); ctx = alloc_data.ctx; } @@ -1181,8 +1179,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, rw |= REQ_SYNC; trace_block_getrq(q, bio, rw); - blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx, - hctx); + blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); if (unlikely(!rq)) { __blk_mq_run_hw_queue(hctx); @@ -1191,8 +1188,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, - __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx); + blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); ctx = alloc_data.ctx; hctx = alloc_data.hctx; diff --git a/block/blk-mq.h b/block/blk-mq.h index 713820b..eaede8e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) struct blk_mq_alloc_data { /* input parameter */ struct request_queue *q; - gfp_t gfp; - bool reserved; + unsigned int flags; /* input & output parameter */ struct blk_mq_ctx *ctx; @@ -105,13 +104,11 @@ struct blk_mq_alloc_data { }; static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, - struct request_queue *q, gfp_t gfp, bool reserved, - struct blk_mq_ctx *ctx, - struct blk_mq_hw_ctx *hctx) + struct request_queue *q, unsigned int flags, + struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx) { data->q = q; - data->gfp = gfp; - data->reserved = reserved; + data->flags = flags; data->ctx = ctx; data->hctx = hctx; } diff --git a/block/blk.h b/block/blk.h index 1d95107..38bf997 100644 --- a/block/blk.h +++ b/block/blk.h @@ -72,7 +72,7 @@ void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); -int blk_queue_enter(struct request_queue *q, gfp_t gfp); +int blk_queue_enter(struct request_queue *q, bool nowait); void blk_queue_exit(struct request_queue *q); void blk_freeze_queue(struct request_queue *q); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a28a562..cf3b51a 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; - rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); + rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); return blk_mq_rq_to_pdu(rq); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c new file mode 100644 index 0000000..53cf507 --- /dev/null +++ b/drivers/nvme/host/core.c @@ -0,0 +1,1172 @@ +/* + * NVM Express device driver + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvme.h" + +#define NVME_MINORS (1U << MINORBITS) + +static int nvme_major; +module_param(nvme_major, int, 0); + +static int nvme_char_major; +module_param(nvme_char_major, int, 0); + +static LIST_HEAD(nvme_ctrl_list); +DEFINE_SPINLOCK(dev_list_lock); + +static struct class *nvme_class; + +static void nvme_free_ns(struct kref *kref) +{ + struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); + + if (ns->type == NVME_NS_LIGHTNVM) + nvme_nvm_unregister(ns->queue, ns->disk->disk_name); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + nvme_put_ctrl(ns->ctrl); + put_disk(ns->disk); + kfree(ns); +} + +static void nvme_put_ns(struct nvme_ns *ns) +{ + kref_put(&ns->kref, nvme_free_ns); +} + +static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) +{ + struct nvme_ns *ns; + + spin_lock(&dev_list_lock); + ns = disk->private_data; + if (ns && !kref_get_unless_zero(&ns->kref)) + ns = NULL; + spin_unlock(&dev_list_lock); + + return ns; +} + +static struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd) +{ + bool write = cmd->common.opcode & 1; + struct request *req; + + req = blk_mq_alloc_request(q, write, 0); + if (IS_ERR(req)) + return req; + + req->cmd_type = REQ_TYPE_DRV_PRIV; + req->cmd_flags |= REQ_FAILFAST_DRIVER; + req->__data_len = 0; + req->__sector = (sector_t) -1; + req->bio = req->biotail = NULL; + + req->cmd = (unsigned char *)cmd; + req->cmd_len = sizeof(struct nvme_command); + req->special = (void *)0; + + return req; +} + +/* + * Returns 0 on success. If the result is negative, it's a Linux error code; + * if the result is positive, it's an NVM Express status code + */ +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, unsigned bufflen, u32 *result, unsigned timeout) +{ + struct request *req; + int ret; + + req = nvme_alloc_request(q, cmd); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + + if (buffer && bufflen) { + ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); + if (ret) + goto out; + } + + blk_execute_rq(req->q, NULL, req, 0); + if (result) + *result = (u32)(uintptr_t)req->special; + ret = req->errors; + out: + blk_mq_free_request(req); + return ret; +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, unsigned bufflen) +{ + return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); +} + +int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, + void __user *ubuffer, unsigned bufflen, + void __user *meta_buffer, unsigned meta_len, u32 meta_seed, + u32 *result, unsigned timeout) +{ + bool write = cmd->common.opcode & 1; + struct nvme_ns *ns = q->queuedata; + struct gendisk *disk = ns ? ns->disk : NULL; + struct request *req; + struct bio *bio = NULL; + void *meta = NULL; + int ret; + + req = nvme_alloc_request(q, cmd); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + + if (ubuffer && bufflen) { + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, + GFP_KERNEL); + if (ret) + goto out; + bio = req->bio; + + if (!disk) + goto submit; + bio->bi_bdev = bdget_disk(disk, 0); + if (!bio->bi_bdev) { + ret = -ENODEV; + goto out_unmap; + } + + if (meta_buffer) { + struct bio_integrity_payload *bip; + + meta = kmalloc(meta_len, GFP_KERNEL); + if (!meta) { + ret = -ENOMEM; + goto out_unmap; + } + + if (write) { + if (copy_from_user(meta, meta_buffer, + meta_len)) { + ret = -EFAULT; + goto out_free_meta; + } + } + + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); + if (!bip) { + ret = -ENOMEM; + goto out_free_meta; + } + + bip->bip_iter.bi_size = meta_len; + bip->bip_iter.bi_sector = meta_seed; + + ret = bio_integrity_add_page(bio, virt_to_page(meta), + meta_len, offset_in_page(meta)); + if (ret != meta_len) { + ret = -ENOMEM; + goto out_free_meta; + } + } + } + submit: + blk_execute_rq(req->q, disk, req, 0); + ret = req->errors; + if (result) + *result = (u32)(uintptr_t)req->special; + if (meta && !ret && !write) { + if (copy_to_user(meta_buffer, meta, meta_len)) + ret = -EFAULT; + } + out_free_meta: + kfree(meta); + out_unmap: + if (bio) { + if (disk && bio->bi_bdev) + bdput(bio->bi_bdev); + blk_rq_unmap_user(bio); + } + out: + blk_mq_free_request(req); + return ret; +} + +int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, + void __user *ubuffer, unsigned bufflen, u32 *result, + unsigned timeout) +{ + return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, + result, timeout); +} + +int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) +{ + struct nvme_command c = { }; + int error; + + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify; + c.identify.cns = cpu_to_le32(1); + + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ctrl)); + if (error) + kfree(*id); + return error; +} + +static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) +{ + struct nvme_command c = { }; + + c.identify.opcode = nvme_admin_identify; + c.identify.cns = cpu_to_le32(2); + c.identify.nsid = cpu_to_le32(nsid); + return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); +} + +int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, + struct nvme_id_ns **id) +{ + struct nvme_command c = { }; + int error; + + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify, + c.identify.nsid = cpu_to_le32(nsid), + + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ns)); + if (error) + kfree(*id); + return error; +} + +int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.features.opcode = nvme_admin_get_features; + c.features.nsid = cpu_to_le32(nsid); + c.features.prp1 = cpu_to_le64(dma_addr); + c.features.fid = cpu_to_le32(fid); + + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); +} + +int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.features.opcode = nvme_admin_set_features; + c.features.prp1 = cpu_to_le64(dma_addr); + c.features.fid = cpu_to_le32(fid); + c.features.dword11 = cpu_to_le32(dword11); + + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); +} + +int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) +{ + struct nvme_command c = { }; + int error; + + c.common.opcode = nvme_admin_get_log_page, + c.common.nsid = cpu_to_le32(0xFFFFFFFF), + c.common.cdw10[0] = cpu_to_le32( + (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | + NVME_LOG_SMART), + + *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); + if (!*log) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, + sizeof(struct nvme_smart_log)); + if (error) + kfree(*log); + return error; +} + +static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) +{ + struct nvme_user_io io; + struct nvme_command c; + unsigned length, meta_len; + void __user *metadata; + + if (copy_from_user(&io, uio, sizeof(io))) + return -EFAULT; + + switch (io.opcode) { + case nvme_cmd_write: + case nvme_cmd_read: + case nvme_cmd_compare: + break; + default: + return -EINVAL; + } + + length = (io.nblocks + 1) << ns->lba_shift; + meta_len = (io.nblocks + 1) * ns->ms; + metadata = (void __user *)(uintptr_t)io.metadata; + + if (ns->ext) { + length += meta_len; + meta_len = 0; + } else if (meta_len) { + if ((io.metadata & 3) || !io.metadata) + return -EINVAL; + } + + memset(&c, 0, sizeof(c)); + c.rw.opcode = io.opcode; + c.rw.flags = io.flags; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.slba = cpu_to_le64(io.slba); + c.rw.length = cpu_to_le16(io.nblocks); + c.rw.control = cpu_to_le16(io.control); + c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); + c.rw.reftag = cpu_to_le32(io.reftag); + c.rw.apptag = cpu_to_le16(io.apptag); + c.rw.appmask = cpu_to_le16(io.appmask); + + return __nvme_submit_user_cmd(ns->queue, &c, + (void __user *)(uintptr_t)io.addr, length, + metadata, meta_len, io.slba, NULL, 0); +} + +static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd __user *ucmd) +{ + struct nvme_passthru_cmd cmd; + struct nvme_command c; + unsigned timeout = 0; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); + c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); + c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); + c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); + c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); + c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)cmd.addr, cmd.data_len, + &cmd.result, timeout); + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + + switch (cmd) { + case NVME_IOCTL_ID: + force_successful_syscall_return(); + return ns->ns_id; + case NVME_IOCTL_ADMIN_CMD: + return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); + case NVME_IOCTL_IO_CMD: + return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); + case NVME_IOCTL_SUBMIT_IO: + return nvme_submit_io(ns, (void __user *)arg); + case SG_GET_VERSION_NUM: + return nvme_sg_get_version_num((void __user *)arg); + case SG_IO: + return nvme_sg_io(ns, (void __user *)arg); + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SG_IO: + return -ENOIOCTLCMD; + } + return nvme_ioctl(bdev, mode, cmd, arg); +} +#else +#define nvme_compat_ioctl NULL +#endif + +static int nvme_open(struct block_device *bdev, fmode_t mode) +{ + return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; +} + +static void nvme_release(struct gendisk *disk, fmode_t mode) +{ + nvme_put_ns(disk->private_data); +} + +static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + /* some standard values */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bdev->bd_disk) >> 11; + return 0; +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_init_integrity(struct nvme_ns *ns) +{ + struct blk_integrity integrity; + + switch (ns->pi_type) { + case NVME_NS_DPS_PI_TYPE3: + integrity.profile = &t10_pi_type3_crc; + break; + case NVME_NS_DPS_PI_TYPE1: + case NVME_NS_DPS_PI_TYPE2: + integrity.profile = &t10_pi_type1_crc; + break; + default: + integrity.profile = NULL; + break; + } + integrity.tuple_size = ns->ms; + blk_integrity_register(ns->disk, &integrity); + blk_queue_max_integrity_segments(ns->queue, 1); +} +#else +static void nvme_init_integrity(struct nvme_ns *ns) +{ +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + +static void nvme_config_discard(struct nvme_ns *ns) +{ + u32 logical_block_size = queue_logical_block_size(ns->queue); + ns->queue->limits.discard_zeroes_data = 0; + ns->queue->limits.discard_alignment = logical_block_size; + ns->queue->limits.discard_granularity = logical_block_size; + blk_queue_max_discard_sectors(ns->queue, 0xffffffff); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); +} + +static int nvme_revalidate_disk(struct gendisk *disk) +{ + struct nvme_ns *ns = disk->private_data; + struct nvme_id_ns *id; + u8 lbaf, pi_type; + u16 old_ms; + unsigned short bs; + + if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { + dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", + __func__, ns->ctrl->instance, ns->ns_id); + return -ENODEV; + } + if (id->ncap == 0) { + kfree(id); + return -ENODEV; + } + + if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { + if (nvme_nvm_register(ns->queue, disk->disk_name)) { + dev_warn(ns->ctrl->dev, + "%s: LightNVM init failure\n", __func__); + kfree(id); + return -ENODEV; + } + ns->type = NVME_NS_LIGHTNVM; + } + + old_ms = ns->ms; + lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; + ns->lba_shift = id->lbaf[lbaf].ds; + ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + + /* + * If identify namespace failed, use default 512 byte block size so + * block layer can use before failing read/write for 0 capacity. + */ + if (ns->lba_shift == 0) + ns->lba_shift = 9; + bs = 1 << ns->lba_shift; + /* XXX: PI implementation requires metadata equal t10 pi tuple size */ + pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? + id->dps & NVME_NS_DPS_PI_MASK : 0; + + blk_mq_freeze_queue(disk->queue); + if (blk_get_integrity(disk) && (ns->pi_type != pi_type || + ns->ms != old_ms || + bs != queue_logical_block_size(disk->queue) || + (ns->ms && ns->ext))) + blk_integrity_unregister(disk); + + ns->pi_type = pi_type; + blk_queue_logical_block_size(ns->queue, bs); + + if (ns->ms && !blk_get_integrity(disk) && !ns->ext) + nvme_init_integrity(ns); + if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) + set_capacity(disk, 0); + else + set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); + + if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) + nvme_config_discard(ns); + blk_mq_unfreeze_queue(disk->queue); + + kfree(id); + return 0; +} + +static char nvme_pr_type(enum pr_type type) +{ + switch (type) { + case PR_WRITE_EXCLUSIVE: + return 1; + case PR_EXCLUSIVE_ACCESS: + return 2; + case PR_WRITE_EXCLUSIVE_REG_ONLY: + return 3; + case PR_EXCLUSIVE_ACCESS_REG_ONLY: + return 4; + case PR_WRITE_EXCLUSIVE_ALL_REGS: + return 5; + case PR_EXCLUSIVE_ACCESS_ALL_REGS: + return 6; + default: + return 0; + } +}; + +static int nvme_pr_command(struct block_device *bdev, u32 cdw10, + u64 key, u64 sa_key, u8 op) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + struct nvme_command c; + u8 data[16] = { 0, }; + + put_unaligned_le64(key, &data[0]); + put_unaligned_le64(sa_key, &data[8]); + + memset(&c, 0, sizeof(c)); + c.common.opcode = op; + c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.cdw10[0] = cpu_to_le32(cdw10); + + return nvme_submit_sync_cmd(ns->queue, &c, data, 16); +} + +static int nvme_pr_register(struct block_device *bdev, u64 old, + u64 new, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = old ? 2 : 0; + cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; + cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); +} + +static int nvme_pr_reserve(struct block_device *bdev, u64 key, + enum pr_type type, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = nvme_pr_type(type) << 8; + cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); +} + +static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, + enum pr_type type, bool abort) +{ + u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); +} + +static int nvme_pr_clear(struct block_device *bdev, u64 key) +{ + u32 cdw10 = 1 | key ? 1 << 3 : 0; + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); +} + +static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) +{ + u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); +} + +static const struct pr_ops nvme_pr_ops = { + .pr_register = nvme_pr_register, + .pr_reserve = nvme_pr_reserve, + .pr_release = nvme_pr_release, + .pr_preempt = nvme_pr_preempt, + .pr_clear = nvme_pr_clear, +}; + +static const struct block_device_operations nvme_fops = { + .owner = THIS_MODULE, + .ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, + .open = nvme_open, + .release = nvme_release, + .getgeo = nvme_getgeo, + .revalidate_disk= nvme_revalidate_disk, + .pr_ops = &nvme_pr_ops, +}; + +/* + * Initialize the cached copies of the Identify data and various controller + * register in our nvme_ctrl structure. This should be called as soon as + * the admin queue is fully up and running. + */ +int nvme_init_identify(struct nvme_ctrl *ctrl) +{ + struct nvme_id_ctrl *id; + u64 cap; + int ret, page_shift; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); + if (ret) { + dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret); + return ret; + } + + ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); + if (ret) { + dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret); + return ret; + } + page_shift = NVME_CAP_MPSMIN(cap) + 12; + ctrl->page_size = 1 << page_shift; + + if (ctrl->vs >= NVME_VS(1, 1)) + ctrl->subsystem = NVME_CAP_NSSRC(cap); + + ret = nvme_identify_ctrl(ctrl, &id); + if (ret) { + dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret); + return -EIO; + } + + ctrl->oncs = le16_to_cpup(&id->oncs); + atomic_set(&ctrl->abort_limit, id->acl + 1); + ctrl->vwc = id->vwc; + memcpy(ctrl->serial, id->sn, sizeof(id->sn)); + memcpy(ctrl->model, id->mn, sizeof(id->mn)); + memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); + if (id->mdts) + ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9); + else + ctrl->max_hw_sectors = UINT_MAX; + + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { + unsigned int max_hw_sectors; + + ctrl->stripe_size = 1 << (id->vs[3] + page_shift); + max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); + if (ctrl->max_hw_sectors) { + ctrl->max_hw_sectors = min(max_hw_sectors, + ctrl->max_hw_sectors); + } else { + ctrl->max_hw_sectors = max_hw_sectors; + } + } + + kfree(id); + return 0; +} + +static int nvme_dev_open(struct inode *inode, struct file *file) +{ + struct nvme_ctrl *ctrl; + int instance = iminor(inode); + int ret = -ENODEV; + + spin_lock(&dev_list_lock); + list_for_each_entry(ctrl, &nvme_ctrl_list, node) { + if (ctrl->instance != instance) + continue; + + if (!ctrl->admin_q) { + ret = -EWOULDBLOCK; + break; + } + if (!kref_get_unless_zero(&ctrl->kref)) + break; + file->private_data = ctrl; + ret = 0; + break; + } + spin_unlock(&dev_list_lock); + + return ret; +} + +static int nvme_dev_release(struct inode *inode, struct file *file) +{ + nvme_put_ctrl(file->private_data); + return 0; +} + +static long nvme_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct nvme_ctrl *ctrl = file->private_data; + void __user *argp = (void __user *)arg; + struct nvme_ns *ns; + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_IO_CMD: + if (list_empty(&ctrl->namespaces)) + return -ENOTTY; + ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); + return nvme_user_cmd(ctrl, ns, argp); + case NVME_IOCTL_RESET: + dev_warn(ctrl->dev, "resetting controller\n"); + return ctrl->ops->reset_ctrl(ctrl); + case NVME_IOCTL_SUBSYS_RESET: + return nvme_reset_subsystem(ctrl); + default: + return -ENOTTY; + } +} + +static const struct file_operations nvme_dev_fops = { + .owner = THIS_MODULE, + .open = nvme_dev_open, + .release = nvme_dev_release, + .unlocked_ioctl = nvme_dev_ioctl, + .compat_ioctl = nvme_dev_ioctl, +}; + +static ssize_t nvme_sysfs_reset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + int ret; + + ret = ctrl->ops->reset_ctrl(ctrl); + if (ret < 0) + return ret; + return count; +} +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); + +static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); + struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); + + return nsa->ns_id - nsb->ns_id; +} + +static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &ctrl->namespaces, list) { + if (ns->ns_id == nsid) + return ns; + if (ns->ns_id > nsid) + break; + } + return NULL; +} + +static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) +{ + struct nvme_ns *ns; + struct gendisk *disk; + int node = dev_to_node(ctrl->dev); + + ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); + if (!ns) + return; + + ns->queue = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ns->queue)) + goto out_free_ns; + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); + ns->queue->queuedata = ns; + ns->ctrl = ctrl; + + disk = alloc_disk_node(0, node); + if (!disk) + goto out_free_queue; + + kref_init(&ns->kref); + ns->ns_id = nsid; + ns->disk = disk; + ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ + + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); + if (ctrl->max_hw_sectors) { + blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors); + blk_queue_max_segments(ns->queue, + (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1); + } + if (ctrl->stripe_size) + blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9); + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); + blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1); + + disk->major = nvme_major; + disk->first_minor = 0; + disk->fops = &nvme_fops; + disk->private_data = ns; + disk->queue = ns->queue; + disk->driverfs_dev = ctrl->device; + disk->flags = GENHD_FL_EXT_DEVT; + sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); + + if (nvme_revalidate_disk(ns->disk)) + goto out_free_disk; + + list_add_tail(&ns->list, &ctrl->namespaces); + kref_get(&ctrl->kref); + if (ns->type != NVME_NS_LIGHTNVM) + add_disk(ns->disk); + + return; + out_free_disk: + kfree(disk); + out_free_queue: + blk_cleanup_queue(ns->queue); + out_free_ns: + kfree(ns); +} + +static void nvme_ns_remove(struct nvme_ns *ns) +{ + bool kill = nvme_io_incapable(ns->ctrl) && + !blk_queue_dying(ns->queue); + + if (kill) + blk_set_queue_dying(ns->queue); + if (ns->disk->flags & GENHD_FL_UP) { + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); + del_gendisk(ns->disk); + } + if (kill || !blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); + blk_cleanup_queue(ns->queue); + } + list_del_init(&ns->list); + nvme_put_ns(ns); +} + +static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) +{ + struct nvme_ns *ns; + + ns = nvme_find_ns(ctrl, nsid); + if (ns) { + if (revalidate_disk(ns->disk)) + nvme_ns_remove(ns); + } else + nvme_alloc_ns(ctrl, nsid); +} + +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) +{ + struct nvme_ns *ns; + __le32 *ns_list; + unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); + int ret = 0; + + ns_list = kzalloc(0x1000, GFP_KERNEL); + if (!ns_list) + return -ENOMEM; + + for (i = 0; i < num_lists; i++) { + ret = nvme_identify_ns_list(ctrl, prev, ns_list); + if (ret) + goto out; + + for (j = 0; j < min(nn, 1024U); j++) { + nsid = le32_to_cpu(ns_list[j]); + if (!nsid) + goto out; + + nvme_validate_ns(ctrl, nsid); + + while (++prev < nsid) { + ns = nvme_find_ns(ctrl, prev); + if (ns) + nvme_ns_remove(ns); + } + } + nn -= j; + } + out: + kfree(ns_list); + return ret; +} + +static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) +{ + struct nvme_ns *ns, *next; + unsigned i; + + for (i = 1; i <= nn; i++) + nvme_validate_ns(ctrl, i); + + list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { + if (ns->ns_id > nn) + nvme_ns_remove(ns); + } +} + +void nvme_scan_namespaces(struct nvme_ctrl *ctrl) +{ + struct nvme_id_ctrl *id; + unsigned nn; + + if (nvme_identify_ctrl(ctrl, &id)) + return; + + nn = le32_to_cpu(id->nn); + if (ctrl->vs >= NVME_VS(1, 1)) { + if (!nvme_scan_ns_list(ctrl, nn)) + goto done; + } + __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); + done: + list_sort(NULL, &ctrl->namespaces, ns_cmp); + kfree(id); +} + +void nvme_remove_namespaces(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns, *next; + + list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) + nvme_ns_remove(ns); +} + +static DEFINE_IDA(nvme_instance_ida); + +static int nvme_set_instance(struct nvme_ctrl *ctrl) +{ + int instance, error; + + do { + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) + return -ENODEV; + + spin_lock(&dev_list_lock); + error = ida_get_new(&nvme_instance_ida, &instance); + spin_unlock(&dev_list_lock); + } while (error == -EAGAIN); + + if (error) + return -ENODEV; + + ctrl->instance = instance; + return 0; +} + +static void nvme_release_instance(struct nvme_ctrl *ctrl) +{ + spin_lock(&dev_list_lock); + ida_remove(&nvme_instance_ida, ctrl->instance); + spin_unlock(&dev_list_lock); +} + +void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) + { + device_remove_file(ctrl->device, &dev_attr_reset_controller); + device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); + + spin_lock(&dev_list_lock); + list_del(&ctrl->node); + spin_unlock(&dev_list_lock); +} + +static void nvme_free_ctrl(struct kref *kref) +{ + struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); + + put_device(ctrl->device); + nvme_release_instance(ctrl); + + ctrl->ops->free_ctrl(ctrl); +} + +void nvme_put_ctrl(struct nvme_ctrl *ctrl) +{ + kref_put(&ctrl->kref, nvme_free_ctrl); +} + +/* + * Initialize a NVMe controller structures. This needs to be called during + * earliest initialization so that we have the initialized structured around + * during probing. + */ +int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, + const struct nvme_ctrl_ops *ops, u16 vendor, + unsigned long quirks) +{ + int ret; + + INIT_LIST_HEAD(&ctrl->namespaces); + kref_init(&ctrl->kref); + ctrl->dev = dev; + ctrl->ops = ops; + ctrl->vendor = vendor; + ctrl->quirks = quirks; + + ret = nvme_set_instance(ctrl); + if (ret) + goto out; + + ctrl->device = device_create(nvme_class, ctrl->dev, + MKDEV(nvme_char_major, ctrl->instance), + dev, "nvme%d", ctrl->instance); + if (IS_ERR(ctrl->device)) { + ret = PTR_ERR(ctrl->device); + goto out_release_instance; + } + get_device(ctrl->device); + dev_set_drvdata(ctrl->device, ctrl); + + ret = device_create_file(ctrl->device, &dev_attr_reset_controller); + if (ret) + goto out_put_device; + + spin_lock(&dev_list_lock); + list_add_tail(&ctrl->node, &nvme_ctrl_list); + spin_unlock(&dev_list_lock); + + return 0; + +out_put_device: + put_device(ctrl->device); + device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); +out_release_instance: + nvme_release_instance(ctrl); +out: + return ret; +} + +int __init nvme_core_init(void) +{ + int result; + + result = register_blkdev(nvme_major, "nvme"); + if (result < 0) + return result; + else if (result > 0) + nvme_major = result; + + result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", + &nvme_dev_fops); + if (result < 0) + goto unregister_blkdev; + else if (result > 0) + nvme_char_major = result; + + nvme_class = class_create(THIS_MODULE, "nvme"); + if (IS_ERR(nvme_class)) { + result = PTR_ERR(nvme_class); + goto unregister_chrdev; + } + + return 0; + + unregister_chrdev: + __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_blkdev: + unregister_blkdev(nvme_major, "nvme"); + return result; +} + +void nvme_core_exit(void) +{ + unregister_blkdev(nvme_major, "nvme"); + class_destroy(nvme_class); + __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); +} diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9444884..5c5f455 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1040,7 +1040,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, struct request *req; int ret; - req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); + req = blk_mq_alloc_request(q, write, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -1093,7 +1093,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) struct nvme_cmd_info *cmd_info; struct request *req; - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true); + req = blk_mq_alloc_request(dev->admin_q, WRITE, + BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED); if (IS_ERR(req)) return PTR_ERR(req); @@ -1118,7 +1119,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, struct request *req; struct nvme_cmd_info *cmd_rq; - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); + req = blk_mq_alloc_request(dev->admin_q, WRITE, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -1319,8 +1320,8 @@ static void nvme_abort_req(struct request *req) if (!dev->abort_limit) return; - abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, - false); + abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) return; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index daf17d7..7fc9296 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_free_request(struct request *rq); void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); + +enum { + BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ + BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ +}; + struct request *blk_mq_alloc_request(struct request_queue *q, int rw, - gfp_t gfp, bool reserved); + unsigned int flags); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);