diff mbox

[13/14] nvme: use the changed namespaces list log to clear ns data changed AENs

Message ID 20180526102735.31404-14-hch@lst.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig May 26, 2018, 10:27 a.m. UTC
Per section 5.2 we need to issue the corresponding log page to clear an
AEN, so for a namespace data changed AEN we need to read the changed
namespace list log.  And once we read that log anyway we might as well
use it to optimize the rescan.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 51 ++++++++++++++++++++++++++++++++++++----
 drivers/nvme/host/nvme.h |  2 ++
 2 files changed, 49 insertions(+), 4 deletions(-)

Comments

Johannes Thumshirn May 28, 2018, 6:59 a.m. UTC | #1
Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Keith Busch June 4, 2018, 7:59 p.m. UTC | #2
On Sat, May 26, 2018 at 12:27:34PM +0200, Christoph Hellwig wrote:
> Per section 5.2 we need to issue the corresponding log page to clear an
> AEN, so for a namespace data changed AEN we need to read the changed
> namespace list log.  And once we read that log anyway we might as well
> use it to optimize the rescan.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I'm a little concerned about this. Userspace might be reading the same
log page. Since the contents of the page may change each time its read,
it's possible the driver will see only a subset of changed namespaces
at the point it gets to read the log page, missing a chance to
revalidate others.
Christoph Hellwig June 5, 2018, 4:47 a.m. UTC | #3
On Mon, Jun 04, 2018 at 01:59:09PM -0600, Keith Busch wrote:
> > Per section 5.2 we need to issue the corresponding log page to clear an
> > AEN, so for a namespace data changed AEN we need to read the changed
> > namespace list log.  And once we read that log anyway we might as well
> > use it to optimize the rescan.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> I'm a little concerned about this. Userspace might be reading the same
> log page. Since the contents of the page may change each time its read,
> it's possible the driver will see only a subset of changed namespaces
> at the point it gets to read the log page, missing a chance to
> revalidate others.

I agree with the concern, but I don't think it really is uniqueue here.
We allow userspace to send all kind sof harmful commands, even queue
for queue creation and deletion.

But let's assume we don't want to use the list due to this concern:
we'd still have to read the log page, as per the NVMe spec the only
think clearing a pending AEN is reading the associated log page.
We'd then need to still do the full scan using identify.  Is this what
we want?  If you think this is important for reliability we could
just ignore the log page.
Keith Busch June 5, 2018, 2:37 p.m. UTC | #4
On Tue, Jun 05, 2018 at 06:47:33AM +0200, Christoph Hellwig wrote:
> But let's assume we don't want to use the list due to this concern:
> we'd still have to read the log page, as per the NVMe spec the only
> think clearing a pending AEN is reading the associated log page.
> We'd then need to still do the full scan using identify.  Is this what
> we want?  If you think this is important for reliability we could
> just ignore the log page.

That sounds good. Let's have the driver read the log page to re-arm
the event as you've done, but don't rely on the contents for namespace
enumeration.

The rest of your series looks good to me.
diff mbox

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 06cd04dcffbc..1ae77428a1a5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3194,6 +3194,42 @@  static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
 	nvme_remove_invalid_namespaces(ctrl, nn);
 }
 
+static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
+{
+	size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
+	__le32 *log;
+	int error, i;
+	bool ret = false;
+
+	log = kzalloc(log_size, GFP_KERNEL);
+	if (!log)
+		return false;
+
+	error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+	if (error) {
+		dev_warn(ctrl->device,
+			"reading changed ns log failed: %d\n", error);
+		goto out_free_log;
+	}
+
+	if (log[0] == cpu_to_le32(0xffffffff))
+		goto out_free_log;
+
+	for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
+		u32 nsid = le32_to_cpu(log[i]);
+
+		if (nsid == 0)
+			break;
+		dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
+		nvme_validate_ns(ctrl, nsid);
+	}
+	ret = true;
+
+out_free_log:
+	kfree(log);
+	return ret;
+}
+
 static void nvme_scan_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
@@ -3206,6 +3242,12 @@  static void nvme_scan_work(struct work_struct *work)
 
 	WARN_ON_ONCE(!ctrl->tagset);
 
+	if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) {
+		if (nvme_scan_changed_ns_log(ctrl))
+			goto out_sort_namespaces;
+		dev_info(ctrl->device, "rescanning namespaces.\n");
+	}
+
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
@@ -3213,14 +3255,15 @@  static void nvme_scan_work(struct work_struct *work)
 	if (ctrl->vs >= NVME_VS(1, 1, 0) &&
 	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
 		if (!nvme_scan_ns_list(ctrl, nn))
-			goto done;
+			goto out_free_id;
 	}
 	nvme_scan_ns_sequential(ctrl, nn);
- done:
+out_free_id:
+	kfree(id);
+out_sort_namespaces:
 	down_write(&ctrl->namespaces_rwsem);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	up_write(&ctrl->namespaces_rwsem);
-	kfree(id);
 }
 
 /*
@@ -3340,7 +3383,7 @@  static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
 {
 	switch ((result & 0xff00) >> 8) {
 	case NVME_AER_NOTICE_NS_CHANGED:
-		dev_info(ctrl->device, "rescanning\n");
+		set_bit(EVENT_NS_CHANGED, &ctrl->events);
 		nvme_queue_scan(ctrl);
 		break;
 	case NVME_AER_NOTICE_FW_ACT_STARTING:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 11681278fdf6..07e8bfe705c6 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -189,6 +189,8 @@  struct nvme_ctrl {
 	struct delayed_work ka_work;
 	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
+#define EVENT_NS_CHANGED		(1 << 0)
+	unsigned long events;
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;