diff mbox series

[10/10] hw/block/nvme: support reset/finish recommended limits

Message ID 20200630100139.1483002-11-its@irrelevant.dk (mailing list archive)
State New, archived
Headers show
Series hw/block/nvme: namespace types and zoned namespaces | expand

Commit Message

Klaus Jensen June 30, 2020, 10:01 a.m. UTC
Add the rrl and frl device parameters. The parameters specify the number
of seconds before the device may perform an internal operation to
"clear" the Reset Zone Recommended and Finish Zone Recommended
attributes respectively.

When the attibutes are set is governed by the rrld and frld parameters
(Reset/Finish Recomended Limit Delay). The Reset Zone Recommended Delay
starts when a zone becomes full. The Finish Zone Recommended Delay
starts when the zone is first activated.  When the limits are reached,
the attributes are cleared again and the process is restarted.

If zone excursions are enabled (they are by default), when the Finish
Recommended Limit is reached, the device will finish the zone.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme-ns.c    | 105 ++++++++++++++++++++++++++++++++++++++++++
 hw/block/nvme-ns.h    |  13 ++++++
 hw/block/nvme.c       |  49 +++++++++++++-------
 hw/block/nvme.h       |   7 +++
 hw/block/trace-events |   3 +-
 5 files changed, 160 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 3b9fa91c7af8..7f9b1d526197 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -25,6 +25,7 @@ 
 #include "hw/qdev-properties.h"
 #include "hw/qdev-core.h"
 
+#include "trace.h"
 #include "nvme.h"
 #include "nvme-ns.h"
 
@@ -48,6 +49,91 @@  const char *nvme_zs_to_str(NvmeZoneState zs)
     return NULL;
 }
 
+static void nvme_ns_process_timer(void *opaque)
+{
+    NvmeNamespace *ns = opaque;
+    BusState *s = qdev_get_parent_bus(&ns->parent_obj);
+    NvmeCtrl *n = NVME(s->parent);
+    NvmeZone *zone;
+
+    trace_pci_nvme_ns_process_timer(ns->params.nsid);
+
+    int64_t next_timer = INT64_MAX, now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    QTAILQ_FOREACH(zone, &ns->zns.resources.lru_open, lru_entry) {
+        int64_t activated_ns = now - zone->stats.activated_ns;
+        if (activated_ns < ns->zns.frld_ns) {
+            next_timer = MIN(next_timer, zone->stats.activated_ns +
+                             ns->zns.frld_ns);
+
+            break;
+        }
+
+        if (activated_ns < ns->zns.frld_ns + ns->zns.frl_ns) {
+            NVME_ZA_SET_FZR(zone->zd.za, 0x1);
+            nvme_zone_changed(n, ns, zone);
+
+            next_timer = MIN(next_timer, now + ns->zns.frl_ns);
+
+            continue;
+        }
+
+        if (zone->wp_staging != le64_to_cpu(zone->zd.wp)) {
+            next_timer = now + 500;
+            continue;
+        }
+
+        nvme_zone_excursion(n, ns, zone, NULL);
+    }
+
+    QTAILQ_FOREACH(zone, &ns->zns.resources.lru_active, lru_entry) {
+        int64_t activated_ns = now - zone->stats.activated_ns;
+        if (activated_ns < ns->zns.frld_ns) {
+            next_timer = MIN(next_timer, zone->stats.activated_ns +
+                             ns->zns.frld_ns);
+
+            break;
+        }
+
+        if (activated_ns < ns->zns.frld_ns + ns->zns.frl_ns) {
+            NVME_ZA_SET_FZR(zone->zd.za, 0x1);
+            nvme_zone_changed(n, ns, zone);
+
+            next_timer = MIN(next_timer, now + ns->zns.frl_ns);
+
+            continue;
+        }
+
+        nvme_zone_excursion(n, ns, zone, NULL);
+    }
+
+    QTAILQ_FOREACH(zone, &ns->zns.lru_finished, lru_entry) {
+        int64_t finished_ns = now - zone->stats.finished_ns;
+        if (finished_ns < ns->zns.rrld_ns) {
+            next_timer = MIN(next_timer, zone->stats.finished_ns +
+                             ns->zns.rrld_ns);
+
+            break;
+        }
+
+        if (finished_ns < ns->zns.rrld_ns + ns->zns.rrl_ns) {
+            NVME_ZA_SET_RZR(zone->zd.za, 0x1);
+            nvme_zone_changed(n, ns, zone);
+
+            next_timer = MIN(next_timer, now + ns->zns.rrl_ns);
+
+            nvme_zone_changed(n, ns, zone);
+            continue;
+        }
+
+        NVME_ZA_SET_RZR(zone->zd.za, 0x0);
+    }
+
+    if (next_timer != INT64_MAX) {
+        timer_mod(ns->zns.timer, next_timer);
+    }
+}
+
 static int nvme_ns_blk_resize(BlockBackend *blk, size_t len, Error **errp)
 {
 	Error *local_err = NULL;
@@ -262,6 +348,21 @@  static void nvme_ns_init_zoned(NvmeNamespace *ns)
 
     id_ns->ncap = ns->zns.info.num_zones * ns->params.zns.zcap;
 
+    id_ns_zns->rrl = ns->params.zns.rrl;
+    id_ns_zns->frl = ns->params.zns.frl;
+
+    if (ns->params.zns.rrl || ns->params.zns.frl) {
+        ns->zns.rrl_ns = ns->params.zns.rrl * NANOSECONDS_PER_SECOND;
+        ns->zns.rrld_ns = ns->params.zns.rrld * NANOSECONDS_PER_SECOND;
+        ns->zns.frl_ns = ns->params.zns.frl * NANOSECONDS_PER_SECOND;
+        ns->zns.frld_ns = ns->params.zns.frld * NANOSECONDS_PER_SECOND;
+
+        ns->zns.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                     nvme_ns_process_timer, ns);
+
+        QTAILQ_INIT(&ns->zns.lru_finished);
+    }
+
     id_ns_zns->mar = cpu_to_le32(ns->params.zns.mar);
     id_ns_zns->mor = cpu_to_le32(ns->params.zns.mor);
 
@@ -515,6 +616,10 @@  static Property nvme_ns_props[] = {
     DEFINE_PROP_UINT16("zns.ozcs", NvmeNamespace, params.zns.ozcs, 0),
     DEFINE_PROP_UINT32("zns.mar", NvmeNamespace, params.zns.mar, 0xffffffff),
     DEFINE_PROP_UINT32("zns.mor", NvmeNamespace, params.zns.mor, 0xffffffff),
+    DEFINE_PROP_UINT32("zns.rrl", NvmeNamespace, params.zns.rrl, 0),
+    DEFINE_PROP_UINT32("zns.frl", NvmeNamespace, params.zns.frl, 0),
+    DEFINE_PROP_UINT32("zns.rrld", NvmeNamespace, params.zns.rrld, 0),
+    DEFINE_PROP_UINT32("zns.frld", NvmeNamespace, params.zns.frld, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 6acda5c2cf3f..f92045f19948 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@  typedef struct NvmeNamespaceParams {
         uint16_t ozcs;
         uint32_t mar;
         uint32_t mor;
+        uint32_t rrl;
+        uint32_t frl;
+        uint32_t rrld;
+        uint32_t frld;
     } zns;
 } NvmeNamespaceParams;
 
@@ -40,6 +44,11 @@  typedef struct NvmeZone {
 
     uint64_t wp_staging;
 
+    struct {
+        int64_t activated_ns;
+        int64_t finished_ns;
+    } stats;
+
     QTAILQ_ENTRY(NvmeZone) lru_entry;
 } NvmeZone;
 
@@ -77,6 +86,10 @@  typedef struct NvmeNamespace {
         } resources;
 
         NvmeChangedZoneList changed_list;
+
+        QTAILQ_HEAD(, NvmeZone) lru_finished;
+        QEMUTimer *timer;
+        int64_t rrl_ns, rrld_ns, frl_ns, frld_ns;
     } zns;
 } NvmeNamespace;
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6db6daa62bc5..f28373feb887 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -875,13 +875,13 @@  static void nvme_process_aers(void *opaque)
     }
 }
 
-static void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns,
-                               uint8_t event_type, uint8_t event_info,
-                               uint8_t log_page)
+void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns, uint8_t event_type,
+                        uint8_t event_info, uint8_t log_page)
 {
     NvmeAsyncEvent *event;
 
-    trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
+    trace_pci_nvme_enqueue_event(ns ? ns->params.nsid : -1, event_type,
+                                 event_info, log_page);
 
     if (n->aer_queued == n->params.aer_max_queued) {
         trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
@@ -1194,7 +1194,7 @@  static void nvme_update_zone_descr(NvmeNamespace *ns, NvmeRequest *req,
     nvme_req_add_aio(req, aio);
 }
 
-static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
 {
     uint16_t num_ids = le16_to_cpu(ns->zns.changed_list.num_ids);
 
@@ -1213,12 +1213,8 @@  static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
                        NVME_LOG_CHANGED_ZONE_LIST);
 }
 
-static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
-                                    NvmeZone *zone, NvmeZoneState to,
-                                    NvmeRequest *req);
-
-static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
-    NvmeRequest *req)
+void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                         NvmeRequest *req)
 {
     trace_pci_nvme_zone_excursion(ns->params.nsid, nvme_zslba(zone),
                                   nvme_zs_str(zone));
@@ -1226,6 +1222,7 @@  static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
     assert(nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req) == NVME_SUCCESS);
 
     NVME_ZA_SET_ZFC(zone->zd.za, 0x1);
+    NVME_ZA_SET_FZR(zone->zd.za, 0x0);
 
     nvme_zone_changed(n, ns, zone);
 
@@ -1333,9 +1330,8 @@  out:
  * The function does NOT change the Zone Attribute field; this must be done by
  * the caller.
  */
-static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
-                                    NvmeZone *zone, NvmeZoneState to,
-                                    NvmeRequest *req)
+uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                             NvmeZoneState to, NvmeRequest *req)
 {
     NvmeZoneState from = nvme_zs(zone);
     uint16_t status;
@@ -1366,7 +1362,7 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
 
             QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
 
-            goto out;
+            goto activated;
 
         case NVME_ZS_ZSIO:
         case NVME_ZS_ZSEO:
@@ -1389,7 +1385,7 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
 
             QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
 
-            goto out;
+            goto activated;
 
         default:
             return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
@@ -1512,8 +1508,28 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
         return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
     }
 
+activated:
+    zone->stats.activated_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    if (ns->params.zns.frld && !timer_pending(ns->zns.timer)) {
+        int64_t next_timer = zone->stats.activated_ns + ns->zns.frld_ns;
+        timer_mod(ns->zns.timer, next_timer);
+    }
+
 out:
     nvme_zs_set(zone, to);
+
+    if (to == NVME_ZS_ZSF && ns->params.zns.rrld) {
+        QTAILQ_INSERT_TAIL(&ns->zns.lru_finished, zone, lru_entry);
+
+        zone->stats.finished_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+        if (!timer_pending(ns->zns.timer)) {
+            int64_t next_timer = zone->stats.finished_ns + ns->zns.rrld_ns;
+            timer_mod(ns->zns.timer, next_timer);
+        }
+    }
+
     return NVME_SUCCESS;
 }
 
@@ -1979,6 +1995,7 @@  static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n, NvmeRequest *req,
 
     case NVME_ZS_ZSRO:
         assert(!nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSO, req));
+
         nvme_update_zone_info(ns, req, zone);
         return NVME_NO_COMPLETE;
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 309fb1b94ecb..e51a38546080 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -318,5 +318,12 @@  static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
 }
 
 int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                             NvmeZoneState to, NvmeRequest *req);
+void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns, uint8_t event_type,
+                        uint8_t event_info, uint8_t log_page);
+void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                         NvmeRequest *req);
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone);
 
 #endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index c4c80644f782..249487ae79fc 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -85,7 +85,7 @@  pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
 pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
 pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
 pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_enqueue_event(uint32_t nsid, uint8_t typ, uint8_t info, uint8_t log_page) "nsid 0x%"PRIx32" type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
 pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
 pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
@@ -105,6 +105,7 @@  pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsi
 pci_nvme_zone_zrm_excursion_not_allowed(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
 pci_nvme_zone_changed(uint32_t nsid, uint64_t zslba) "nsid %"PRIu32" zslba 0x%"PRIx64""
 pci_nvme_zone_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_ns_process_timer(uint32_t nsid) "nsid %"PRIu32""
 pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""