@@ -118,6 +118,20 @@ static int nvme_calc_zone_geometry(NvmeNamespace *ns, Error **errp)
ns->zone_size_log2 = 63 - clz64(ns->zone_size);
}
+ /* Make sure that the values of all ZNS properties are sane */
+ if (ns->params.max_open_zones > nz) {
+ error_setg(errp,
+ "max_open_zones value %u exceeds the number of zones %u",
+ ns->params.max_open_zones, nz);
+ return -1;
+ }
+ if (ns->params.max_active_zones > nz) {
+ error_setg(errp,
+ "max_active_zones value %u exceeds the number of zones %u",
+ ns->params.max_active_zones, nz);
+ return -1;
+ }
+
return 0;
}
@@ -172,8 +186,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
/* MAR/MOR are zeroes-based, 0xffffffff means no limit */
- id_ns_z->mar = 0xffffffff;
- id_ns_z->mor = 0xffffffff;
+ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
+ id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
id_ns_z->zoc = 0;
id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
@@ -199,6 +213,9 @@ static void nvme_zoned_clear_ns(NvmeNamespace *ns)
uint32_t set_state;
int i;
+ ns->nr_active_zones = 0;
+ ns->nr_open_zones = 0;
+
zone = ns->zone_array;
for (i = 0; i < ns->num_zones; i++, zone++) {
switch (nvme_get_zone_state(zone)) {
@@ -209,6 +226,7 @@ static void nvme_zoned_clear_ns(NvmeNamespace *ns)
QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
break;
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_inc_active(ns);
/* fall through */
default:
continue;
@@ -216,6 +234,9 @@ static void nvme_zoned_clear_ns(NvmeNamespace *ns)
if (zone->d.wp == zone->d.zslba) {
set_state = NVME_ZONE_STATE_EMPTY;
+ } else if (ns->params.max_active_zones == 0 ||
+ ns->nr_active_zones < ns->params.max_active_zones) {
+ set_state = NVME_ZONE_STATE_CLOSED;
} else {
set_state = NVME_ZONE_STATE_CLOSED;
}
@@ -224,6 +245,7 @@ static void nvme_zoned_clear_ns(NvmeNamespace *ns)
case NVME_ZONE_STATE_CLOSED:
trace_pci_nvme_clear_ns_close(nvme_get_zone_state(zone),
zone->d.zslba);
+ nvme_aor_inc_active(ns);
QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
break;
case NVME_ZONE_STATE_EMPTY:
@@ -326,6 +348,8 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_SIZE("zone_capacity", NvmeNamespace, params.zone_cap_bs, 0),
DEFINE_PROP_BOOL("cross_zone_read", NvmeNamespace,
params.cross_zone_read, false),
+ DEFINE_PROP_UINT32("max_active", NvmeNamespace, params.max_active_zones, 0),
+ DEFINE_PROP_UINT32("max_open", NvmeNamespace, params.max_open_zones, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -34,6 +34,8 @@ typedef struct NvmeNamespaceParams {
bool cross_zone_read;
uint64_t zone_size_bs;
uint64_t zone_cap_bs;
+ uint32_t max_active_zones;
+ uint32_t max_open_zones;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
@@ -56,6 +58,8 @@ typedef struct NvmeNamespace {
uint64_t zone_capacity;
uint64_t zone_array_size;
uint32_t zone_size_log2;
+ int32_t nr_open_zones;
+ int32_t nr_active_zones;
NvmeNamespaceParams params;
} NvmeNamespace;
@@ -123,6 +127,43 @@ static inline bool nvme_wp_is_valid(NvmeZone *zone)
st != NVME_ZONE_STATE_OFFLINE;
}
+static inline void nvme_aor_inc_open(NvmeNamespace *ns)
+{
+ assert(ns->nr_open_zones >= 0);
+ if (ns->params.max_open_zones) {
+ ns->nr_open_zones++;
+ assert(ns->nr_open_zones <= ns->params.max_open_zones);
+ }
+}
+
+static inline void nvme_aor_dec_open(NvmeNamespace *ns)
+{
+ if (ns->params.max_open_zones) {
+ assert(ns->nr_open_zones > 0);
+ ns->nr_open_zones--;
+ }
+ assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeNamespace *ns)
+{
+ assert(ns->nr_active_zones >= 0);
+ if (ns->params.max_active_zones) {
+ ns->nr_active_zones++;
+ assert(ns->nr_active_zones <= ns->params.max_active_zones);
+ }
+}
+
+static inline void nvme_aor_dec_active(NvmeNamespace *ns)
+{
+ if (ns->params.max_active_zones) {
+ assert(ns->nr_active_zones > 0);
+ ns->nr_active_zones--;
+ assert(ns->nr_active_zones >= ns->nr_open_zones);
+ }
+ assert(ns->nr_active_zones >= 0);
+}
+
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
void nvme_ns_drain(NvmeNamespace *ns);
void nvme_ns_flush(NvmeNamespace *ns);
@@ -199,6 +199,26 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
}
}
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+{
+ if (ns->params.max_active_zones != 0 &&
+ ns->nr_active_zones + act > ns->params.max_active_zones) {
+ trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
+ return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
+ }
+ if (ns->params.max_open_zones != 0 &&
+ ns->nr_open_zones + opn > ns->params.max_open_zones) {
+ trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
+ return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
+ }
+
+ return NVME_SUCCESS;
+}
+
static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
{
hwaddr low = n->ctrl_mem.addr;
@@ -1207,6 +1227,41 @@ static uint16_t nvme_check_zone_read(NvmeNamespace *ns, NvmeZone *zone,
return status;
}
+static void nvme_auto_transition_zone(NvmeNamespace *ns, bool implicit,
+ bool adding_active)
+{
+ NvmeZone *zone;
+
+ if (implicit && ns->params.max_open_zones &&
+ ns->nr_open_zones == ns->params.max_open_zones) {
+ zone = QTAILQ_FIRST(&ns->imp_open_zones);
+ if (zone) {
+ /*
+ * Automatically close this implicitly open zone.
+ */
+ QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
+ nvme_aor_dec_open(ns);
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+ }
+ }
+}
+
+static uint16_t nvme_auto_open_zone(NvmeNamespace *ns, NvmeZone *zone)
+{
+ uint16_t status = NVME_SUCCESS;
+ uint8_t zs = nvme_get_zone_state(zone);
+
+ if (zs == NVME_ZONE_STATE_EMPTY) {
+ nvme_auto_transition_zone(ns, true, true);
+ status = nvme_aor_check(ns, 1, 1);
+ } else if (zs == NVME_ZONE_STATE_CLOSED) {
+ nvme_auto_transition_zone(ns, true, false);
+ status = nvme_aor_check(ns, 0, 1);
+ }
+
+ return status;
+}
+
static bool nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
bool failed)
{
@@ -1243,7 +1298,11 @@ static bool nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
switch (nvme_get_zone_state(zone)) {
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_EMPTY:
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
/* fall through */
@@ -1272,7 +1331,10 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
zs = nvme_get_zone_state(zone);
switch (zs) {
case NVME_ZONE_STATE_EMPTY:
+ nvme_aor_inc_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_inc_open(ns);
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
}
}
@@ -1378,6 +1440,11 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
+ status = nvme_auto_open_zone(ns, zone);
+ if (status != NVME_SUCCESS) {
+ goto invalid;
+ }
+
req->cqe.result64 = nvme_advance_zone_wp(ns, zone, nlb);
}
@@ -1436,6 +1503,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req, bool append)
slba = zone->w_ptr;
}
+ status = nvme_auto_open_zone(ns, zone);
+ if (status != NVME_SUCCESS) {
+ goto invalid;
+ }
+
req->cqe.result64 = nvme_advance_zone_wp(ns, zone, nlb);
} else {
status = nvme_check_zone_read(ns, zone, slba, nlb, &rfc);
@@ -1537,9 +1609,27 @@ static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
uint8_t state)
{
+ uint16_t status;
+
switch (state) {
case NVME_ZONE_STATE_EMPTY:
+ nvme_auto_transition_zone(ns, false, true);
+ status = nvme_aor_check(ns, 1, 0);
+ if (status != NVME_SUCCESS) {
+ return status;
+ }
+ nvme_aor_inc_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ status = nvme_aor_check(ns, 0, 1);
+ if (status != NVME_SUCCESS) {
+ if (state == NVME_ZONE_STATE_EMPTY) {
+ nvme_aor_dec_active(ns);
+ }
+ return status;
+ }
+ nvme_aor_inc_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
/* fall through */
@@ -1561,6 +1651,7 @@ static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
/* fall through */
case NVME_ZONE_STATE_CLOSED:
@@ -1582,7 +1673,11 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_EMPTY:
zone->w_ptr = nvme_zone_wr_boundary(zone);
zone->d.wp = zone->w_ptr;
@@ -1608,7 +1703,11 @@ static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_FULL:
zone->w_ptr = zone->d.zslba;
zone->d.wp = zone->w_ptr;