@@ -294,6 +294,10 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP,
};
static const uint32_t nvme_cse_iocs_zoned[256] = {
@@ -308,6 +312,10 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP,
};
static void nvme_process_sq(void *opaque);
@@ -1745,6 +1753,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret)
switch (req->cmd.opcode) {
case NVME_CMD_READ:
+ case NVME_CMD_RESV_REPORT:
status = NVME_UNRECOVERED_READ;
break;
case NVME_CMD_FLUSH:
@@ -1752,6 +1761,9 @@ static void nvme_aio_err(NvmeRequest *req, int ret)
case NVME_CMD_WRITE_ZEROES:
case NVME_CMD_ZONE_APPEND:
case NVME_CMD_COPY:
+ case NVME_CMD_RESV_REGISTER:
+ case NVME_CMD_RESV_ACQUIRE:
+ case NVME_CMD_RESV_RELEASE:
status = NVME_WRITE_FAULT;
break;
default:
@@ -2692,6 +2704,333 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
+static uint16_t nvme_resv_register(NvmeCtrl *n, NvmeRequest *req)
+{
+ int ret;
+ NvmeReservationKeyInfo key_info;
+ NvmeNamespace *ns = req->ns;
+ uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
+ bool ignore_key = ((cdw10 >> 3) & 0x1);
+ uint8_t action = cdw10 & 0x7;
+ uint8_t cptpl = ((cdw10 >> 30) & 0x3);
+ bool aptpl;
+
+ if (!nvme_support_pr(ns)) {
+ return NVME_INVALID_OPCODE;
+ }
+
+ switch (cptpl) {
+ case NVME_RESV_PTPL_NO_CHANGE:
+ aptpl = (ns->id_ns.rescap & NVME_PR_CAP_PTPL) ? true : false;
+ break;
+ case NVME_RESV_PTPL_DISABLE:
+ aptpl = false;
+ break;
+ case NVME_RESV_PTPL_ENABLE:
+ aptpl = true;
+ break;
+ default:
+ return NVME_INVALID_FIELD;
+ }
+
+ ret = nvme_h2c(n, (uint8_t *)&key_info,
+ sizeof(NvmeReservationKeyInfo), req);
+ if (ret) {
+ return ret;
+ }
+
+ key_info.cr_key = le64_to_cpu(key_info.cr_key);
+ key_info.nr_key = le64_to_cpu(key_info.nr_key);
+
+ switch (action) {
+ case NVME_RESV_REGISTER_ACTION_REGISTER:
+ req->aiocb = blk_aio_pr_register(ns->blkconf.blk, 0,
+ key_info.nr_key, 0, aptpl,
+ ignore_key, nvme_misc_cb,
+ req);
+ break;
+ case NVME_RESV_REGISTER_ACTION_UNREGISTER:
+ req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, 0,
+ 0, aptpl, ignore_key,
+ nvme_misc_cb, req);
+ break;
+ case NVME_RESV_REGISTER_ACTION_REPLACE:
+ req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key,
+ key_info.nr_key, 0, aptpl, ignore_key,
+ nvme_misc_cb, req);
+ break;
+ default:
+ return NVME_INVALID_FIELD;
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_resv_release(NvmeCtrl *n, NvmeRequest *req)
+{
+ int ret;
+ uint64_t cr_key;
+ NvmeNamespace *ns = req->ns;
+ uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
+ uint8_t action = cdw10 & 0x7;
+ NvmeResvType type = ((cdw10 >> 8) & 0xff);
+
+ /*
+ * According to the specification, the 03 bit is set to a '1',
+ * NVME_INVALID_FIELD error should be returned.
+ */
+ if ((cdw10 >> 3) & 1) {
+ return NVME_INVALID_FIELD;
+ }
+
+ if (!nvme_support_pr(ns)) {
+ return NVME_INVALID_OPCODE;
+ }
+
+ ret = nvme_h2c(n, (uint8_t *)&cr_key, sizeof(cr_key), req);
+ if (ret) {
+ return ret;
+ }
+
+ cr_key = le64_to_cpu(cr_key);
+
+ switch (action) {
+ case NVME_RESV_RELEASE_ACTION_RELEASE:
+ req->aiocb = blk_aio_pr_release(ns->blkconf.blk, cr_key,
+ nvme_pr_type_to_block(type),
+ nvme_misc_cb, req);
+ break;
+ case NVME_RESV_RELEASE_ACTION_CLEAR:
+ req->aiocb = blk_aio_pr_clear(ns->blkconf.blk, cr_key,
+ nvme_misc_cb, req);
+ break;
+ default:
+ return NVME_INVALID_FIELD;
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_resv_acquire(NvmeCtrl *n, NvmeRequest *req)
+{
+ int ret;
+ NvmeReservationKeyInfo key_info;
+ NvmeNamespace *ns = req->ns;
+ uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
+ uint8_t action = cdw10 & 0x7;
+ NvmeResvType type = ((cdw10 >> 8) & 0xff);
+
+ /*
+ * According to the specification, the 03 bit is set to a '1',
+ * NVME_INVALID_FIELD error should be returned.
+ */
+ if ((cdw10 >> 3) & 1) {
+ return NVME_INVALID_FIELD;
+ }
+
+ if (!nvme_support_pr(ns)) {
+ return NVME_INVALID_OPCODE;
+ }
+
+ ret = nvme_h2c(n, (uint8_t *)&key_info,
+ sizeof(NvmeReservationKeyInfo), req);
+ if (ret) {
+ return ret;
+ }
+
+ key_info.cr_key = le64_to_cpu(key_info.cr_key);
+ key_info.pr_key = le64_to_cpu(key_info.pr_key);
+
+ switch (action) {
+ case NVME_RESV_ACQUIRE_ACTION_ACQUIRE:
+ req->aiocb = blk_aio_pr_reserve(ns->blkconf.blk, key_info.cr_key,
+ nvme_pr_type_to_block(type),
+ nvme_misc_cb, req);
+ break;
+ case NVME_RESV_ACQUIRE_ACTION_PREEMPT:
+ req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk,
+ key_info.cr_key, key_info.pr_key,
+ nvme_pr_type_to_block(type),
+ false, nvme_misc_cb, req);
+ break;
+ case NVME_RESV_ACQUIRE_ACTION_PREEMPT_AND_ABORT:
+ req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, key_info.cr_key,
+ key_info.pr_key, type, true,
+ nvme_misc_cb, req);
+ break;
+ default:
+ return NVME_INVALID_FIELD;
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
+typedef struct NvmeResvKeys {
+ uint32_t generation;
+ uint32_t num_keys;
+ uint64_t *keys;
+ NvmeRequest *req;
+} NvmeResvKeys;
+
+typedef struct NvmeReadReservation {
+ uint32_t generation;
+ uint64_t key;
+ BlockPrType type;
+ NvmeRequest *req;
+ NvmeResvKeys *keys_info;
+} NvmeReadReservation;
+
+static int nvme_read_reservation_cb(NvmeReadReservation *reservation)
+{
+ int rc;
+ NvmeReservationStatus *nvme_status;
+ NvmeRequest *req = reservation->req;
+ NvmeCtrl *n = req->sq->ctrl;
+ NvmeResvKeys *keys_info = reservation->keys_info;
+ int len = sizeof(NvmeReservationStatusHeader) +
+ sizeof(NvmeRegisteredCtrl) * keys_info->num_keys;
+
+ nvme_status = g_malloc0(len);
+ nvme_status->header.gen = cpu_to_le32(reservation->generation);
+ nvme_status->header.rtype = block_pr_type_to_nvme(reservation->type);
+ nvme_status->header.regstrnt = cpu_to_le32(keys_info->num_keys);
+ for (int i = 0; i < keys_info->num_keys; i++) {
+ uint16_t ctnlid = nvme_ctrl(req)->cntlid;
+ nvme_status->regctl_ds[i].cntlid = cpu_to_le16(ctnlid);
+ nvme_status->regctl_ds[i].rkey = cpu_to_le64(keys_info->keys[i]);
+ nvme_status->regctl_ds[i].rcsts = keys_info->keys[i] ==
+ reservation->key ? 1 : 0;
+ }
+
+ rc = nvme_c2h(n, (uint8_t *)nvme_status, len, req);
+ g_free(nvme_status);
+ return rc;
+}
+
+static int nvme_read_reservation_ext_cb(NvmeReadReservation *reservation)
+{
+ int rc;
+ NvmeReservationStatusExt *nvme_status_ext;
+ NvmeRequest *req = reservation->req;
+ NvmeCtrl *n = req->sq->ctrl;
+ NvmeResvKeys *keys_info = reservation->keys_info;
+ int len = sizeof(NvmeReservationStatusExt) +
+ sizeof(NvmeRegisteredCtrlExt) * keys_info->num_keys;
+
+ nvme_status_ext = g_malloc0(len);
+ nvme_status_ext->header.gen = cpu_to_le32(reservation->generation);
+ nvme_status_ext->header.rtype = block_pr_type_to_nvme(reservation->type);
+ nvme_status_ext->header.regstrnt = cpu_to_le32(keys_info->num_keys);
+
+ for (int i = 0; i < keys_info->num_keys; i++) {
+ uint16_t ctnlid = nvme_ctrl(req)->cntlid;
+ nvme_status_ext->regctl_eds[i].cntlid = cpu_to_le16(ctnlid);
+ nvme_status_ext->regctl_eds[i].rkey = cpu_to_le64(keys_info->keys[i]);
+ nvme_status_ext->regctl_eds[i].rcsts = keys_info->keys[i] ==
+ reservation->key ? 1 : 0;
+ }
+
+ rc = nvme_c2h(n, (uint8_t *)nvme_status_ext, len, req);
+ g_free(nvme_status_ext);
+ return rc;
+}
+
+static void nvme_resv_read_reservation_cb(void *opaque, int ret)
+{
+ NvmeReadReservation *reservation = opaque;
+ NvmeRequest *req = reservation->req;
+ bool eds = le32_to_cpu(req->cmd.cdw11) & 0x1;
+ NvmeResvKeys *keys_info = reservation->keys_info;
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (eds) {
+ ret = nvme_read_reservation_ext_cb(reservation);
+ } else {
+ ret = nvme_read_reservation_cb(reservation);
+ }
+
+out:
+ g_free(keys_info->keys);
+ g_free(keys_info);
+ g_free(reservation);
+ nvme_misc_cb(req, ret);
+}
+
+static void nvme_resv_read_keys_cb(void *opaque, int ret)
+{
+ NvmeResvKeys *keys_info = opaque;
+ NvmeRequest *req = keys_info->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeReadReservation *reservation;
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ keys_info->num_keys = MIN(ret, keys_info->num_keys);
+ reservation = g_new0(NvmeReadReservation, 1);
+ reservation->req = req;
+ reservation->keys_info = keys_info;
+
+ req->aiocb = blk_aio_pr_read_reservation(ns->blkconf.blk,
+ &reservation->generation, &reservation->key,
+ &reservation->type, nvme_resv_read_reservation_cb,
+ reservation);
+ return;
+
+out:
+ g_free(keys_info->keys);
+ g_free(keys_info);
+ nvme_misc_cb(req, ret);
+}
+
+
+static uint16_t nvme_resv_report(NvmeCtrl *n, NvmeRequest *req)
+{
+ int num_keys;
+ uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
+ uint32_t cdw11 = le32_to_cpu(req->cmd.cdw11);
+ size_t buflen = (cdw10 + 1) * sizeof(uint32_t);
+ bool eds = cdw11 & 0x1;
+ NvmeNamespace *ns = req->ns;
+ NvmeResvKeys *keys_info;
+
+ if (!nvme_support_pr(ns)) {
+ return NVME_INVALID_OPCODE;
+ }
+
+ if (eds) {
+ if (buflen < sizeof(NvmeReservationStatusExt)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ num_keys = (buflen - sizeof(NvmeReservationStatusExt)) /
+ sizeof(struct NvmeRegisteredCtrlExt);
+ } else {
+ if (buflen < sizeof(NvmeReservationStatusHeader)) {
+ return NVME_INVALID_FIELD;
+ }
+
+ num_keys = (buflen - sizeof(NvmeReservationStatusHeader)) /
+ sizeof(struct NvmeRegisteredCtrl);
+ }
+
+ keys_info = g_new0(NvmeResvKeys, 1);
+ /* num_keys is the maximum number of keys that can be transmitted */
+ keys_info->num_keys = MAX(num_keys, 0);
+ keys_info->keys = g_malloc(sizeof(uint64_t) * num_keys);
+ keys_info->req = req;
+
+ req->aiocb = blk_aio_pr_read_keys(ns->blkconf.blk, &keys_info->generation,
+ keys_info->num_keys, keys_info->keys,
+ nvme_resv_read_keys_cb, keys_info);
+
+ return NVME_NO_COMPLETE;
+}
+
typedef struct NvmeCopyAIOCB {
BlockAIOCB common;
BlockAIOCB *aiocb;
@@ -4469,6 +4808,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_dsm(n, req);
case NVME_CMD_VERIFY:
return nvme_verify(n, req);
+ case NVME_CMD_RESV_REGISTER:
+ return nvme_resv_register(n, req);
+ case NVME_CMD_RESV_REPORT:
+ return nvme_resv_report(n, req);
+ case NVME_CMD_RESV_ACQUIRE:
+ return nvme_resv_acquire(n, req);
+ case NVME_CMD_RESV_RELEASE:
+ return nvme_resv_release(n, req);
case NVME_CMD_COPY:
return nvme_copy(n, req);
case NVME_CMD_ZONE_MGMT_SEND:
@@ -60,6 +60,12 @@ void nvme_ns_init_format(NvmeNamespace *ns)
blk_pr_cap = blk_bs(ns->blkconf.blk)->bl.pr_cap;
id_ns->rescap = block_pr_cap_to_nvme(blk_pr_cap);
+ if (id_ns->rescap != NVME_PR_CAP_ALL &&
+ id_ns->rescap != NVME_PR_CAP_RW) {
+
+ /* Rescap either supports all or none of them */
+ id_ns->rescap = 0;
+ }
}
static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
@@ -470,6 +470,10 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
+ case NVME_CMD_RESV_REGISTER: return "NVME_CMD_RESV_REGISTER";
+ case NVME_CMD_RESV_REPORT: return "NVME_CMD_RESV_REPORT";
+ case NVME_CMD_RESV_ACQUIRE: return "NVME_CMD_RESV_ACQUIRE";
+ case NVME_CMD_RESV_RELEASE: return "NVME_CMD_RESV_RELEASE";
default: return "NVME_NVM_CMD_UNKNOWN";
}
}
@@ -558,6 +562,11 @@ static inline uint8_t block_pr_cap_to_nvme(uint8_t block_pr_cap)
return res;
}
+static inline bool nvme_support_pr(NvmeNamespace *ns)
+{
+ return (ns->id_ns.rescap & NVME_PR_CAP_RW) == NVME_PR_CAP_RW;
+}
+
typedef struct NvmeSQueue {
struct NvmeCtrl *ctrl;
uint16_t sqid;
@@ -692,6 +692,13 @@ typedef enum NVMEPrCap {
NVME_PR_CAP_WR_EX_AR = 1 << 5,
/* Exclusive Access All Registrants reservation type */
NVME_PR_CAP_EX_AC_AR = 1 << 6,
+ /* Write and Read reservation type */
+ NVME_PR_CAP_RW = (NVME_PR_CAP_WR_EX |
+ NVME_PR_CAP_EX_AC |
+ NVME_PR_CAP_WR_EX_RO |
+ NVME_PR_CAP_EX_AC_RO |
+ NVME_PR_CAP_WR_EX_AR |
+ NVME_PR_CAP_EX_AC_AR),
NVME_PR_CAP_ALL = (NVME_PR_CAP_PTPL |
NVME_PR_CAP_WR_EX |
@@ -702,6 +709,51 @@ typedef enum NVMEPrCap {
NVME_PR_CAP_EX_AC_AR),
} NvmePrCap;
+typedef struct QEMU_PACKED NvmeReservationKeyInfo {
+ uint64_t cr_key;
+ union {
+ uint64_t nr_key;
+ uint64_t pr_key;
+ };
+} NvmeReservationKeyInfo;
+
+typedef struct QEMU_PACKED NvmeRegisteredCtrl {
+ uint16_t cntlid;
+ uint8_t rcsts;
+ uint8_t rsvd3[5];
+ uint8_t hostid[8];
+ uint64_t rkey;
+} NvmeRegisteredCtrl;
+
+typedef struct QEMU_PACKED NvmeRegisteredCtrlExt {
+ uint16_t cntlid;
+ uint8_t rcsts;
+ uint8_t rsvd3[5];
+ uint64_t rkey;
+ uint8_t hostid[16];
+ uint8_t rsvd32[32];
+} NvmeRegisteredCtrlExt;
+
+typedef struct QEMU_PACKED NvmeReservationStatusHeader {
+ uint32_t gen;
+ uint8_t rtype;
+ uint16_t regstrnt;
+ uint16_t resv5;
+ uint8_t ptpls;
+ uint8_t resv10[14];
+} NvmeReservationStatusHeader;
+
+typedef struct QEMU_PACKED NvmeReservationStatus {
+ NvmeReservationStatusHeader header;
+ NvmeRegisteredCtrl regctl_ds[];
+} NvmeReservationStatus;
+
+typedef struct QEMU_PACKED NvmeReservationStatusExt {
+ NvmeReservationStatusHeader header;
+ uint8_t rsvd24[40];
+ NvmeRegisteredCtrlExt regctl_eds[];
+} NvmeReservationStatusExt;
+
typedef struct QEMU_PACKED NvmeDeleteQ {
uint8_t opcode;
uint8_t flags;
@@ -1926,5 +1978,11 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeEndGrpLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeDirectiveIdentify) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeReservationKeyInfo) != 16);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeRegisteredCtrl) != 24);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeRegisteredCtrlExt) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeReservationStatusHeader) != 24);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeReservationStatus) != 24);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeReservationStatusExt) != 64);
}
#endif