diff mbox series

[v6,31/42] nvme: add check for prinfo

Message ID 20200316142928.153431-32-its@irrelevant.dk (mailing list archive)
State New, archived
Headers show
Series nvme: support NVMe v1.3d, SGLs and multiple namespaces | expand

Commit Message

Klaus Jensen March 16, 2020, 2:29 p.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

Check the validity of the PRINFO field.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c       | 50 ++++++++++++++++++++++++++++++++++++-------
 hw/block/trace-events |  1 +
 include/block/nvme.h  |  1 +
 3 files changed, 44 insertions(+), 8 deletions(-)

Comments

Maxim Levitsky March 25, 2020, 10:57 a.m. UTC | #1
On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Check the validity of the PRINFO field.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c       | 50 ++++++++++++++++++++++++++++++++++++-------
>  hw/block/trace-events |  1 +
>  include/block/nvme.h  |  1 +
>  3 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 7d5340c272c6..0d2b5b45b0c5 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -505,6 +505,17 @@ static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len,
>      return NVME_SUCCESS;
>  }
>  
> +static inline uint16_t nvme_check_prinfo(NvmeCtrl *n, NvmeNamespace *ns,
> +                                         uint16_t ctrl, NvmeRequest *req)
> +{
> +    if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) {
> +        trace_nvme_dev_err_prinfo(nvme_cid(req), ctrl);
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }

I refreshed my (still very limited) knowelege on the metadata
and the protection info, and this is what I found:

I think that this is very far from complete, because we also have:

1. PRCHECK. According to the spec it is independent of PRACT
   And when some of it is set, 
   together with enabled protection (the DPS field in namespace),
   Then the 8 bytes of the protection info is checked (optionally using the
   the EILBRT and ELBAT/ELBATM fields in the command and CRC of the data for the guard field)

   So this field should also be checked to be zero when protection is disabled
   (I don't see an explicit requirement for that in the spec, but neither I see
   such requirement for PRACT)

2. The protection values to be written / checked ((E)ILBRT/(E)LBATM/(E)LBAT)
   Same here, but also these should not be set when PRCHECK is not set for reads,
   plus some are protection type specific.


The spec does mention the 'Invalid Protection Information' error code which
refers to invalid values in the PRINFO field.
So this error code I think should be returned instead of the 'Invalid field'

Another thing to optionaly check is that the metadata pointer for separate metadata.
 Is zero as long as we don't support metadata
(again I don't see an explicit requirement for this in the spec, but it mentions:

"This field is valid only if the command has metadata that is not interleaved with
the logical block data, as specified in the Format NVM command"

)


> +
> +    return NVME_SUCCESS;
> +}
> +
>  static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
>                                           uint64_t slba, uint32_t nlb,
>                                           NvmeRequest *req)
> @@ -564,11 +575,22 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
>      uint64_t offset = slba << data_shift;
>      uint32_t count = nlb << data_shift;
> +    uint16_t ctrl = le16_to_cpu(rw->control);
>      uint16_t status;
>  
> +    status = nvme_check_prinfo(n, ns, ctrl, req);
> +    if (status) {
> +        goto invalid;
> +    }
> +
> +    if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
> +        status = NVME_INVALID_PROT_INFO | NVME_DNR;
> +        goto invalid;
> +    }
> +
>      status = nvme_check_bounds(n, ns, slba, nlb, req);
>      if (status) {
> -        return status;
> +        goto invalid;
>      }
>  
>      block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
> @@ -576,6 +598,10 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
>                                          BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
>      return NVME_NO_COMPLETE;
> +
> +invalid:
> +    block_acct_invalid(blk_get_stats(n->conf.blk), BLOCK_ACCT_WRITE);
> +    return status;
>  }
>  
>  static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
> @@ -584,6 +610,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
>      uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
>      uint64_t slba = le64_to_cpu(rw->slba);
> +    uint16_t ctrl = le16_to_cpu(rw->control);
>  
>      uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>      uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
> @@ -597,19 +624,22 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>  
>      status = nvme_check_mdts(n, data_size, req);
>      if (status) {
> -        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> -        return status;
> +        goto invalid;
> +    }
> +
> +    status = nvme_check_prinfo(n, ns, ctrl, req);
> +    if (status) {
> +        goto invalid;
>      }
>  
>      status = nvme_check_bounds(n, ns, slba, nlb, req);
>      if (status) {
> -        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> -        return status;
> +        goto invalid;
>      }
>  
> -    if (nvme_map(n, cmd, &req->qsg, &req->iov, data_size, req)) {
> -        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> -        return NVME_INVALID_FIELD | NVME_DNR;
> +    status = nvme_map(n, cmd, &req->qsg, &req->iov, data_size, req);
> +    if (status) {
> +        goto invalid;
>      }
>  
>      if (req->qsg.nsg > 0) {
> @@ -633,6 +663,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      }
>  
>      return NVME_NO_COMPLETE;
> +
> +invalid:
> +    block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> +    return status;
>  }
>  
>  static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 2df6aa38df1b..2aceb0537e05 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -80,6 +80,7 @@ nvme_dev_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "cqid %"PRIu16" new_
>  
>  # nvme traces for error conditions
>  nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts %"PRIu64" len %"PRIu64""
> +nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl %"PRIu16""
>  nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
>  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index ecc02fbe8bb8..293d68553538 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -394,6 +394,7 @@ enum {
>      NVME_RW_PRINFO_PRCHK_GUARD  = 1 << 12,
>      NVME_RW_PRINFO_PRCHK_APP    = 1 << 11,
>      NVME_RW_PRINFO_PRCHK_REF    = 1 << 10,
> +    NVME_RW_PRINFO_PRCHK_MASK   = 7 << 10,
>  };
>  
>  typedef struct NvmeDsmCmd {


Best regards,
	Maxim Levitsky
Klaus Jensen March 31, 2020, 5:45 a.m. UTC | #2
On Mar 25 12:57, Maxim Levitsky wrote:
> On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Check the validity of the PRINFO field.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> >  hw/block/nvme.c       | 50 ++++++++++++++++++++++++++++++++++++-------
> >  hw/block/trace-events |  1 +
> >  include/block/nvme.h  |  1 +
> >  3 files changed, 44 insertions(+), 8 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 7d5340c272c6..0d2b5b45b0c5 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -505,6 +505,17 @@ static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len,
> >      return NVME_SUCCESS;
> >  }
> >  
> > +static inline uint16_t nvme_check_prinfo(NvmeCtrl *n, NvmeNamespace *ns,
> > +                                         uint16_t ctrl, NvmeRequest *req)
> > +{
> > +    if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) {
> > +        trace_nvme_dev_err_prinfo(nvme_cid(req), ctrl);
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> 
> I refreshed my (still very limited) knowelege on the metadata
> and the protection info, and this is what I found:
> 
> I think that this is very far from complete, because we also have:
> 
> 1. PRCHECK. According to the spec it is independent of PRACT
>    And when some of it is set, 
>    together with enabled protection (the DPS field in namespace),
>    Then the 8 bytes of the protection info is checked (optionally using the
>    the EILBRT and ELBAT/ELBATM fields in the command and CRC of the data for the guard field)
> 
>    So this field should also be checked to be zero when protection is disabled
>    (I don't see an explicit requirement for that in the spec, but neither I see
>    such requirement for PRACT)
> 
> 2. The protection values to be written / checked ((E)ILBRT/(E)LBATM/(E)LBAT)
>    Same here, but also these should not be set when PRCHECK is not set for reads,
>    plus some are protection type specific.
> 
> 
> The spec does mention the 'Invalid Protection Information' error code which
> refers to invalid values in the PRINFO field.
> So this error code I think should be returned instead of the 'Invalid field'
> 
> Another thing to optionaly check is that the metadata pointer for separate metadata.
>  Is zero as long as we don't support metadata
> (again I don't see an explicit requirement for this in the spec, but it mentions:
> 
> "This field is valid only if the command has metadata that is not interleaved with
> the logical block data, as specified in the Format NVM command"
> 
> )
> 

I'm kinda inclined to just drop this patch. The spec actually says that
the PRACT and PRCHK fields are used only if the namespace is formatted
to use end-to-end protection information. Since we do not support that,
I don't think we even need to check it.

Any opinion on this?
Maxim Levitsky March 31, 2020, 9:17 a.m. UTC | #3
On Tue, 2020-03-31 at 07:45 +0200, Klaus Birkelund Jensen wrote:
> On Mar 25 12:57, Maxim Levitsky wrote:
> > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> > > From: Klaus Jensen <k.jensen@samsung.com>
> > > 
> > > Check the validity of the PRINFO field.
> > > 
> > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > > ---
> > >  hw/block/nvme.c       | 50 ++++++++++++++++++++++++++++++++++++-------
> > >  hw/block/trace-events |  1 +
> > >  include/block/nvme.h  |  1 +
> > >  3 files changed, 44 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 7d5340c272c6..0d2b5b45b0c5 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -505,6 +505,17 @@ static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len,
> > >      return NVME_SUCCESS;
> > >  }
> > >  
> > > +static inline uint16_t nvme_check_prinfo(NvmeCtrl *n, NvmeNamespace *ns,
> > > +                                         uint16_t ctrl, NvmeRequest *req)
> > > +{
> > > +    if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) {
> > > +        trace_nvme_dev_err_prinfo(nvme_cid(req), ctrl);
> > > +        return NVME_INVALID_FIELD | NVME_DNR;
> > > +    }
> > 
> > I refreshed my (still very limited) knowelege on the metadata
> > and the protection info, and this is what I found:
> > 
> > I think that this is very far from complete, because we also have:
> > 
> > 1. PRCHECK. According to the spec it is independent of PRACT
> >    And when some of it is set, 
> >    together with enabled protection (the DPS field in namespace),
> >    Then the 8 bytes of the protection info is checked (optionally using the
> >    the EILBRT and ELBAT/ELBATM fields in the command and CRC of the data for the guard field)
> > 
> >    So this field should also be checked to be zero when protection is disabled
> >    (I don't see an explicit requirement for that in the spec, but neither I see
> >    such requirement for PRACT)
> > 
> > 2. The protection values to be written / checked ((E)ILBRT/(E)LBATM/(E)LBAT)
> >    Same here, but also these should not be set when PRCHECK is not set for reads,
> >    plus some are protection type specific.
> > 
> > 
> > The spec does mention the 'Invalid Protection Information' error code which
> > refers to invalid values in the PRINFO field.
> > So this error code I think should be returned instead of the 'Invalid field'
> > 
> > Another thing to optionaly check is that the metadata pointer for separate metadata.
> >  Is zero as long as we don't support metadata
> > (again I don't see an explicit requirement for this in the spec, but it mentions:
> > 
> > "This field is valid only if the command has metadata that is not interleaved with
> > the logical block data, as specified in the Format NVM command"
> > 
> > )
> > 
> 
> I'm kinda inclined to just drop this patch. The spec actually says that
> the PRACT and PRCHK fields are used only if the namespace is formatted
> to use end-to-end protection information. Since we do not support that,
> I don't think we even need to check it.
> 
> Any opinion on this?
Yep. I also think so.

I did add as much as possible checks on all the reserved fields in nvme-mdev,
checks for all all the unused fields like that, to make it as defensive as possible and to reduce
the attack surface to the minimum.
This can be done later when all the dust settles, its not a high priority for sure.


Best regard,
	Maxim Levitsky


>
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 7d5340c272c6..0d2b5b45b0c5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -505,6 +505,17 @@  static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len,
     return NVME_SUCCESS;
 }
 
+static inline uint16_t nvme_check_prinfo(NvmeCtrl *n, NvmeNamespace *ns,
+                                         uint16_t ctrl, NvmeRequest *req)
+{
+    if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) {
+        trace_nvme_dev_err_prinfo(nvme_cid(req), ctrl);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
 static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
                                          uint64_t slba, uint32_t nlb,
                                          NvmeRequest *req)
@@ -564,11 +575,22 @@  static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
     uint64_t offset = slba << data_shift;
     uint32_t count = nlb << data_shift;
+    uint16_t ctrl = le16_to_cpu(rw->control);
     uint16_t status;
 
+    status = nvme_check_prinfo(n, ns, ctrl, req);
+    if (status) {
+        goto invalid;
+    }
+
+    if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
+        status = NVME_INVALID_PROT_INFO | NVME_DNR;
+        goto invalid;
+    }
+
     status = nvme_check_bounds(n, ns, slba, nlb, req);
     if (status) {
-        return status;
+        goto invalid;
     }
 
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
@@ -576,6 +598,10 @@  static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
                                         BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
     return NVME_NO_COMPLETE;
+
+invalid:
+    block_acct_invalid(blk_get_stats(n->conf.blk), BLOCK_ACCT_WRITE);
+    return status;
 }
 
 static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
@@ -584,6 +610,7 @@  static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
     uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
     uint64_t slba = le64_to_cpu(rw->slba);
+    uint16_t ctrl = le16_to_cpu(rw->control);
 
     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
@@ -597,19 +624,22 @@  static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 
     status = nvme_check_mdts(n, data_size, req);
     if (status) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-        return status;
+        goto invalid;
+    }
+
+    status = nvme_check_prinfo(n, ns, ctrl, req);
+    if (status) {
+        goto invalid;
     }
 
     status = nvme_check_bounds(n, ns, slba, nlb, req);
     if (status) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-        return status;
+        goto invalid;
     }
 
-    if (nvme_map(n, cmd, &req->qsg, &req->iov, data_size, req)) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-        return NVME_INVALID_FIELD | NVME_DNR;
+    status = nvme_map(n, cmd, &req->qsg, &req->iov, data_size, req);
+    if (status) {
+        goto invalid;
     }
 
     if (req->qsg.nsg > 0) {
@@ -633,6 +663,10 @@  static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     }
 
     return NVME_NO_COMPLETE;
+
+invalid:
+    block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+    return status;
 }
 
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 2df6aa38df1b..2aceb0537e05 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -80,6 +80,7 @@  nvme_dev_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "cqid %"PRIu16" new_
 
 # nvme traces for error conditions
 nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts %"PRIu64" len %"PRIu64""
+nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl %"PRIu16""
 nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
 nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
 nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index ecc02fbe8bb8..293d68553538 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -394,6 +394,7 @@  enum {
     NVME_RW_PRINFO_PRCHK_GUARD  = 1 << 12,
     NVME_RW_PRINFO_PRCHK_APP    = 1 << 11,
     NVME_RW_PRINFO_PRCHK_REF    = 1 << 10,
+    NVME_RW_PRINFO_PRCHK_MASK   = 7 << 10,
 };
 
 typedef struct NvmeDsmCmd {