Message ID | 97104495f5c945d25315aff1bd618e1a7bacf34c.1561589072.git.alistair.francis@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1,1/1] hw/scsi: Report errors and sense to guests through scsi-block | expand |
On 27/06/19 00:46, Alistair Francis wrote: > From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> > > When host block devices are bridged to a guest system through > virtio-scsi-pci and scsi-block driver, scsi_handle_rw_error() in > hw/scsi/scsi-disk.c checks the error number to judge which error to > report to the guests. EIO and EINVAL are not reported and ignored. Once > EIO or EINVAL happen, eternal wait of guest system happens. This problem > was observed with zoned block devices on the host system attached to the > guest via virtio-scsi-pci. To avoid the eternal wait, add EIO and EINVAL > to the list of error numbers to report to the guest. This is certainly correct for EINVAL, I am not sure about EIO. Did you run the VM with "-drive ...,rerror=report,werror=report"? The update_sense part is okay too. Paolo > On top of this, it is required to report SCSI sense data to the guest > so that the guest can handle the error correctly. However, > scsi_handle_rw_error() does not passthrough sense data that host > scsi-block device reported. Instead, it newly generates fixed sense > data only for certain error numbers. This is inflexible to support new > error codes to report to guest. To avoid this inflexiblity, pass the SCSI > sense data that the host scsi-block device reported as is. To be more > precise, set valid sense_len in the SCSIDiskReq referring sb_len_wr that > host SCSI device SG_IO ioctl reported. Add update_sense callback to > SCSIDiskClass to refer the SG_IO ioctl result only when scsi-block device > is targeted. > > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> > Signed-off-by: Alistair Francis <alistair.francis@wdc.com> > --- > hw/scsi/scsi-disk.c | 15 ++++++++++++++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > index ed7295bfd7..6801e3a0d0 100644 > --- a/hw/scsi/scsi-disk.c > +++ b/hw/scsi/scsi-disk.c > @@ -62,6 +62,7 @@ typedef struct SCSIDiskClass { > DMAIOFunc *dma_readv; > DMAIOFunc *dma_writev; > bool (*need_fua_emulation)(SCSICommand *cmd); > + void (*update_sense)(SCSIRequest *r); > } SCSIDiskClass; > > typedef struct SCSIDiskReq { > @@ -438,6 +439,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) > { > bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); > SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); > BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, > is_read, error); > > @@ -452,9 +454,12 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) > * pause the host. > */ > assert(r->status && *r->status); > + if (sdc->update_sense) { > + sdc->update_sense(&r->req); > + } > error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); > if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || > - error == 0) { > + error == EIO || error == EINVAL || error == 0) { > /* These errors are handled by guest. */ > scsi_req_complete(&r->req, *r->status); > return true; > @@ -2894,6 +2899,13 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, > } > } > > +static void scsi_block_update_sense(SCSIRequest *req) > +{ > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r); > + r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense)); > +} > + > #endif > > static > @@ -3059,6 +3071,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) > sc->parse_cdb = scsi_block_parse_cdb; > sdc->dma_readv = scsi_block_dma_readv; > sdc->dma_writev = scsi_block_dma_writev; > + sdc->update_sense = scsi_block_update_sense; > sdc->need_fua_emulation = scsi_block_no_fua; > dc->desc = "SCSI block device passthrough"; > dc->props = scsi_block_properties; >
On Thu, Jun 27, 2019 at 2:01 AM Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 27/06/19 00:46, Alistair Francis wrote: > > From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> > > > > When host block devices are bridged to a guest system through > > virtio-scsi-pci and scsi-block driver, scsi_handle_rw_error() in > > hw/scsi/scsi-disk.c checks the error number to judge which error to > > report to the guests. EIO and EINVAL are not reported and ignored. Once > > EIO or EINVAL happen, eternal wait of guest system happens. This problem > > was observed with zoned block devices on the host system attached to the > > guest via virtio-scsi-pci. To avoid the eternal wait, add EIO and EINVAL > > to the list of error numbers to report to the guest. > > This is certainly correct for EINVAL, I am not sure about EIO. Did you > run the VM with "-drive ...,rerror=report,werror=report"? This is from Shin'ichiro Kawasaki: I tried to run my VM with option "-drive ...,rerror=report,werror=report" as he noted, but the eternal loop symptom still happens when host block-scsi device returns EIO. Then I believe EIO should be added to the report target error list. > > The update_sense part is okay too. Cool! Alistair > > Paolo > > > On top of this, it is required to report SCSI sense data to the guest > > so that the guest can handle the error correctly. However, > > scsi_handle_rw_error() does not passthrough sense data that host > > scsi-block device reported. Instead, it newly generates fixed sense > > data only for certain error numbers. This is inflexible to support new > > error codes to report to guest. To avoid this inflexiblity, pass the SCSI > > sense data that the host scsi-block device reported as is. To be more > > precise, set valid sense_len in the SCSIDiskReq referring sb_len_wr that > > host SCSI device SG_IO ioctl reported. Add update_sense callback to > > SCSIDiskClass to refer the SG_IO ioctl result only when scsi-block device > > is targeted. > > > > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> > > Signed-off-by: Alistair Francis <alistair.francis@wdc.com> > > --- > > hw/scsi/scsi-disk.c | 15 ++++++++++++++- > > 1 file changed, 14 insertions(+), 1 deletion(-) > > > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > > index ed7295bfd7..6801e3a0d0 100644 > > --- a/hw/scsi/scsi-disk.c > > +++ b/hw/scsi/scsi-disk.c > > @@ -62,6 +62,7 @@ typedef struct SCSIDiskClass { > > DMAIOFunc *dma_readv; > > DMAIOFunc *dma_writev; > > bool (*need_fua_emulation)(SCSICommand *cmd); > > + void (*update_sense)(SCSIRequest *r); > > } SCSIDiskClass; > > > > typedef struct SCSIDiskReq { > > @@ -438,6 +439,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) > > { > > bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); > > SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > > + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); > > BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, > > is_read, error); > > > > @@ -452,9 +454,12 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) > > * pause the host. > > */ > > assert(r->status && *r->status); > > + if (sdc->update_sense) { > > + sdc->update_sense(&r->req); > > + } > > error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); > > if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || > > - error == 0) { > > + error == EIO || error == EINVAL || error == 0) { > > /* These errors are handled by guest. */ > > scsi_req_complete(&r->req, *r->status); > > return true; > > @@ -2894,6 +2899,13 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, > > } > > } > > > > +static void scsi_block_update_sense(SCSIRequest *req) > > +{ > > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > > + SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r); > > + r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense)); > > +} > > + > > #endif > > > > static > > @@ -3059,6 +3071,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) > > sc->parse_cdb = scsi_block_parse_cdb; > > sdc->dma_readv = scsi_block_dma_readv; > > sdc->dma_writev = scsi_block_dma_writev; > > + sdc->update_sense = scsi_block_update_sense; > > sdc->need_fua_emulation = scsi_block_no_fua; > > dc->desc = "SCSI block device passthrough"; > > dc->props = scsi_block_properties; > > >
On 28/06/19 23:57, Alistair Francis wrote: > > I tried to run my VM with option "-drive > ...,rerror=report,werror=report" as he noted, but the eternal loop > symptom still happens when host block-scsi device returns EIO. Then I > believe EIO should be added to the report target error list. What is the sense data he's seeing? EIO is a catch-all return value for scsi_sense_buf_to_errno so I'd rather be more specific. Thanks, Paolo
On Fri, Jun 28, 2019 at 3:14 PM Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 28/06/19 23:57, Alistair Francis wrote: > > > > I tried to run my VM with option "-drive > > ...,rerror=report,werror=report" as he noted, but the eternal loop > > symptom still happens when host block-scsi device returns EIO. Then I > > believe EIO should be added to the report target error list. > > What is the sense data he's seeing? EIO is a catch-all return value > for scsi_sense_buf_to_errno so I'd rather be more specific. I'm not sure, he is CCed to this email so he should respond with more information. Alistair > > Thanks, > > Paolo
On 6/29/19 7:21 AM, Alistair Francis wrote: > On Fri, Jun 28, 2019 at 3:14 PM Paolo Bonzini <pbonzini@redhat.com> wrote: >> >> On 28/06/19 23:57, Alistair Francis wrote: >>> >>> I tried to run my VM with option "-drive >>> ...,rerror=report,werror=report" as he noted, but the eternal loop >>> symptom still happens when host block-scsi device returns EIO. Then I >>> believe EIO should be added to the report target error list. >> >> What is the sense data he's seeing? EIO is a catch-all return value >> for scsi_sense_buf_to_errno so I'd rather be more specific. > > I'm not sure, he is CCed to this email so he should respond with more > information. Hi Paolo, thank you very much for your review and comments. (Alistair, thank you for your care for the patch post) The sense data I observe are related to 'zoned storage'. Now I'm trying to make zoned storage on host visible to qemu guests through scsi-block driver, to utilize qemu guest environment for zoned storage system development. 'zonedstroage.io' is the good reference for details of the zoned storage. http://zonedstorage.io/introduction/zoned-storage/ The zoned storage introduced "zone" and "write pointer" concepts, and SCSI spec documents were updated to define additional commands for zoned storage as well as Additional Sense Codes. The latest SPC-5 defines a number of ASCs that zoned SCSI storage devices return. I observe four of them listed below in sense data, when I ran basic operations to the zoned storage from the guest via scsi-block. 21h 04h: UNALIGNED WRITE COMMAND 21h 05h: WRITE BOUNDARY VIOLATION 21h 06h: ATTEMPT TO READ INVALID DATA 55h 0Eh: INSUFFICIENT ZONE RESOURCES These ASCs can be reported for write or read commands due to unexpected zone status or write pointer status. Reporting these ASCs to the guest, the user applications can handle them to manage zone/write pointer status, or help the user application developers to understand the failure reason and fix bugs. I took a look in scsi_sense_to_errno() and learned that ASCs are grouped in errnos. To report the ASCs above to the guest, is it good to add them in EINVAL group defined in scsi_sense_to_errno()? The ASCs are reported with sense key ILLEGAL_REQUEST or DATA_PROTECT, then I think it fits in the function.
On 01/07/19 12:14, Shinichiro Kawasaki wrote: > I observe four of them listed below in sense data, > when I ran basic operations to the zoned storage from the guest via scsi-block. > > 21h 04h: UNALIGNED WRITE COMMAND > 21h 05h: WRITE BOUNDARY VIOLATION > 21h 06h: ATTEMPT TO READ INVALID DATA > 55h 0Eh: INSUFFICIENT ZONE RESOURCES > > These ASCs can be reported for write or read commands due to unexpected zone > status or write pointer status. Reporting these ASCs to the guest, the user > applications can handle them to manage zone/write pointer status, or help the > user application developers to understand the failure reason and fix bugs. > > I took a look in scsi_sense_to_errno() and learned that ASCs are grouped in > errnos. To report the ASCs above to the guest, is it good to add them in EINVAL > group defined in scsi_sense_to_errno()? The ASCs are reported with sense key > ILLEGAL_REQUEST or DATA_PROTECT, then I think it fits in the function. The grouping by errno is historical and pretty much broken. It should be possible to change it to return just a bool. Paolo
On 7/1/19 8:56 PM, Paolo Bonzini wrote: > On 01/07/19 12:14, Shinichiro Kawasaki wrote: >> I observe four of them listed below in sense data, >> when I ran basic operations to the zoned storage from the guest via scsi-block. >> >> 21h 04h: UNALIGNED WRITE COMMAND >> 21h 05h: WRITE BOUNDARY VIOLATION >> 21h 06h: ATTEMPT TO READ INVALID DATA >> 55h 0Eh: INSUFFICIENT ZONE RESOURCES >> >> These ASCs can be reported for write or read commands due to unexpected zone >> status or write pointer status. Reporting these ASCs to the guest, the user >> applications can handle them to manage zone/write pointer status, or help the >> user application developers to understand the failure reason and fix bugs. >> >> I took a look in scsi_sense_to_errno() and learned that ASCs are grouped in >> errnos. To report the ASCs above to the guest, is it good to add them in EINVAL >> group defined in scsi_sense_to_errno()? The ASCs are reported with sense key >> ILLEGAL_REQUEST or DATA_PROTECT, then I think it fits in the function. > > The grouping by errno is historical and pretty much broken. It should > be possible to change it to return just a bool. The errno grouping of scsi_sense_to_errno() is used not only by scsi-disk but also by block/iscsi for error reporting. Can we avoid errno grouping for iscsi also? If the errno grouping is not valuable for iscsi, single error code (EIO?) can be reported for all iscsi failures. If the errno grouping is useful for iscsi, I can create a patch to avoid errno grouping only for scsi-disk, leaving scsi_sense_to_errno() for iscsi.
On 02/07/19 08:44, Shinichiro Kawasaki wrote: > On 7/1/19 8:56 PM, Paolo Bonzini wrote: >> On 01/07/19 12:14, Shinichiro Kawasaki wrote: >>> I observe four of them listed below in sense data, >>> when I ran basic operations to the zoned storage from the guest via scsi-block. >>> >>> 21h 04h: UNALIGNED WRITE COMMAND >>> 21h 05h: WRITE BOUNDARY VIOLATION >>> 21h 06h: ATTEMPT TO READ INVALID DATA >>> 55h 0Eh: INSUFFICIENT ZONE RESOURCES >>> >>> These ASCs can be reported for write or read commands due to unexpected zone >>> status or write pointer status. Reporting these ASCs to the guest, the user >>> applications can handle them to manage zone/write pointer status, or help the >>> user application developers to understand the failure reason and fix bugs. >>> >>> I took a look in scsi_sense_to_errno() and learned that ASCs are grouped in >>> errnos. To report the ASCs above to the guest, is it good to add them in EINVAL >>> group defined in scsi_sense_to_errno()? The ASCs are reported with sense key >>> ILLEGAL_REQUEST or DATA_PROTECT, then I think it fits in the function. >> >> The grouping by errno is historical and pretty much broken. It should >> be possible to change it to return just a bool. > > The errno grouping of scsi_sense_to_errno() is used not only by scsi-disk but > also by block/iscsi for error reporting. Can we avoid errno grouping for iscsi also? No, but we can do something like if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { /* These errors are handled by guest. */ sdc->update_sense(&r->req); scsi_req_complete(&r->req, *r->status); return true; } error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); This way there's generally no need to shoehorn ASC codes into errno. I still have to test my changes, but I hope to send something within a couple of days. Paolo
On 7/2/19 7:23 PM, Paolo Bonzini wrote: > On 02/07/19 08:44, Shinichiro Kawasaki wrote: >> On 7/1/19 8:56 PM, Paolo Bonzini wrote: >>> On 01/07/19 12:14, Shinichiro Kawasaki wrote: >>>> I observe four of them listed below in sense data, >>>> when I ran basic operations to the zoned storage from the guest via scsi-block. >>>> >>>> 21h 04h: UNALIGNED WRITE COMMAND >>>> 21h 05h: WRITE BOUNDARY VIOLATION >>>> 21h 06h: ATTEMPT TO READ INVALID DATA >>>> 55h 0Eh: INSUFFICIENT ZONE RESOURCES >>>> >>>> These ASCs can be reported for write or read commands due to unexpected zone >>>> status or write pointer status. Reporting these ASCs to the guest, the user >>>> applications can handle them to manage zone/write pointer status, or help the >>>> user application developers to understand the failure reason and fix bugs. >>>> >>>> I took a look in scsi_sense_to_errno() and learned that ASCs are grouped in >>>> errnos. To report the ASCs above to the guest, is it good to add them in EINVAL >>>> group defined in scsi_sense_to_errno()? The ASCs are reported with sense key >>>> ILLEGAL_REQUEST or DATA_PROTECT, then I think it fits in the function. >>> >>> The grouping by errno is historical and pretty much broken. It should >>> be possible to change it to return just a bool. >> >> The errno grouping of scsi_sense_to_errno() is used not only by scsi-disk but >> also by block/iscsi for error reporting. Can we avoid errno grouping for iscsi also? > > No, but we can do something like > > if (scsi_sense_buf_is_guest_recoverable(r->req.sense, > sizeof(r->req.sense))) { > /* These errors are handled by guest. */ > sdc->update_sense(&r->req); > scsi_req_complete(&r->req, *r->status); > return true; > } > error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); > > This way there's generally no need to shoehorn ASC codes into errno. I > still have to test my changes, but I hope to send something within a > couple of days. Thanks for sharing your thoughts. Now I understand your idea. I'm awaiting your patch. In case you want me to create the patch based on your idea, please let me know. I can afford some time next week to work on it.
On 05/07/19 12:31, Shinichiro Kawasaki wrote: >> This way there's generally no need to shoehorn ASC codes into errno. I >> still have to test my changes, but I hope to send something within a >> couple of days. > > Thanks for sharing your thoughts. Now I understand your idea. > > I'm awaiting your patch. In case you want me to create the patch based on your > idea, please let me know. I can afford some time next week to work on it. I didn't have time to finish, but since I will be out for part of next week, here is what I currently have (untested, somewhat uncompiled). It's a bugfix so it can be included after hard freeze. Thanks! Paolo
On 7/6/19 2:08 AM, Paolo Bonzini wrote: > On 05/07/19 12:31, Shinichiro Kawasaki wrote: >>> This way there's generally no need to shoehorn ASC codes into errno. I >>> still have to test my changes, but I hope to send something within a >>> couple of days. >> >> Thanks for sharing your thoughts. Now I understand your idea. >> >> I'm awaiting your patch. In case you want me to create the patch based on your >> idea, please let me know. I can afford some time next week to work on it. > > I didn't have time to finish, but since I will be out for part of next > week, here is what I currently have (untested, somewhat uncompiled). Paolo, thanks for the patch! With a few simple compile error fixes, the patch series worked with my system as expected. Zoned storage devices are visible on the guest through scsi-block driver. Good. Here I list the compile error fixes for your reference: 1) changed return type of scsi_sense_buf_is_guest_recoverable() from int to bool in include/scsi/utils.h 2) replaced scsi_sense_to_errno() call in scsi_sense_buf_is_guest_recoverable() with scsi_sense_is_guest_recoverable() 3) removed unused "out:" label in block/iscsi.c
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index ed7295bfd7..6801e3a0d0 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -62,6 +62,7 @@ typedef struct SCSIDiskClass { DMAIOFunc *dma_readv; DMAIOFunc *dma_writev; bool (*need_fua_emulation)(SCSICommand *cmd); + void (*update_sense)(SCSIRequest *r); } SCSIDiskClass; typedef struct SCSIDiskReq { @@ -438,6 +439,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) { bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, is_read, error); @@ -452,9 +454,12 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) * pause the host. */ assert(r->status && *r->status); + if (sdc->update_sense) { + sdc->update_sense(&r->req); + } error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || - error == 0) { + error == EIO || error == EINVAL || error == 0) { /* These errors are handled by guest. */ scsi_req_complete(&r->req, *r->status); return true; @@ -2894,6 +2899,13 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, } } +static void scsi_block_update_sense(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r); + r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense)); +} + #endif static @@ -3059,6 +3071,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) sc->parse_cdb = scsi_block_parse_cdb; sdc->dma_readv = scsi_block_dma_readv; sdc->dma_writev = scsi_block_dma_writev; + sdc->update_sense = scsi_block_update_sense; sdc->need_fua_emulation = scsi_block_no_fua; dc->desc = "SCSI block device passthrough"; dc->props = scsi_block_properties;