Message ID | 20190503134912.39756-8-farman@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | s390: vfio-ccw fixes | expand |
On 03/05/2019 15:49, Eric Farman wrote: > If the CCW being processed is a No-Operation, then by definition no > data is being transferred. Let's fold those checks into the normal > CCW processors, rather than skipping out early. > > Likewise, if the CCW being processed is a "test" (an invented > definition to simply mean it ends in a zero), let's permit that to go > through to the hardware. There's nothing inherently unique about > those command codes versus one that ends in an eight [1], or any other > otherwise valid command codes that are undefined for the device type > in question. > > [1] POPS states that a x08 is a TIC CCW, and that having any high-order > bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the > high-order bits are ignored. > > Signed-off-by: Eric Farman <farman@linux.ibm.com> > --- > drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ > 1 file changed, 5 insertions(+), 6 deletions(-) > > diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c > index 36d76b821209..c0a52025bf06 100644 > --- a/drivers/s390/cio/vfio_ccw_cp.c > +++ b/drivers/s390/cio/vfio_ccw_cp.c > @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, > #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) > #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) > > -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) > - > #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) > > #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) > @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) > if (ccw->count == 0) > return 0; > > + /* If the command is a NOP, then no data will be transferred */ > + if (ccw_is_noop(ccw)) > + return 0; > + > /* If the skip flag is off, then data will be transferred */ > if (!ccw_is_skip(ccw)) > return 1; > @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) > { > struct ccw1 *ccw = chain->ch_ccw + idx; > > - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) > + if (ccw_is_tic(ccw)) AFAIR, we introduced this code to protect against noop and test with a non zero CDA. This could go away only if there is somewhere the guaranty that noop have always a null CDA (same for test). > return; > > kfree((void *)(u64)ccw->cda); > @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, > { > struct ccw1 *ccw = chain->ch_ccw + idx; > > - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) > - return 0; > - > if (ccw_is_tic(ccw)) > return ccwchain_fetch_tic(chain, idx, cp); > >
On Fri, 3 May 2019 15:49:12 +0200 Eric Farman <farman@linux.ibm.com> wrote: > If the CCW being processed is a No-Operation, then by definition no > data is being transferred. Let's fold those checks into the normal > CCW processors, rather than skipping out early. > > Likewise, if the CCW being processed is a "test" (an invented > definition to simply mean it ends in a zero), The "Common I/O Device Commands" document actually defines this :) > let's permit that to go > through to the hardware. There's nothing inherently unique about > those command codes versus one that ends in an eight [1], or any other > otherwise valid command codes that are undefined for the device type > in question. But I agree that everything possible should be sent to the hardware. > > [1] POPS states that a x08 is a TIC CCW, and that having any high-order > bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the > high-order bits are ignored. > > Signed-off-by: Eric Farman <farman@linux.ibm.com> > --- > drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ > 1 file changed, 5 insertions(+), 6 deletions(-) > > diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c > index 36d76b821209..c0a52025bf06 100644 > --- a/drivers/s390/cio/vfio_ccw_cp.c > +++ b/drivers/s390/cio/vfio_ccw_cp.c > @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, > #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) > #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) > > -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) > - > #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) > > #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) > @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) > if (ccw->count == 0) > return 0; > > + /* If the command is a NOP, then no data will be transferred */ > + if (ccw_is_noop(ccw)) > + return 0; > + Don't you need to return 0 here for any test command as well? (If I read the doc correctly, we'll just get a unit check in any case, as there are no parallel I/O interfaces on modern s390 boxes. Even if we had a parallel I/O interface, we'd just collect the status, and not get any data transfer. FWIW, the QEMU ccw interpreter for emulated devices rejects test ccws with a channel program check, which looks wrong; should be a command reject instead.) > /* If the skip flag is off, then data will be transferred */ > if (!ccw_is_skip(ccw)) > return 1; > @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) > { > struct ccw1 *ccw = chain->ch_ccw + idx; > > - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) > + if (ccw_is_tic(ccw)) > return; > > kfree((void *)(u64)ccw->cda); > @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, > { > struct ccw1 *ccw = chain->ch_ccw + idx; > > - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) > - return 0; > - > if (ccw_is_tic(ccw)) > return ccwchain_fetch_tic(chain, idx, cp); >
On 5/6/19 8:56 AM, Pierre Morel wrote: > On 03/05/2019 15:49, Eric Farman wrote: >> If the CCW being processed is a No-Operation, then by definition no >> data is being transferred. Let's fold those checks into the normal >> CCW processors, rather than skipping out early. >> >> Likewise, if the CCW being processed is a "test" (an invented >> definition to simply mean it ends in a zero), let's permit that to go >> through to the hardware. There's nothing inherently unique about >> those command codes versus one that ends in an eight [1], or any other >> otherwise valid command codes that are undefined for the device type >> in question. >> >> [1] POPS states that a x08 is a TIC CCW, and that having any high-order >> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >> high-order bits are ignored. >> >> Signed-off-by: Eric Farman <farman@linux.ibm.com> >> --- >> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >> 1 file changed, 5 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/s390/cio/vfio_ccw_cp.c >> b/drivers/s390/cio/vfio_ccw_cp.c >> index 36d76b821209..c0a52025bf06 100644 >> --- a/drivers/s390/cio/vfio_ccw_cp.c >> +++ b/drivers/s390/cio/vfio_ccw_cp.c >> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct >> channel_program *cp, >> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) >> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == >> CCW_CMD_BASIC_SENSE) >> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >> - >> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct >> ccw1 *ccw) >> if (ccw->count == 0) >> return 0; >> + /* If the command is a NOP, then no data will be transferred */ >> + if (ccw_is_noop(ccw)) >> + return 0; >> + >> /* If the skip flag is off, then data will be transferred */ >> if (!ccw_is_skip(ccw)) >> return 1; >> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain >> *chain, int idx) >> { >> struct ccw1 *ccw = chain->ch_ccw + idx; >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >> + if (ccw_is_tic(ccw)) > > > AFAIR, we introduced this code to protect against noop and test with a > non zero CDA. > This could go away only if there is somewhere the guaranty that noop > have always a null CDA (same for test). What was generating either the null or "test" command codes? I can provide plenty of examples for both these command codes and how they look coming out of vfio-ccw now. The noop check is moved up into the "does data transfer" routine, to determine whether the pages should be pinned or not. Regardless of whether or not the input CDA is null, we'll end up with a CCW pointing to a valid IDAL of invalid addresses. The "test" command codes always struck me as funky, because x18 and xF8 and everything in between that ends in x8 is architecturally invalid too, but we don't check for them like we do for things that end in x0. And there's a TON of other opcodes that are invalid for today's ECKD devices, or perhaps were valid for older DASD but have since been deprecated, or are only valid for non-DASD device types. We have no logic to permit them, either. If those CCWs had a non-zero CDA, we either pin it successfully and let the targeted device sort it out or an error occurs and we fail at that point. (QEMU will see a "wirte" region error of -EINVAL because of vfio_pin_pages()) > > > >> return; >> kfree((void *)(u64)ccw->cda); >> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, >> { >> struct ccw1 *ccw = chain->ch_ccw + idx; >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >> - return 0; >> - >> if (ccw_is_tic(ccw)) >> return ccwchain_fetch_tic(chain, idx, cp); >> > >
On 5/6/19 11:37 AM, Cornelia Huck wrote: > On Fri, 3 May 2019 15:49:12 +0200 > Eric Farman <farman@linux.ibm.com> wrote: > >> If the CCW being processed is a No-Operation, then by definition no >> data is being transferred. Let's fold those checks into the normal >> CCW processors, rather than skipping out early. >> >> Likewise, if the CCW being processed is a "test" (an invented >> definition to simply mean it ends in a zero), > > The "Common I/O Device Commands" document actually defines this :) Blech, okay so I didn't look early enough in that document. Section 1.5 it is. :) > >> let's permit that to go >> through to the hardware. There's nothing inherently unique about >> those command codes versus one that ends in an eight [1], or any other >> otherwise valid command codes that are undefined for the device type >> in question. > > But I agree that everything possible should be sent to the hardware. > >> >> [1] POPS states that a x08 is a TIC CCW, and that having any high-order >> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >> high-order bits are ignored. >> >> Signed-off-by: Eric Farman <farman@linux.ibm.com> >> --- >> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >> 1 file changed, 5 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c >> index 36d76b821209..c0a52025bf06 100644 >> --- a/drivers/s390/cio/vfio_ccw_cp.c >> +++ b/drivers/s390/cio/vfio_ccw_cp.c >> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, >> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) >> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) >> >> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >> - >> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >> >> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) >> if (ccw->count == 0) >> return 0; >> >> + /* If the command is a NOP, then no data will be transferred */ >> + if (ccw_is_noop(ccw)) >> + return 0; >> + > > Don't you need to return 0 here for any test command as well? > > (If I read the doc correctly, we'll just get a unit check in any case, > as there are no parallel I/O interfaces on modern s390 boxes. Even if > we had a parallel I/O interface, we'd just collect the status, and not > get any data transfer. FWIW, the QEMU ccw interpreter for emulated > devices rejects test ccws with a channel program check, which looks > wrong; should be a command reject instead.) I will go back and look. I thought when I sent a test command with an address that wasn't translated I got an unhappy result, which is why I ripped this check out. I was trying to use test CCWs as a safety valve for Halil's Status Modifier concern, so maybe I had something else wrong on that pile. (The careful observer would note that that code was not included here. :) > >> /* If the skip flag is off, then data will be transferred */ >> if (!ccw_is_skip(ccw)) >> return 1; >> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) >> { >> struct ccw1 *ccw = chain->ch_ccw + idx; >> >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >> + if (ccw_is_tic(ccw)) >> return; >> >> kfree((void *)(u64)ccw->cda); >> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, >> { >> struct ccw1 *ccw = chain->ch_ccw + idx; >> >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >> - return 0; >> - >> if (ccw_is_tic(ccw)) >> return ccwchain_fetch_tic(chain, idx, cp); >> >
On Mon, 6 May 2019 11:46:59 -0400 Eric Farman <farman@linux.ibm.com> wrote: > On 5/6/19 11:37 AM, Cornelia Huck wrote: > > On Fri, 3 May 2019 15:49:12 +0200 > > Eric Farman <farman@linux.ibm.com> wrote: > > > >> If the CCW being processed is a No-Operation, then by definition no > >> data is being transferred. Let's fold those checks into the normal > >> CCW processors, rather than skipping out early. > >> > >> Likewise, if the CCW being processed is a "test" (an invented > >> definition to simply mean it ends in a zero), > > > > The "Common I/O Device Commands" document actually defines this :) > > Blech, okay so I didn't look early enough in that document. Section 1.5 > it is. :) > > > > >> let's permit that to go > >> through to the hardware. There's nothing inherently unique about > >> those command codes versus one that ends in an eight [1], or any other > >> otherwise valid command codes that are undefined for the device type > >> in question. > > > > But I agree that everything possible should be sent to the hardware. > > > >> > >> [1] POPS states that a x08 is a TIC CCW, and that having any high-order > >> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the > >> high-order bits are ignored. > >> > >> Signed-off-by: Eric Farman <farman@linux.ibm.com> > >> --- > >> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ > >> 1 file changed, 5 insertions(+), 6 deletions(-) > >> > >> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c > >> index 36d76b821209..c0a52025bf06 100644 > >> --- a/drivers/s390/cio/vfio_ccw_cp.c > >> +++ b/drivers/s390/cio/vfio_ccw_cp.c > >> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, > >> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) > >> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) > >> > >> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) > >> - > >> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) > >> > >> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) > >> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) > >> if (ccw->count == 0) > >> return 0; > >> > >> + /* If the command is a NOP, then no data will be transferred */ > >> + if (ccw_is_noop(ccw)) > >> + return 0; > >> + > > > > Don't you need to return 0 here for any test command as well? > > > > (If I read the doc correctly, we'll just get a unit check in any case, > > as there are no parallel I/O interfaces on modern s390 boxes. Even if > > we had a parallel I/O interface, we'd just collect the status, and not > > get any data transfer. FWIW, the QEMU ccw interpreter for emulated > > devices rejects test ccws with a channel program check, which looks > > wrong; should be a command reject instead.) > > I will go back and look. I thought when I sent a test command with an > address that wasn't translated I got an unhappy result, which is why I > ripped this check out. Ugh, I just looked at the current PoP and that specifies ccws[1] of test type as 'invalid' (generating a channel program check). So, the current PoP and the (old) I/O device commands seem to disagree :/ Do you know if there's any update to the latter? I think I'll just leave QEMU as it is, as that at least agrees with the current PoP... > > I was trying to use test CCWs as a safety valve for Halil's Status > Modifier concern, so maybe I had something else wrong on that pile. > (The careful observer would note that that code was not included here. :) :) > > > > >> /* If the skip flag is off, then data will be transferred */ > >> if (!ccw_is_skip(ccw)) > >> return 1; > >> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) > >> { > >> struct ccw1 *ccw = chain->ch_ccw + idx; > >> > >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) > >> + if (ccw_is_tic(ccw)) > >> return; > >> > >> kfree((void *)(u64)ccw->cda); > >> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, > >> { > >> struct ccw1 *ccw = chain->ch_ccw + idx; > >> > >> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) > >> - return 0; > >> - > >> if (ccw_is_tic(ccw)) > >> return ccwchain_fetch_tic(chain, idx, cp); > >> > > [1] tcws are a bit different; but we don't support them anyway.
On 5/6/19 12:18 PM, Cornelia Huck wrote: > On Mon, 6 May 2019 11:46:59 -0400 > Eric Farman <farman@linux.ibm.com> wrote: > >> On 5/6/19 11:37 AM, Cornelia Huck wrote: >>> On Fri, 3 May 2019 15:49:12 +0200 >>> Eric Farman <farman@linux.ibm.com> wrote: >>> >>>> If the CCW being processed is a No-Operation, then by definition no >>>> data is being transferred. Let's fold those checks into the normal >>>> CCW processors, rather than skipping out early. >>>> >>>> Likewise, if the CCW being processed is a "test" (an invented >>>> definition to simply mean it ends in a zero), >>> >>> The "Common I/O Device Commands" document actually defines this :) >> >> Blech, okay so I didn't look early enough in that document. Section 1.5 >> it is. :) >> >>> >>>> let's permit that to go >>>> through to the hardware. There's nothing inherently unique about >>>> those command codes versus one that ends in an eight [1], or any other >>>> otherwise valid command codes that are undefined for the device type >>>> in question. >>> >>> But I agree that everything possible should be sent to the hardware. >>> >>>> >>>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order >>>> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >>>> high-order bits are ignored. >>>> >>>> Signed-off-by: Eric Farman <farman@linux.ibm.com> >>>> --- >>>> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >>>> 1 file changed, 5 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c >>>> index 36d76b821209..c0a52025bf06 100644 >>>> --- a/drivers/s390/cio/vfio_ccw_cp.c >>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c >>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, >>>> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) >>>> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) >>>> >>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >>>> - >>>> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >>>> >>>> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >>>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) >>>> if (ccw->count == 0) >>>> return 0; >>>> >>>> + /* If the command is a NOP, then no data will be transferred */ >>>> + if (ccw_is_noop(ccw)) >>>> + return 0; >>>> + >>> >>> Don't you need to return 0 here for any test command as well? >>> >>> (If I read the doc correctly, we'll just get a unit check in any case, >>> as there are no parallel I/O interfaces on modern s390 boxes. Even if >>> we had a parallel I/O interface, we'd just collect the status, and not >>> get any data transfer. FWIW, the QEMU ccw interpreter for emulated >>> devices rejects test ccws with a channel program check, which looks >>> wrong; should be a command reject instead.) >> >> I will go back and look. I thought when I sent a test command with an >> address that wasn't translated I got an unhappy result, which is why I >> ripped this check out. > > Ugh, I just looked at the current PoP and that specifies ccws[1] of test > type as 'invalid' (generating a channel program check). So, the current > PoP and the (old) I/O device commands seem to disagree :/ Do you know > if there's any update to the latter? I think I'll just leave QEMU as it > is, as that at least agrees with the current PoP... Double ugh. I am not aware, but I'll poke around on our side. Probably better to focus on PoP, unless it specifically points to ye olde document. > >> >> I was trying to use test CCWs as a safety valve for Halil's Status >> Modifier concern, so maybe I had something else wrong on that pile. >> (The careful observer would note that that code was not included here. :) > > :) > >> >>> >>>> /* If the skip flag is off, then data will be transferred */ >>>> if (!ccw_is_skip(ccw)) >>>> return 1; >>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) >>>> { >>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>> >>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >>>> + if (ccw_is_tic(ccw)) >>>> return; >>>> >>>> kfree((void *)(u64)ccw->cda); >>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, >>>> { >>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>> >>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >>>> - return 0; >>>> - >>>> if (ccw_is_tic(ccw)) >>>> return ccwchain_fetch_tic(chain, idx, cp); >>>> >>> > > [1] tcws are a bit different; but we don't support them anyway. > Yeah... I'd like to get the code cleaned up to a point where if we want to support TCWs, we could just up front say "if command go here, if transport go there." Not that that'll be anytime soon, but it would prevent us from having to dis-entangle everything at that future point.
On Mon, 6 May 2019 12:25:01 -0400 Eric Farman <farman@linux.ibm.com> wrote: > On 5/6/19 12:18 PM, Cornelia Huck wrote: > > [1] tcws are a bit different; but we don't support them anyway. > > > > Yeah... I'd like to get the code cleaned up to a point where if we want > to support TCWs, we could just up front say "if command go here, if > transport go there." Not that that'll be anytime soon, but it would > prevent us from having to dis-entangle everything at that future point. > Let's see how much work that is :)
On 5/6/19 11:39 AM, Eric Farman wrote: > > > On 5/6/19 8:56 AM, Pierre Morel wrote: >> On 03/05/2019 15:49, Eric Farman wrote: >>> If the CCW being processed is a No-Operation, then by definition no >>> data is being transferred. Let's fold those checks into the normal >>> CCW processors, rather than skipping out early. >>> >>> Likewise, if the CCW being processed is a "test" (an invented >>> definition to simply mean it ends in a zero), let's permit that to go >>> through to the hardware. There's nothing inherently unique about >>> those command codes versus one that ends in an eight [1], or any other >>> otherwise valid command codes that are undefined for the device type >>> in question. >>> >>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order >>> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >>> high-order bits are ignored. >>> >>> Signed-off-by: Eric Farman <farman@linux.ibm.com> >>> --- >>> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >>> 1 file changed, 5 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c >>> b/drivers/s390/cio/vfio_ccw_cp.c >>> index 36d76b821209..c0a52025bf06 100644 >>> --- a/drivers/s390/cio/vfio_ccw_cp.c >>> +++ b/drivers/s390/cio/vfio_ccw_cp.c >>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct >>> channel_program *cp, >>> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) >>> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == >>> CCW_CMD_BASIC_SENSE) >>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >>> - >>> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >>> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct >>> ccw1 *ccw) >>> if (ccw->count == 0) >>> return 0; >>> + /* If the command is a NOP, then no data will be transferred */ >>> + if (ccw_is_noop(ccw)) >>> + return 0; >>> + >>> /* If the skip flag is off, then data will be transferred */ >>> if (!ccw_is_skip(ccw)) >>> return 1; >>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain >>> *chain, int idx) >>> { >>> struct ccw1 *ccw = chain->ch_ccw + idx; >>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >>> + if (ccw_is_tic(ccw)) >> >> >> AFAIR, we introduced this code to protect against noop and test with a >> non zero CDA. >> This could go away only if there is somewhere the guaranty that noop >> have always a null CDA (same for test). > > What was generating either the null or "test" command codes? I can > provide plenty of examples for both these command codes and how they > look coming out of vfio-ccw now. I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to hardware without this patch. I don't see anything particuarly surpising, so I'm not sure what the original code was attempting to protect. Maybe, since you question this in ccwchain_cda_free(), you're referring to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to free no-op, test and tic cda."), which fixed up our attempt to clean things up that weren't allocated on the transmit side? With this series, that is reverted, but the cda is indeed set to something that needs to be free'd (see below). So maybe I should at least mention that commit here. Regardless, while the I/Os work/fail as I expect, the cda addresses themselves are wrong in much the same way I describe in patch 4. Yes, without this patch we don't convert them to an IDAL so certain program checks aren't applicable. But the addresses that we end up sending to the hardware are nonsensical, though potentially valid, locations. > > The noop check is moved up into the "does data transfer" routine, to > determine whether the pages should be pinned or not. Regardless of > whether or not the input CDA is null, we'll end up with a CCW pointing > to a valid IDAL of invalid addresses. > > The "test" command codes always struck me as funky, because x18 and xF8 > and everything in between that ends in x8 is architecturally invalid > too, but we don't check for them like we do for things that end in x0. > And there's a TON of other opcodes that are invalid for today's ECKD > devices, or perhaps were valid for older DASD but have since been > deprecated, or are only valid for non-DASD device types. We have no > logic to permit them, either. If those CCWs had a non-zero CDA, we > either pin it successfully and let the targeted device sort it out or an > error occurs and we fail at that point. (QEMU will see a "wirte" region > error of -EINVAL because of vfio_pin_pages()) > >> >> >> >>> return; >>> kfree((void *)(u64)ccw->cda); >>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain >>> *chain, >>> { >>> struct ccw1 *ccw = chain->ch_ccw + idx; >>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >>> - return 0; >>> - >>> if (ccw_is_tic(ccw)) >>> return ccwchain_fetch_tic(chain, idx, cp); >>> >> >>
On 06/05/2019 22:47, Eric Farman wrote: > > > On 5/6/19 11:39 AM, Eric Farman wrote: >> >> >> On 5/6/19 8:56 AM, Pierre Morel wrote: >>> On 03/05/2019 15:49, Eric Farman wrote: >>>> If the CCW being processed is a No-Operation, then by definition no >>>> data is being transferred. Let's fold those checks into the normal >>>> CCW processors, rather than skipping out early. >>>> >>>> Likewise, if the CCW being processed is a "test" (an invented >>>> definition to simply mean it ends in a zero), let's permit that to go >>>> through to the hardware. There's nothing inherently unique about >>>> those command codes versus one that ends in an eight [1], or any other >>>> otherwise valid command codes that are undefined for the device type >>>> in question. >>>> >>>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order >>>> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >>>> high-order bits are ignored. >>>> >>>> Signed-off-by: Eric Farman <farman@linux.ibm.com> >>>> --- >>>> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >>>> 1 file changed, 5 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c >>>> b/drivers/s390/cio/vfio_ccw_cp.c >>>> index 36d76b821209..c0a52025bf06 100644 >>>> --- a/drivers/s390/cio/vfio_ccw_cp.c >>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c >>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct >>>> channel_program *cp, >>>> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>> 0x0C) >>>> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>> CCW_CMD_BASIC_SENSE) >>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >>>> - >>>> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >>>> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >>>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct >>>> ccw1 *ccw) >>>> if (ccw->count == 0) >>>> return 0; >>>> + /* If the command is a NOP, then no data will be transferred */ >>>> + if (ccw_is_noop(ccw)) >>>> + return 0; >>>> + >>>> /* If the skip flag is off, then data will be transferred */ >>>> if (!ccw_is_skip(ccw)) >>>> return 1; >>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain >>>> *chain, int idx) >>>> { >>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >>>> + if (ccw_is_tic(ccw)) >>> >>> >>> AFAIR, we introduced this code to protect against noop and test with >>> a non zero CDA. >>> This could go away only if there is somewhere the guaranty that noop >>> have always a null CDA (same for test). >> >> What was generating either the null or "test" command codes? I can >> provide plenty of examples for both these command codes and how they >> look coming out of vfio-ccw now. > > I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to > hardware without this patch. I don't see anything particuarly > surpising, so I'm not sure what the original code was attempting to > protect. > > Maybe, since you question this in ccwchain_cda_free(), you're referring > to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to free no-op, > test and tic cda."), which fixed up our attempt to clean things up that > weren't allocated on the transmit side? With this series, that is > reverted, but the cda is indeed set to something that needs to be free'd > (see below). So maybe I should at least mention that commit here. > > Regardless, while the I/Os work/fail as I expect, the cda addresses > themselves are wrong in much the same way I describe in patch 4. Yes, > without this patch we don't convert them to an IDAL so certain program > checks aren't applicable. But the addresses that we end up sending to > the hardware are nonsensical, though potentially valid, locations. > I am not comfortable with this. with NOOP no data transfer take place and the role of VFIO is to take care about data transfer. So in my logic better do nothing and send the original CCW to the hardware. >> >> The noop check is moved up into the "does data transfer" routine, to >> determine whether the pages should be pinned or not. Regardless of >> whether or not the input CDA is null, we'll end up with a CCW pointing >> to a valid IDAL of invalid addresses. >> >> The "test" command codes always struck me as funky, because x18 and >> xF8 and everything in between that ends in x8 is architecturally >> invalid too, but we don't check for them like we do for things that >> end in x0. And there's a TON of other opcodes that are invalid for >> today's ECKD devices, or perhaps were valid for older DASD but have >> since been deprecated, or are only valid for non-DASD device types. >> We have no logic to permit them, either. If those CCWs had a non-zero >> CDA, we either pin it successfully and let the targeted device sort it >> out or an error occurs and we fail at that point. (QEMU will see a >> "wirte" region error of -EINVAL because of vfio_pin_pages()) The test command is AFAIU even more sensible that the NOOP command and in my opinion should never be modified since it is highly device dependent and do not induce data transfer anyway. We even do not know how the CDA field may be used by the device. May be I am a little dramatic with this. Just to say that I would feel more comfortable if the test command reach the device unchanged. >> >>> >>> >>> >>>> return; >>>> kfree((void *)(u64)ccw->cda); >>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain >>>> *chain, >>>> { >>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >>>> - return 0; >>>> - >>>> if (ccw_is_tic(ccw)) >>>> return ccwchain_fetch_tic(chain, idx, cp); >>>> >>> >>>
On 5/7/19 4:52 AM, Pierre Morel wrote: > On 06/05/2019 22:47, Eric Farman wrote: >> >> >> On 5/6/19 11:39 AM, Eric Farman wrote: >>> >>> >>> On 5/6/19 8:56 AM, Pierre Morel wrote: >>>> On 03/05/2019 15:49, Eric Farman wrote: >>>>> If the CCW being processed is a No-Operation, then by definition no >>>>> data is being transferred. Let's fold those checks into the normal >>>>> CCW processors, rather than skipping out early. >>>>> >>>>> Likewise, if the CCW being processed is a "test" (an invented >>>>> definition to simply mean it ends in a zero), let's permit that to go >>>>> through to the hardware. There's nothing inherently unique about >>>>> those command codes versus one that ends in an eight [1], or any other >>>>> otherwise valid command codes that are undefined for the device type >>>>> in question. >>>>> >>>>> [1] POPS states that a x08 is a TIC CCW, and that having any >>>>> high-order >>>>> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >>>>> high-order bits are ignored. >>>>> >>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com> >>>>> --- >>>>> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >>>>> 1 file changed, 5 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c >>>>> b/drivers/s390/cio/vfio_ccw_cp.c >>>>> index 36d76b821209..c0a52025bf06 100644 >>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c >>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c >>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct >>>>> channel_program *cp, >>>>> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>>> 0x0C) >>>>> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>>> CCW_CMD_BASIC_SENSE) >>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >>>>> - >>>>> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >>>>> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >>>>> @@ -314,6 +312,10 @@ static inline int >>>>> ccw_does_data_transfer(struct ccw1 *ccw) >>>>> if (ccw->count == 0) >>>>> return 0; >>>>> + /* If the command is a NOP, then no data will be transferred */ >>>>> + if (ccw_is_noop(ccw)) >>>>> + return 0; >>>>> + >>>>> /* If the skip flag is off, then data will be transferred */ >>>>> if (!ccw_is_skip(ccw)) >>>>> return 1; >>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain >>>>> *chain, int idx) >>>>> { >>>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >>>>> + if (ccw_is_tic(ccw)) >>>> >>>> >>>> AFAIR, we introduced this code to protect against noop and test with >>>> a non zero CDA. >>>> This could go away only if there is somewhere the guaranty that noop >>>> have always a null CDA (same for test). >>> >>> What was generating either the null or "test" command codes? I can >>> provide plenty of examples for both these command codes and how they >>> look coming out of vfio-ccw now. >> >> I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to >> hardware without this patch. I don't see anything particuarly >> surpising, so I'm not sure what the original code was attempting to >> protect. >> >> Maybe, since you question this in ccwchain_cda_free(), you're >> referring to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to >> free no-op, test and tic cda."), which fixed up our attempt to clean >> things up that weren't allocated on the transmit side? With this >> series, that is reverted, but the cda is indeed set to something that >> needs to be free'd (see below). So maybe I should at least mention >> that commit here. >> >> Regardless, while the I/Os work/fail as I expect, the cda addresses >> themselves are wrong in much the same way I describe in patch 4. Yes, >> without this patch we don't convert them to an IDAL so certain program >> checks aren't applicable. But the addresses that we end up sending to >> the hardware are nonsensical, though potentially valid, locations. >> > > I am not comfortable with this. > with NOOP no data transfer take place and the role of VFIO is to take > care about data transfer. > So in my logic better do nothing and send the original CCW to the hardware. > >>> >>> The noop check is moved up into the "does data transfer" routine, to >>> determine whether the pages should be pinned or not. Regardless of >>> whether or not the input CDA is null, we'll end up with a CCW >>> pointing to a valid IDAL of invalid addresses. >>> >>> The "test" command codes always struck me as funky, because x18 and >>> xF8 and everything in between that ends in x8 is architecturally >>> invalid too, but we don't check for them like we do for things that >>> end in x0. And there's a TON of other opcodes that are invalid for >>> today's ECKD devices, or perhaps were valid for older DASD but have >>> since been deprecated, or are only valid for non-DASD device types. >>> We have no logic to permit them, either. If those CCWs had a >>> non-zero CDA, we either pin it successfully and let the targeted >>> device sort it out or an error occurs and we fail at that point. >>> (QEMU will see a "wirte" region error of -EINVAL because of >>> vfio_pin_pages()) > > The test command is AFAIU even more sensible that the NOOP command and > in my opinion should never be modified since it is highly device > dependent and do not induce data transfer anyway. > > We even do not know how the CDA field may be used by the device. Exactly, which is why I think sending an unpinned, non-translated, guest address to the hardware (which is what happens today) is a Bad Idea. If the associated command code WERE going to cause the channel to modify any memory, the provided address from the guest would (best case) cause a program check if the address were not available, or some data corruption if it were. > May be I am a little dramatic with this. > Just to say that I would feel more comfortable if the test command reach > the device unchanged. > As I say above, I disagree. I'd rather that the command (test or otherwise) hit the channel (and the device if applicable) with a valid host address in ccw.cda, so that if any data transfer occurs we're not exposed. If there's an application that wants to send a test CCW with an invalid CDA (and thus would fail the pin, as I have seen with NOP), then I guess I can add ccw_is_test() to ccw_does_data_transfer(), but since I still don't see the use case for test CCWs I'm not as thrilled about it. Do you recall what caused them to be added originally? >>> >>>> >>>> >>>> >>>>> return; >>>>> kfree((void *)(u64)ccw->cda); >>>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain >>>>> *chain, >>>>> { >>>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) >>>>> - return 0; >>>>> - >>>>> if (ccw_is_tic(ccw)) >>>>> return ccwchain_fetch_tic(chain, idx, cp); >>>>> >>>> >>>> > >
On 07/05/2019 18:43, Eric Farman wrote: > > > On 5/7/19 4:52 AM, Pierre Morel wrote: >> On 06/05/2019 22:47, Eric Farman wrote: >>> >>> >>> On 5/6/19 11:39 AM, Eric Farman wrote: >>>> >>>> >>>> On 5/6/19 8:56 AM, Pierre Morel wrote: >>>>> On 03/05/2019 15:49, Eric Farman wrote: >>>>>> If the CCW being processed is a No-Operation, then by definition no >>>>>> data is being transferred. Let's fold those checks into the normal >>>>>> CCW processors, rather than skipping out early. >>>>>> >>>>>> Likewise, if the CCW being processed is a "test" (an invented >>>>>> definition to simply mean it ends in a zero), let's permit that to go >>>>>> through to the hardware. There's nothing inherently unique about >>>>>> those command codes versus one that ends in an eight [1], or any >>>>>> other >>>>>> otherwise valid command codes that are undefined for the device type >>>>>> in question. >>>>>> >>>>>> [1] POPS states that a x08 is a TIC CCW, and that having any >>>>>> high-order >>>>>> bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the >>>>>> high-order bits are ignored. >>>>>> >>>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com> >>>>>> --- >>>>>> drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ >>>>>> 1 file changed, 5 insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c >>>>>> b/drivers/s390/cio/vfio_ccw_cp.c >>>>>> index 36d76b821209..c0a52025bf06 100644 >>>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c >>>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c >>>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct >>>>>> channel_program *cp, >>>>>> #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>>>> 0x0C) >>>>>> #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == >>>>>> CCW_CMD_BASIC_SENSE) >>>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) >>>>>> - >>>>>> #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) >>>>>> #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) >>>>>> @@ -314,6 +312,10 @@ static inline int >>>>>> ccw_does_data_transfer(struct ccw1 *ccw) >>>>>> if (ccw->count == 0) >>>>>> return 0; >>>>>> + /* If the command is a NOP, then no data will be transferred */ >>>>>> + if (ccw_is_noop(ccw)) >>>>>> + return 0; >>>>>> + >>>>>> /* If the skip flag is off, then data will be transferred */ >>>>>> if (!ccw_is_skip(ccw)) >>>>>> return 1; >>>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain >>>>>> *chain, int idx) >>>>>> { >>>>>> struct ccw1 *ccw = chain->ch_ccw + idx; >>>>>> - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) >>>>>> + if (ccw_is_tic(ccw)) >>>>> >>>>> >>>>> AFAIR, we introduced this code to protect against noop and test >>>>> with a non zero CDA. >>>>> This could go away only if there is somewhere the guaranty that >>>>> noop have always a null CDA (same for test). >>>> >>>> What was generating either the null or "test" command codes? I can >>>> provide plenty of examples for both these command codes and how they >>>> look coming out of vfio-ccw now. >>> >>> I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to >>> hardware without this patch. I don't see anything particuarly >>> surpising, so I'm not sure what the original code was attempting to >>> protect. >>> >>> Maybe, since you question this in ccwchain_cda_free(), you're >>> referring to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to >>> free no-op, test and tic cda."), which fixed up our attempt to clean >>> things up that weren't allocated on the transmit side? With this >>> series, that is reverted, but the cda is indeed set to something that >>> needs to be free'd (see below). So maybe I should at least mention >>> that commit here. >>> >>> Regardless, while the I/Os work/fail as I expect, the cda addresses >>> themselves are wrong in much the same way I describe in patch 4. >>> Yes, without this patch we don't convert them to an IDAL so certain >>> program checks aren't applicable. But the addresses that we end up >>> sending to the hardware are nonsensical, though potentially valid, >>> locations. >>> >> >> I am not comfortable with this. >> with NOOP no data transfer take place and the role of VFIO is to take >> care about data transfer. >> So in my logic better do nothing and send the original CCW to the >> hardware. >> >>>> >>>> The noop check is moved up into the "does data transfer" routine, to >>>> determine whether the pages should be pinned or not. Regardless of >>>> whether or not the input CDA is null, we'll end up with a CCW >>>> pointing to a valid IDAL of invalid addresses. >>>> >>>> The "test" command codes always struck me as funky, because x18 and >>>> xF8 and everything in between that ends in x8 is architecturally >>>> invalid too, but we don't check for them like we do for things that >>>> end in x0. And there's a TON of other opcodes that are invalid for >>>> today's ECKD devices, or perhaps were valid for older DASD but have >>>> since been deprecated, or are only valid for non-DASD device types. >>>> We have no logic to permit them, either. If those CCWs had a >>>> non-zero CDA, we either pin it successfully and let the targeted >>>> device sort it out or an error occurs and we fail at that point. >>>> (QEMU will see a "wirte" region error of -EINVAL because of >>>> vfio_pin_pages()) >> >> The test command is AFAIU even more sensible that the NOOP command and >> in my opinion should never be modified since it is highly device >> dependent and do not induce data transfer anyway. >> >> We even do not know how the CDA field may be used by the device. > > Exactly, which is why I think sending an unpinned, non-translated, guest > address to the hardware (which is what happens today) is a Bad Idea. If > the associated command code WERE going to cause the channel to modify > any memory, the provided address from the guest would (best case) cause > a program check if the address were not available, or some data > corruption if it were. > >> May be I am a little dramatic with this. >> Just to say that I would feel more comfortable if the test command >> reach the device unchanged. >> > > As I say above, I disagree. I'd rather that the command (test or > otherwise) hit the channel (and the device if applicable) with a valid > host address in ccw.cda, so that if any data transfer occurs we're not > exposed. > > If there's an application that wants to send a test CCW with an invalid > CDA (and thus would fail the pin, as I have seen with NOP), then I guess > I can add ccw_is_test() to ccw_does_data_transfer(), but since I still > don't see the use case for test CCWs I'm not as thrilled about it. > > Do you recall what caused them to be added originally? AFAIK they have bin added because they are standard CCW commands. For the NOOP its clearly stated that it does not start a data transfer. If we pin the CDA, it could then eventually be the cause of errors if the address indicated by the CDA is not accessible. The NOOP is a particular CONTROL operation for which no data is transfered. Other CONTROL operation may start a data transfer. The TEST command is used to retrieve the status of the I/O-device __path__ and do not go up to the device. I did not find clearly that it does not start a data transfer but I really do not think it does. May be we should ask people from hardware. I only found that test I/O (a specific test command) do not initiate an operation. Regards, Pierre
On Wed, 8 May 2019 11:22:07 +0200 Pierre Morel <pmorel@linux.ibm.com> wrote: > The TEST command is used to retrieve the status of the I/O-device > __path__ and do not go up to the device. > I did not find clearly that it does not start a data transfer but I > really do not think it does. > May be we should ask people from hardware. > I only found that test I/O (a specific test command) do not initiate an > operation. FWIW, I'm not sure about what we should do with the test command in any case. Currently, I see it defined as a proper command in the rather ancient "Common I/O Device Commands" (I don't know of any newer public version), which states that it retrieves the status on the parallel interface _only_ (and generates a command reject on the serial interface). IIRC, the parallel interface has been phased out quite some time ago. The current PoP, in contrast, defines this as an _invalid_ command (generating a channel program check). So, while the test command originally was designed to never initiate a data transfer, we now have an invalid command in its place, and we don't know if something else might change in the future (for transfer mode, a test-like command is already defined in the PoP). So, the safest course would probably be to handle the ->cda portion and send the command down. We'll get a check condition on current hardware, but it should be safe if something changes in the future. Of course, asking some hardware folks is not a bad idea, either :)
On 5/8/19 6:06 AM, Cornelia Huck wrote: > On Wed, 8 May 2019 11:22:07 +0200 > Pierre Morel <pmorel@linux.ibm.com> wrote: > >> The TEST command is used to retrieve the status of the I/O-device >> __path__ and do not go up to the device. >> I did not find clearly that it does not start a data transfer but I >> really do not think it does. >> May be we should ask people from hardware. >> I only found that test I/O (a specific test command) do not initiate an >> operation. > > FWIW, I'm not sure about what we should do with the test command in any > case. > > Currently, I see it defined as a proper command in the rather ancient > "Common I/O Device Commands" (I don't know of any newer public > version), Nor I. I had to rummage around a few dumpsters to find a copy of this one, even. > which states that it retrieves the status on the parallel > interface _only_ (and generates a command reject on the serial > interface). IIRC, the parallel interface has been phased out quite some > time ago. The current POPs, towards the bottom left side of page 13-3, has this statement: --- The term “serial-I/O interface” is used to refer the ESCON I/O interface, the FICON I/O interface, and the FICON-converted I/O interface. The term “parallel-I/O interface” is used to refer to the IBM System/360 and System/370 I/O interface. --- So, yes it was phased out some time ago. :) > > The current PoP, in contrast, defines this as an _invalid_ command > (generating a channel program check). Ditto the ESA/390 POPs (SA22-7201-08). > > So, while the test command originally was designed to never initiate a > data transfer, we now have an invalid command in its place, and we > don't know if something else might change in the future (for transfer > mode, a test-like command is already defined in the PoP). Indeed, the ccw_is_test() check would need to be reworked if we ever want to support transport mode anyway. :shudder: > > So, the safest course would probably be to handle the ->cda portion and > send the command down. We'll get a check condition on current hardware, > but it should be safe if something changes in the future. > > Of course, asking some hardware folks is not a bad idea, either :) > I'll shoot a quick note (and cc Pierre) just for the sake of sanity, but I'm still convinced this patch is fine as-is. :)
On Wed, 8 May 2019 11:22:07 +0200 Pierre Morel <pmorel@linux.ibm.com> wrote: > For the NOOP its clearly stated that it does not start a data transfer. > If we pin the CDA, it could then eventually be the cause of errors if > the address indicated by the CDA is not accessible. > > The NOOP is a particular CONTROL operation for which no data is transfered. > Other CONTROL operation may start a data transfer. I've just looked at the documentation again. The Olde Common I/O Device Commands document indicates that a NOOP simply causes channel end/device end. The PoP seems to indicate that the cda is always checked (i.e. does it point to a valid memory area?), but I'm not sure whether the area that is pointed to is checked for accessibility etc. as well, even if the command does not transfer any data. Has somebody tried to find out what happens on Real Hardware(tm) if you send a command that is not supposed to transfer any data where the cda points to a valid, but not accessible area? In general, I think doing the translation (and probably already hitting errors there) is better than sending down a guest address.
On 5/10/19 7:47 AM, Cornelia Huck wrote: > On Wed, 8 May 2019 11:22:07 +0200 > Pierre Morel <pmorel@linux.ibm.com> wrote: > >> For the NOOP its clearly stated that it does not start a data transfer. >> If we pin the CDA, it could then eventually be the cause of errors if >> the address indicated by the CDA is not accessible. >> >> The NOOP is a particular CONTROL operation for which no data is transfered. >> Other CONTROL operation may start a data transfer. > > I've just looked at the documentation again. > > The Olde Common I/O Device Commands document indicates that a NOOP > simply causes channel end/device end. > > The PoP seems to indicate that the cda is always checked (i.e. does it > point to a valid memory area?), but I'm not sure whether the area that > is pointed to is checked for accessibility etc. as well, even if the > command does not transfer any data. > > Has somebody tried to find out what happens on Real Hardware(tm) if you > send a command that is not supposed to transfer any data where the cda > points to a valid, but not accessible area? Hrm... The CDA itself? I don't think so. Since every CCW is converted to an IDAL in vfio-ccw, we guarantee that it's pointing to something valid at that point. So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP CCW, and to leave the CDA alone. This means it will still contain the guest address, which is risky but hey it's a test system. :) (I offline'd a bunch of host memory too, to make sure I had some unavailable addresses.) In my traces, the non-IDA NOP CCWs were issued to the host with and without the skip flag, with zero and non-zero counts, and with zero and non-zero CDAs. All of them work just fine, including the ones who's addresses fall into the offline space. Even the combination of no skip, non-zero count, and zero cda. I modified that hack to do the same for a known invalid control opcode, and it seemed to be okay too. We got an (expected) invalid command before we noticed any problem with the provided address. > > In general, I think doing the translation (and probably already hitting > errors there) is better than sending down a guest address. > I mostly agree, but I have one test program that generates invalid GUEST addresses with its NOP CCWs, since it doesn't seem to care about whether they're valid or not. So any attempt to pin them will end badly, which is why I call that opcode out in ccw_does_data_transfer(), and just send invalid IDAWs with it.
On Fri, 10 May 2019 10:24:31 -0400 Eric Farman <farman@linux.ibm.com> wrote: > On 5/10/19 7:47 AM, Cornelia Huck wrote: > > On Wed, 8 May 2019 11:22:07 +0200 > > Pierre Morel <pmorel@linux.ibm.com> wrote: > > > >> For the NOOP its clearly stated that it does not start a data transfer. > >> If we pin the CDA, it could then eventually be the cause of errors if > >> the address indicated by the CDA is not accessible. > >> > >> The NOOP is a particular CONTROL operation for which no data is transfered. > >> Other CONTROL operation may start a data transfer. > > > > I've just looked at the documentation again. > > > > The Olde Common I/O Device Commands document indicates that a NOOP > > simply causes channel end/device end. > > > > The PoP seems to indicate that the cda is always checked (i.e. does it > > point to a valid memory area?), but I'm not sure whether the area that > > is pointed to is checked for accessibility etc. as well, even if the > > command does not transfer any data. > > > > Has somebody tried to find out what happens on Real Hardware(tm) if you > > send a command that is not supposed to transfer any data where the cda > > points to a valid, but not accessible area? > > Hrm... The CDA itself? I don't think so. Since every CCW is converted > to an IDAL in vfio-ccw, we guarantee that it's pointing to something > valid at that point. > > So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP > CCW, and to leave the CDA alone. This means it will still contain the > guest address, which is risky but hey it's a test system. :) (I > offline'd a bunch of host memory too, to make sure I had some > unavailable addresses.) > > In my traces, the non-IDA NOP CCWs were issued to the host with and > without the skip flag, with zero and non-zero counts, and with zero and > non-zero CDAs. All of them work just fine, including the ones who's > addresses fall into the offline space. Even the combination of no skip, > non-zero count, and zero cda. > > I modified that hack to do the same for a known invalid control opcode, > and it seemed to be okay too. We got an (expected) invalid command > before we noticed any problem with the provided address. That's interesting; I would not have arrived at this by interpreting the PoP... > > > > In general, I think doing the translation (and probably already hitting > > errors there) is better than sending down a guest address. > > > > I mostly agree, but I have one test program that generates invalid GUEST > addresses with its NOP CCWs, since it doesn't seem to care about whether > they're valid or not. So any attempt to pin them will end badly, which > is why I call that opcode out in ccw_does_data_transfer(), and just send > invalid IDAWs with it. So, without the attempt to pin they do not fail? Maybe the right approach would be to rewrite the cda before sending the ccws?
On 5/14/19 10:29 AM, Cornelia Huck wrote: > On Fri, 10 May 2019 10:24:31 -0400 > Eric Farman <farman@linux.ibm.com> wrote: > >> On 5/10/19 7:47 AM, Cornelia Huck wrote: >>> On Wed, 8 May 2019 11:22:07 +0200 >>> Pierre Morel <pmorel@linux.ibm.com> wrote: >>> >>>> For the NOOP its clearly stated that it does not start a data transfer. >>>> If we pin the CDA, it could then eventually be the cause of errors if >>>> the address indicated by the CDA is not accessible. >>>> >>>> The NOOP is a particular CONTROL operation for which no data is transfered. >>>> Other CONTROL operation may start a data transfer. >>> >>> I've just looked at the documentation again. >>> >>> The Olde Common I/O Device Commands document indicates that a NOOP >>> simply causes channel end/device end. >>> >>> The PoP seems to indicate that the cda is always checked (i.e. does it >>> point to a valid memory area?), but I'm not sure whether the area that >>> is pointed to is checked for accessibility etc. as well, even if the >>> command does not transfer any data. >>> >>> Has somebody tried to find out what happens on Real Hardware(tm) if you >>> send a command that is not supposed to transfer any data where the cda >>> points to a valid, but not accessible area? >> >> Hrm... The CDA itself? I don't think so. Since every CCW is converted >> to an IDAL in vfio-ccw, we guarantee that it's pointing to something >> valid at that point. >> >> So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP >> CCW, and to leave the CDA alone. This means it will still contain the >> guest address, which is risky but hey it's a test system. :) (I >> offline'd a bunch of host memory too, to make sure I had some >> unavailable addresses.) >> >> In my traces, the non-IDA NOP CCWs were issued to the host with and >> without the skip flag, with zero and non-zero counts, and with zero and >> non-zero CDAs. All of them work just fine, including the ones who's >> addresses fall into the offline space. Even the combination of no skip, >> non-zero count, and zero cda. >> >> I modified that hack to do the same for a known invalid control opcode, >> and it seemed to be okay too. We got an (expected) invalid command >> before we noticed any problem with the provided address. > > That's interesting; I would not have arrived at this by interpreting > the PoP... > >>> >>> In general, I think doing the translation (and probably already hitting >>> errors there) is better than sending down a guest address. >>> >> >> I mostly agree, but I have one test program that generates invalid GUEST >> addresses with its NOP CCWs, since it doesn't seem to care about whether >> they're valid or not. So any attempt to pin them will end badly, which >> is why I call that opcode out in ccw_does_data_transfer(), and just send >> invalid IDAWs with it. > > So, without the attempt to pin they do not fail? Correct. > Maybe the right > approach would be to rewrite the cda before sending the ccws? > That would be my vote. (And it's what this series does. :)
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 36d76b821209..c0a52025bf06 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp, #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) - #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw) if (ccw->count == 0) return 0; + /* If the command is a NOP, then no data will be transferred */ + if (ccw_is_noop(ccw)) + return 0; + /* If the skip flag is off, then data will be transferred */ if (!ccw_is_skip(ccw)) return 1; @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx) { struct ccw1 *ccw = chain->ch_ccw + idx; - if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) + if (ccw_is_tic(ccw)) return; kfree((void *)(u64)ccw->cda); @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain, { struct ccw1 *ccw = chain->ch_ccw + idx; - if (ccw_is_test(ccw) || ccw_is_noop(ccw)) - return 0; - if (ccw_is_tic(ccw)) return ccwchain_fetch_tic(chain, idx, cp);
If the CCW being processed is a No-Operation, then by definition no data is being transferred. Let's fold those checks into the normal CCW processors, rather than skipping out early. Likewise, if the CCW being processed is a "test" (an invented definition to simply mean it ends in a zero), let's permit that to go through to the hardware. There's nothing inherently unique about those command codes versus one that ends in an eight [1], or any other otherwise valid command codes that are undefined for the device type in question. [1] POPS states that a x08 is a TIC CCW, and that having any high-order bits enabled is invalid for format-1 CCWs. For format-0 CCWs, the high-order bits are ignored. Signed-off-by: Eric Farman <farman@linux.ibm.com> --- drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-)