diff mbox series

[7/7] s390/cio: Remove vfio-ccw checks of command codes

Message ID 20190503134912.39756-8-farman@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390: vfio-ccw fixes | expand

Commit Message

Eric Farman May 3, 2019, 1:49 p.m. UTC
If the CCW being processed is a No-Operation, then by definition no
data is being transferred.  Let's fold those checks into the normal
CCW processors, rather than skipping out early.

Likewise, if the CCW being processed is a "test" (an invented
definition to simply mean it ends in a zero), let's permit that to go
through to the hardware.  There's nothing inherently unique about
those command codes versus one that ends in an eight [1], or any other
otherwise valid command codes that are undefined for the device type
in question.

[1] POPS states that a x08 is a TIC CCW, and that having any high-order
bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
high-order bits are ignored.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
---
 drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Comments

Pierre Morel May 6, 2019, 12:56 p.m. UTC | #1
On 03/05/2019 15:49, Eric Farman wrote:
> If the CCW being processed is a No-Operation, then by definition no
> data is being transferred.  Let's fold those checks into the normal
> CCW processors, rather than skipping out early.
> 
> Likewise, if the CCW being processed is a "test" (an invented
> definition to simply mean it ends in a zero), let's permit that to go
> through to the hardware.  There's nothing inherently unique about
> those command codes versus one that ends in an eight [1], or any other
> otherwise valid command codes that are undefined for the device type
> in question.
> 
> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
> high-order bits are ignored.
> 
> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> ---
>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>   1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> index 36d76b821209..c0a52025bf06 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.c
> +++ b/drivers/s390/cio/vfio_ccw_cp.c
> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp,
>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
>   
> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
> -
>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>   
>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw)
>   	if (ccw->count == 0)
>   		return 0;
>   
> +	/* If the command is a NOP, then no data will be transferred */
> +	if (ccw_is_noop(ccw))
> +		return 0;
> +
>   	/* If the skip flag is off, then data will be transferred */
>   	if (!ccw_is_skip(ccw))
>   		return 1;
> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
>   {
>   	struct ccw1 *ccw = chain->ch_ccw + idx;
>   
> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
> +	if (ccw_is_tic(ccw))


AFAIR, we introduced this code to protect against noop and test with a 
non zero CDA.
This could go away only if there is somewhere the guaranty that noop 
have always a null CDA (same for test).



>   		return;
>   
>   	kfree((void *)(u64)ccw->cda);
> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
>   {
>   	struct ccw1 *ccw = chain->ch_ccw + idx;
>   
> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
> -		return 0;
> -
>   	if (ccw_is_tic(ccw))
>   		return ccwchain_fetch_tic(chain, idx, cp);
>   
>
Cornelia Huck May 6, 2019, 3:37 p.m. UTC | #2
On Fri,  3 May 2019 15:49:12 +0200
Eric Farman <farman@linux.ibm.com> wrote:

> If the CCW being processed is a No-Operation, then by definition no
> data is being transferred.  Let's fold those checks into the normal
> CCW processors, rather than skipping out early.
> 
> Likewise, if the CCW being processed is a "test" (an invented
> definition to simply mean it ends in a zero), 

The "Common I/O Device Commands" document actually defines this :)

> let's permit that to go
> through to the hardware.  There's nothing inherently unique about
> those command codes versus one that ends in an eight [1], or any other
> otherwise valid command codes that are undefined for the device type
> in question.

But I agree that everything possible should be sent to the hardware.

> 
> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
> high-order bits are ignored.
> 
> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> ---
>  drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> index 36d76b821209..c0a52025bf06 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.c
> +++ b/drivers/s390/cio/vfio_ccw_cp.c
> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp,
>  #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>  #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
>  
> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
> -
>  #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>  
>  #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw)
>  	if (ccw->count == 0)
>  		return 0;
>  
> +	/* If the command is a NOP, then no data will be transferred */
> +	if (ccw_is_noop(ccw))
> +		return 0;
> +

Don't you need to return 0 here for any test command as well?

(If I read the doc correctly, we'll just get a unit check in any case,
as there are no parallel I/O interfaces on modern s390 boxes. Even if
we had a parallel I/O interface, we'd just collect the status, and not
get any data transfer. FWIW, the QEMU ccw interpreter for emulated
devices rejects test ccws with a channel program check, which looks
wrong; should be a command reject instead.)

>  	/* If the skip flag is off, then data will be transferred */
>  	if (!ccw_is_skip(ccw))
>  		return 1;
> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
>  {
>  	struct ccw1 *ccw = chain->ch_ccw + idx;
>  
> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
> +	if (ccw_is_tic(ccw))
>  		return;
>  
>  	kfree((void *)(u64)ccw->cda);
> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
>  {
>  	struct ccw1 *ccw = chain->ch_ccw + idx;
>  
> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
> -		return 0;
> -
>  	if (ccw_is_tic(ccw))
>  		return ccwchain_fetch_tic(chain, idx, cp);
>
Eric Farman May 6, 2019, 3:39 p.m. UTC | #3
On 5/6/19 8:56 AM, Pierre Morel wrote:
> On 03/05/2019 15:49, Eric Farman wrote:
>> If the CCW being processed is a No-Operation, then by definition no
>> data is being transferred.  Let's fold those checks into the normal
>> CCW processors, rather than skipping out early.
>>
>> Likewise, if the CCW being processed is a "test" (an invented
>> definition to simply mean it ends in a zero), let's permit that to go
>> through to the hardware.  There's nothing inherently unique about
>> those command codes versus one that ends in an eight [1], or any other
>> otherwise valid command codes that are undefined for the device type
>> in question.
>>
>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>> high-order bits are ignored.
>>
>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>> ---
>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c 
>> b/drivers/s390/cio/vfio_ccw_cp.c
>> index 36d76b821209..c0a52025bf06 100644
>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct 
>> channel_program *cp,
>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>> CCW_CMD_BASIC_SENSE)
>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>> -
>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct 
>> ccw1 *ccw)
>>       if (ccw->count == 0)
>>           return 0;
>> +    /* If the command is a NOP, then no data will be transferred */
>> +    if (ccw_is_noop(ccw))
>> +        return 0;
>> +
>>       /* If the skip flag is off, then data will be transferred */
>>       if (!ccw_is_skip(ccw))
>>           return 1;
>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain 
>> *chain, int idx)
>>   {
>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>> +    if (ccw_is_tic(ccw))
> 
> 
> AFAIR, we introduced this code to protect against noop and test with a 
> non zero CDA.
> This could go away only if there is somewhere the guaranty that noop 
> have always a null CDA (same for test).

What was generating either the null or "test" command codes?  I can 
provide plenty of examples for both these command codes and how they 
look coming out of vfio-ccw now.

The noop check is moved up into the "does data transfer" routine, to 
determine whether the pages should be pinned or not.  Regardless of 
whether or not the input CDA is null, we'll end up with a CCW pointing 
to a valid IDAL of invalid addresses.

The "test" command codes always struck me as funky, because x18 and xF8 
and everything in between that ends in x8 is architecturally invalid 
too, but we don't check for them like we do for things that end in x0. 
And there's a TON of other opcodes that are invalid for today's ECKD 
devices, or perhaps were valid for older DASD but have since been 
deprecated, or are only valid for non-DASD device types.  We have no 
logic to permit them, either.  If those CCWs had a non-zero CDA, we 
either pin it successfully and let the targeted device sort it out or an 
error occurs and we fail at that point.  (QEMU will see a "wirte" region 
error of -EINVAL because of vfio_pin_pages())

> 
> 
> 
>>           return;
>>       kfree((void *)(u64)ccw->cda);
>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
>>   {
>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>> -        return 0;
>> -
>>       if (ccw_is_tic(ccw))
>>           return ccwchain_fetch_tic(chain, idx, cp);
>>
> 
>
Eric Farman May 6, 2019, 3:46 p.m. UTC | #4
On 5/6/19 11:37 AM, Cornelia Huck wrote:
> On Fri,  3 May 2019 15:49:12 +0200
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> If the CCW being processed is a No-Operation, then by definition no
>> data is being transferred.  Let's fold those checks into the normal
>> CCW processors, rather than skipping out early.
>>
>> Likewise, if the CCW being processed is a "test" (an invented
>> definition to simply mean it ends in a zero),
> 
> The "Common I/O Device Commands" document actually defines this :)

Blech, okay so I didn't look early enough in that document.  Section 1.5 
it is.  :)

> 
>> let's permit that to go
>> through to the hardware.  There's nothing inherently unique about
>> those command codes versus one that ends in an eight [1], or any other
>> otherwise valid command codes that are undefined for the device type
>> in question.
> 
> But I agree that everything possible should be sent to the hardware.
> 
>>
>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>> high-order bits are ignored.
>>
>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>> ---
>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
>> index 36d76b821209..c0a52025bf06 100644
>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp,
>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
>>   
>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>> -
>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>   
>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw)
>>   	if (ccw->count == 0)
>>   		return 0;
>>   
>> +	/* If the command is a NOP, then no data will be transferred */
>> +	if (ccw_is_noop(ccw))
>> +		return 0;
>> +
> 
> Don't you need to return 0 here for any test command as well?
> 
> (If I read the doc correctly, we'll just get a unit check in any case,
> as there are no parallel I/O interfaces on modern s390 boxes. Even if
> we had a parallel I/O interface, we'd just collect the status, and not
> get any data transfer. FWIW, the QEMU ccw interpreter for emulated
> devices rejects test ccws with a channel program check, which looks
> wrong; should be a command reject instead.)

I will go back and look.  I thought when I sent a test command with an 
address that wasn't translated I got an unhappy result, which is why I 
ripped this check out.

I was trying to use test CCWs as a safety valve for Halil's Status 
Modifier concern, so maybe I had something else wrong on that pile. 
(The careful observer would note that that code was not included here.  :)

> 
>>   	/* If the skip flag is off, then data will be transferred */
>>   	if (!ccw_is_skip(ccw))
>>   		return 1;
>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
>>   {
>>   	struct ccw1 *ccw = chain->ch_ccw + idx;
>>   
>> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>> +	if (ccw_is_tic(ccw))
>>   		return;
>>   
>>   	kfree((void *)(u64)ccw->cda);
>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
>>   {
>>   	struct ccw1 *ccw = chain->ch_ccw + idx;
>>   
>> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>> -		return 0;
>> -
>>   	if (ccw_is_tic(ccw))
>>   		return ccwchain_fetch_tic(chain, idx, cp);
>>   
>
Cornelia Huck May 6, 2019, 4:18 p.m. UTC | #5
On Mon, 6 May 2019 11:46:59 -0400
Eric Farman <farman@linux.ibm.com> wrote:

> On 5/6/19 11:37 AM, Cornelia Huck wrote:
> > On Fri,  3 May 2019 15:49:12 +0200
> > Eric Farman <farman@linux.ibm.com> wrote:
> >   
> >> If the CCW being processed is a No-Operation, then by definition no
> >> data is being transferred.  Let's fold those checks into the normal
> >> CCW processors, rather than skipping out early.
> >>
> >> Likewise, if the CCW being processed is a "test" (an invented
> >> definition to simply mean it ends in a zero),  
> > 
> > The "Common I/O Device Commands" document actually defines this :)  
> 
> Blech, okay so I didn't look early enough in that document.  Section 1.5 
> it is.  :)
> 
> >   
> >> let's permit that to go
> >> through to the hardware.  There's nothing inherently unique about
> >> those command codes versus one that ends in an eight [1], or any other
> >> otherwise valid command codes that are undefined for the device type
> >> in question.  
> > 
> > But I agree that everything possible should be sent to the hardware.
> >   
> >>
> >> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
> >> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
> >> high-order bits are ignored.
> >>
> >> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> >> ---
> >>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
> >>   1 file changed, 5 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> >> index 36d76b821209..c0a52025bf06 100644
> >> --- a/drivers/s390/cio/vfio_ccw_cp.c
> >> +++ b/drivers/s390/cio/vfio_ccw_cp.c
> >> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp,
> >>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
> >>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
> >>   
> >> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
> >> -
> >>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
> >>   
> >>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
> >> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw)
> >>   	if (ccw->count == 0)
> >>   		return 0;
> >>   
> >> +	/* If the command is a NOP, then no data will be transferred */
> >> +	if (ccw_is_noop(ccw))
> >> +		return 0;
> >> +  
> > 
> > Don't you need to return 0 here for any test command as well?
> > 
> > (If I read the doc correctly, we'll just get a unit check in any case,
> > as there are no parallel I/O interfaces on modern s390 boxes. Even if
> > we had a parallel I/O interface, we'd just collect the status, and not
> > get any data transfer. FWIW, the QEMU ccw interpreter for emulated
> > devices rejects test ccws with a channel program check, which looks
> > wrong; should be a command reject instead.)  
> 
> I will go back and look.  I thought when I sent a test command with an 
> address that wasn't translated I got an unhappy result, which is why I 
> ripped this check out.

Ugh, I just looked at the current PoP and that specifies ccws[1] of test
type as 'invalid' (generating a channel program check). So, the current
PoP and the (old) I/O device commands seem to disagree :/ Do you know
if there's any update to the latter? I think I'll just leave QEMU as it
is, as that at least agrees with the current PoP...

> 
> I was trying to use test CCWs as a safety valve for Halil's Status 
> Modifier concern, so maybe I had something else wrong on that pile. 
> (The careful observer would note that that code was not included here.  :)

:)

> 
> >   
> >>   	/* If the skip flag is off, then data will be transferred */
> >>   	if (!ccw_is_skip(ccw))
> >>   		return 1;
> >> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
> >>   {
> >>   	struct ccw1 *ccw = chain->ch_ccw + idx;
> >>   
> >> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
> >> +	if (ccw_is_tic(ccw))
> >>   		return;
> >>   
> >>   	kfree((void *)(u64)ccw->cda);
> >> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
> >>   {
> >>   	struct ccw1 *ccw = chain->ch_ccw + idx;
> >>   
> >> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
> >> -		return 0;
> >> -
> >>   	if (ccw_is_tic(ccw))
> >>   		return ccwchain_fetch_tic(chain, idx, cp);
> >>     
> >   

[1] tcws are a bit different; but we don't support them anyway.
Eric Farman May 6, 2019, 4:25 p.m. UTC | #6
On 5/6/19 12:18 PM, Cornelia Huck wrote:
> On Mon, 6 May 2019 11:46:59 -0400
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> On 5/6/19 11:37 AM, Cornelia Huck wrote:
>>> On Fri,  3 May 2019 15:49:12 +0200
>>> Eric Farman <farman@linux.ibm.com> wrote:
>>>    
>>>> If the CCW being processed is a No-Operation, then by definition no
>>>> data is being transferred.  Let's fold those checks into the normal
>>>> CCW processors, rather than skipping out early.
>>>>
>>>> Likewise, if the CCW being processed is a "test" (an invented
>>>> definition to simply mean it ends in a zero),
>>>
>>> The "Common I/O Device Commands" document actually defines this :)
>>
>> Blech, okay so I didn't look early enough in that document.  Section 1.5
>> it is.  :)
>>
>>>    
>>>> let's permit that to go
>>>> through to the hardware.  There's nothing inherently unique about
>>>> those command codes versus one that ends in an eight [1], or any other
>>>> otherwise valid command codes that are undefined for the device type
>>>> in question.
>>>
>>> But I agree that everything possible should be sent to the hardware.
>>>    
>>>>
>>>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
>>>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>>>> high-order bits are ignored.
>>>>
>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>>>> ---
>>>>    drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>>>    1 file changed, 5 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
>>>> index 36d76b821209..c0a52025bf06 100644
>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct channel_program *cp,
>>>>    #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>>>>    #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
>>>>    
>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>>>> -
>>>>    #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>>>    
>>>>    #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>>>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct ccw1 *ccw)
>>>>    	if (ccw->count == 0)
>>>>    		return 0;
>>>>    
>>>> +	/* If the command is a NOP, then no data will be transferred */
>>>> +	if (ccw_is_noop(ccw))
>>>> +		return 0;
>>>> +
>>>
>>> Don't you need to return 0 here for any test command as well?
>>>
>>> (If I read the doc correctly, we'll just get a unit check in any case,
>>> as there are no parallel I/O interfaces on modern s390 boxes. Even if
>>> we had a parallel I/O interface, we'd just collect the status, and not
>>> get any data transfer. FWIW, the QEMU ccw interpreter for emulated
>>> devices rejects test ccws with a channel program check, which looks
>>> wrong; should be a command reject instead.)
>>
>> I will go back and look.  I thought when I sent a test command with an
>> address that wasn't translated I got an unhappy result, which is why I
>> ripped this check out.
> 
> Ugh, I just looked at the current PoP and that specifies ccws[1] of test
> type as 'invalid' (generating a channel program check). So, the current
> PoP and the (old) I/O device commands seem to disagree :/ Do you know
> if there's any update to the latter? I think I'll just leave QEMU as it
> is, as that at least agrees with the current PoP...

Double ugh.  I am not aware, but I'll poke around on our side.  Probably 
better to focus on PoP, unless it specifically points to ye olde document.

> 
>>
>> I was trying to use test CCWs as a safety valve for Halil's Status
>> Modifier concern, so maybe I had something else wrong on that pile.
>> (The careful observer would note that that code was not included here.  :)
> 
> :)
> 
>>
>>>    
>>>>    	/* If the skip flag is off, then data will be transferred */
>>>>    	if (!ccw_is_skip(ccw))
>>>>    		return 1;
>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
>>>>    {
>>>>    	struct ccw1 *ccw = chain->ch_ccw + idx;
>>>>    
>>>> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>>>> +	if (ccw_is_tic(ccw))
>>>>    		return;
>>>>    
>>>>    	kfree((void *)(u64)ccw->cda);
>>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
>>>>    {
>>>>    	struct ccw1 *ccw = chain->ch_ccw + idx;
>>>>    
>>>> -	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>>>> -		return 0;
>>>> -
>>>>    	if (ccw_is_tic(ccw))
>>>>    		return ccwchain_fetch_tic(chain, idx, cp);
>>>>      
>>>    
> 
> [1] tcws are a bit different; but we don't support them anyway.
> 

Yeah...  I'd like to get the code cleaned up to a point where if we want 
to support TCWs, we could just up front say "if command go here, if 
transport go there."  Not that that'll be anytime soon, but it would 
prevent us from having to dis-entangle everything at that future point.
Cornelia Huck May 6, 2019, 4:31 p.m. UTC | #7
On Mon, 6 May 2019 12:25:01 -0400
Eric Farman <farman@linux.ibm.com> wrote:

> On 5/6/19 12:18 PM, Cornelia Huck wrote:

> > [1] tcws are a bit different; but we don't support them anyway.
> >   
> 
> Yeah...  I'd like to get the code cleaned up to a point where if we want 
> to support TCWs, we could just up front say "if command go here, if 
> transport go there."  Not that that'll be anytime soon, but it would 
> prevent us from having to dis-entangle everything at that future point.
> 

Let's see how much work that is :)
Eric Farman May 6, 2019, 8:47 p.m. UTC | #8
On 5/6/19 11:39 AM, Eric Farman wrote:
> 
> 
> On 5/6/19 8:56 AM, Pierre Morel wrote:
>> On 03/05/2019 15:49, Eric Farman wrote:
>>> If the CCW being processed is a No-Operation, then by definition no
>>> data is being transferred.  Let's fold those checks into the normal
>>> CCW processors, rather than skipping out early.
>>>
>>> Likewise, if the CCW being processed is a "test" (an invented
>>> definition to simply mean it ends in a zero), let's permit that to go
>>> through to the hardware.  There's nothing inherently unique about
>>> those command codes versus one that ends in an eight [1], or any other
>>> otherwise valid command codes that are undefined for the device type
>>> in question.
>>>
>>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
>>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>>> high-order bits are ignored.
>>>
>>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>>> ---
>>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c 
>>> b/drivers/s390/cio/vfio_ccw_cp.c
>>> index 36d76b821209..c0a52025bf06 100644
>>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct 
>>> channel_program *cp,
>>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
>>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>> CCW_CMD_BASIC_SENSE)
>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>>> -
>>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct 
>>> ccw1 *ccw)
>>>       if (ccw->count == 0)
>>>           return 0;
>>> +    /* If the command is a NOP, then no data will be transferred */
>>> +    if (ccw_is_noop(ccw))
>>> +        return 0;
>>> +
>>>       /* If the skip flag is off, then data will be transferred */
>>>       if (!ccw_is_skip(ccw))
>>>           return 1;
>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain 
>>> *chain, int idx)
>>>   {
>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>>> +    if (ccw_is_tic(ccw))
>>
>>
>> AFAIR, we introduced this code to protect against noop and test with a 
>> non zero CDA.
>> This could go away only if there is somewhere the guaranty that noop 
>> have always a null CDA (same for test).
> 
> What was generating either the null or "test" command codes?  I can 
> provide plenty of examples for both these command codes and how they 
> look coming out of vfio-ccw now.

I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to 
hardware without this patch.  I don't see anything particuarly 
surpising, so I'm not sure what the original code was attempting to protect.

Maybe, since you question this in ccwchain_cda_free(), you're referring 
to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to free no-op, 
test and tic cda."), which fixed up our attempt to clean things up that 
weren't allocated on the transmit side?  With this series, that is 
reverted, but the cda is indeed set to something that needs to be free'd 
(see below).  So maybe I should at least mention that commit here.

Regardless, while the I/Os work/fail as I expect, the cda addresses 
themselves are wrong in much the same way I describe in patch 4.  Yes, 
without this patch we don't convert them to an IDAL so certain program 
checks aren't applicable.  But the addresses that we end up sending to 
the hardware are nonsensical, though potentially valid, locations.

> 
> The noop check is moved up into the "does data transfer" routine, to 
> determine whether the pages should be pinned or not.  Regardless of 
> whether or not the input CDA is null, we'll end up with a CCW pointing 
> to a valid IDAL of invalid addresses.
> 
> The "test" command codes always struck me as funky, because x18 and xF8 
> and everything in between that ends in x8 is architecturally invalid 
> too, but we don't check for them like we do for things that end in x0. 
> And there's a TON of other opcodes that are invalid for today's ECKD 
> devices, or perhaps were valid for older DASD but have since been 
> deprecated, or are only valid for non-DASD device types.  We have no 
> logic to permit them, either.  If those CCWs had a non-zero CDA, we 
> either pin it successfully and let the targeted device sort it out or an 
> error occurs and we fail at that point.  (QEMU will see a "wirte" region 
> error of -EINVAL because of vfio_pin_pages())
> 
>>
>>
>>
>>>           return;
>>>       kfree((void *)(u64)ccw->cda);
>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain 
>>> *chain,
>>>   {
>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>>> -        return 0;
>>> -
>>>       if (ccw_is_tic(ccw))
>>>           return ccwchain_fetch_tic(chain, idx, cp);
>>>
>>
>>
Pierre Morel May 7, 2019, 8:52 a.m. UTC | #9
On 06/05/2019 22:47, Eric Farman wrote:
> 
> 
> On 5/6/19 11:39 AM, Eric Farman wrote:
>>
>>
>> On 5/6/19 8:56 AM, Pierre Morel wrote:
>>> On 03/05/2019 15:49, Eric Farman wrote:
>>>> If the CCW being processed is a No-Operation, then by definition no
>>>> data is being transferred.  Let's fold those checks into the normal
>>>> CCW processors, rather than skipping out early.
>>>>
>>>> Likewise, if the CCW being processed is a "test" (an invented
>>>> definition to simply mean it ends in a zero), let's permit that to go
>>>> through to the hardware.  There's nothing inherently unique about
>>>> those command codes versus one that ends in an eight [1], or any other
>>>> otherwise valid command codes that are undefined for the device type
>>>> in question.
>>>>
>>>> [1] POPS states that a x08 is a TIC CCW, and that having any high-order
>>>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>>>> high-order bits are ignored.
>>>>
>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>>>> ---
>>>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c 
>>>> b/drivers/s390/cio/vfio_ccw_cp.c
>>>> index 36d76b821209..c0a52025bf06 100644
>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct 
>>>> channel_program *cp,
>>>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>> 0x0C)
>>>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>> CCW_CMD_BASIC_SENSE)
>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>>>> -
>>>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>>>> @@ -314,6 +312,10 @@ static inline int ccw_does_data_transfer(struct 
>>>> ccw1 *ccw)
>>>>       if (ccw->count == 0)
>>>>           return 0;
>>>> +    /* If the command is a NOP, then no data will be transferred */
>>>> +    if (ccw_is_noop(ccw))
>>>> +        return 0;
>>>> +
>>>>       /* If the skip flag is off, then data will be transferred */
>>>>       if (!ccw_is_skip(ccw))
>>>>           return 1;
>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain 
>>>> *chain, int idx)
>>>>   {
>>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>>>> +    if (ccw_is_tic(ccw))
>>>
>>>
>>> AFAIR, we introduced this code to protect against noop and test with 
>>> a non zero CDA.
>>> This could go away only if there is somewhere the guaranty that noop 
>>> have always a null CDA (same for test).
>>
>> What was generating either the null or "test" command codes?  I can 
>> provide plenty of examples for both these command codes and how they 
>> look coming out of vfio-ccw now.
> 
> I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to 
> hardware without this patch.  I don't see anything particuarly 
> surpising, so I'm not sure what the original code was attempting to 
> protect.
> 
> Maybe, since you question this in ccwchain_cda_free(), you're referring 
> to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to free no-op, 
> test and tic cda."), which fixed up our attempt to clean things up that 
> weren't allocated on the transmit side?  With this series, that is 
> reverted, but the cda is indeed set to something that needs to be free'd 
> (see below).  So maybe I should at least mention that commit here.
> 
> Regardless, while the I/Os work/fail as I expect, the cda addresses 
> themselves are wrong in much the same way I describe in patch 4.  Yes, 
> without this patch we don't convert them to an IDAL so certain program 
> checks aren't applicable.  But the addresses that we end up sending to 
> the hardware are nonsensical, though potentially valid, locations.
> 

I am not comfortable with this.
with NOOP no data transfer take place and the role of VFIO is to take 
care about data transfer.
So in my logic better do nothing and send the original CCW to the hardware.

>>
>> The noop check is moved up into the "does data transfer" routine, to 
>> determine whether the pages should be pinned or not.  Regardless of 
>> whether or not the input CDA is null, we'll end up with a CCW pointing 
>> to a valid IDAL of invalid addresses.
>>
>> The "test" command codes always struck me as funky, because x18 and 
>> xF8 and everything in between that ends in x8 is architecturally 
>> invalid too, but we don't check for them like we do for things that 
>> end in x0. And there's a TON of other opcodes that are invalid for 
>> today's ECKD devices, or perhaps were valid for older DASD but have 
>> since been deprecated, or are only valid for non-DASD device types.  
>> We have no logic to permit them, either.  If those CCWs had a non-zero 
>> CDA, we either pin it successfully and let the targeted device sort it 
>> out or an error occurs and we fail at that point.  (QEMU will see a 
>> "wirte" region error of -EINVAL because of vfio_pin_pages())

The test command is AFAIU even more sensible that the NOOP command and 
in my opinion should never be modified since it is highly device 
dependent and do not induce data transfer anyway.

We even do not know how the CDA field may be used by the device.
May be I am a little dramatic with this.
Just to say that I would feel more comfortable if the test command reach 
the device unchanged.

>>
>>>
>>>
>>>
>>>>           return;
>>>>       kfree((void *)(u64)ccw->cda);
>>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain 
>>>> *chain,
>>>>   {
>>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>>>> -        return 0;
>>>> -
>>>>       if (ccw_is_tic(ccw))
>>>>           return ccwchain_fetch_tic(chain, idx, cp);
>>>>
>>>
>>>
Eric Farman May 7, 2019, 4:43 p.m. UTC | #10
On 5/7/19 4:52 AM, Pierre Morel wrote:
> On 06/05/2019 22:47, Eric Farman wrote:
>>
>>
>> On 5/6/19 11:39 AM, Eric Farman wrote:
>>>
>>>
>>> On 5/6/19 8:56 AM, Pierre Morel wrote:
>>>> On 03/05/2019 15:49, Eric Farman wrote:
>>>>> If the CCW being processed is a No-Operation, then by definition no
>>>>> data is being transferred.  Let's fold those checks into the normal
>>>>> CCW processors, rather than skipping out early.
>>>>>
>>>>> Likewise, if the CCW being processed is a "test" (an invented
>>>>> definition to simply mean it ends in a zero), let's permit that to go
>>>>> through to the hardware.  There's nothing inherently unique about
>>>>> those command codes versus one that ends in an eight [1], or any other
>>>>> otherwise valid command codes that are undefined for the device type
>>>>> in question.
>>>>>
>>>>> [1] POPS states that a x08 is a TIC CCW, and that having any 
>>>>> high-order
>>>>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>>>>> high-order bits are ignored.
>>>>>
>>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>>>>> ---
>>>>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>>>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c 
>>>>> b/drivers/s390/cio/vfio_ccw_cp.c
>>>>> index 36d76b821209..c0a52025bf06 100644
>>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct 
>>>>> channel_program *cp,
>>>>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>>> 0x0C)
>>>>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>>> CCW_CMD_BASIC_SENSE)
>>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>>>>> -
>>>>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>>>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>>>>> @@ -314,6 +312,10 @@ static inline int 
>>>>> ccw_does_data_transfer(struct ccw1 *ccw)
>>>>>       if (ccw->count == 0)
>>>>>           return 0;
>>>>> +    /* If the command is a NOP, then no data will be transferred */
>>>>> +    if (ccw_is_noop(ccw))
>>>>> +        return 0;
>>>>> +
>>>>>       /* If the skip flag is off, then data will be transferred */
>>>>>       if (!ccw_is_skip(ccw))
>>>>>           return 1;
>>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain 
>>>>> *chain, int idx)
>>>>>   {
>>>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>>>>> +    if (ccw_is_tic(ccw))
>>>>
>>>>
>>>> AFAIR, we introduced this code to protect against noop and test with 
>>>> a non zero CDA.
>>>> This could go away only if there is somewhere the guaranty that noop 
>>>> have always a null CDA (same for test).
>>>
>>> What was generating either the null or "test" command codes?  I can 
>>> provide plenty of examples for both these command codes and how they 
>>> look coming out of vfio-ccw now.
>>
>> I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to 
>> hardware without this patch.  I don't see anything particuarly 
>> surpising, so I'm not sure what the original code was attempting to 
>> protect.
>>
>> Maybe, since you question this in ccwchain_cda_free(), you're 
>> referring to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to 
>> free no-op, test and tic cda."), which fixed up our attempt to clean 
>> things up that weren't allocated on the transmit side?  With this 
>> series, that is reverted, but the cda is indeed set to something that 
>> needs to be free'd (see below).  So maybe I should at least mention 
>> that commit here.
>>
>> Regardless, while the I/Os work/fail as I expect, the cda addresses 
>> themselves are wrong in much the same way I describe in patch 4.  Yes, 
>> without this patch we don't convert them to an IDAL so certain program 
>> checks aren't applicable.  But the addresses that we end up sending to 
>> the hardware are nonsensical, though potentially valid, locations.
>>
> 
> I am not comfortable with this.
> with NOOP no data transfer take place and the role of VFIO is to take 
> care about data transfer.
> So in my logic better do nothing and send the original CCW to the hardware.
> 
>>>
>>> The noop check is moved up into the "does data transfer" routine, to 
>>> determine whether the pages should be pinned or not.  Regardless of 
>>> whether or not the input CDA is null, we'll end up with a CCW 
>>> pointing to a valid IDAL of invalid addresses.
>>>
>>> The "test" command codes always struck me as funky, because x18 and 
>>> xF8 and everything in between that ends in x8 is architecturally 
>>> invalid too, but we don't check for them like we do for things that 
>>> end in x0. And there's a TON of other opcodes that are invalid for 
>>> today's ECKD devices, or perhaps were valid for older DASD but have 
>>> since been deprecated, or are only valid for non-DASD device types. 
>>> We have no logic to permit them, either.  If those CCWs had a 
>>> non-zero CDA, we either pin it successfully and let the targeted 
>>> device sort it out or an error occurs and we fail at that point.  
>>> (QEMU will see a "wirte" region error of -EINVAL because of 
>>> vfio_pin_pages())
> 
> The test command is AFAIU even more sensible that the NOOP command and 
> in my opinion should never be modified since it is highly device 
> dependent and do not induce data transfer anyway.
> 
> We even do not know how the CDA field may be used by the device.

Exactly, which is why I think sending an unpinned, non-translated, guest 
address to the hardware (which is what happens today) is a Bad Idea.  If 
the associated command code WERE going to cause the channel to modify 
any memory, the provided address from the guest would (best case) cause 
a program check if the address were not available, or some data 
corruption if it were.

> May be I am a little dramatic with this.
> Just to say that I would feel more comfortable if the test command reach 
> the device unchanged.
> 

As I say above, I disagree.  I'd rather that the command (test or 
otherwise) hit the channel (and the device if applicable) with a valid 
host address in ccw.cda, so that if any data transfer occurs we're not 
exposed.

If there's an application that wants to send a test CCW with an invalid 
CDA (and thus would fail the pin, as I have seen with NOP), then I guess 
I can add ccw_is_test() to ccw_does_data_transfer(), but since I still 
don't see the use case for test CCWs I'm not as thrilled about it.

Do you recall what caused them to be added originally?

>>>
>>>>
>>>>
>>>>
>>>>>           return;
>>>>>       kfree((void *)(u64)ccw->cda);
>>>>> @@ -723,9 +725,6 @@ static int ccwchain_fetch_one(struct ccwchain 
>>>>> *chain,
>>>>>   {
>>>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw))
>>>>> -        return 0;
>>>>> -
>>>>>       if (ccw_is_tic(ccw))
>>>>>           return ccwchain_fetch_tic(chain, idx, cp);
>>>>>
>>>>
>>>>
> 
>
Pierre Morel May 8, 2019, 9:22 a.m. UTC | #11
On 07/05/2019 18:43, Eric Farman wrote:
> 
> 
> On 5/7/19 4:52 AM, Pierre Morel wrote:
>> On 06/05/2019 22:47, Eric Farman wrote:
>>>
>>>
>>> On 5/6/19 11:39 AM, Eric Farman wrote:
>>>>
>>>>
>>>> On 5/6/19 8:56 AM, Pierre Morel wrote:
>>>>> On 03/05/2019 15:49, Eric Farman wrote:
>>>>>> If the CCW being processed is a No-Operation, then by definition no
>>>>>> data is being transferred.  Let's fold those checks into the normal
>>>>>> CCW processors, rather than skipping out early.
>>>>>>
>>>>>> Likewise, if the CCW being processed is a "test" (an invented
>>>>>> definition to simply mean it ends in a zero), let's permit that to go
>>>>>> through to the hardware.  There's nothing inherently unique about
>>>>>> those command codes versus one that ends in an eight [1], or any 
>>>>>> other
>>>>>> otherwise valid command codes that are undefined for the device type
>>>>>> in question.
>>>>>>
>>>>>> [1] POPS states that a x08 is a TIC CCW, and that having any 
>>>>>> high-order
>>>>>> bits enabled is invalid for format-1 CCWs.  For format-0 CCWs, the
>>>>>> high-order bits are ignored.
>>>>>>
>>>>>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>>>>>> ---
>>>>>>   drivers/s390/cio/vfio_ccw_cp.c | 11 +++++------
>>>>>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/s390/cio/vfio_ccw_cp.c 
>>>>>> b/drivers/s390/cio/vfio_ccw_cp.c
>>>>>> index 36d76b821209..c0a52025bf06 100644
>>>>>> --- a/drivers/s390/cio/vfio_ccw_cp.c
>>>>>> +++ b/drivers/s390/cio/vfio_ccw_cp.c
>>>>>> @@ -289,8 +289,6 @@ static long copy_ccw_from_iova(struct 
>>>>>> channel_program *cp,
>>>>>>   #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>>>> 0x0C)
>>>>>>   #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == 
>>>>>> CCW_CMD_BASIC_SENSE)
>>>>>> -#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
>>>>>> -
>>>>>>   #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
>>>>>>   #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
>>>>>> @@ -314,6 +312,10 @@ static inline int 
>>>>>> ccw_does_data_transfer(struct ccw1 *ccw)
>>>>>>       if (ccw->count == 0)
>>>>>>           return 0;
>>>>>> +    /* If the command is a NOP, then no data will be transferred */
>>>>>> +    if (ccw_is_noop(ccw))
>>>>>> +        return 0;
>>>>>> +
>>>>>>       /* If the skip flag is off, then data will be transferred */
>>>>>>       if (!ccw_is_skip(ccw))
>>>>>>           return 1;
>>>>>> @@ -398,7 +400,7 @@ static void ccwchain_cda_free(struct ccwchain 
>>>>>> *chain, int idx)
>>>>>>   {
>>>>>>       struct ccw1 *ccw = chain->ch_ccw + idx;
>>>>>> -    if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
>>>>>> +    if (ccw_is_tic(ccw))
>>>>>
>>>>>
>>>>> AFAIR, we introduced this code to protect against noop and test 
>>>>> with a non zero CDA.
>>>>> This could go away only if there is somewhere the guaranty that 
>>>>> noop have always a null CDA (same for test).
>>>>
>>>> What was generating either the null or "test" command codes?  I can 
>>>> provide plenty of examples for both these command codes and how they 
>>>> look coming out of vfio-ccw now.
>>>
>>> I've sent both x00 and x03 (NOP) CCWs with zero and non-zero CDAs to 
>>> hardware without this patch.  I don't see anything particuarly 
>>> surpising, so I'm not sure what the original code was attempting to 
>>> protect.
>>>
>>> Maybe, since you question this in ccwchain_cda_free(), you're 
>>> referring to commit 408358b50dea ("s390: vfio-ccw: Do not attempt to 
>>> free no-op, test and tic cda."), which fixed up our attempt to clean 
>>> things up that weren't allocated on the transmit side?  With this 
>>> series, that is reverted, but the cda is indeed set to something that 
>>> needs to be free'd (see below).  So maybe I should at least mention 
>>> that commit here.
>>>
>>> Regardless, while the I/Os work/fail as I expect, the cda addresses 
>>> themselves are wrong in much the same way I describe in patch 4.  
>>> Yes, without this patch we don't convert them to an IDAL so certain 
>>> program checks aren't applicable.  But the addresses that we end up 
>>> sending to the hardware are nonsensical, though potentially valid, 
>>> locations.
>>>
>>
>> I am not comfortable with this.
>> with NOOP no data transfer take place and the role of VFIO is to take 
>> care about data transfer.
>> So in my logic better do nothing and send the original CCW to the 
>> hardware.
>>
>>>>
>>>> The noop check is moved up into the "does data transfer" routine, to 
>>>> determine whether the pages should be pinned or not.  Regardless of 
>>>> whether or not the input CDA is null, we'll end up with a CCW 
>>>> pointing to a valid IDAL of invalid addresses.
>>>>
>>>> The "test" command codes always struck me as funky, because x18 and 
>>>> xF8 and everything in between that ends in x8 is architecturally 
>>>> invalid too, but we don't check for them like we do for things that 
>>>> end in x0. And there's a TON of other opcodes that are invalid for 
>>>> today's ECKD devices, or perhaps were valid for older DASD but have 
>>>> since been deprecated, or are only valid for non-DASD device types. 
>>>> We have no logic to permit them, either.  If those CCWs had a 
>>>> non-zero CDA, we either pin it successfully and let the targeted 
>>>> device sort it out or an error occurs and we fail at that point. 
>>>> (QEMU will see a "wirte" region error of -EINVAL because of 
>>>> vfio_pin_pages())
>>
>> The test command is AFAIU even more sensible that the NOOP command and 
>> in my opinion should never be modified since it is highly device 
>> dependent and do not induce data transfer anyway.
>>
>> We even do not know how the CDA field may be used by the device.
> 
> Exactly, which is why I think sending an unpinned, non-translated, guest 
> address to the hardware (which is what happens today) is a Bad Idea.  If 
> the associated command code WERE going to cause the channel to modify 
> any memory, the provided address from the guest would (best case) cause 
> a program check if the address were not available, or some data 
> corruption if it were.
> 
>> May be I am a little dramatic with this.
>> Just to say that I would feel more comfortable if the test command 
>> reach the device unchanged.
>>
> 
> As I say above, I disagree.  I'd rather that the command (test or 
> otherwise) hit the channel (and the device if applicable) with a valid 
> host address in ccw.cda, so that if any data transfer occurs we're not 
> exposed.
> 
> If there's an application that wants to send a test CCW with an invalid 
> CDA (and thus would fail the pin, as I have seen with NOP), then I guess 
> I can add ccw_is_test() to ccw_does_data_transfer(), but since I still 
> don't see the use case for test CCWs I'm not as thrilled about it.
> 
> Do you recall what caused them to be added originally?



AFAIK they have bin added because they are standard CCW commands.


For the NOOP its clearly stated that it does not start a data transfer.
If we pin the CDA, it could then eventually be the cause of errors if 
the address indicated by the CDA is not accessible.

The NOOP is a particular CONTROL operation for which no data is transfered.
Other CONTROL operation may start a data transfer.

The TEST command is used to retrieve the status of the I/O-device 
__path__ and do not go up to the device.
I did not find clearly that it does not start a data transfer but I 
really do not think it does.
May be we should ask people from hardware.
I only found that test I/O (a specific test command) do not initiate an 
operation.

Regards,
Pierre
Cornelia Huck May 8, 2019, 10:06 a.m. UTC | #12
On Wed, 8 May 2019 11:22:07 +0200
Pierre Morel <pmorel@linux.ibm.com> wrote:

> The TEST command is used to retrieve the status of the I/O-device 
> __path__ and do not go up to the device.
> I did not find clearly that it does not start a data transfer but I 
> really do not think it does.
> May be we should ask people from hardware.
> I only found that test I/O (a specific test command) do not initiate an 
> operation.

FWIW, I'm not sure about what we should do with the test command in any
case.

Currently, I see it defined as a proper command in the rather ancient
"Common I/O Device Commands" (I don't know of any newer public
version), which states that it retrieves the status on the parallel
interface _only_ (and generates a command reject on the serial
interface). IIRC, the parallel interface has been phased out quite some
time ago.

The current PoP, in contrast, defines this as an _invalid_ command
(generating a channel program check).

So, while the test command originally was designed to never initiate a
data transfer, we now have an invalid command in its place, and we
don't know if something else might change in the future (for transfer
mode, a test-like command is already defined in the PoP).

So, the safest course would probably be to handle the ->cda portion and
send the command down. We'll get a check condition on current hardware,
but it should be safe if something changes in the future.

Of course, asking some hardware folks is not a bad idea, either :)
Eric Farman May 8, 2019, 7:38 p.m. UTC | #13
On 5/8/19 6:06 AM, Cornelia Huck wrote:
> On Wed, 8 May 2019 11:22:07 +0200
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> The TEST command is used to retrieve the status of the I/O-device
>> __path__ and do not go up to the device.
>> I did not find clearly that it does not start a data transfer but I
>> really do not think it does.
>> May be we should ask people from hardware.
>> I only found that test I/O (a specific test command) do not initiate an
>> operation.
> 
> FWIW, I'm not sure about what we should do with the test command in any
> case.
> 
> Currently, I see it defined as a proper command in the rather ancient
> "Common I/O Device Commands" (I don't know of any newer public
> version), 

Nor I.  I had to rummage around a few dumpsters to find a copy of this 
one, even.

> which states that it retrieves the status on the parallel
> interface _only_ (and generates a command reject on the serial
> interface). IIRC, the parallel interface has been phased out quite some
> time ago.

The current POPs, towards the bottom left side of page 13-3, has this 
statement:

---
The term “serial-I/O interface” is used to refer the ESCON I/O 
interface, the FICON I/O interface, and the FICON-converted I/O 
interface. The term “parallel-I/O interface” is used to refer to the IBM 
System/360 and System/370 I/O interface.
---

So, yes it was phased out some time ago.  :)

> 
> The current PoP, in contrast, defines this as an _invalid_ command
> (generating a channel program check).

Ditto the ESA/390 POPs (SA22-7201-08).

> 
> So, while the test command originally was designed to never initiate a
> data transfer, we now have an invalid command in its place, and we
> don't know if something else might change in the future (for transfer
> mode, a test-like command is already defined in the PoP).

Indeed, the ccw_is_test() check would need to be reworked if we ever 
want to support transport mode anyway.  :shudder:

> 
> So, the safest course would probably be to handle the ->cda portion and
> send the command down. We'll get a check condition on current hardware,
> but it should be safe if something changes in the future.
> 
> Of course, asking some hardware folks is not a bad idea, either :)
> 

I'll shoot a quick note (and cc Pierre) just for the sake of sanity, but 
I'm still convinced this patch is fine as-is.  :)
Cornelia Huck May 10, 2019, 11:47 a.m. UTC | #14
On Wed, 8 May 2019 11:22:07 +0200
Pierre Morel <pmorel@linux.ibm.com> wrote:

> For the NOOP its clearly stated that it does not start a data transfer.
> If we pin the CDA, it could then eventually be the cause of errors if 
> the address indicated by the CDA is not accessible.
> 
> The NOOP is a particular CONTROL operation for which no data is transfered.
> Other CONTROL operation may start a data transfer.

I've just looked at the documentation again.

The Olde Common I/O Device Commands document indicates that a NOOP
simply causes channel end/device end.

The PoP seems to indicate that the cda is always checked (i.e. does it
point to a valid memory area?), but I'm not sure whether the area that
is pointed to is checked for accessibility etc. as well, even if the
command does not transfer any data.

Has somebody tried to find out what happens on Real Hardware(tm) if you
send a command that is not supposed to transfer any data where the cda
points to a valid, but not accessible area?

In general, I think doing the translation (and probably already hitting
errors there) is better than sending down a guest address.
Eric Farman May 10, 2019, 2:24 p.m. UTC | #15
On 5/10/19 7:47 AM, Cornelia Huck wrote:
> On Wed, 8 May 2019 11:22:07 +0200
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> For the NOOP its clearly stated that it does not start a data transfer.
>> If we pin the CDA, it could then eventually be the cause of errors if
>> the address indicated by the CDA is not accessible.
>>
>> The NOOP is a particular CONTROL operation for which no data is transfered.
>> Other CONTROL operation may start a data transfer.
> 
> I've just looked at the documentation again.
> 
> The Olde Common I/O Device Commands document indicates that a NOOP
> simply causes channel end/device end.
> 
> The PoP seems to indicate that the cda is always checked (i.e. does it
> point to a valid memory area?), but I'm not sure whether the area that
> is pointed to is checked for accessibility etc. as well, even if the
> command does not transfer any data.
> 
> Has somebody tried to find out what happens on Real Hardware(tm) if you
> send a command that is not supposed to transfer any data where the cda
> points to a valid, but not accessible area?

Hrm...  The CDA itself?  I don't think so.  Since every CCW is converted 
to an IDAL in vfio-ccw, we guarantee that it's pointing to something 
valid at that point.

So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP 
CCW, and to leave the CDA alone.  This means it will still contain the 
guest address, which is risky but hey it's a test system.  :)  (I 
offline'd a bunch of host memory too, to make sure I had some 
unavailable addresses.)

In my traces, the non-IDA NOP CCWs were issued to the host with and 
without the skip flag, with zero and non-zero counts, and with zero and 
non-zero CDAs.  All of them work just fine, including the ones who's 
addresses fall into the offline space.  Even the combination of no skip, 
non-zero count, and zero cda.

I modified that hack to do the same for a known invalid control opcode, 
and it seemed to be okay too.  We got an (expected) invalid command 
before we noticed any problem with the provided address.


> 
> In general, I think doing the translation (and probably already hitting
> errors there) is better than sending down a guest address.
> 

I mostly agree, but I have one test program that generates invalid GUEST 
addresses with its NOP CCWs, since it doesn't seem to care about whether 
they're valid or not.  So any attempt to pin them will end badly, which 
is why I call that opcode out in ccw_does_data_transfer(), and just send 
invalid IDAWs with it.
Cornelia Huck May 14, 2019, 2:29 p.m. UTC | #16
On Fri, 10 May 2019 10:24:31 -0400
Eric Farman <farman@linux.ibm.com> wrote:

> On 5/10/19 7:47 AM, Cornelia Huck wrote:
> > On Wed, 8 May 2019 11:22:07 +0200
> > Pierre Morel <pmorel@linux.ibm.com> wrote:
> >   
> >> For the NOOP its clearly stated that it does not start a data transfer.
> >> If we pin the CDA, it could then eventually be the cause of errors if
> >> the address indicated by the CDA is not accessible.
> >>
> >> The NOOP is a particular CONTROL operation for which no data is transfered.
> >> Other CONTROL operation may start a data transfer.  
> > 
> > I've just looked at the documentation again.
> > 
> > The Olde Common I/O Device Commands document indicates that a NOOP
> > simply causes channel end/device end.
> > 
> > The PoP seems to indicate that the cda is always checked (i.e. does it
> > point to a valid memory area?), but I'm not sure whether the area that
> > is pointed to is checked for accessibility etc. as well, even if the
> > command does not transfer any data.
> > 
> > Has somebody tried to find out what happens on Real Hardware(tm) if you
> > send a command that is not supposed to transfer any data where the cda
> > points to a valid, but not accessible area?  
> 
> Hrm...  The CDA itself?  I don't think so.  Since every CCW is converted 
> to an IDAL in vfio-ccw, we guarantee that it's pointing to something 
> valid at that point.
> 
> So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP 
> CCW, and to leave the CDA alone.  This means it will still contain the 
> guest address, which is risky but hey it's a test system.  :)  (I 
> offline'd a bunch of host memory too, to make sure I had some 
> unavailable addresses.)
> 
> In my traces, the non-IDA NOP CCWs were issued to the host with and 
> without the skip flag, with zero and non-zero counts, and with zero and 
> non-zero CDAs.  All of them work just fine, including the ones who's 
> addresses fall into the offline space.  Even the combination of no skip, 
> non-zero count, and zero cda.
> 
> I modified that hack to do the same for a known invalid control opcode, 
> and it seemed to be okay too.  We got an (expected) invalid command 
> before we noticed any problem with the provided address.

That's interesting; I would not have arrived at this by interpreting
the PoP...

> > 
> > In general, I think doing the translation (and probably already hitting
> > errors there) is better than sending down a guest address.
> >   
> 
> I mostly agree, but I have one test program that generates invalid GUEST 
> addresses with its NOP CCWs, since it doesn't seem to care about whether 
> they're valid or not.  So any attempt to pin them will end badly, which 
> is why I call that opcode out in ccw_does_data_transfer(), and just send 
> invalid IDAWs with it.

So, without the attempt to pin they do not fail? Maybe the right
approach would be to rewrite the cda before sending the ccws?
Eric Farman May 14, 2019, 6:29 p.m. UTC | #17
On 5/14/19 10:29 AM, Cornelia Huck wrote:
> On Fri, 10 May 2019 10:24:31 -0400
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> On 5/10/19 7:47 AM, Cornelia Huck wrote:
>>> On Wed, 8 May 2019 11:22:07 +0200
>>> Pierre Morel <pmorel@linux.ibm.com> wrote:
>>>    
>>>> For the NOOP its clearly stated that it does not start a data transfer.
>>>> If we pin the CDA, it could then eventually be the cause of errors if
>>>> the address indicated by the CDA is not accessible.
>>>>
>>>> The NOOP is a particular CONTROL operation for which no data is transfered.
>>>> Other CONTROL operation may start a data transfer.
>>>
>>> I've just looked at the documentation again.
>>>
>>> The Olde Common I/O Device Commands document indicates that a NOOP
>>> simply causes channel end/device end.
>>>
>>> The PoP seems to indicate that the cda is always checked (i.e. does it
>>> point to a valid memory area?), but I'm not sure whether the area that
>>> is pointed to is checked for accessibility etc. as well, even if the
>>> command does not transfer any data.
>>>
>>> Has somebody tried to find out what happens on Real Hardware(tm) if you
>>> send a command that is not supposed to transfer any data where the cda
>>> points to a valid, but not accessible area?
>>
>> Hrm...  The CDA itself?  I don't think so.  Since every CCW is converted
>> to an IDAL in vfio-ccw, we guarantee that it's pointing to something
>> valid at that point.
>>
>> So, I hacked ccwchain_fetch_direct() to NOT set the IDAL flag in a NOP
>> CCW, and to leave the CDA alone.  This means it will still contain the
>> guest address, which is risky but hey it's a test system.  :)  (I
>> offline'd a bunch of host memory too, to make sure I had some
>> unavailable addresses.)
>>
>> In my traces, the non-IDA NOP CCWs were issued to the host with and
>> without the skip flag, with zero and non-zero counts, and with zero and
>> non-zero CDAs.  All of them work just fine, including the ones who's
>> addresses fall into the offline space.  Even the combination of no skip,
>> non-zero count, and zero cda.
>>
>> I modified that hack to do the same for a known invalid control opcode,
>> and it seemed to be okay too.  We got an (expected) invalid command
>> before we noticed any problem with the provided address.
> 
> That's interesting; I would not have arrived at this by interpreting
> the PoP...
> 
>>>
>>> In general, I think doing the translation (and probably already hitting
>>> errors there) is better than sending down a guest address.
>>>    
>>
>> I mostly agree, but I have one test program that generates invalid GUEST
>> addresses with its NOP CCWs, since it doesn't seem to care about whether
>> they're valid or not.  So any attempt to pin them will end badly, which
>> is why I call that opcode out in ccw_does_data_transfer(), and just send
>> invalid IDAWs with it.
> 
> So, without the attempt to pin they do not fail? 

Correct.

> Maybe the right
> approach would be to rewrite the cda before sending the ccws?
> 

That would be my vote.  (And it's what this series does.  :)
diff mbox series

Patch

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 36d76b821209..c0a52025bf06 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -289,8 +289,6 @@  static long copy_ccw_from_iova(struct channel_program *cp,
 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
 
-#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
-
 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
 
 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
@@ -314,6 +312,10 @@  static inline int ccw_does_data_transfer(struct ccw1 *ccw)
 	if (ccw->count == 0)
 		return 0;
 
+	/* If the command is a NOP, then no data will be transferred */
+	if (ccw_is_noop(ccw))
+		return 0;
+
 	/* If the skip flag is off, then data will be transferred */
 	if (!ccw_is_skip(ccw))
 		return 1;
@@ -398,7 +400,7 @@  static void ccwchain_cda_free(struct ccwchain *chain, int idx)
 {
 	struct ccw1 *ccw = chain->ch_ccw + idx;
 
-	if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
+	if (ccw_is_tic(ccw))
 		return;
 
 	kfree((void *)(u64)ccw->cda);
@@ -723,9 +725,6 @@  static int ccwchain_fetch_one(struct ccwchain *chain,
 {
 	struct ccw1 *ccw = chain->ch_ccw + idx;
 
-	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
-		return 0;
-
 	if (ccw_is_tic(ccw))
 		return ccwchain_fetch_tic(chain, idx, cp);