diff mbox series

[1/2] target/s390x: Fix determination of overflow condition code after addition

Message ID 20220323162621.139313-2-thuth@redhat.com (mailing list archive)
State New, archived
Headers show
Series target/s390x: Fix determination of overflow condition code | expand

Commit Message

Thomas Huth March 23, 2022, 4:26 p.m. UTC
This program currently prints different results when run with TCG instead
of running on real s390x hardware:

 #include <stdio.h>

 int overflow_32 (int x, int y)
 {
   int sum;
   return ! __builtin_add_overflow (x, y, &sum);
 }

 int overflow_64 (long long x, long long y)
 {
   long sum;
   return ! __builtin_add_overflow (x, y, &sum);
 }

 int a1 = -2147483648;
 int b1 = -2147483648;
 long long a2 = -9223372036854775808L;
 long long b2 = -9223372036854775808L;

 int main ()
 {
   {
     int a = a1;
     int b = b1;
     printf ("a = 0x%x, b = 0x%x\n", a, b);
     printf ("no_overflow = %d\n", overflow_32 (a, b));
   }
   {
     long long a = a2;
     long long b = b2;
     printf ("a = 0x%llx, b = 0x%llx\n", a, b);
     printf ("no_overflow = %d\n", overflow_64 (a, b));
   }
 }

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
Suggested-by: Bruno Haible <bruno@clisp.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 target/s390x/tcg/cc_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

David Hildenbrand March 30, 2022, 8:52 a.m. UTC | #1
On 23.03.22 17:26, Thomas Huth wrote:
> This program currently prints different results when run with TCG instead
> of running on real s390x hardware:
> 
>  #include <stdio.h>
> 
>  int overflow_32 (int x, int y)
>  {
>    int sum;
>    return ! __builtin_add_overflow (x, y, &sum);
>  }
> 
>  int overflow_64 (long long x, long long y)
>  {
>    long sum;
>    return ! __builtin_add_overflow (x, y, &sum);
>  }
> 
>  int a1 = -2147483648;
>  int b1 = -2147483648;
>  long long a2 = -9223372036854775808L;
>  long long b2 = -9223372036854775808L;
> 
>  int main ()
>  {
>    {
>      int a = a1;
>      int b = b1;
>      printf ("a = 0x%x, b = 0x%x\n", a, b);
>      printf ("no_overflow = %d\n", overflow_32 (a, b));
>    }
>    {
>      long long a = a2;
>      long long b = b2;
>      printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>      printf ("no_overflow = %d\n", overflow_64 (a, b));
>    }
>  }
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
> Suggested-by: Bruno Haible <bruno@clisp.org>
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---
>  target/s390x/tcg/cc_helper.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
> index 8d04097f78..e11cdb745d 100644
> --- a/target/s390x/tcg/cc_helper.c
> +++ b/target/s390x/tcg/cc_helper.c
> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>  
>  static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>  {
> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {


Intuitively, I'd have checked for any overflow/underflow by comparing
with one of the input variables:

a) Both numbers are positive

Adding to positive numbers has to result in something that's bigger than
the input parameters.

"a1 > 0 && a2 > 0 && ar < a1"

b) Both numbers are negative

Adding to negative numbers has to result in something that's smaller
than the input parameters.

"a1 < 0 && a2 < 0 && ar > a1"
Thomas Huth March 30, 2022, 9:29 a.m. UTC | #2
On 30/03/2022 10.52, David Hildenbrand wrote:
> On 23.03.22 17:26, Thomas Huth wrote:
>> This program currently prints different results when run with TCG instead
>> of running on real s390x hardware:
>>
>>   #include <stdio.h>
>>
>>   int overflow_32 (int x, int y)
>>   {
>>     int sum;
>>     return ! __builtin_add_overflow (x, y, &sum);
>>   }
>>
>>   int overflow_64 (long long x, long long y)
>>   {
>>     long sum;
>>     return ! __builtin_add_overflow (x, y, &sum);
>>   }
>>
>>   int a1 = -2147483648;
>>   int b1 = -2147483648;
>>   long long a2 = -9223372036854775808L;
>>   long long b2 = -9223372036854775808L;
>>
>>   int main ()
>>   {
>>     {
>>       int a = a1;
>>       int b = b1;
>>       printf ("a = 0x%x, b = 0x%x\n", a, b);
>>       printf ("no_overflow = %d\n", overflow_32 (a, b));
>>     }
>>     {
>>       long long a = a2;
>>       long long b = b2;
>>       printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>       printf ("no_overflow = %d\n", overflow_64 (a, b));
>>     }
>>   }
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>> Suggested-by: Bruno Haible <bruno@clisp.org>
>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>> ---
>>   target/s390x/tcg/cc_helper.c | 4 ++--
>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>> index 8d04097f78..e11cdb745d 100644
>> --- a/target/s390x/tcg/cc_helper.c
>> +++ b/target/s390x/tcg/cc_helper.c
>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>>   
>>   static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>   {
>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
> 
> 
> Intuitively, I'd have checked for any overflow/underflow by comparing
> with one of the input variables:
> 
> a) Both numbers are positive
> 
> Adding to positive numbers has to result in something that's bigger than
> the input parameters.
> 
> "a1 > 0 && a2 > 0 && ar < a1"

I think it doesn't really matter whether we compare ar with a1 or 0 here. If 
an overflow happens, what's the biggest number that we can get? AFAICT it's 
with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:

  0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE

and that's still < 0 if treated as a signed value. I don't see a way where 
ar could be in the range between 0 and a1.

(OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I guess).

> b) Both numbers are negative
> 
> Adding to negative numbers has to result in something that's smaller
> than the input parameters.
> 
> "a1 < 0 && a2 < 0 && ar > a1"

What about if the uppermost bit gets lost in 64-bit mode:

  0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000

ar > a1 does not work here anymore, does it?

  Thomas
David Hildenbrand March 30, 2022, 9:34 a.m. UTC | #3
On 30.03.22 11:29, Thomas Huth wrote:
> On 30/03/2022 10.52, David Hildenbrand wrote:
>> On 23.03.22 17:26, Thomas Huth wrote:
>>> This program currently prints different results when run with TCG instead
>>> of running on real s390x hardware:
>>>
>>>   #include <stdio.h>
>>>
>>>   int overflow_32 (int x, int y)
>>>   {
>>>     int sum;
>>>     return ! __builtin_add_overflow (x, y, &sum);
>>>   }
>>>
>>>   int overflow_64 (long long x, long long y)
>>>   {
>>>     long sum;
>>>     return ! __builtin_add_overflow (x, y, &sum);
>>>   }
>>>
>>>   int a1 = -2147483648;
>>>   int b1 = -2147483648;
>>>   long long a2 = -9223372036854775808L;
>>>   long long b2 = -9223372036854775808L;
>>>
>>>   int main ()
>>>   {
>>>     {
>>>       int a = a1;
>>>       int b = b1;
>>>       printf ("a = 0x%x, b = 0x%x\n", a, b);
>>>       printf ("no_overflow = %d\n", overflow_32 (a, b));
>>>     }
>>>     {
>>>       long long a = a2;
>>>       long long b = b2;
>>>       printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>>       printf ("no_overflow = %d\n", overflow_64 (a, b));
>>>     }
>>>   }
>>>
>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>>> Suggested-by: Bruno Haible <bruno@clisp.org>
>>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>>> ---
>>>   target/s390x/tcg/cc_helper.c | 4 ++--
>>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>>> index 8d04097f78..e11cdb745d 100644
>>> --- a/target/s390x/tcg/cc_helper.c
>>> +++ b/target/s390x/tcg/cc_helper.c
>>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>>>   
>>>   static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>   {
>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
>>
>>
>> Intuitively, I'd have checked for any overflow/underflow by comparing
>> with one of the input variables:
>>
>> a) Both numbers are positive
>>
>> Adding to positive numbers has to result in something that's bigger than
>> the input parameters.
>>
>> "a1 > 0 && a2 > 0 && ar < a1"
> 
> I think it doesn't really matter whether we compare ar with a1 or 0 here. If 
> an overflow happens, what's the biggest number that we can get? AFAICT it's 
> with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:
> 
>   0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE
> 
> and that's still < 0 if treated as a signed value. I don't see a way where 
> ar could be in the range between 0 and a1.
> 
> (OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I guess).
> 
>> b) Both numbers are negative
>>
>> Adding to negative numbers has to result in something that's smaller
>> than the input parameters.
>>
>> "a1 < 0 && a2 < 0 && ar > a1"
> 
> What about if the uppermost bit gets lost in 64-bit mode:
> 
>   0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000
> 
> ar > a1 does not work here anymore, does it?


0 > -9223372036854775808, no?
Thomas Huth March 30, 2022, 9:42 a.m. UTC | #4
On 30/03/2022 11.34, David Hildenbrand wrote:
> On 30.03.22 11:29, Thomas Huth wrote:
>> On 30/03/2022 10.52, David Hildenbrand wrote:
>>> On 23.03.22 17:26, Thomas Huth wrote:
>>>> This program currently prints different results when run with TCG instead
>>>> of running on real s390x hardware:
>>>>
>>>>    #include <stdio.h>
>>>>
>>>>    int overflow_32 (int x, int y)
>>>>    {
>>>>      int sum;
>>>>      return ! __builtin_add_overflow (x, y, &sum);
>>>>    }
>>>>
>>>>    int overflow_64 (long long x, long long y)
>>>>    {
>>>>      long sum;
>>>>      return ! __builtin_add_overflow (x, y, &sum);
>>>>    }
>>>>
>>>>    int a1 = -2147483648;
>>>>    int b1 = -2147483648;
>>>>    long long a2 = -9223372036854775808L;
>>>>    long long b2 = -9223372036854775808L;
>>>>
>>>>    int main ()
>>>>    {
>>>>      {
>>>>        int a = a1;
>>>>        int b = b1;
>>>>        printf ("a = 0x%x, b = 0x%x\n", a, b);
>>>>        printf ("no_overflow = %d\n", overflow_32 (a, b));
>>>>      }
>>>>      {
>>>>        long long a = a2;
>>>>        long long b = b2;
>>>>        printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>>>        printf ("no_overflow = %d\n", overflow_64 (a, b));
>>>>      }
>>>>    }
>>>>
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>>>> Suggested-by: Bruno Haible <bruno@clisp.org>
>>>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>>>> ---
>>>>    target/s390x/tcg/cc_helper.c | 4 ++--
>>>>    1 file changed, 2 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>>>> index 8d04097f78..e11cdb745d 100644
>>>> --- a/target/s390x/tcg/cc_helper.c
>>>> +++ b/target/s390x/tcg/cc_helper.c
>>>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>>>>    
>>>>    static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>>    {
>>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
>>>
>>>
>>> Intuitively, I'd have checked for any overflow/underflow by comparing
>>> with one of the input variables:
>>>
>>> a) Both numbers are positive
>>>
>>> Adding to positive numbers has to result in something that's bigger than
>>> the input parameters.
>>>
>>> "a1 > 0 && a2 > 0 && ar < a1"
>>
>> I think it doesn't really matter whether we compare ar with a1 or 0 here. If
>> an overflow happens, what's the biggest number that we can get? AFAICT it's
>> with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:
>>
>>    0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE
>>
>> and that's still < 0 if treated as a signed value. I don't see a way where
>> ar could be in the range between 0 and a1.
>>
>> (OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I guess).
>>
>>> b) Both numbers are negative
>>>
>>> Adding to negative numbers has to result in something that's smaller
>>> than the input parameters.
>>>
>>> "a1 < 0 && a2 < 0 && ar > a1"
>>
>> What about if the uppermost bit gets lost in 64-bit mode:
>>
>>    0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000
>>
>> ar > a1 does not work here anymore, does it?
> 
> 
> 0 > -9223372036854775808, no?

current coffe level < correct coffee level

... sorry, never mind, you're right of course.

Anyway, 0 is the lowest number we can get for an underflow, so comparing 
with >= 0 should be fine (but comparing with a1 wouldn't hurt either).

  Thomas
David Hildenbrand March 30, 2022, 9:47 a.m. UTC | #5
On 30.03.22 11:42, Thomas Huth wrote:
> On 30/03/2022 11.34, David Hildenbrand wrote:
>> On 30.03.22 11:29, Thomas Huth wrote:
>>> On 30/03/2022 10.52, David Hildenbrand wrote:
>>>> On 23.03.22 17:26, Thomas Huth wrote:
>>>>> This program currently prints different results when run with TCG instead
>>>>> of running on real s390x hardware:
>>>>>
>>>>>    #include <stdio.h>
>>>>>
>>>>>    int overflow_32 (int x, int y)
>>>>>    {
>>>>>      int sum;
>>>>>      return ! __builtin_add_overflow (x, y, &sum);
>>>>>    }
>>>>>
>>>>>    int overflow_64 (long long x, long long y)
>>>>>    {
>>>>>      long sum;
>>>>>      return ! __builtin_add_overflow (x, y, &sum);
>>>>>    }
>>>>>
>>>>>    int a1 = -2147483648;
>>>>>    int b1 = -2147483648;
>>>>>    long long a2 = -9223372036854775808L;
>>>>>    long long b2 = -9223372036854775808L;
>>>>>
>>>>>    int main ()
>>>>>    {
>>>>>      {
>>>>>        int a = a1;
>>>>>        int b = b1;
>>>>>        printf ("a = 0x%x, b = 0x%x\n", a, b);
>>>>>        printf ("no_overflow = %d\n", overflow_32 (a, b));
>>>>>      }
>>>>>      {
>>>>>        long long a = a2;
>>>>>        long long b = b2;
>>>>>        printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>>>>        printf ("no_overflow = %d\n", overflow_64 (a, b));
>>>>>      }
>>>>>    }
>>>>>
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>>>>> Suggested-by: Bruno Haible <bruno@clisp.org>
>>>>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>>>>> ---
>>>>>    target/s390x/tcg/cc_helper.c | 4 ++--
>>>>>    1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>>>>> index 8d04097f78..e11cdb745d 100644
>>>>> --- a/target/s390x/tcg/cc_helper.c
>>>>> +++ b/target/s390x/tcg/cc_helper.c
>>>>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>>>>>    
>>>>>    static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>>>    {
>>>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
>>>>
>>>>
>>>> Intuitively, I'd have checked for any overflow/underflow by comparing
>>>> with one of the input variables:
>>>>
>>>> a) Both numbers are positive
>>>>
>>>> Adding to positive numbers has to result in something that's bigger than
>>>> the input parameters.
>>>>
>>>> "a1 > 0 && a2 > 0 && ar < a1"
>>>
>>> I think it doesn't really matter whether we compare ar with a1 or 0 here. If
>>> an overflow happens, what's the biggest number that we can get? AFAICT it's
>>> with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:
>>>
>>>    0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE
>>>
>>> and that's still < 0 if treated as a signed value. I don't see a way where
>>> ar could be in the range between 0 and a1.
>>>
>>> (OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I guess).
>>>
>>>> b) Both numbers are negative
>>>>
>>>> Adding to negative numbers has to result in something that's smaller
>>>> than the input parameters.
>>>>
>>>> "a1 < 0 && a2 < 0 && ar > a1"
>>>
>>> What about if the uppermost bit gets lost in 64-bit mode:
>>>
>>>    0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000
>>>
>>> ar > a1 does not work here anymore, does it?
>>
>>
>> 0 > -9223372036854775808, no?
> 
> current coffe level < correct coffee level
> 
> ... sorry, never mind, you're right of course.
> 
> Anyway, 0 is the lowest number we can get for an underflow, so comparing 
> with >= 0 should be fine (but comparing with a1 wouldn't hurt either).

At least for me it takes more brainpower to understand that than
comparing against one of both parameters as we usually do, e.g., for
unsigned values in

include/qemu/range.h:range_init()

if (lob + size < lob) {
	return -ERANGE;
}


Having that said, I haven't checked all corner cases in your example but
I assume it's fine.
Thomas Huth March 30, 2022, 10:12 a.m. UTC | #6
On 30/03/2022 11.47, David Hildenbrand wrote:
> On 30.03.22 11:42, Thomas Huth wrote:
>> On 30/03/2022 11.34, David Hildenbrand wrote:
>>> On 30.03.22 11:29, Thomas Huth wrote:
>>>> On 30/03/2022 10.52, David Hildenbrand wrote:
>>>>> On 23.03.22 17:26, Thomas Huth wrote:
>>>>>> This program currently prints different results when run with TCG instead
>>>>>> of running on real s390x hardware:
>>>>>>
>>>>>>     #include <stdio.h>
>>>>>>
>>>>>>     int overflow_32 (int x, int y)
>>>>>>     {
>>>>>>       int sum;
>>>>>>       return ! __builtin_add_overflow (x, y, &sum);
>>>>>>     }
>>>>>>
>>>>>>     int overflow_64 (long long x, long long y)
>>>>>>     {
>>>>>>       long sum;
>>>>>>       return ! __builtin_add_overflow (x, y, &sum);
>>>>>>     }
>>>>>>
>>>>>>     int a1 = -2147483648;
>>>>>>     int b1 = -2147483648;
>>>>>>     long long a2 = -9223372036854775808L;
>>>>>>     long long b2 = -9223372036854775808L;
>>>>>>
>>>>>>     int main ()
>>>>>>     {
>>>>>>       {
>>>>>>         int a = a1;
>>>>>>         int b = b1;
>>>>>>         printf ("a = 0x%x, b = 0x%x\n", a, b);
>>>>>>         printf ("no_overflow = %d\n", overflow_32 (a, b));
>>>>>>       }
>>>>>>       {
>>>>>>         long long a = a2;
>>>>>>         long long b = b2;
>>>>>>         printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>>>>>         printf ("no_overflow = %d\n", overflow_64 (a, b));
>>>>>>       }
>>>>>>     }
>>>>>>
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>>>>>> Suggested-by: Bruno Haible <bruno@clisp.org>
>>>>>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>>>>>> ---
>>>>>>     target/s390x/tcg/cc_helper.c | 4 ++--
>>>>>>     1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>>>>>> index 8d04097f78..e11cdb745d 100644
>>>>>> --- a/target/s390x/tcg/cc_helper.c
>>>>>> +++ b/target/s390x/tcg/cc_helper.c
>>>>>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
>>>>>>     
>>>>>>     static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>>>>     {
>>>>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>>>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
>>>>>
>>>>>
>>>>> Intuitively, I'd have checked for any overflow/underflow by comparing
>>>>> with one of the input variables:
>>>>>
>>>>> a) Both numbers are positive
>>>>>
>>>>> Adding to positive numbers has to result in something that's bigger than
>>>>> the input parameters.
>>>>>
>>>>> "a1 > 0 && a2 > 0 && ar < a1"
>>>>
>>>> I think it doesn't really matter whether we compare ar with a1 or 0 here. If
>>>> an overflow happens, what's the biggest number that we can get? AFAICT it's
>>>> with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:
>>>>
>>>>     0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE
>>>>
>>>> and that's still < 0 if treated as a signed value. I don't see a way where
>>>> ar could be in the range between 0 and a1.
>>>>
>>>> (OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I guess).
>>>>
>>>>> b) Both numbers are negative
>>>>>
>>>>> Adding to negative numbers has to result in something that's smaller
>>>>> than the input parameters.
>>>>>
>>>>> "a1 < 0 && a2 < 0 && ar > a1"
>>>>
>>>> What about if the uppermost bit gets lost in 64-bit mode:
>>>>
>>>>     0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000
>>>>
>>>> ar > a1 does not work here anymore, does it?
>>>
>>>
>>> 0 > -9223372036854775808, no?
>>
>> current coffe level < correct coffee level
>>
>> ... sorry, never mind, you're right of course.
>>
>> Anyway, 0 is the lowest number we can get for an underflow, so comparing
>> with >= 0 should be fine (but comparing with a1 wouldn't hurt either).
> 
> At least for me it takes more brainpower to understand that than
> comparing against one of both parameters as we usually do, e.g., for
> unsigned values
Maybe we should simply switch the code to use __builtin_add_overflow and 
__builtin_sub_overflow and let the compiler figure out the details...

  Thomas
Thomas Huth March 30, 2022, 10:15 a.m. UTC | #7
On 30/03/2022 12.12, Thomas Huth wrote:
> On 30/03/2022 11.47, David Hildenbrand wrote:
>> On 30.03.22 11:42, Thomas Huth wrote:
>>> On 30/03/2022 11.34, David Hildenbrand wrote:
>>>> On 30.03.22 11:29, Thomas Huth wrote:
>>>>> On 30/03/2022 10.52, David Hildenbrand wrote:
>>>>>> On 23.03.22 17:26, Thomas Huth wrote:
>>>>>>> This program currently prints different results when run with TCG 
>>>>>>> instead
>>>>>>> of running on real s390x hardware:
>>>>>>>
>>>>>>>     #include <stdio.h>
>>>>>>>
>>>>>>>     int overflow_32 (int x, int y)
>>>>>>>     {
>>>>>>>       int sum;
>>>>>>>       return ! __builtin_add_overflow (x, y, &sum);
>>>>>>>     }
>>>>>>>
>>>>>>>     int overflow_64 (long long x, long long y)
>>>>>>>     {
>>>>>>>       long sum;
>>>>>>>       return ! __builtin_add_overflow (x, y, &sum);
>>>>>>>     }
>>>>>>>
>>>>>>>     int a1 = -2147483648;
>>>>>>>     int b1 = -2147483648;
>>>>>>>     long long a2 = -9223372036854775808L;
>>>>>>>     long long b2 = -9223372036854775808L;
>>>>>>>
>>>>>>>     int main ()
>>>>>>>     {
>>>>>>>       {
>>>>>>>         int a = a1;
>>>>>>>         int b = b1;
>>>>>>>         printf ("a = 0x%x, b = 0x%x\n", a, b);
>>>>>>>         printf ("no_overflow = %d\n", overflow_32 (a, b));
>>>>>>>       }
>>>>>>>       {
>>>>>>>         long long a = a2;
>>>>>>>         long long b = b2;
>>>>>>>         printf ("a = 0x%llx, b = 0x%llx\n", a, b);
>>>>>>>         printf ("no_overflow = %d\n", overflow_64 (a, b));
>>>>>>>       }
>>>>>>>     }
>>>>>>>
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/616
>>>>>>> Suggested-by: Bruno Haible <bruno@clisp.org>
>>>>>>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>>>>>>> ---
>>>>>>>     target/s390x/tcg/cc_helper.c | 4 ++--
>>>>>>>     1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
>>>>>>> index 8d04097f78..e11cdb745d 100644
>>>>>>> --- a/target/s390x/tcg/cc_helper.c
>>>>>>> +++ b/target/s390x/tcg/cc_helper.c
>>>>>>> @@ -136,7 +136,7 @@ static uint32_t cc_calc_subu(uint64_t borrow_out, 
>>>>>>> uint64_t result)
>>>>>>>     static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>>>>>     {
>>>>>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>>>>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 
>>>>>>> 0)) {
>>>>>>
>>>>>>
>>>>>> Intuitively, I'd have checked for any overflow/underflow by comparing
>>>>>> with one of the input variables:
>>>>>>
>>>>>> a) Both numbers are positive
>>>>>>
>>>>>> Adding to positive numbers has to result in something that's bigger than
>>>>>> the input parameters.
>>>>>>
>>>>>> "a1 > 0 && a2 > 0 && ar < a1"
>>>>>
>>>>> I think it doesn't really matter whether we compare ar with a1 or 0 
>>>>> here. If
>>>>> an overflow happens, what's the biggest number that we can get? AFAICT 
>>>>> it's
>>>>> with a1 = 0x7fffffffffffffff and a2 = 0x7fffffffffffffff. You then get:
>>>>>
>>>>>     0x7fffffffffffffff + 0x7fffffffffffffff = 0xFFFFFFFFFFFFFFFE
>>>>>
>>>>> and that's still < 0 if treated as a signed value. I don't see a way where
>>>>> ar could be in the range between 0 and a1.
>>>>>
>>>>> (OTOH, checking for ar < a1 instead of ar < 0 wouldn't hurt either, I 
>>>>> guess).
>>>>>
>>>>>> b) Both numbers are negative
>>>>>>
>>>>>> Adding to negative numbers has to result in something that's smaller
>>>>>> than the input parameters.
>>>>>>
>>>>>> "a1 < 0 && a2 < 0 && ar > a1"
>>>>>
>>>>> What about if the uppermost bit gets lost in 64-bit mode:
>>>>>
>>>>>     0x8000000000000000 + 0x8000000000000000 = 0x0000000000000000
>>>>>
>>>>> ar > a1 does not work here anymore, does it?
>>>>
>>>>
>>>> 0 > -9223372036854775808, no?
>>>
>>> current coffe level < correct coffee level
>>>
>>> ... sorry, never mind, you're right of course.
>>>
>>> Anyway, 0 is the lowest number we can get for an underflow, so comparing
>>> with >= 0 should be fine (but comparing with a1 wouldn't hurt either).
>>
>> At least for me it takes more brainpower to understand that than
>> comparing against one of both parameters as we usually do, e.g., for
>> unsigned values
> Maybe we should simply switch the code to use __builtin_add_overflow and 
> __builtin_sub_overflow and let the compiler figure out the details...

Never mind again, that doesn't work in this context either ...
/me finally goes fetching another coffee...

  Thomas
Richard Henderson March 30, 2022, 2:03 p.m. UTC | #8
On 3/30/22 02:52, David Hildenbrand wrote:
>>   static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>   {
>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
> 
> 
> Intuitively, I'd have checked for any overflow/underflow by comparing
> with one of the input variables:
> 
> a) Both numbers are positive
> 
> Adding to positive numbers has to result in something that's bigger than
> the input parameters.
> 
> "a1 > 0 && a2 > 0 && ar < a1"
> 
> b) Both numbers are negative
> 
> Adding to negative numbers has to result in something that's smaller
> than the input parameters.
> 
> "a1 < 0 && a2 < 0 && ar > a1"

If we're not going to just fix the >= typo,
I'd suggest using the much more compact bitwise method:

     ((ar ^ a1) & ~(a1 ^ a2)) < 0

See sadd64_overflow in qemu/host-utils.h.


r~
Thomas Huth March 31, 2022, 2:52 p.m. UTC | #9
On 30/03/2022 16.03, Richard Henderson wrote:
> On 3/30/22 02:52, David Hildenbrand wrote:
>>>   static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
>>>   {
>>> -    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
>>> +    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
>>
>>
>> Intuitively, I'd have checked for any overflow/underflow by comparing
>> with one of the input variables:
>>
>> a) Both numbers are positive
>>
>> Adding to positive numbers has to result in something that's bigger than
>> the input parameters.
>>
>> "a1 > 0 && a2 > 0 && ar < a1"
>>
>> b) Both numbers are negative
>>
>> Adding to negative numbers has to result in something that's smaller
>> than the input parameters.
>>
>> "a1 < 0 && a2 < 0 && ar > a1"
> 
> If we're not going to just fix the >= typo,
> I'd suggest using the much more compact bitwise method:
> 
>      ((ar ^ a1) & ~(a1 ^ a2)) < 0
> 
> See sadd64_overflow in qemu/host-utils.h.

Thanks, sounds like a good idea. Anyway, I'd like to go with Bruno's patch 
for 7.0 and do the optimization in the 7.1 cycle instead.

  Thomas
diff mbox series

Patch

diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
index 8d04097f78..e11cdb745d 100644
--- a/target/s390x/tcg/cc_helper.c
+++ b/target/s390x/tcg/cc_helper.c
@@ -136,7 +136,7 @@  static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
 
 static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
 {
-    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
+    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
         return 3; /* overflow */
     } else {
         if (ar < 0) {
@@ -196,7 +196,7 @@  static uint32_t cc_calc_comp_64(int64_t dst)
 
 static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
 {
-    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
+    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar >= 0)) {
         return 3; /* overflow */
     } else {
         if (ar < 0) {