diff mbox series

[v3] date: detect underflow/overflow when parsing dates with timezone offset

Message ID pull.1726.v3.git.git.1717719428510.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series [v3] date: detect underflow/overflow when parsing dates with timezone offset | expand

Commit Message

darcy June 7, 2024, 12:17 a.m. UTC
From: darcy <acednes@gmail.com>

Overriding the date of a commit to be close to "1970-01-01 00:00:00"
with a large enough positive timezone for the equivelant GMT time to be
before the epoch is considered valid by `parse_date_basic`. Similar
behaviour occurs when using a date close to "2099-12-31 23:59:59" (the
maximum date allowed by `tm_to_time_t`) with a large enough negative
timezone offset.

This leads to an integer underflow or underflow respectively in the
commit timestamp, which is not caught by `git-commit`, but will cause
other services to fail, such as `git-fsck`, which, for the first case,
reports "badDateOverflow: invalid author/committer line - date causes
integer overflow".

Instead check the timezone offset and fail if the resulting time comes
before the epoch "1970-01-01T00:00:00Z" or after the maximum date
"2099-12-31T23:59:59Z".

Signed-off-by: Darcy Burke <acednes@gmail.com>
---
    fix: prevent date underflow when using positive timezone offset
    
    cc: Patrick Steinhardt ps@pks.im cc: Phillip Wood
    phillip.wood123@gmail.com cc: darcy acednes@gmail.com cc: Jeff King
    peff@peff.net

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1726%2Fdxrcy%2Fmaster-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1726/dxrcy/master-v3
Pull-Request: https://github.com/git/git/pull/1726

Range-diff vs v2:

 1:  db508b2f533 ! 1:  f0de3a2c543 date: detect underflow when parsing dates with positive timezone offset
     @@ Metadata
      Author: darcy <acednes@gmail.com>
      
       ## Commit message ##
     -    date: detect underflow when parsing dates with positive timezone offset
     +    date: detect underflow/overflow when parsing dates with timezone offset
      
          Overriding the date of a commit to be close to "1970-01-01 00:00:00"
     -    with a large enough timezone for the equivelant GMT time to be before
     -    the epoch is considered valid by `parse_date_basic`.
     +    with a large enough positive timezone for the equivelant GMT time to be
     +    before the epoch is considered valid by `parse_date_basic`. Similar
     +    behaviour occurs when using a date close to "2099-12-31 23:59:59" (the
     +    maximum date allowed by `tm_to_time_t`) with a large enough negative
     +    timezone offset.
      
     -    This leads to an integer underflow in the commit timestamp, which is not
     -    caught by `git-commit`, but will cause other services to fail, such as
     -    `git-fsck`, which reports "badDateOverflow: invalid author/committer
     -    line - date causes integer overflow".
     +    This leads to an integer underflow or underflow respectively in the
     +    commit timestamp, which is not caught by `git-commit`, but will cause
     +    other services to fail, such as `git-fsck`, which, for the first case,
     +    reports "badDateOverflow: invalid author/committer line - date causes
     +    integer overflow".
      
          Instead check the timezone offset and fail if the resulting time comes
     -    before the epoch, "1970-01-01T00:00:00Z", when parsing.
     +    before the epoch "1970-01-01T00:00:00Z" or after the maximum date
     +    "2099-12-31T23:59:59Z".
      
          Signed-off-by: Darcy Burke <acednes@gmail.com>
      
       ## date.c ##
     +@@ date.c: static int match_object_header_date(const char *date, timestamp_t *timestamp, in
     + 	return 0;
     + }
     + 
     ++
     ++/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
     ++static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
     ++
     + /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
     +    (i.e. English) day/month names, and it doesn't work correctly with %z. */
     + int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
      @@ date.c: int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
       		}
       	}
     @@ date.c: int parse_date_basic(const char *date, timestamp_t *timestamp, int *offs
      -	if (!tm_gmt)
      +	if (!tm_gmt) {
      +		if (*offset > 0 && *offset * 60 > *timestamp)
     ++			return -1;
     ++		if (*offset < 0 && -*offset * 60 > timestamp_max - *timestamp)
      +			return -1;
       		*timestamp -= *offset * 60;
      +	}
     @@ date.c: int parse_date_basic(const char *date, timestamp_t *timestamp, int *offs
       
      
       ## t/t0006-date.sh ##
     -@@ t/t0006-date.sh: check_parse '2008-02-14 20:30:45 -:30' '2008-02-14 20:30:45 +0000'
     - check_parse '2008-02-14 20:30:45 -05:00' '2008-02-14 20:30:45 -0500'
     +@@ t/t0006-date.sh: check_parse '2008-02-14 20:30:45 -05:00' '2008-02-14 20:30:45 -0500'
       check_parse '2008-02-14 20:30:45' '2008-02-14 20:30:45 -0500' EST5
       check_parse 'Thu, 7 Apr 2005 15:14:13 -0700' '2005-04-07 15:14:13 -0700'
     + 
      +check_parse '1970-01-01 00:00:00' '1970-01-01 00:00:00 +0000'
      +check_parse '1970-01-01 00:00:00 +00' '1970-01-01 00:00:00 +0000'
      +check_parse '1970-01-01 00:00:00 Z' '1970-01-01 00:00:00 +0000'
     @@ t/t0006-date.sh: check_parse '2008-02-14 20:30:45 -:30' '2008-02-14 20:30:45 +00
      +check_parse '1969-12-31 23:59:59 Z' bad
      +check_parse '1969-12-31 23:59:59 +11' bad
      +check_parse '1969-12-31 23:59:59 -11' bad
     - 
     ++
     ++check_parse '2099-12-31 23:59:59' '2099-12-31 23:59:59 +0000'
     ++check_parse '2099-12-31 23:59:59 +00' '2099-12-31 23:59:59 +0000'
     ++check_parse '2099-12-31 23:59:59 Z' '2099-12-31 23:59:59 +0000'
     ++check_parse '2099-12-31 23:59:59 +01' '2099-12-31 23:59:59 +0100'
     ++check_parse '2099-12-31 23:59:59 -01' bad
     ++check_parse '2099-12-31 23:59:59 -11' bad
     ++check_parse '2099-12-31 23:00:00 -01' bad
     ++check_parse '2099-12-31 22:59:59 -01' '2099-12-31 22:59:59 -0100'
     ++check_parse '2100-00-00 00:00:00' bad
     ++check_parse '2099-12-30 00:00:00 -11' '2099-12-30 00:00:00 -1100'
     ++check_parse '2100-00-00 00:00:00 +00' bad
     ++check_parse '2100-00-00 00:00:00 Z' bad
     ++check_parse '2100-00-00 00:00:00 -11' bad
     ++check_parse '2100-00-00 00:00:00 +11' bad
     ++
       check_approxidate() {
       	echo "$1 -> $2 +0000" >expect
     + 	test_expect_${3:-success} "parse approxidate ($1)" "


 date.c          | 12 +++++++++++-
 t/t0006-date.sh | 31 +++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)


base-commit: cd77e87115eab5e80e6b161e7b84a79ba1a12cdc

Comments

Junio C Hamano June 7, 2024, 5:40 p.m. UTC | #1
"darcy via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: darcy <acednes@gmail.com>

This ident should match what is used on "Signed-off-by:" line.

> Overriding the date of a commit to be close to "1970-01-01 00:00:00"
> with a large enough positive timezone for the equivelant GMT time to be
> before the epoch is considered valid by `parse_date_basic`. Similar
> behaviour occurs when using a date close to "2099-12-31 23:59:59" (the
> maximum date allowed by `tm_to_time_t`) with a large enough negative
> timezone offset.
>
> This leads to an integer underflow or underflow respectively in the

"underflow or underflow respectively"?

> commit timestamp, which is not caught by `git-commit`, but will cause
> other services to fail, such as `git-fsck`, which, for the first case,
> reports "badDateOverflow: invalid author/committer line - date causes
> integer overflow".
>
> Instead check the timezone offset and fail if the resulting time comes
> before the epoch "1970-01-01T00:00:00Z" or after the maximum date
> "2099-12-31T23:59:59Z".

Nicely described otherwise.

> +
> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;

I wonder if this should be of timestamp_t type instead, as the check
is done against *timestamp in parse_date_basic() where *timestamp is
of type timestamp_t to match?

>  int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
> @@ -937,8 +941,14 @@ int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
>  		}
>  	}
>  
> -	if (!tm_gmt)
> +	if (!tm_gmt) {
> +		if (*offset > 0 && *offset * 60 > *timestamp)
> +			return -1;
> +		if (*offset < 0 && -*offset * 60 > timestamp_max - *timestamp)
> +			return -1;
>  		*timestamp -= *offset * 60;
> +	}
> +
>  	return 0; /* success */
>  }

Thanks.
Junio C Hamano June 8, 2024, 6:58 p.m. UTC | #2
Junio C Hamano <gitster@pobox.com> writes:

>> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
>> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>
> I wonder if this should be of timestamp_t type instead, as the check
> is done against *timestamp in parse_date_basic() where *timestamp is
> of type timestamp_t to match?

The CI build on Windows tells me that my worry was warranted.

  https://github.com/git/git/actions/runs/9424299208/job/25964281907#step:4:643

(GitHub seems to show the breakage details only to those who are
logged in, so you'd need to be logged in to visit that link)


  Error: date.c:873:75: integer overflow in expression of type 'long int' results in '-192522496' [-Werror=overflow]
    873 | static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
        |                                                                           ^
  Error: date.c:873:1: overflow in constant expression [-Werror=overflow]
Junio C Hamano June 11, 2024, 11:30 p.m. UTC | #3
Junio C Hamano <gitster@pobox.com> writes:

> I wonder if this should be of timestamp_t type instead, as the check
> is done against *timestamp in parse_date_basic() where *timestamp is
> of type timestamp_t to match?

Also, as you can see at one of the GitHub CI jobs, e.g.,

  https://github.com/git/git/actions/runs/9455916669/job/26046731619#step:6:1915

you'd need to either exclude some "these are too large timestamps
for the system" tests from 32-bit systems or expect output on them
to be different from 64-bit systems.

As you are actively detecting the condition and giving an error
message "too large for _this_ system", I think it is a good idea
to actually do the latter, i.e. on 64-bit system make sure parsing
is done correctly, and on 32-bit system make sure you get that "too
large for this system" error.

Thanks.
Randall S. Becker June 11, 2024, 11:49 p.m. UTC | #4
On Tuesday, June 11, 2024 7:30 PM, Junio C Hamano wrote:
>Junio C Hamano <gitster@pobox.com> writes:
>
>> I wonder if this should be of timestamp_t type instead, as the check
>> is done against *timestamp in parse_date_basic() where *timestamp is
>> of type timestamp_t to match?
>
>Also, as you can see at one of the GitHub CI jobs, e.g.,
>
>
>https://github.com/git/git/actions/runs/9455916669/job/26046731619#step:6:
>1915
>
>you'd need to either exclude some "these are too large timestamps for the
system"
>tests from 32-bit systems or expect output on them to be different from
64-bit
>systems.
>
>As you are actively detecting the condition and giving an error message
"too large
>for _this_ system", I think it is a good idea to actually do the latter,
i.e. on 64-bit
>system make sure parsing is done correctly, and on 32-bit system make sure
you get
>that "too large for this system" error.

Does this imply that timestamp tests will fail on 32-bit systems? I am
unsure how to interpret this. Can you please clarify?

Thanks,
Randall
Junio C Hamano June 11, 2024, 11:52 p.m. UTC | #5
<rsbecker@nexbridge.com> writes:

>>Also, as you can see at one of the GitHub CI jobs, e.g.,
>>
>>
>> https://github.com/git/git/actions/runs/9455916669/job/26046731619#step:6:1915
>>
> Does this imply that timestamp tests will fail on 32-bit systems? I am
> unsure how to interpret this. Can you please clarify?

Sorry, but I have nothing more to clarify than what appears in the
quoted CI log.
Phillip Wood June 12, 2024, 9:07 a.m. UTC | #6
On 07/06/2024 18:40, Junio C Hamano wrote:
> "darcy via GitGitGadget" <gitgitgadget@gmail.com> writes:
 >
>> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
>> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
> 
> I wonder if this should be of timestamp_t type instead, as the check
> is done against *timestamp in parse_date_basic() where *timestamp is
> of type timestamp_t to match?

I think that would make sense. We'd want to cast one of the numbers to 
timestamp_t to ensure that sum uses a wide enough integer as well. Using 
2100L is probably OK but an explicit cast would be clearer I think.

Best Wishes

Phillip
Karthik Nayak June 12, 2024, 9:49 a.m. UTC | #7
"darcy via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: darcy <acednes@gmail.com>
>
> Overriding the date of a commit to be close to "1970-01-01 00:00:00"
> with a large enough positive timezone for the equivelant GMT time to be

s/equivelant/equivalent

[snip]

> diff --git a/date.c b/date.c
> index 7365a4ad24f..95776c8a92f 100644
> --- a/date.c
> +++ b/date.c
> @@ -868,6 +868,10 @@ static int match_object_header_date(const char *date, timestamp_t *timestamp, in
>  	return 0;
>  }
>
> +
> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>

Nit: but since we're calculating the number of years here (2100L -
1970), shouldn't we also be calculating the number of leap days instead
of hardcoding it?
Phillip Wood June 13, 2024, 1:31 p.m. UTC | #8
Hi Karthik

On 12/06/2024 10:49, Karthik Nayak wrote:
> "darcy via GitGitGadget" <gitgitgadget@gmail.com> writes:
>> +
>> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
>> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>>
> 
> Nit: but since we're calculating the number of years here (2100L -
> 1970), shouldn't we also be calculating the number of leap days instead
> of hardcoding it?

I'm happy with a hard coded constant for the number of leap days - I 
think it is probably easier to check that (which I have done) than it 
would be to check the calculation as I'm not sure off the top of my head 
if is it safe to do (2100-1970)/4 or whether we need something more 
complicated.

Best Wishes

Phillip
Junio C Hamano June 13, 2024, 4:16 p.m. UTC | #9
Phillip Wood <phillip.wood123@gmail.com> writes:

>>> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
>>> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>>>
>> Nit: but since we're calculating the number of years here (2100L -
>> 1970), shouldn't we also be calculating the number of leap days instead
>> of hardcoding it?
>
> I'm happy with a hard coded constant for the number of leap days - I
> think it is probably easier to check that (which I have done) than it
> would be to check the calculation as I'm not sure off the top of my
> head if is it safe to do (2100-1970)/4 or whether we need something
> more complicated.

It's even OK to use a hard coded constant for the number of days
since the epoch to the git-end-of-time ;-)

The timestamp of the git-end-of-time would not fit in time_t on
32-bit systems, I would presume?  If our tests are trying to see if
timestamps around the beginning of year 2100 are handled
"correctly", the definition of the correctness needs to be
consitional on the platform.

On systems with TIME_T_IS_64BIT, we'd want to see such a timestamp
to be represented fine.  On systems without, we'd want to see the
"Timestamp too large for this system" error when we feed such a
timestamp to be parsed.

Thanks.
Junio C Hamano June 14, 2024, 1:20 a.m. UTC | #10
Junio C Hamano <gitster@pobox.com> writes:

> The CI build on Windows tells me that my worry was warranted.
>
>   https://github.com/git/git/actions/runs/9424299208/job/25964281907#step:4:643
>
> (GitHub seems to show the breakage details only to those who are
> logged in, so you'd need to be logged in to visit that link)

So here is what I accumulated in SQUASH??? patches on top of your
topic while waiting for an updated version to unbreak the CI.

 * The "end of git time" timestamp does not fit in time_t on 32-bit
   systems, so I updated it to use timestamp_t at least for now.

 * t0006 has two tests that use TIME_IS_64BIT,TIME_T_IS_64BIT
   prerequisites; I introduced HAVE_64BIT_TIME to simplify them.

 * nobody passes $4 to check_parse to tell it to expect a failure,
   so I removed it.  It always expects success.

 * check_parse now honors a global variable REQUIRE_64BIT_TIME that
   is used as the prerequisite to run its test_expect_success; the
   "near the end of git time" tests you added use the mechanism to
   pass HAVE_64BIT_TIME prerequisite.

The last one is a bit questionable, as it only "punts" on 32-bit
systems, instead of making sure we get the expected error messages.
I think it is OK to punt here and have a separate test that checks
timestamp around year 2040 for that condition.

 date.c          |  2 +-
 t/t0006-date.sh | 20 ++++++++++++++------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/date.c b/date.c
index 95776c8a92..bee9fe8f10 100644
--- a/date.c
+++ b/date.c
@@ -870,7 +870,7 @@ static int match_object_header_date(const char *date, timestamp_t *timestamp, in
 
 
 /* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
-static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
+static const timestamp_t timestamp_max = (((timestamp_t)2100 - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
 
 /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
    (i.e. English) day/month names, and it doesn't work correctly with %z. */
diff --git a/t/t0006-date.sh b/t/t0006-date.sh
index e8fdf361ad..fd373e1b39 100755
--- a/t/t0006-date.sh
+++ b/t/t0006-date.sh
@@ -8,6 +8,11 @@ TEST_PASSES_SANITIZE_LEAK=true
 # arbitrary reference time: 2009-08-30 19:20:00
 GIT_TEST_DATE_NOW=1251660000; export GIT_TEST_DATE_NOW
 
+if test_have_prereq TIME_IS_64BIT,TIME_T_IS_64BIT
+then
+	test_set_prereq HAVE_64BIT_TIME
+fi
+
 check_relative() {
 	t=$(($GIT_TEST_DATE_NOW - $1))
 	echo "$t -> $2" >expect
@@ -80,14 +85,15 @@ check_show raw "$TIME" '1466000000 -0200'
 
 # arbitrary time absurdly far in the future
 FUTURE="5758122296 -0400"
-check_show iso       "$FUTURE" "2152-06-19 18:24:56 -0400" TIME_IS_64BIT,TIME_T_IS_64BIT
-check_show iso-local "$FUTURE" "2152-06-19 22:24:56 +0000" TIME_IS_64BIT,TIME_T_IS_64BIT
+check_show iso       "$FUTURE" "2152-06-19 18:24:56 -0400" HAVE_64BIT_TIME
+check_show iso-local "$FUTURE" "2152-06-19 22:24:56 +0000" HAVE_64BIT_TIME
 
-check_parse() {
+REQUIRE_64BIT_TIME=
+check_parse () {
 	echo "$1 -> $2" >expect
-	test_expect_${4:-success} "parse date ($1${3:+ TZ=$3})" "
-	TZ=${3:-$TZ} test-tool date parse '$1' >actual &&
-	test_cmp expect actual
+	test_expect_success $REQUIRE_64BIT_TIME "parse date ($1${3:+ TZ=$3}) -> $2" "
+		TZ=${3:-$TZ} test-tool date parse '$1' >actual &&
+		test_cmp expect actual
 	"
 }
 
@@ -133,6 +139,7 @@ check_parse '1969-12-31 23:59:59 Z' bad
 check_parse '1969-12-31 23:59:59 +11' bad
 check_parse '1969-12-31 23:59:59 -11' bad
 
+REQUIRE_64BIT_TIME=HAVE_64BIT_TIME
 check_parse '2099-12-31 23:59:59' '2099-12-31 23:59:59 +0000'
 check_parse '2099-12-31 23:59:59 +00' '2099-12-31 23:59:59 +0000'
 check_parse '2099-12-31 23:59:59 Z' '2099-12-31 23:59:59 +0000'
@@ -147,6 +154,7 @@ check_parse '2100-00-00 00:00:00 +00' bad
 check_parse '2100-00-00 00:00:00 Z' bad
 check_parse '2100-00-00 00:00:00 -11' bad
 check_parse '2100-00-00 00:00:00 +11' bad
+REQUIRE_64BIT_TIME=
 
 check_approxidate() {
 	echo "$1 -> $2 +0000" >expect
Karthik Nayak June 14, 2024, 8:09 p.m. UTC | #11
Junio C Hamano <gitster@pobox.com> writes:

> Phillip Wood <phillip.wood123@gmail.com> writes:
>
>>>> +/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
>>>> +static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>>>>
>>> Nit: but since we're calculating the number of years here (2100L -
>>> 1970), shouldn't we also be calculating the number of leap days instead
>>> of hardcoding it?
>>
>> I'm happy with a hard coded constant for the number of leap days - I
>> think it is probably easier to check that (which I have done) than it
>> would be to check the calculation as I'm not sure off the top of my
>> head if is it safe to do (2100-1970)/4 or whether we need something
>> more complicated.
>
> It's even OK to use a hard coded constant for the number of days
> since the epoch to the git-end-of-time ;-)

That's why I noted it as a _Nit_, mostly because it wasn't anything big.
But I found that part of it being dynamic and part of it being static
was inconsistent.

> The timestamp of the git-end-of-time would not fit in time_t on
> 32-bit systems, I would presume?  If our tests are trying to see if
> timestamps around the beginning of year 2100 are handled
> "correctly", the definition of the correctness needs to be
> consitional on the platform.
>
> On systems with TIME_T_IS_64BIT, we'd want to see such a timestamp
> to be represented fine.  On systems without, we'd want to see the
> "Timestamp too large for this system" error when we feed such a
> timestamp to be parsed.
>
> Thanks.
Junio C Hamano June 14, 2024, 9:02 p.m. UTC | #12
Karthik Nayak <karthik.188@gmail.com> writes:

>> It's even OK to use a hard coded constant for the number of days
>> since the epoch to the git-end-of-time ;-)
>
> That's why I noted it as a _Nit_, mostly because it wasn't anything big.
> But I found that part of it being dynamic and part of it being static
> was inconsistent.

Sure, but it is so tiny thing, we shouldn't waste more time than we
spend getting the tests right even on 32-bit systems.  We seem to be
doing the opposite by talking about this part even more, which is a
bit sad.  Any comments on the actual patch I sent as a follow-up?

>> The timestamp of the git-end-of-time would not fit in time_t on
>> 32-bit systems, I would presume?  If our tests are trying to see if
>> timestamps around the beginning of year 2100 are handled
>> "correctly", the definition of the correctness needs to be
>> consitional on the platform.
>>
>> On systems with TIME_T_IS_64BIT, we'd want to see such a timestamp
>> to be represented fine.  On systems without, we'd want to see the
>> "Timestamp too large for this system" error when we feed such a
>> timestamp to be parsed.
>>
>> Thanks.
Karthik Nayak June 15, 2024, 11:47 a.m. UTC | #13
Junio C Hamano <gitster@pobox.com> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> The CI build on Windows tells me that my worry was warranted.
>>
>>   https://github.com/git/git/actions/runs/9424299208/job/25964281907#step:4:643
>>
>> (GitHub seems to show the breakage details only to those who are
>> logged in, so you'd need to be logged in to visit that link)
>
> So here is what I accumulated in SQUASH??? patches on top of your
> topic while waiting for an updated version to unbreak the CI.
>
>  * The "end of git time" timestamp does not fit in time_t on 32-bit
>    systems, so I updated it to use timestamp_t at least for now.
>
>  * t0006 has two tests that use TIME_IS_64BIT,TIME_T_IS_64BIT
>    prerequisites; I introduced HAVE_64BIT_TIME to simplify them.
>
>  * nobody passes $4 to check_parse to tell it to expect a failure,
>    so I removed it.  It always expects success.
>
>  * check_parse now honors a global variable REQUIRE_64BIT_TIME that
>    is used as the prerequisite to run its test_expect_success; the
>    "near the end of git time" tests you added use the mechanism to
>    pass HAVE_64BIT_TIME prerequisite.
>
> The last one is a bit questionable, as it only "punts" on 32-bit
> systems, instead of making sure we get the expected error messages.
> I think it is OK to punt here and have a separate test that checks
> timestamp around year 2040 for that condition.
>
>  date.c          |  2 +-
>  t/t0006-date.sh | 20 ++++++++++++++------
>  2 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/date.c b/date.c
> index 95776c8a92..bee9fe8f10 100644
> --- a/date.c
> +++ b/date.c
> @@ -870,7 +870,7 @@ static int match_object_header_date(const char *date, timestamp_t *timestamp, in
>
>
>  /* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
> -static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
> +static const timestamp_t timestamp_max = (((timestamp_t)2100 - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
>
>  /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
>     (i.e. English) day/month names, and it doesn't work correctly with %z. */
> diff --git a/t/t0006-date.sh b/t/t0006-date.sh
> index e8fdf361ad..fd373e1b39 100755
> --- a/t/t0006-date.sh
> +++ b/t/t0006-date.sh
> @@ -8,6 +8,11 @@ TEST_PASSES_SANITIZE_LEAK=true
>  # arbitrary reference time: 2009-08-30 19:20:00
>  GIT_TEST_DATE_NOW=1251660000; export GIT_TEST_DATE_NOW
>
> +if test_have_prereq TIME_IS_64BIT,TIME_T_IS_64BIT
> +then
> +	test_set_prereq HAVE_64BIT_TIME
> +fi
> +

This does make sense, I did check and noticed that the two are always
used together everywhere, so outside of these patches, it perhaps would
also make sense to combine them altogether.

>  check_relative() {
>  	t=$(($GIT_TEST_DATE_NOW - $1))
>  	echo "$t -> $2" >expect
> @@ -80,14 +85,15 @@ check_show raw "$TIME" '1466000000 -0200'
>
>  # arbitrary time absurdly far in the future
>  FUTURE="5758122296 -0400"
> -check_show iso       "$FUTURE" "2152-06-19 18:24:56 -0400" TIME_IS_64BIT,TIME_T_IS_64BIT
> -check_show iso-local "$FUTURE" "2152-06-19 22:24:56 +0000" TIME_IS_64BIT,TIME_T_IS_64BIT
> +check_show iso       "$FUTURE" "2152-06-19 18:24:56 -0400" HAVE_64BIT_TIME
> +check_show iso-local "$FUTURE" "2152-06-19 22:24:56 +0000" HAVE_64BIT_TIME
>
> -check_parse() {
> +REQUIRE_64BIT_TIME=
> +check_parse () {
>  	echo "$1 -> $2" >expect
> -	test_expect_${4:-success} "parse date ($1${3:+ TZ=$3})" "
> -	TZ=${3:-$TZ} test-tool date parse '$1' >actual &&
> -	test_cmp expect actual
> +	test_expect_success $REQUIRE_64BIT_TIME "parse date ($1${3:+ TZ=$3}) -> $2" "
> +		TZ=${3:-$TZ} test-tool date parse '$1' >actual &&
> +		test_cmp expect actual
>  	"
>  }
>
> @@ -133,6 +139,7 @@ check_parse '1969-12-31 23:59:59 Z' bad
>  check_parse '1969-12-31 23:59:59 +11' bad
>  check_parse '1969-12-31 23:59:59 -11' bad
>
> +REQUIRE_64BIT_TIME=HAVE_64BIT_TIME
>  check_parse '2099-12-31 23:59:59' '2099-12-31 23:59:59 +0000'
>  check_parse '2099-12-31 23:59:59 +00' '2099-12-31 23:59:59 +0000'
>  check_parse '2099-12-31 23:59:59 Z' '2099-12-31 23:59:59 +0000'
> @@ -147,6 +154,7 @@ check_parse '2100-00-00 00:00:00 +00' bad
>  check_parse '2100-00-00 00:00:00 Z' bad
>  check_parse '2100-00-00 00:00:00 -11' bad
>  check_parse '2100-00-00 00:00:00 +11' bad
> +REQUIRE_64BIT_TIME=
>
>  check_approxidate() {
>  	echo "$1 -> $2 +0000" >expect

I think this patch looks good. I also think we should probably look into
fixing the 2099 limit overall. I think Jeff already posted a patch to do
the same already:

https://lore.kernel.org/r/20240604093345.GA1279521@coredump.intra.peff.net
Karthik Nayak June 15, 2024, 11:49 a.m. UTC | #14
Junio C Hamano <gitster@pobox.com> writes:

> Karthik Nayak <karthik.188@gmail.com> writes:
>
>>> It's even OK to use a hard coded constant for the number of days
>>> since the epoch to the git-end-of-time ;-)
>>
>> That's why I noted it as a _Nit_, mostly because it wasn't anything big.
>> But I found that part of it being dynamic and part of it being static
>> was inconsistent.
>
> Sure, but it is so tiny thing, we shouldn't waste more time than we
> spend getting the tests right even on 32-bit systems.  We seem to be
> doing the opposite by talking about this part even more, which is a
> bit sad.  Any comments on the actual patch I sent as a follow-up?
>

Agreed! I looked at your patch and it looks good to me, thanks Junio.
diff mbox series

Patch

diff --git a/date.c b/date.c
index 7365a4ad24f..95776c8a92f 100644
--- a/date.c
+++ b/date.c
@@ -868,6 +868,10 @@  static int match_object_header_date(const char *date, timestamp_t *timestamp, in
 	return 0;
 }
 
+
+/* timestamp of 2099-12-31T23:59:59Z, including 32 leap days */
+static const time_t timestamp_max = ((2100L - 1970) * 365 + 32) * 24 * 60 * 60 - 1;
+
 /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
    (i.e. English) day/month names, and it doesn't work correctly with %z. */
 int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
@@ -937,8 +941,14 @@  int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset)
 		}
 	}
 
-	if (!tm_gmt)
+	if (!tm_gmt) {
+		if (*offset > 0 && *offset * 60 > *timestamp)
+			return -1;
+		if (*offset < 0 && -*offset * 60 > timestamp_max - *timestamp)
+			return -1;
 		*timestamp -= *offset * 60;
+	}
+
 	return 0; /* success */
 }
 
diff --git a/t/t0006-date.sh b/t/t0006-date.sh
index 3031256d143..e8fdf361ad4 100755
--- a/t/t0006-date.sh
+++ b/t/t0006-date.sh
@@ -117,6 +117,37 @@  check_parse '2008-02-14 20:30:45 -05:00' '2008-02-14 20:30:45 -0500'
 check_parse '2008-02-14 20:30:45' '2008-02-14 20:30:45 -0500' EST5
 check_parse 'Thu, 7 Apr 2005 15:14:13 -0700' '2005-04-07 15:14:13 -0700'
 
+check_parse '1970-01-01 00:00:00' '1970-01-01 00:00:00 +0000'
+check_parse '1970-01-01 00:00:00 +00' '1970-01-01 00:00:00 +0000'
+check_parse '1970-01-01 00:00:00 Z' '1970-01-01 00:00:00 +0000'
+check_parse '1970-01-01 00:00:00 -01' '1970-01-01 00:00:00 -0100'
+check_parse '1970-01-01 00:00:00 +01' bad
+check_parse '1970-01-01 00:00:00 +11' bad
+check_parse '1970-01-01 00:59:59 +01' bad
+check_parse '1970-01-01 01:00:00 +01' '1970-01-01 01:00:00 +0100'
+check_parse '1970-01-01 01:00:00 +11' bad
+check_parse '1970-01-02 00:00:00 +11' '1970-01-02 00:00:00 +1100'
+check_parse '1969-12-31 23:59:59' bad
+check_parse '1969-12-31 23:59:59 +00' bad
+check_parse '1969-12-31 23:59:59 Z' bad
+check_parse '1969-12-31 23:59:59 +11' bad
+check_parse '1969-12-31 23:59:59 -11' bad
+
+check_parse '2099-12-31 23:59:59' '2099-12-31 23:59:59 +0000'
+check_parse '2099-12-31 23:59:59 +00' '2099-12-31 23:59:59 +0000'
+check_parse '2099-12-31 23:59:59 Z' '2099-12-31 23:59:59 +0000'
+check_parse '2099-12-31 23:59:59 +01' '2099-12-31 23:59:59 +0100'
+check_parse '2099-12-31 23:59:59 -01' bad
+check_parse '2099-12-31 23:59:59 -11' bad
+check_parse '2099-12-31 23:00:00 -01' bad
+check_parse '2099-12-31 22:59:59 -01' '2099-12-31 22:59:59 -0100'
+check_parse '2100-00-00 00:00:00' bad
+check_parse '2099-12-30 00:00:00 -11' '2099-12-30 00:00:00 -1100'
+check_parse '2100-00-00 00:00:00 +00' bad
+check_parse '2100-00-00 00:00:00 Z' bad
+check_parse '2100-00-00 00:00:00 -11' bad
+check_parse '2100-00-00 00:00:00 +11' bad
+
 check_approxidate() {
 	echo "$1 -> $2 +0000" >expect
 	test_expect_${3:-success} "parse approxidate ($1)" "