Message ID | d92338467d66fcfedd57f209c97a798e9920d1e5.1629207607.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3,01/25] hash.h: provide constants for the hash IDs | expand |
On 17/08/2021 14:39, Han-Wen Nienhuys via GitGitGadget wrote: > From: Han-Wen Nienhuys <hanwen@google.com> > > This will be needed for reading reflog blocks in reftable. How large might the reftable become? In particular will it exceed the 32bit Long limit on Windows? I ask as the Zlib library is one of (among many) the constraints on beating the 4GB [backward compatibility] size limit from 32 bit Windows. > > Helped-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> > Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> > --- > Makefile | 7 +++ > ci/lib.sh | 1 + > compat/.gitattributes | 1 + > compat/zlib-uncompress2.c | 92 +++++++++++++++++++++++++++++++++++++++ > config.mak.uname | 1 + > configure.ac | 13 ++++++ > 6 files changed, 115 insertions(+) > create mode 100644 compat/.gitattributes > create mode 100644 compat/zlib-uncompress2.c > > diff --git a/Makefile b/Makefile > index e98d8ed17cf..16c883978d4 100644 > --- a/Makefile > +++ b/Makefile > @@ -256,6 +256,8 @@ all:: > # > # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. > # > +# Define NO_UNCOMPRESS2 if your zlib does not have uncompress2. > +# > # Define NO_NORETURN if using buggy versions of gcc 4.6+ and profile feedback, > # as the compiler can crash (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49299) > # > @@ -1738,6 +1740,11 @@ ifdef NO_DEFLATE_BOUND > BASIC_CFLAGS += -DNO_DEFLATE_BOUND > endif > > +ifdef NO_UNCOMPRESS2 > + BASIC_CFLAGS += -DNO_UNCOMPRESS2 > + REFTABLE_OBJS += compat/zlib-uncompress2.o > +endif > + > ifdef NO_POSIX_GOODIES > BASIC_CFLAGS += -DNO_POSIX_GOODIES > endif > diff --git a/ci/lib.sh b/ci/lib.sh > index 476c3f369f5..5711c63979d 100755 > --- a/ci/lib.sh > +++ b/ci/lib.sh > @@ -224,6 +224,7 @@ linux-gcc-default) > ;; > Linux32) > CC=gcc > + MAKEFLAGS="$MAKEFLAGS NO_UNCOMPRESS2=1" > ;; > linux-musl) > CC=gcc > diff --git a/compat/.gitattributes b/compat/.gitattributes > new file mode 100644 > index 00000000000..40dbfb170da > --- /dev/null > +++ b/compat/.gitattributes > @@ -0,0 +1 @@ > +/zlib-uncompress2.c whitespace=-indent-with-non-tab,-trailing-space > diff --git a/compat/zlib-uncompress2.c b/compat/zlib-uncompress2.c > new file mode 100644 > index 00000000000..6893bb469ce > --- /dev/null > +++ b/compat/zlib-uncompress2.c > @@ -0,0 +1,92 @@ > +/* taken from zlib's uncompr.c > + > + commit cacf7f1d4e3d44d871b605da3b647f07d718623f > + Author: Mark Adler <madler@alumni.caltech.edu> > + Date: Sun Jan 15 09:18:46 2017 -0800 > + > + zlib 1.2.11 > + > +*/ > + > +/* > + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler > + * For conditions of distribution and use, see copyright notice in zlib.h > + */ > + > +#include <zlib.h> > + > +/* clang-format off */ > + > +/* =========================================================================== > + Decompresses the source buffer into the destination buffer. *sourceLen is > + the byte length of the source buffer. Upon entry, *destLen is the total size > + of the destination buffer, which must be large enough to hold the entire > + uncompressed data. (The size of the uncompressed data must have been saved > + previously by the compressor and transmitted to the decompressor by some > + mechanism outside the scope of this compression library.) Upon exit, > + *destLen is the size of the decompressed data and *sourceLen is the number > + of source bytes consumed. Upon return, source + *sourceLen points to the > + first unused input byte. > + > + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough > + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or > + Z_DATA_ERROR if the input data was corrupted, including if the input data is > + an incomplete zlib stream. > +*/ > +int ZEXPORT uncompress2 ( > + Bytef *dest, > + uLongf *destLen, > + const Bytef *source, > + uLong *sourceLen) { Length is limited to 4GB on Windows (the pointer itself will be implicitly size_t, but ...) > + z_stream stream; > + int err; > + const uInt max = (uInt)-1; > + uLong len, left; > + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ > + > + len = *sourceLen; > + if (*destLen) { > + left = *destLen; > + *destLen = 0; > + } > + else { > + left = 1; > + dest = buf; > + } > + > + stream.next_in = (z_const Bytef *)source; > + stream.avail_in = 0; > + stream.zalloc = (alloc_func)0; > + stream.zfree = (free_func)0; > + stream.opaque = (voidpf)0; > + > + err = inflateInit(&stream); > + if (err != Z_OK) return err; > + > + stream.next_out = dest; > + stream.avail_out = 0; > + > + do { > + if (stream.avail_out == 0) { > + stream.avail_out = left > (uLong)max ? max : (uInt)left; > + left -= stream.avail_out; > + } > + if (stream.avail_in == 0) { > + stream.avail_in = len > (uLong)max ? max : (uInt)len; > + len -= stream.avail_in; > + } > + err = inflate(&stream, Z_NO_FLUSH); > + } while (err == Z_OK); > + > + *sourceLen -= len + stream.avail_in; > + if (dest != buf) > + *destLen = stream.total_out; > + else if (stream.total_out && err == Z_BUF_ERROR) > + left = 1; > + > + inflateEnd(&stream); > + return err == Z_STREAM_END ? Z_OK : > + err == Z_NEED_DICT ? Z_DATA_ERROR : > + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : > + err; > +} > diff --git a/config.mak.uname b/config.mak.uname > index 69413fb3dc0..61e11550b1f 100644 > --- a/config.mak.uname > +++ b/config.mak.uname > @@ -256,6 +256,7 @@ ifeq ($(uname_S),FreeBSD) > FILENO_IS_A_MACRO = UnfortunatelyYes > endif > ifeq ($(uname_S),OpenBSD) > + NO_UNCOMPRESS2 = YesPlease > NO_STRCASESTR = YesPlease > NO_MEMMEM = YesPlease > USE_ST_TIMESPEC = YesPlease > diff --git a/configure.ac b/configure.ac > index 031e8d3fee8..c3a913103d0 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -672,9 +672,22 @@ AC_LINK_IFELSE([ZLIBTEST_SRC], > NO_DEFLATE_BOUND=yes]) > LIBS="$old_LIBS" > > +AC_DEFUN([ZLIBTEST_UNCOMPRESS2_SRC], [ > +AC_LANG_PROGRAM([#include <zlib.h>], > + [uncompress2(NULL,NULL,NULL,NULL);])]) > +AC_MSG_CHECKING([for uncompress2 in -lz]) > +old_LIBS="$LIBS" > +LIBS="$LIBS -lz" > +AC_LINK_IFELSE([ZLIBTEST_UNCOMPRESS2_SRC], > + [AC_MSG_RESULT([yes])], > + [AC_MSG_RESULT([no]) > + NO_UNCOMPRESS2=yes]) > +LIBS="$old_LIBS" > + > GIT_UNSTASH_FLAGS($ZLIB_PATH) > > GIT_CONF_SUBST([NO_DEFLATE_BOUND]) > +GIT_CONF_SUBST([NO_UNCOMPRESS2]) > > # > # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, -- Philip
On Wed, Aug 18, 2021 at 12:14 PM Philip Oakley <philipoakley@iee.email> wrote: > > On 17/08/2021 14:39, Han-Wen Nienhuys via GitGitGadget wrote: > > From: Han-Wen Nienhuys <hanwen@google.com> > > > > This will be needed for reading reflog blocks in reftable. > > How large might the reftable become? In particular will it exceed the > 32bit Long limit on Windows? A reftable file is organized as a sequence of blocks. Each block has max size 2^24, and the zlib compression is done per-block. So zlib should not introduce a new 4G limit. The intra-file offsets for the blocks themselves are encoded either as varint (index values) or as uint64 (file footer), so reftable files beyond 4G should be no problem.
On 18/08/2021 11:39, Han-Wen Nienhuys wrote: > On Wed, Aug 18, 2021 at 12:14 PM Philip Oakley <philipoakley@iee.email> wrote: >> On 17/08/2021 14:39, Han-Wen Nienhuys via GitGitGadget wrote: >>> From: Han-Wen Nienhuys <hanwen@google.com> >>> >>> This will be needed for reading reflog blocks in reftable. >> How large might the reftable become? In particular will it exceed the >> 32bit Long limit on Windows? > A reftable file is organized as a sequence of blocks. Each block has > max size 2^24, and the zlib compression is done per-block. So zlib > should not introduce a new 4G limit. The intra-file offsets for the > blocks themselves are encoded either as varint (index values) or as > uint64 (file footer), so reftable files beyond 4G should be no > problem. > Thanks, that's good to know for planning any Git upgrade to cope with 4GB file sizes on Windows. Philip
diff --git a/Makefile b/Makefile index e98d8ed17cf..16c883978d4 100644 --- a/Makefile +++ b/Makefile @@ -256,6 +256,8 @@ all:: # # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. # +# Define NO_UNCOMPRESS2 if your zlib does not have uncompress2. +# # Define NO_NORETURN if using buggy versions of gcc 4.6+ and profile feedback, # as the compiler can crash (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49299) # @@ -1738,6 +1740,11 @@ ifdef NO_DEFLATE_BOUND BASIC_CFLAGS += -DNO_DEFLATE_BOUND endif +ifdef NO_UNCOMPRESS2 + BASIC_CFLAGS += -DNO_UNCOMPRESS2 + REFTABLE_OBJS += compat/zlib-uncompress2.o +endif + ifdef NO_POSIX_GOODIES BASIC_CFLAGS += -DNO_POSIX_GOODIES endif diff --git a/ci/lib.sh b/ci/lib.sh index 476c3f369f5..5711c63979d 100755 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -224,6 +224,7 @@ linux-gcc-default) ;; Linux32) CC=gcc + MAKEFLAGS="$MAKEFLAGS NO_UNCOMPRESS2=1" ;; linux-musl) CC=gcc diff --git a/compat/.gitattributes b/compat/.gitattributes new file mode 100644 index 00000000000..40dbfb170da --- /dev/null +++ b/compat/.gitattributes @@ -0,0 +1 @@ +/zlib-uncompress2.c whitespace=-indent-with-non-tab,-trailing-space diff --git a/compat/zlib-uncompress2.c b/compat/zlib-uncompress2.c new file mode 100644 index 00000000000..6893bb469ce --- /dev/null +++ b/compat/zlib-uncompress2.c @@ -0,0 +1,92 @@ +/* taken from zlib's uncompr.c + + commit cacf7f1d4e3d44d871b605da3b647f07d718623f + Author: Mark Adler <madler@alumni.caltech.edu> + Date: Sun Jan 15 09:18:46 2017 -0800 + + zlib 1.2.11 + +*/ + +/* + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include <zlib.h> + +/* clang-format off */ + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. +*/ +int ZEXPORT uncompress2 ( + Bytef *dest, + uLongf *destLen, + const Bytef *source, + uLong *sourceLen) { + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} diff --git a/config.mak.uname b/config.mak.uname index 69413fb3dc0..61e11550b1f 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -256,6 +256,7 @@ ifeq ($(uname_S),FreeBSD) FILENO_IS_A_MACRO = UnfortunatelyYes endif ifeq ($(uname_S),OpenBSD) + NO_UNCOMPRESS2 = YesPlease NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease USE_ST_TIMESPEC = YesPlease diff --git a/configure.ac b/configure.ac index 031e8d3fee8..c3a913103d0 100644 --- a/configure.ac +++ b/configure.ac @@ -672,9 +672,22 @@ AC_LINK_IFELSE([ZLIBTEST_SRC], NO_DEFLATE_BOUND=yes]) LIBS="$old_LIBS" +AC_DEFUN([ZLIBTEST_UNCOMPRESS2_SRC], [ +AC_LANG_PROGRAM([#include <zlib.h>], + [uncompress2(NULL,NULL,NULL,NULL);])]) +AC_MSG_CHECKING([for uncompress2 in -lz]) +old_LIBS="$LIBS" +LIBS="$LIBS -lz" +AC_LINK_IFELSE([ZLIBTEST_UNCOMPRESS2_SRC], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + NO_UNCOMPRESS2=yes]) +LIBS="$old_LIBS" + GIT_UNSTASH_FLAGS($ZLIB_PATH) GIT_CONF_SUBST([NO_DEFLATE_BOUND]) +GIT_CONF_SUBST([NO_UNCOMPRESS2]) # # Define NEEDS_SOCKET if linking with libc is not enough (SunOS,