Message ID | 9839aca00a10b16d96c47db631ac025281ffc864.1576008027.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Directory traversal bugs | expand |
Hi Elijah, I have not had time to dive deeply into this, but I know that it _does_ cause a ton of segmentation faults in the `shears/pu` branch (where all of Git for Windows' patches are rebased on top of `pu`): On Tue, 10 Dec 2019, Elijah Newren via GitGitGadget wrote: > diff --git a/dir.c b/dir.c > index 645b44ea64..9c71a9ac21 100644 > --- a/dir.c > +++ b/dir.c > @@ -2102,37 +2102,69 @@ static int treat_leading_path(struct dir_struct *dir, > const struct pathspec *pathspec) > { > struct strbuf sb = STRBUF_INIT; > - int baselen, rc = 0; > + int prevlen, baselen; > const char *cp; > + struct cached_dir cdir; > + struct dirent de; > + enum path_treatment state = path_none; > + > + /* > + * For each directory component of path, we are going to check whether > + * that path is relevant given the pathspec. For example, if path is > + * foo/bar/baz/ > + * then we will ask treat_path() whether we should go into foo, then > + * whether we should go into bar, then whether baz is relevant. > + * Checking each is important because e.g. if path is > + * .git/info/ > + * then we need to check .git to know we shouldn't traverse it. > + * If the return from treat_path() is: > + * * path_none, for any path, we return false. > + * * path_recurse, for all path components, we return true > + * * <anything else> for some intermediate component, we make sure > + * to add that path to the relevant list but return false > + * signifying that we shouldn't recurse into it. > + */ > > while (len && path[len - 1] == '/') > len--; > if (!len) > return 1; > + > + memset(&cdir, 0, sizeof(cdir)); > + memset(&de, 0, sizeof(de)); > + cdir.de = &de; > + de.d_type = DT_DIR; So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. > baselen = 0; > + prevlen = 0; > while (1) { > - cp = path + baselen + !!baselen; > + prevlen = baselen + !!baselen; > + cp = path + prevlen; > cp = memchr(cp, '/', path + len - cp); > if (!cp) > baselen = len; > else > baselen = cp - path; > - strbuf_setlen(&sb, 0); > + strbuf_reset(&sb); > strbuf_add(&sb, path, baselen); > if (!is_directory(sb.buf)) > break; > - if (simplify_away(sb.buf, sb.len, pathspec)) > - break; > - if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec, > - DT_DIR, NULL) == path_none) > + strbuf_reset(&sb); > + strbuf_add(&sb, path, prevlen); > + memcpy(de.d_name, path+prevlen, baselen-prevlen); But here we try to copy a path into that `de.d_name`, which is still `NULL`? That can't be right, can it? Thanks for your help, Dscho > + de.d_name[baselen-prevlen] = '\0'; > + state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, > + pathspec); > + if (state != path_recurse) > break; /* do not recurse into it */ > - if (len <= baselen) { > - rc = 1; > + if (len <= baselen) > break; /* finished checking */ > - } > } > + add_path_to_appropriate_result_list(dir, NULL, &cdir, istate, > + &sb, baselen, pathspec, > + state); > + > strbuf_release(&sb); > - return rc; > + return state == path_recurse; > } > > static const char *get_ident_string(void) > diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh > index d6e161ddd8..098fddc75b 100755 > --- a/t/t3011-common-prefixes-and-directory-traversal.sh > +++ b/t/t3011-common-prefixes-and-directory-traversal.sh > @@ -74,7 +74,7 @@ test_expect_success 'git ls-files -o --directory untracked_dir does not recurse' > test_cmp expect actual > ' > > -test_expect_failure 'git ls-files -o --directory untracked_dir/ does not recurse' ' > +test_expect_success 'git ls-files -o --directory untracked_dir/ does not recurse' ' > echo untracked_dir/ >expect && > git ls-files -o --directory untracked_dir/ >actual && > test_cmp expect actual > @@ -86,7 +86,7 @@ test_expect_success 'git ls-files -o untracked_repo does not recurse' ' > test_cmp expect actual > ' > > -test_expect_failure 'git ls-files -o untracked_repo/ does not recurse' ' > +test_expect_success 'git ls-files -o untracked_repo/ does not recurse' ' > echo untracked_repo/ >expect && > git ls-files -o untracked_repo/ >actual && > test_cmp expect actual > @@ -133,7 +133,7 @@ test_expect_success 'git ls-files -o .git shows nothing' ' > test_must_be_empty actual > ' > > -test_expect_failure 'git ls-files -o .git/ shows nothing' ' > +test_expect_success 'git ls-files -o .git/ shows nothing' ' > git ls-files -o .git/ >actual && > test_must_be_empty actual > ' > -- > gitgitgadget > > >
On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote: > > Hi Elijah, > > I have not had time to dive deeply into this, but I know that it _does_ > cause a ton of segmentation faults in the `shears/pu` branch (where all of > Git for Windows' patches are rebased on top of `pu`): Weird. If it's going to cause segmentation faults at all, it would certainly do it all over the place, but I tested the patches on the major platforms using your Azure Pipelines setup on git.git so it should be good on all the platforms. Did your shears/pu branch make some other changes to the setup? > On Tue, 10 Dec 2019, Elijah Newren via GitGitGadget wrote: > > > diff --git a/dir.c b/dir.c > > index 645b44ea64..9c71a9ac21 100644 > > --- a/dir.c > > +++ b/dir.c > > @@ -2102,37 +2102,69 @@ static int treat_leading_path(struct dir_struct *dir, > > const struct pathspec *pathspec) > > { > > struct strbuf sb = STRBUF_INIT; > > - int baselen, rc = 0; > > + int prevlen, baselen; > > const char *cp; > > + struct cached_dir cdir; > > + struct dirent de; > > + enum path_treatment state = path_none; > > + > > + /* > > + * For each directory component of path, we are going to check whether > > + * that path is relevant given the pathspec. For example, if path is > > + * foo/bar/baz/ > > + * then we will ask treat_path() whether we should go into foo, then > > + * whether we should go into bar, then whether baz is relevant. > > + * Checking each is important because e.g. if path is > > + * .git/info/ > > + * then we need to check .git to know we shouldn't traverse it. > > + * If the return from treat_path() is: > > + * * path_none, for any path, we return false. > > + * * path_recurse, for all path components, we return true > > + * * <anything else> for some intermediate component, we make sure > > + * to add that path to the relevant list but return false > > + * signifying that we shouldn't recurse into it. > > + */ > > > > while (len && path[len - 1] == '/') > > len--; > > if (!len) > > return 1; > > + > > + memset(&cdir, 0, sizeof(cdir)); > > + memset(&de, 0, sizeof(de)); > > + cdir.de = &de; > > + de.d_type = DT_DIR; > > So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. Um, yeah...didn't I have an allocation of de.d_name here? It will always have a subset of path copied into it, so an allocation of len+1 is plenty long enough. > > baselen = 0; > > + prevlen = 0; > > while (1) { > > - cp = path + baselen + !!baselen; > > + prevlen = baselen + !!baselen; > > + cp = path + prevlen; > > cp = memchr(cp, '/', path + len - cp); > > if (!cp) > > baselen = len; > > else > > baselen = cp - path; > > - strbuf_setlen(&sb, 0); > > + strbuf_reset(&sb); > > strbuf_add(&sb, path, baselen); > > if (!is_directory(sb.buf)) > > break; > > - if (simplify_away(sb.buf, sb.len, pathspec)) > > - break; > > - if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec, > > - DT_DIR, NULL) == path_none) > > + strbuf_reset(&sb); > > + strbuf_add(&sb, path, prevlen); > > + memcpy(de.d_name, path+prevlen, baselen-prevlen); > > But here we try to copy a path into that `de.d_name`, which is still > `NULL`? > > That can't be right, can it? Yes, it can't be right. How did this possibly pass on any platform let alone all of them? (https://dev.azure.com/git/git/_build/results?buildId=1462&view=results). This is absolutely an important codepath that is hit; otherwise it couldn't fix the three tests from failure to success. Further, the subsequent patch added code within this if-block after this memcpy and fixed a few tests from failures to success. So it had to hit this code path as well. How could it not have segfaulted? I'm very confused...
On Mon, Dec 16, 2019 at 5:51 AM Elijah Newren <newren@gmail.com> wrote: > > On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin > <Johannes.Schindelin@gmx.de> wrote: > > > > Hi Elijah, > > > > I have not had time to dive deeply into this, but I know that it _does_ > > cause a ton of segmentation faults in the `shears/pu` branch (where all of > > Git for Windows' patches are rebased on top of `pu`): > > Weird. If it's going to cause segmentation faults at all, it would > certainly do it all over the place, but I tested the patches on the > major platforms using your Azure Pipelines setup on git.git so it > should be good on all the platforms. Did your shears/pu branch make > some other changes to the setup? > > > On Tue, 10 Dec 2019, Elijah Newren via GitGitGadget wrote: > > > > > diff --git a/dir.c b/dir.c > > > index 645b44ea64..9c71a9ac21 100644 > > > --- a/dir.c > > > +++ b/dir.c > > > @@ -2102,37 +2102,69 @@ static int treat_leading_path(struct dir_struct *dir, > > > const struct pathspec *pathspec) > > > { > > > struct strbuf sb = STRBUF_INIT; > > > - int baselen, rc = 0; > > > + int prevlen, baselen; > > > const char *cp; > > > + struct cached_dir cdir; > > > + struct dirent de; > > > + enum path_treatment state = path_none; > > > + > > > + /* > > > + * For each directory component of path, we are going to check whether > > > + * that path is relevant given the pathspec. For example, if path is > > > + * foo/bar/baz/ > > > + * then we will ask treat_path() whether we should go into foo, then > > > + * whether we should go into bar, then whether baz is relevant. > > > + * Checking each is important because e.g. if path is > > > + * .git/info/ > > > + * then we need to check .git to know we shouldn't traverse it. > > > + * If the return from treat_path() is: > > > + * * path_none, for any path, we return false. > > > + * * path_recurse, for all path components, we return true > > > + * * <anything else> for some intermediate component, we make sure > > > + * to add that path to the relevant list but return false > > > + * signifying that we shouldn't recurse into it. > > > + */ > > > > > > while (len && path[len - 1] == '/') > > > len--; > > > if (!len) > > > return 1; > > > + > > > + memset(&cdir, 0, sizeof(cdir)); > > > + memset(&de, 0, sizeof(de)); > > > + cdir.de = &de; > > > + de.d_type = DT_DIR; > > > > So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. > > Um, yeah...didn't I have an allocation of de.d_name here? It will > always have a subset of path copied into it, so an allocation of len+1 > is plenty long enough. Actually, it looks like I looked up the definition of dirent previously and forgot by the time you emailed. On linux, from /usr/include/bits/dirent.h: struct dirent { .... unsigned char d_type; char d_name[256]; /* We must not include limits.h! */ }; and from compat/win32/dirent.h defines it as: struct dirent { unsigned char d_type; /* file type to prevent lstat after readdir */ char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ }; and 'man dirent' on Mac OS X says it's defined as: struct dirent { ... _uint8_t d_type; _unit8_t d_namlen; /* length of string in d_name */ char d_name[255+1]; /* name must be no longer than this */ } so, allocating it would be incorrect and my memset would just fill d_name with nul characters. But the raises the question...what kind of segfaults are you getting? Can you link to any builds or post any stack traces? Can I duplicate with some copy of git-for-windows on linux?
Elijah Newren <newren@gmail.com> writes: >> > > + memset(&cdir, 0, sizeof(cdir)); >> > > + memset(&de, 0, sizeof(de)); >> > > + cdir.de = &de; >> > > + de.d_type = DT_DIR; >> > >> > So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. >> >> Um, yeah...didn't I have an allocation of de.d_name here? It will >> always have a subset of path copied into it, so an allocation of len+1 >> is plenty long enough. > > Actually, it looks like I looked up the definition of dirent > previously and forgot by the time you emailed. On linux, from > /usr/include/bits/dirent.h: > > struct dirent > { > .... > unsigned char d_type; > char d_name[256]; /* We must not include limits.h! */ > }; > > ... Uh, oh. The size of "struct dirent" is unspecified and it is asking for trouble to allocate one yourself (iow, treat it pretty much as something you can only get a pointer to an instance from readdir()). For example, a dirent that comes back readdir() may have a lot longer name than the sizeof(.d_name[]) above may imply. Do you really need to manufacture a dirent yourself, or can you use a more concrete type you invent yourself?
On Mon, Dec 16, 2019 at 10:13 AM Junio C Hamano <gitster@pobox.com> wrote: > > Elijah Newren <newren@gmail.com> writes: > > >> > > + memset(&cdir, 0, sizeof(cdir)); > >> > > + memset(&de, 0, sizeof(de)); > >> > > + cdir.de = &de; > >> > > + de.d_type = DT_DIR; > >> > > >> > So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. > >> > >> Um, yeah...didn't I have an allocation of de.d_name here? It will > >> always have a subset of path copied into it, so an allocation of len+1 > >> is plenty long enough. > > > > Actually, it looks like I looked up the definition of dirent > > previously and forgot by the time you emailed. On linux, from > > /usr/include/bits/dirent.h: > > > > struct dirent > > { > > .... > > unsigned char d_type; > > char d_name[256]; /* We must not include limits.h! */ > > }; > > > > ... > > Uh, oh. The size of "struct dirent" is unspecified and it is asking > for trouble to allocate one yourself (iow, treat it pretty much as > something you can only get a pointer to an instance from readdir()). > For example, a dirent that comes back readdir() may have a lot > longer name than the sizeof(.d_name[]) above may imply. > > Do you really need to manufacture a dirent yourself, or can you use > a more concrete type you invent yourself? I need to manufacture a dirent myself; short of that, the most likely alternative is to drop patches 2 & 5-8 of this series and throw my hands in the air and give up. That probably deserves an explanation... Years ago someone noticed that if a user ran "git ls-files -o foo/bar/one foo/bar/two", that we could try to optimize by noticing that we won't be interested in anything until we get to foo/bar/. So, they tried to short-circuit the read_directory_recursive() and readdir() calls, but couldn't reuse the same treat_path() logic to check that we should even go into foo/bar/ at all. So there was some copy & paste from treat_path() into a new treat_leading_path()...and that both missed some important parts and the logic further diverged over time. This patch was about categorizing the suite of bugs that arose from not using treat_path() for checks from both codepaths, and tried to correct those problems. treat_path() takes a dirent, and several of the functions it calls all take a dirent. It'd be an awful lot of work to rip it out. So I manufactured a dirent myself so that we could use the same codepaths and not only fix all these bugs but prevent future ones as well. If we can't manufacture a dirent, then unless someone else has some bright ideas about something clever we can do, then I think this problem blows up in complexity to a level where I don't think it's worth addressing. I almost ripped the optimization out altogether (just how much do we really save by not looking into the leading two directories?), except that unpack_trees() calls into the same code with a leading path and I didn't want to mess with that. Any bright ideas about what to do here?
Elijah Newren <newren@gmail.com> writes:
> Any bright ideas about what to do here?
Restructuring the code so that we do not use "struct dirent" in the
first place, even in the original code that used only those obtained
from readdir(), perhaps? Then the codepath that would take the _thing_
that describes the diretory entry would expect to see the data in
the struct you define (not "struct dirent" from the system), and you
can safely manufacture ones out of thin air.
On Mon, Dec 16, 2019 at 1:25 PM Junio C Hamano <gitster@pobox.com> wrote: > > Elijah Newren <newren@gmail.com> writes: > > > Any bright ideas about what to do here? > > Restructuring the code so that we do not use "struct dirent" in the > first place, even in the original code that used only those obtained > from readdir(), perhaps? Then the codepath that would take the _thing_ > that describes the diretory entry would expect to see the data in > the struct you define (not "struct dirent" from the system), and you > can safely manufacture ones out of thin air. Okay, I'll submit a new series dropping most the patches, but note this thread in the commit message of the new testcases in case someone (maybe my future self?) wants to dig further.
Hi Elijah, On Mon, 16 Dec 2019, Elijah Newren wrote: > On Mon, Dec 16, 2019 at 5:51 AM Elijah Newren <newren@gmail.com> wrote: > > > > On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin > > <Johannes.Schindelin@gmx.de> wrote: > > > > > > Hi Elijah, > > > > > > I have not had time to dive deeply into this, but I know that it _does_ > > > cause a ton of segmentation faults in the `shears/pu` branch (where all of > > > Git for Windows' patches are rebased on top of `pu`): > > > > Weird. If it's going to cause segmentation faults at all, it would > > certainly do it all over the place, but I tested the patches on the > > major platforms using your Azure Pipelines setup on git.git so it > > should be good on all the platforms. Did your shears/pu branch make > > some other changes to the setup? Not really. > > > On Tue, 10 Dec 2019, Elijah Newren via GitGitGadget wrote: > > > > > > > diff --git a/dir.c b/dir.c > > > > index 645b44ea64..9c71a9ac21 100644 > > > > --- a/dir.c > > > > +++ b/dir.c > > > > @@ -2102,37 +2102,69 @@ static int treat_leading_path(struct dir_struct *dir, > > > > const struct pathspec *pathspec) > > > > { > > > > struct strbuf sb = STRBUF_INIT; > > > > - int baselen, rc = 0; > > > > + int prevlen, baselen; > > > > const char *cp; > > > > + struct cached_dir cdir; > > > > + struct dirent de; > > > > + enum path_treatment state = path_none; > > > > + > > > > + /* > > > > + * For each directory component of path, we are going to check whether > > > > + * that path is relevant given the pathspec. For example, if path is > > > > + * foo/bar/baz/ > > > > + * then we will ask treat_path() whether we should go into foo, then > > > > + * whether we should go into bar, then whether baz is relevant. > > > > + * Checking each is important because e.g. if path is > > > > + * .git/info/ > > > > + * then we need to check .git to know we shouldn't traverse it. > > > > + * If the return from treat_path() is: > > > > + * * path_none, for any path, we return false. > > > > + * * path_recurse, for all path components, we return true > > > > + * * <anything else> for some intermediate component, we make sure > > > > + * to add that path to the relevant list but return false > > > > + * signifying that we shouldn't recurse into it. > > > > + */ > > > > > > > > while (len && path[len - 1] == '/') > > > > len--; > > > > if (!len) > > > > return 1; > > > > + > > > > + memset(&cdir, 0, sizeof(cdir)); > > > > + memset(&de, 0, sizeof(de)); > > > > + cdir.de = &de; > > > > + de.d_type = DT_DIR; > > > > > > So here, `de` is zeroed out, and therefore `de.d_name` is `NULL`. > > > > Um, yeah...didn't I have an allocation of de.d_name here? It will > > always have a subset of path copied into it, so an allocation of len+1 > > is plenty long enough. > > Actually, it looks like I looked up the definition of dirent > previously and forgot by the time you emailed. On linux, from > /usr/include/bits/dirent.h: > > struct dirent > { > .... > unsigned char d_type; > char d_name[256]; /* We must not include limits.h! */ > }; > > and from compat/win32/dirent.h defines it as: > > struct dirent { > unsigned char d_type; /* file type to prevent lstat after > readdir */ > char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ > }; > > and 'man dirent' on Mac OS X says it's defined as: > > struct dirent { > ... > _uint8_t d_type; > _unit8_t d_namlen; /* length of string in d_name */ > char d_name[255+1]; /* name must be no longer than this */ > } > > so, allocating it would be incorrect and my memset would just fill > d_name with nul characters. > > > But the raises the question...what kind of segfaults are you getting? > Can you link to any builds or post any stack traces? Can I duplicate > with some copy of git-for-windows on linux? If you care to look at our very own `compat/win32/dirent.h`, you will see this: struct dirent { unsigned char d_type; /* file type to prevent lstat after readdir */ char *d_name; /* file name */ }; And looking at https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I do not see any guarantee of that `[256]` at all: The <dirent.h> header shall [...] define the structure dirent which shall include the following members: [XSI][Option Start] ino_t d_ino File serial number. [Option End] char d_name[] Filename string of entry. You will notice that not even `d_type` is guaranteed. Ciao, Dscho
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: > If you care to look at our very own `compat/win32/dirent.h`, you will see > this: > > struct dirent { > unsigned char d_type; /* file type to prevent lstat after readdir */ > char *d_name; /* file name */ > }; > > And looking at > https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I > do not see any guarantee of that `[256]` at all: > > The <dirent.h> header shall [...] define the structure dirent which shall > include the following members: > > [XSI][Option Start] > ino_t d_ino File serial number. > [Option End] > char d_name[] Filename string of entry. > > You will notice that not even `d_type` is guaranteed. I am reasonably sure that the code (without Elijah's patches anyway) takes the possibility of missing d_type into account already. Doesn't the above mean d_name[] has to be an in-place array of some size (i.e. even a flex-array is OK)? It does not look to me that it allows for it to be a pointer pointing at elsewhere (possibly on heap), which may be asking for trouble.
Hi Dscho, On Mon, Dec 16, 2019 at 4:04 PM Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote: > On Mon, 16 Dec 2019, Elijah Newren wrote: > > On Mon, Dec 16, 2019 at 5:51 AM Elijah Newren <newren@gmail.com> wrote: > > > > > > On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin > > > <Johannes.Schindelin@gmx.de> wrote: > > > > > > > > Hi Elijah, > > > > > > > > I have not had time to dive deeply into this, but I know that it _does_ > > > > cause a ton of segmentation faults in the `shears/pu` branch (where all of > > > > Git for Windows' patches are rebased on top of `pu`): > > > > > > Weird. If it's going to cause segmentation faults at all, it would > > > certainly do it all over the place, but I tested the patches on the > > > major platforms using your Azure Pipelines setup on git.git so it > > > should be good on all the platforms. Did your shears/pu branch make > > > some other changes to the setup? > > Not really. > > > > > Actually, it looks like I looked up the definition of dirent > > previously and forgot by the time you emailed. On linux, from > > /usr/include/bits/dirent.h: ... > > and from compat/win32/dirent.h defines it as: > > > > struct dirent { > > unsigned char d_type; /* file type to prevent lstat after > > readdir */ > > char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ > > }; ... > > If you care to look at our very own `compat/win32/dirent.h`, you will see > this: Interesting, we both brought up compat/win32/dirent.h and quoted from it in our emails... > struct dirent { > unsigned char d_type; /* file type to prevent lstat after readdir */ > char *d_name; /* file name */ > }; ...but the contents were different? Looks like git-for-windows forked compat/win32/dirent.h, possibly in a way that violates POSIX as pointed out by Junio. Any reason those changes weren't sent back upstream, by chance? Feels odd having a compat/win32/ directory that our downstream windows users aren't actually using. It also means the testing I'm getting from gitgitgadget and your Azure setup (which all is really, really nice by the way), is far less reassuring and helpful than I hoped. > And looking at > https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I > do not see any guarantee of that `[256]` at all: > > The <dirent.h> header shall [...] define the structure dirent which shall > include the following members: > > [XSI][Option Start] > ino_t d_ino File serial number. > [Option End] > char d_name[] Filename string of entry. > > You will notice that not even `d_type` is guaranteed. Doh, yeah, I messed that up too. Anyway, as I mentioned to Junio, I'll resubmit after gutting the series. I'll still include a fix for the issue that a real world user reported, but all the other ancillary bugs I found that have been around for over a decade aren't important enough to merit a major refactor, IMO. Elijah
Hi Junio, On Mon, 16 Dec 2019, Junio C Hamano wrote: > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: > > > If you care to look at our very own `compat/win32/dirent.h`, you will see > > this: > > > > struct dirent { > > unsigned char d_type; /* file type to prevent lstat after readdir */ > > char *d_name; /* file name */ > > }; > > > > And looking at > > https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I > > do not see any guarantee of that `[256]` at all: > > > > The <dirent.h> header shall [...] define the structure dirent which shall > > include the following members: > > > > [XSI][Option Start] > > ino_t d_ino File serial number. > > [Option End] > > char d_name[] Filename string of entry. > > > > You will notice that not even `d_type` is guaranteed. > > I am reasonably sure that the code (without Elijah's patches anyway) > takes the possibility of missing d_type into account already. > > Doesn't the above mean d_name[] has to be an in-place array of some > size (i.e. even a flex-array is OK)? It does not look to me that it > allows for it to be a pointer pointing at elsewhere (possibly on > heap), which may be asking for trouble. You are right, of course. I also was not _quite_ spot on, as I had looked at Git for Windows' `shears/pu` branch, not at the `pu` branch. Alas, we have patches in Git for Windows that allow for switching to a faster, caching way to access the stat() and readdir() data (it is called the "FSCache" and it is responsible for some rather dramatic speed-ups). And these patches change `struct dirent` to the form that is quoted above, to allow switching back and forth between two _different_ backends, storing the actual `d_name` not in `struct dirent` but in `DIR`. Is this compliant with POSIX? I guess not. Does it work? Yes, it does. I can't know for sure that it makes a dent, but FSCache is designed for speed, and it actually does not even store the `d_name` in the `DIR`, but directly in the cache structure, avoiding copying at all. In short: if we can allow FSCache to keep operating that way (i.e. keep `d_name` as a pointer), then that would be helpful to keep the performance on Windows somewhat within acceptable boundaries. Ciao, Dscho
Hi Elijah, On Mon, 16 Dec 2019, Elijah Newren wrote: > On Mon, Dec 16, 2019 at 4:04 PM Johannes Schindelin > <Johannes.Schindelin@gmx.de> wrote: > > On Mon, 16 Dec 2019, Elijah Newren wrote: > > > On Mon, Dec 16, 2019 at 5:51 AM Elijah Newren <newren@gmail.com> wrote: > > > > > > > > On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin > > > > <Johannes.Schindelin@gmx.de> wrote: > > > > > > > > > > Hi Elijah, > > > > > > > > > > I have not had time to dive deeply into this, but I know that it _does_ > > > > > cause a ton of segmentation faults in the `shears/pu` branch (where all of > > > > > Git for Windows' patches are rebased on top of `pu`): > > > > > > > > Weird. If it's going to cause segmentation faults at all, it would > > > > certainly do it all over the place, but I tested the patches on the > > > > major platforms using your Azure Pipelines setup on git.git so it > > > > should be good on all the platforms. Did your shears/pu branch make > > > > some other changes to the setup? > > > > Not really. > > > > > > > > Actually, it looks like I looked up the definition of dirent > > > previously and forgot by the time you emailed. On linux, from > > > /usr/include/bits/dirent.h: > ... > > > and from compat/win32/dirent.h defines it as: > > > > > > struct dirent { > > > unsigned char d_type; /* file type to prevent lstat after > > > readdir */ > > > char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ > > > }; > ... > > > > If you care to look at our very own `compat/win32/dirent.h`, you will see > > this: > > Interesting, we both brought up compat/win32/dirent.h and quoted from > it in our emails... > > > struct dirent { > > unsigned char d_type; /* file type to prevent lstat after readdir */ > > char *d_name; /* file name */ > > }; > > ...but the contents were different? Looks like git-for-windows forked > compat/win32/dirent.h, possibly in a way that violates POSIX as > pointed out by Junio. Yep, I messed that up, sorry. > Any reason those changes weren't sent back upstream, by chance? Feels > odd having a compat/win32/ directory that our downstream windows users > aren't actually using. It also means the testing I'm getting from > gitgitgadget and your Azure setup (which all is really, really nice by > the way), is far less reassuring and helpful than I hoped. Yes. I was ready to submit the FSCache feature to the Git mailing list for review some 2.5 years ago when along came Ben Peart, finding ways to speed up FSCache even further. That is the reason why I held off, and I still have to condense the patches (which currently form a topology of 17 patch series!!!) into a nice small patch series that does not reflect the meandering history of the FSCache history, but instead presents one neat story. > > And looking at > > https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I > > do not see any guarantee of that `[256]` at all: > > > > The <dirent.h> header shall [...] define the structure dirent which shall > > include the following members: > > > > [XSI][Option Start] > > ino_t d_ino File serial number. > > [Option End] > > char d_name[] Filename string of entry. > > > > You will notice that not even `d_type` is guaranteed. > > Doh, yeah, I messed that up too. > > Anyway, as I mentioned to Junio, I'll resubmit after gutting the > series. I'll still include a fix for the issue that a real world user > reported, but all the other ancillary bugs I found that have been > around for over a decade aren't important enough to merit a major > refactor, IMO. Hmm. I am really sorry that I nudged you to go down this route. Quite honestly, I'd rather add an ugly work-around that is Windows-only just so that you can fix those ancillary bugs. I might even go so far as to adjust the FSCache's internal data structure to _store_ `struct dirent` items, then the fast `readdir()` implementation could be even faster by just pointing at those items. What do you think? Can we strike a deal to keep those bug fixes? Ciao, Dscho
Hi Dscho, On Tue, Dec 17, 2019 at 3:16 AM Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote: > > Hi Elijah, > > On Mon, 16 Dec 2019, Elijah Newren wrote: > > > On Mon, Dec 16, 2019 at 4:04 PM Johannes Schindelin > > <Johannes.Schindelin@gmx.de> wrote: > > > On Mon, 16 Dec 2019, Elijah Newren wrote: > > > > On Mon, Dec 16, 2019 at 5:51 AM Elijah Newren <newren@gmail.com> wrote: > > > > > > > > > > On Sun, Dec 15, 2019 at 2:29 AM Johannes Schindelin > > > > > <Johannes.Schindelin@gmx.de> wrote: > > > > > > > > > > > > Hi Elijah, > > > > > > > > > > > > I have not had time to dive deeply into this, but I know that it _does_ > > > > > > cause a ton of segmentation faults in the `shears/pu` branch (where all of > > > > > > Git for Windows' patches are rebased on top of `pu`): > > > > > > > > > > Weird. If it's going to cause segmentation faults at all, it would > > > > > certainly do it all over the place, but I tested the patches on the > > > > > major platforms using your Azure Pipelines setup on git.git so it > > > > > should be good on all the platforms. Did your shears/pu branch make > > > > > some other changes to the setup? > > > > > > Not really. > > > > > > > > > > > Actually, it looks like I looked up the definition of dirent > > > > previously and forgot by the time you emailed. On linux, from > > > > /usr/include/bits/dirent.h: > > ... > > > > and from compat/win32/dirent.h defines it as: > > > > > > > > struct dirent { > > > > unsigned char d_type; /* file type to prevent lstat after > > > > readdir */ > > > > char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ > > > > }; > > ... > > > > > > If you care to look at our very own `compat/win32/dirent.h`, you will see > > > this: > > > > Interesting, we both brought up compat/win32/dirent.h and quoted from > > it in our emails... > > > > > struct dirent { > > > unsigned char d_type; /* file type to prevent lstat after readdir */ > > > char *d_name; /* file name */ > > > }; > > > > ...but the contents were different? Looks like git-for-windows forked > > compat/win32/dirent.h, possibly in a way that violates POSIX as > > pointed out by Junio. > > Yep, I messed that up, sorry. > > > Any reason those changes weren't sent back upstream, by chance? Feels > > odd having a compat/win32/ directory that our downstream windows users > > aren't actually using. It also means the testing I'm getting from > > gitgitgadget and your Azure setup (which all is really, really nice by > > the way), is far less reassuring and helpful than I hoped. > > Yes. I was ready to submit the FSCache feature to the Git mailing list for > review some 2.5 years ago when along came Ben Peart, finding ways to speed > up FSCache even further. That is the reason why I held off, and I still > have to condense the patches (which currently form a topology of 17 patch > series!!!) into a nice small patch series that does not reflect the > meandering history of the FSCache history, but instead presents one neat > story. > > > > And looking at > > > https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html, I > > > do not see any guarantee of that `[256]` at all: > > > > > > The <dirent.h> header shall [...] define the structure dirent which shall > > > include the following members: > > > > > > [XSI][Option Start] > > > ino_t d_ino File serial number. > > > [Option End] > > > char d_name[] Filename string of entry. > > > > > > You will notice that not even `d_type` is guaranteed. > > > > Doh, yeah, I messed that up too. > > > > Anyway, as I mentioned to Junio, I'll resubmit after gutting the > > series. I'll still include a fix for the issue that a real world user > > reported, but all the other ancillary bugs I found that have been > > around for over a decade aren't important enough to merit a major > > refactor, IMO. > > Hmm. I am really sorry that I nudged you to go down this route. Quite > honestly, I'd rather add an ugly work-around that is Windows-only just so > that you can fix those ancillary bugs. You brought up issues; that's what you're supposed to do. You shouldn't feel bad about that. Besides, the d_type one is real, and means the patches at least need a #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) surrounding my explicit setting of d_type. The problem wasn't what you brought up or how you brought it up, it's massive fatigue on my end from dir.c, from before even submitting this series[*]. I'm not giving up on these changes or trying to discourage anyone else from picking them up and extending them, I just don't want to touch them right now and would rather put them on the shelf for a while. Elijah [*] If you're really curious...I got involved in dir.c because of a simple bug report nearly two years ago[1], and found myself working on a foundation that was error-prone by design[2], with ambiguous or even wrong documentation[3] about not just what the code does but the intent. Further, it was a place where not only is the correct fix unclear, and not only is the "right" behavior unclear, but the cases in question affect so few people that pinging the list periodically over more than a year can't generate enough interest for anyone else to hazard a guess as to what "correct" behavior is[4]. Stack on that the fact that every time I touch this area, I think I'm really close to having a fix, only to find I never, ever am. There's always one-more-thing before I can finally get back to something I really wanted to work on instead. Speaking of which, I've only managed to work on my new merge strategy like once every 3-6 months for a small amount of time each time. Yes, part of that's my fault with git-filter-repo (another case of perpetually thinking I'm close to done), rebase changes, and whatnot. But this series arose right when I had my calendar nearly cleared so that I could work on the merge strategy again (and of course the rebase bug report came in about the same time too). But at least git-filter-repo and rebase are generally useful; dir.c at most generates "meh, this seems annoying" reports. And I've already fixed all of those, the remaining fixes are stuff that it appears I'm the only one to have reported, and I only reported it because I was digging into the other "meh, seems annoying" reports. I'm usually happy when I have a patch series ready to submit to git; it means I think I'll make things better for others. I didn't feel that way with this series; I kind of wanted to just drop it entirely and not even turn it in. But I figured I should to at least document my findings, so I pushed myself to submit and hoped no one would respond. Then this issue arose and when I mentioned in my possibilities of fixing it that ripping the usage of dirent out would be a lot of work and would probably cause me to give up and asked for ideas, Junio responded that we should rip out dirent. I think he's right, and it's important the he defend code quality and point out the right way to do things, it's just that I want out of this rabbit hole right now. [1] https://lore.kernel.org/git/20180405173446.32372-1-newren@gmail.com/ [2] https://lore.kernel.org/git/xmqqefjp6sko.fsf@gitster-ct.c.googlers.com/ [3] e.g. https://lore.kernel.org/git/20190905154735.29784-10-newren@gmail.com/ [4] https://lore.kernel.org/git/20190905154735.29784-1-newren@gmail.com/ and links referenced therein
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: >> > [XSI][Option Start] >> > ino_t d_ino File serial number. >> > [Option End] >> > char d_name[] Filename string of entry. >> > >> > You will notice that not even `d_type` is guaranteed. >> >> I am reasonably sure that the code (without Elijah's patches anyway) >> takes the possibility of missing d_type into account already. >> >> Doesn't the above mean d_name[] has to be an in-place array of some >> size (i.e. even a flex-array is OK)? It does not look to me that it >> allows for it to be a pointer pointing at elsewhere (possibly on >> heap), which may be asking for trouble. > > You are right, of course. > > ... > > Is this compliant with POSIX? I guess not. Does it work? Yes, it does. I actually would not throw it into "it works" category. The obvious implication is that a program like this: static struct dirent *fabricate(const char *name) { /* over-allocate as we do not know how long the d_name[] is */ struct dirent *ent = calloc(1, sizeof(*ent) + strlen(name) + 1); strcpy(ent->d_name, name); return ent; } static void show_name(const struct dirent *ent) { printf("%s\n", ent->d_name); } int main(int ac, char **av) { struct dirent *mine = fabricate("mine"); show_name(mine); free(mine); return 0; } would be broken if you do not have d_name as an array. I would not be surprised if the segfaults you saw with Elijah's series all were caused by your d_name not being an array, and if that is the case, I'd rather see it fixed on your end than fixes withdrawn. Thanks.
Hi Junio, On Tue, 17 Dec 2019, Junio C Hamano wrote: > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: > > >> > [XSI][Option Start] > >> > ino_t d_ino File serial number. > >> > [Option End] > >> > char d_name[] Filename string of entry. > >> > > >> > You will notice that not even `d_type` is guaranteed. > >> > >> I am reasonably sure that the code (without Elijah's patches anyway) > >> takes the possibility of missing d_type into account already. > >> > >> Doesn't the above mean d_name[] has to be an in-place array of some > >> size (i.e. even a flex-array is OK)? It does not look to me that it > >> allows for it to be a pointer pointing at elsewhere (possibly on > >> heap), which may be asking for trouble. > > > > You are right, of course. > > > > ... > > > > Is this compliant with POSIX? I guess not. Does it work? Yes, it does. > > I actually would not throw it into "it works" category. The obvious > implication is that a program like this: > > static struct dirent *fabricate(const char *name) > { > /* over-allocate as we do not know how long the d_name[] is */ > struct dirent *ent = calloc(1, sizeof(*ent) + strlen(name) + 1); > strcpy(ent->d_name, name); > return ent; > } > > static void show_name(const struct dirent *ent) > { > printf("%s\n", ent->d_name); > } > > int main(int ac, char **av) > { > struct dirent *mine = fabricate("mine"); > show_name(mine); > free(mine); > return 0; > } > > would be broken if you do not have d_name as an array. > > I would not be surprised if the segfaults you saw with Elijah's > series all were caused by your d_name not being an array, and if > that is the case, I'd rather see it fixed on your end than fixes > withdrawn. I agree with this reasoning. Ciao, Dscho
diff --git a/dir.c b/dir.c index 645b44ea64..9c71a9ac21 100644 --- a/dir.c +++ b/dir.c @@ -2102,37 +2102,69 @@ static int treat_leading_path(struct dir_struct *dir, const struct pathspec *pathspec) { struct strbuf sb = STRBUF_INIT; - int baselen, rc = 0; + int prevlen, baselen; const char *cp; + struct cached_dir cdir; + struct dirent de; + enum path_treatment state = path_none; + + /* + * For each directory component of path, we are going to check whether + * that path is relevant given the pathspec. For example, if path is + * foo/bar/baz/ + * then we will ask treat_path() whether we should go into foo, then + * whether we should go into bar, then whether baz is relevant. + * Checking each is important because e.g. if path is + * .git/info/ + * then we need to check .git to know we shouldn't traverse it. + * If the return from treat_path() is: + * * path_none, for any path, we return false. + * * path_recurse, for all path components, we return true + * * <anything else> for some intermediate component, we make sure + * to add that path to the relevant list but return false + * signifying that we shouldn't recurse into it. + */ while (len && path[len - 1] == '/') len--; if (!len) return 1; + + memset(&cdir, 0, sizeof(cdir)); + memset(&de, 0, sizeof(de)); + cdir.de = &de; + de.d_type = DT_DIR; baselen = 0; + prevlen = 0; while (1) { - cp = path + baselen + !!baselen; + prevlen = baselen + !!baselen; + cp = path + prevlen; cp = memchr(cp, '/', path + len - cp); if (!cp) baselen = len; else baselen = cp - path; - strbuf_setlen(&sb, 0); + strbuf_reset(&sb); strbuf_add(&sb, path, baselen); if (!is_directory(sb.buf)) break; - if (simplify_away(sb.buf, sb.len, pathspec)) - break; - if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec, - DT_DIR, NULL) == path_none) + strbuf_reset(&sb); + strbuf_add(&sb, path, prevlen); + memcpy(de.d_name, path+prevlen, baselen-prevlen); + de.d_name[baselen-prevlen] = '\0'; + state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, + pathspec); + if (state != path_recurse) break; /* do not recurse into it */ - if (len <= baselen) { - rc = 1; + if (len <= baselen) break; /* finished checking */ - } } + add_path_to_appropriate_result_list(dir, NULL, &cdir, istate, + &sb, baselen, pathspec, + state); + strbuf_release(&sb); - return rc; + return state == path_recurse; } static const char *get_ident_string(void) diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh index d6e161ddd8..098fddc75b 100755 --- a/t/t3011-common-prefixes-and-directory-traversal.sh +++ b/t/t3011-common-prefixes-and-directory-traversal.sh @@ -74,7 +74,7 @@ test_expect_success 'git ls-files -o --directory untracked_dir does not recurse' test_cmp expect actual ' -test_expect_failure 'git ls-files -o --directory untracked_dir/ does not recurse' ' +test_expect_success 'git ls-files -o --directory untracked_dir/ does not recurse' ' echo untracked_dir/ >expect && git ls-files -o --directory untracked_dir/ >actual && test_cmp expect actual @@ -86,7 +86,7 @@ test_expect_success 'git ls-files -o untracked_repo does not recurse' ' test_cmp expect actual ' -test_expect_failure 'git ls-files -o untracked_repo/ does not recurse' ' +test_expect_success 'git ls-files -o untracked_repo/ does not recurse' ' echo untracked_repo/ >expect && git ls-files -o untracked_repo/ >actual && test_cmp expect actual @@ -133,7 +133,7 @@ test_expect_success 'git ls-files -o .git shows nothing' ' test_must_be_empty actual ' -test_expect_failure 'git ls-files -o .git/ shows nothing' ' +test_expect_success 'git ls-files -o .git/ shows nothing' ' git ls-files -o .git/ >actual && test_must_be_empty actual '