Message ID | 20190520170403.16672-1-marcandre.lureau@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] userdiff: add built-in pattern for rust | expand |
Am 20.05.19 um 19:04 schrieb marcandre.lureau@redhat.com: > From: Marc-André Lureau <mlureau@redhat.com> > > This adds xfuncname and word_regex patterns for Rust, a quite > popular programming language. It also includes test cases for the > xfuncname regex (t4018) and updated documentation. > > The word_regex pattern finds identifiers, integers, floats and > operators, according to the Rust Reference Book. This looks very good. I have a few questions regarding the hunk header regex. > diff --git a/userdiff.c b/userdiff.c > index 3a78fbf504..e45b5920c6 100644 > --- a/userdiff.c > +++ b/userdiff.c > @@ -130,6 +130,12 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", > "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" > "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." > "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), > +PATTERNS("rust", > + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", This pattern matches only if there is no semicolon behind the signal words on the line. Is that important? Can you show a (test) case where a line with a semicolon would be picked incorrectly if '[^;]*' were simplified to '.*'? You permit whitespace at the beginning of an anchor line. I guess that is to catch nested definitions. Or is it common style to write indented code? Can you show a test case where this makes sense? Would it be sufficient to simplify (struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+ to (struct|enum|union|mod|trait|fn|impl)[< \t]+ as it is only important to exclude identifiers that start with these keywords. > + /* -- */ > + "[a-zA-Z_][a-zA-Z0-9_]*" > + "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" > + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), > PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", > "[={}\"]|[^={}\" \t]+"), > PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", > > base-commit: aa25c82427ae70aebf3b8f970f2afd54e9a2a8c6 -- Hannes
Hi On Mon, May 20, 2019 at 9:52 PM Johannes Sixt <j6t@kdbg.org> wrote: > > Am 20.05.19 um 19:04 schrieb marcandre.lureau@redhat.com: > > From: Marc-André Lureau <mlureau@redhat.com> > > > > This adds xfuncname and word_regex patterns for Rust, a quite > > popular programming language. It also includes test cases for the > > xfuncname regex (t4018) and updated documentation. > > > > The word_regex pattern finds identifiers, integers, floats and > > operators, according to the Rust Reference Book. > > This looks very good. I have a few questions regarding the hunk header > regex. > > > diff --git a/userdiff.c b/userdiff.c > > index 3a78fbf504..e45b5920c6 100644 > > --- a/userdiff.c > > +++ b/userdiff.c > > @@ -130,6 +130,12 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", > > "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" > > "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." > > "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), > > +PATTERNS("rust", > > + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", > > This pattern matches only if there is no semicolon behind the signal > words on the line. Is that important? Can you show a (test) case where a > line with a semicolon would be picked incorrectly if '[^;]*' were > simplified to '.*'? Ok, I am adding: trait RIGHT { fn new(name: &'static str) -> Self; fn ChangeMe(&self) { // should skip "new", and return trait name } } > You permit whitespace at the beginning of an anchor line. I guess that > is to catch nested definitions. Or is it common style to write indented > code? Can you show a test case where this makes sense? > sure, I thought it was already covered. fn foo() { fn RIGHT() { // must catch nested function ChangeMe; } } (a simpler example would be a method implementation) > Would it be sufficient to simplify > > (struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+ > to > (struct|enum|union|mod|trait|fn|impl)[< \t]+ > > as it is only important to exclude identifiers that start with these > keywords. I think that would be fine, ok I am changing it > > > + /* -- */ > > + "[a-zA-Z_][a-zA-Z0-9_]*" > > + "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" > > + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), > > PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", > > "[={}\"]|[^={}\" \t]+"), > > PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", > > > > base-commit: aa25c82427ae70aebf3b8f970f2afd54e9a2a8c6 > > -- Hannes
Marc-André Lureau <marcandre.lureau@redhat.com> writes: > Ok, I am adding: > ... > sure, I thought it was already covered. > ... > I think that would be fine, ok I am changing it Thanks, both. The previous round has already hit 'next' (which means that we won't replacing the patch wholesale), so whatever you do, please make the update relative to / on top of what is queued as d74e7860 ("userdiff: add built-in pattern for rust", 2019-05-17).
Am 28.05.19 um 18:34 schrieb Junio C Hamano: > Marc-André Lureau <marcandre.lureau@redhat.com> writes: > >> Ok, I am adding: >> ... >> sure, I thought it was already covered. >> ... >> I think that would be fine, ok I am changing it > > Thanks, both. > > The previous round has already hit 'next' (which means that we won't > replacing the patch wholesale), so whatever you do, please make the > update relative to / on top of what is queued as d74e7860 > ("userdiff: add built-in pattern for rust", 2019-05-17). Ok. So, Marc-André, would you mind resending an incremental patch, because the word-regexp that is currently in 'next' would catch certain expressions that should be multiple words as a single word? -- Hannes
Hi Johannes On Tue, May 28, 2019 at 10:31 PM Johannes Sixt <j6t@kdbg.org> wrote: > > Am 28.05.19 um 18:34 schrieb Junio C Hamano: > > Marc-André Lureau <marcandre.lureau@redhat.com> writes: > > > >> Ok, I am adding: > >> ... > >> sure, I thought it was already covered. > >> ... > >> I think that would be fine, ok I am changing it > > > > Thanks, both. > > > > The previous round has already hit 'next' (which means that we won't > > replacing the patch wholesale), so whatever you do, please make the > > update relative to / on top of what is queued as d74e7860 > > ("userdiff: add built-in pattern for rust", 2019-05-17). > > Ok. So, Marc-André, would you mind resending an incremental patch, > because the word-regexp that is currently in 'next' would catch certain > expressions that should be multiple words as a single word? Beside a few extras tests, the diff is: @@ -134,11 +134,10 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), PATTERNS("rust", - "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl)[< \t]+[^;]*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" - "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" - "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" + "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), So it is simplifying handling of type parameters, and lowering the complexity of literal numbers. Both of these changes were based on your recommendations. Would you mind sending a follow-up patch yourself? I can send a seperate patch for the 3 extra tests. thanks
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 4fb20cd0e9..07da08fb27 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -833,6 +833,8 @@ patterns are available: - `ruby` suitable for source code in the Ruby language. +- `rust` suitable for source code in the Rust language. + - `tex` suitable for source code for LaTeX documents. diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 22f9f88f0a..9261d6d3a0 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -43,6 +43,7 @@ diffpatterns=" php python ruby + rust tex custom1 custom2 diff --git a/t/t4018/rust-fn b/t/t4018/rust-fn new file mode 100644 index 0000000000..cbe02155f1 --- /dev/null +++ b/t/t4018/rust-fn @@ -0,0 +1,5 @@ +pub(self) fn RIGHT<T>(x: &[T]) where T: Debug { + let _ = x; + // a comment + let a = ChangeMe; +} diff --git a/t/t4018/rust-impl b/t/t4018/rust-impl new file mode 100644 index 0000000000..09df3cd93b --- /dev/null +++ b/t/t4018/rust-impl @@ -0,0 +1,5 @@ +impl<'a, T: AsRef<[u8]>> std::RIGHT for Git<'a> { + + pub fn ChangeMe(&self) -> () { + } +} diff --git a/t/t4018/rust-struct b/t/t4018/rust-struct new file mode 100644 index 0000000000..76aff1c0d8 --- /dev/null +++ b/t/t4018/rust-struct @@ -0,0 +1,5 @@ +#[derive(Debug)] +pub(super) struct RIGHT<'a> { + name: &'a str, + age: ChangeMe, +} diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait new file mode 100644 index 0000000000..ea397f09ed --- /dev/null +++ b/t/t4018/rust-trait @@ -0,0 +1,5 @@ +unsafe trait RIGHT<T> { + fn len(&self) -> u32; + fn ChangeMe(&self, n: u32) -> T; + fn iter<F>(&self, f: F) where F: Fn(T); +} diff --git a/t/t4018/rust-unsafe b/t/t4018/rust-unsafe new file mode 100644 index 0000000000..fd4661a934 --- /dev/null +++ b/t/t4018/rust-unsafe @@ -0,0 +1,6 @@ +unsafe fn RIGHT(inc: u32) { + unsafe { + // don't catch unsafe block + ChangeMe += inc; + } +} diff --git a/userdiff.c b/userdiff.c index 3a78fbf504..e45b5920c6 100644 --- a/userdiff.c +++ b/userdiff.c @@ -130,6 +130,12 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), +PATTERNS("rust", + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", "[={}\"]|[^={}\" \t]+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",