Message ID | 20190516235815.13886-1-marcandre.lureau@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] userdiff: add built-in pattern for rust | expand |
Am 17.05.19 um 01:58 schrieb marcandre.lureau@redhat.com: > From: Marc-André Lureau <mlureau@redhat.com> > > This adds xfuncname and word_regex patterns for Rust, a quite > popular programming language. It also includes test cases for the > xfuncname regex (t4018) and updated documentation. > > The word_regex pattern finds identifiers, integers, floats and > operators, according to the Rust Reference Book. > > Cc: Johannes Sixt <j6t@kdbg.org> In this code base, Cc: footers are disliked. > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > --- > diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait > new file mode 100644 > index 0000000000..ea397f09ed > --- /dev/null > +++ b/t/t4018/rust-trait > @@ -0,0 +1,5 @@ > +unsafe trait RIGHT<T> { > + fn len(&self) -> u32; > + fn ChangeMe(&self, n: u32) -> T; > + fn iter<F>(&self, f: F) where F: Fn(T); > +} You mentioned that 'unsafe' is commonly used for blocks, and these cases should not be picked up. Can we have a test case that demonstrates that this is indeed the case? > diff --git a/userdiff.c b/userdiff.c > index 3a78fbf504..8d7e62e2a5 100644 > --- a/userdiff.c > +++ b/userdiff.c > @@ -130,6 +130,13 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", > "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" > "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." > "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), > +PATTERNS("rust", > + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", > + /* -- */ > + "[a-zA-Z_][a-zA-Z0-9_]*" > + "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" This pattern did not change. Doesn't it still mark "+e_1.e_8-e_2.eu128" as a single word? > + "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" I still think that you should reduce the complexity of these patterns. They do not have to be restrictive to dismiss wrong syntax, just liberal enough to catch correct syntax. Let me try again: "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" > + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::") -- Hannes
Hi On Fri, May 17, 2019 at 8:26 AM Johannes Sixt <j6t@kdbg.org> wrote: > > Am 17.05.19 um 01:58 schrieb marcandre.lureau@redhat.com: > > From: Marc-André Lureau <mlureau@redhat.com> > > > > This adds xfuncname and word_regex patterns for Rust, a quite > > popular programming language. It also includes test cases for the > > xfuncname regex (t4018) and updated documentation. > > > > The word_regex pattern finds identifiers, integers, floats and > > operators, according to the Rust Reference Book. > > > > Cc: Johannes Sixt <j6t@kdbg.org> > > In this code base, Cc: footers are disliked. Noted > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > > --- > > > diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait > > new file mode 100644 > > index 0000000000..ea397f09ed > > --- /dev/null > > +++ b/t/t4018/rust-trait > > @@ -0,0 +1,5 @@ > > +unsafe trait RIGHT<T> { > > + fn len(&self) -> u32; > > + fn ChangeMe(&self, n: u32) -> T; > > + fn iter<F>(&self, f: F) where F: Fn(T); > > +} > > You mentioned that 'unsafe' is commonly used for blocks, and these cases > should not be picked up. Can we have a test case that demonstrates that > this is indeed the case? Ok, I am adding: unsafe fn RIGHT(inc: u32) { unsafe { // don't catch unsafe block ChangeMe += inc; } } > > > diff --git a/userdiff.c b/userdiff.c > > index 3a78fbf504..8d7e62e2a5 100644 > > --- a/userdiff.c > > +++ b/userdiff.c > > @@ -130,6 +130,13 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", > > "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" > > "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." > > "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), > > +PATTERNS("rust", > > + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", > > + /* -- */ > > + "[a-zA-Z_][a-zA-Z0-9_]*" > > + "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" > > This pattern did not change. Doesn't it still mark "+e_1.e_8-e_2.eu128" > as a single word? > > > + "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" > > I still think that you should reduce the complexity of these patterns. > They do not have to be restrictive to dismiss wrong syntax, just liberal > enough to catch correct syntax. Let me try again: > > "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" That seems to be pretty good. It misses 12E+99_f64, but I am not sure it is worth the trouble of having a second rule for floating for this case. > > > + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::") > -- Hannes Thanks!
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 4fb20cd0e9..07da08fb27 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -833,6 +833,8 @@ patterns are available: - `ruby` suitable for source code in the Ruby language. +- `rust` suitable for source code in the Rust language. + - `tex` suitable for source code for LaTeX documents. diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 22f9f88f0a..9261d6d3a0 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -43,6 +43,7 @@ diffpatterns=" php python ruby + rust tex custom1 custom2 diff --git a/t/t4018/rust-fn b/t/t4018/rust-fn new file mode 100644 index 0000000000..cbe02155f1 --- /dev/null +++ b/t/t4018/rust-fn @@ -0,0 +1,5 @@ +pub(self) fn RIGHT<T>(x: &[T]) where T: Debug { + let _ = x; + // a comment + let a = ChangeMe; +} diff --git a/t/t4018/rust-impl b/t/t4018/rust-impl new file mode 100644 index 0000000000..09df3cd93b --- /dev/null +++ b/t/t4018/rust-impl @@ -0,0 +1,5 @@ +impl<'a, T: AsRef<[u8]>> std::RIGHT for Git<'a> { + + pub fn ChangeMe(&self) -> () { + } +} diff --git a/t/t4018/rust-struct b/t/t4018/rust-struct new file mode 100644 index 0000000000..76aff1c0d8 --- /dev/null +++ b/t/t4018/rust-struct @@ -0,0 +1,5 @@ +#[derive(Debug)] +pub(super) struct RIGHT<'a> { + name: &'a str, + age: ChangeMe, +} diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait new file mode 100644 index 0000000000..ea397f09ed --- /dev/null +++ b/t/t4018/rust-trait @@ -0,0 +1,5 @@ +unsafe trait RIGHT<T> { + fn len(&self) -> u32; + fn ChangeMe(&self, n: u32) -> T; + fn iter<F>(&self, f: F) where F: Fn(T); +} diff --git a/userdiff.c b/userdiff.c index 3a78fbf504..8d7e62e2a5 100644 --- a/userdiff.c +++ b/userdiff.c @@ -130,6 +130,13 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), +PATTERNS("rust", + "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" + "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?" + "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", "[={}\"]|[^={}\" \t]+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",