diff mbox series

[v2] userdiff: add built-in pattern for rust

Message ID 20190516235815.13886-1-marcandre.lureau@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] userdiff: add built-in pattern for rust | expand

Commit Message

Marc-André Lureau May 16, 2019, 11:58 p.m. UTC
From: Marc-André Lureau <mlureau@redhat.com>

This adds xfuncname and word_regex patterns for Rust, a quite
popular programming language. It also includes test cases for the
xfuncname regex (t4018) and updated documentation.

The word_regex pattern finds identifiers, integers, floats and
operators, according to the Rust Reference Book.

Cc: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 Documentation/gitattributes.txt | 2 ++
 t/t4018-diff-funcname.sh        | 1 +
 t/t4018/rust-fn                 | 5 +++++
 t/t4018/rust-impl               | 5 +++++
 t/t4018/rust-struct             | 5 +++++
 t/t4018/rust-trait              | 5 +++++
 userdiff.c                      | 7 +++++++
 7 files changed, 30 insertions(+)
 create mode 100644 t/t4018/rust-fn
 create mode 100644 t/t4018/rust-impl
 create mode 100644 t/t4018/rust-struct
 create mode 100644 t/t4018/rust-trait


base-commit: ab15ad1a3b4b04a29415aef8c9afa2f64fc194a2

Comments

Johannes Sixt May 17, 2019, 6:26 a.m. UTC | #1
Am 17.05.19 um 01:58 schrieb marcandre.lureau@redhat.com:
> From: Marc-André Lureau <mlureau@redhat.com>
> 
> This adds xfuncname and word_regex patterns for Rust, a quite
> popular programming language. It also includes test cases for the
> xfuncname regex (t4018) and updated documentation.
> 
> The word_regex pattern finds identifiers, integers, floats and
> operators, according to the Rust Reference Book.
> 
> Cc: Johannes Sixt <j6t@kdbg.org>

In this code base, Cc: footers are disliked.

> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---

> diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait
> new file mode 100644
> index 0000000000..ea397f09ed
> --- /dev/null
> +++ b/t/t4018/rust-trait
> @@ -0,0 +1,5 @@
> +unsafe trait RIGHT<T> {
> +    fn len(&self) -> u32;
> +    fn ChangeMe(&self, n: u32) -> T;
> +    fn iter<F>(&self, f: F) where F: Fn(T);
> +}

You mentioned that 'unsafe' is commonly used for blocks, and these cases
should not be picked up. Can we have a test case that demonstrates that
this is indeed the case?

> diff --git a/userdiff.c b/userdiff.c
> index 3a78fbf504..8d7e62e2a5 100644
> --- a/userdiff.c
> +++ b/userdiff.c
> @@ -130,6 +130,13 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
>  	 "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
>  	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
>  	 "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"),
> +PATTERNS("rust",
> +	 "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$",
> +	 /* -- */
> +	 "[a-zA-Z_][a-zA-Z0-9_]*"
> +	 "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"

This pattern did not change. Doesn't it still mark "+e_1.e_8-e_2.eu128"
as a single word?

> +	 "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"

I still think that you should reduce the complexity of these patterns.
They do not have to be restrictive to dismiss wrong syntax, just liberal
enough to catch correct syntax. Let me try again:

	"|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?"

> +	 "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::")
-- Hannes
Marc-André Lureau May 17, 2019, 11:11 a.m. UTC | #2
Hi

On Fri, May 17, 2019 at 8:26 AM Johannes Sixt <j6t@kdbg.org> wrote:
>
> Am 17.05.19 um 01:58 schrieb marcandre.lureau@redhat.com:
> > From: Marc-André Lureau <mlureau@redhat.com>
> >
> > This adds xfuncname and word_regex patterns for Rust, a quite
> > popular programming language. It also includes test cases for the
> > xfuncname regex (t4018) and updated documentation.
> >
> > The word_regex pattern finds identifiers, integers, floats and
> > operators, according to the Rust Reference Book.
> >
> > Cc: Johannes Sixt <j6t@kdbg.org>
>
> In this code base, Cc: footers are disliked.

Noted

>
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
>
> > diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait
> > new file mode 100644
> > index 0000000000..ea397f09ed
> > --- /dev/null
> > +++ b/t/t4018/rust-trait
> > @@ -0,0 +1,5 @@
> > +unsafe trait RIGHT<T> {
> > +    fn len(&self) -> u32;
> > +    fn ChangeMe(&self, n: u32) -> T;
> > +    fn iter<F>(&self, f: F) where F: Fn(T);
> > +}
>
> You mentioned that 'unsafe' is commonly used for blocks, and these cases
> should not be picked up. Can we have a test case that demonstrates that
> this is indeed the case?

Ok, I am adding:

unsafe fn RIGHT(inc: u32) {
    unsafe {
        // don't catch unsafe block
        ChangeMe += inc;
    }
}

>
> > diff --git a/userdiff.c b/userdiff.c
> > index 3a78fbf504..8d7e62e2a5 100644
> > --- a/userdiff.c
> > +++ b/userdiff.c
> > @@ -130,6 +130,13 @@ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
> >        "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
> >        "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
> >        "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"),
> > +PATTERNS("rust",
> > +      "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$",
> > +      /* -- */
> > +      "[a-zA-Z_][a-zA-Z0-9_]*"
> > +      "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"
>
> This pattern did not change. Doesn't it still mark "+e_1.e_8-e_2.eu128"
> as a single word?
>
> > +      "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"
>
> I still think that you should reduce the complexity of these patterns.
> They do not have to be restrictive to dismiss wrong syntax, just liberal
> enough to catch correct syntax. Let me try again:
>
>         "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?"

That seems to be pretty good. It misses 12E+99_f64, but I am not sure
it is worth the trouble of having a second rule for floating for this
case.

>
> > +      "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::")
> -- Hannes

Thanks!
diff mbox series

Patch

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 4fb20cd0e9..07da08fb27 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -833,6 +833,8 @@  patterns are available:
 
 - `ruby` suitable for source code in the Ruby language.
 
+- `rust` suitable for source code in the Rust language.
+
 - `tex` suitable for source code for LaTeX documents.
 
 
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 22f9f88f0a..9261d6d3a0 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -43,6 +43,7 @@  diffpatterns="
 	php
 	python
 	ruby
+	rust
 	tex
 	custom1
 	custom2
diff --git a/t/t4018/rust-fn b/t/t4018/rust-fn
new file mode 100644
index 0000000000..cbe02155f1
--- /dev/null
+++ b/t/t4018/rust-fn
@@ -0,0 +1,5 @@ 
+pub(self) fn RIGHT<T>(x: &[T]) where T: Debug {
+    let _ = x;
+    // a comment
+    let a = ChangeMe;
+}
diff --git a/t/t4018/rust-impl b/t/t4018/rust-impl
new file mode 100644
index 0000000000..09df3cd93b
--- /dev/null
+++ b/t/t4018/rust-impl
@@ -0,0 +1,5 @@ 
+impl<'a, T: AsRef<[u8]>>  std::RIGHT for Git<'a> {
+
+    pub fn ChangeMe(&self) -> () {
+    }
+}
diff --git a/t/t4018/rust-struct b/t/t4018/rust-struct
new file mode 100644
index 0000000000..76aff1c0d8
--- /dev/null
+++ b/t/t4018/rust-struct
@@ -0,0 +1,5 @@ 
+#[derive(Debug)]
+pub(super) struct RIGHT<'a> {
+    name: &'a str,
+    age: ChangeMe,
+}
diff --git a/t/t4018/rust-trait b/t/t4018/rust-trait
new file mode 100644
index 0000000000..ea397f09ed
--- /dev/null
+++ b/t/t4018/rust-trait
@@ -0,0 +1,5 @@ 
+unsafe trait RIGHT<T> {
+    fn len(&self) -> u32;
+    fn ChangeMe(&self, n: u32) -> T;
+    fn iter<F>(&self, f: F) where F: Fn(T);
+}
diff --git a/userdiff.c b/userdiff.c
index 3a78fbf504..8d7e62e2a5 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -130,6 +130,13 @@  PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
 	 "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
 	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
 	 "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"),
+PATTERNS("rust",
+	 "^[\t ]*((pub(\\([^\\)]+\\))?[\t ]+)?((async|const|unsafe|extern([\t ]+\"[^\"]+\"))[\t ]+)?(struct|enum|union|mod|trait|fn|impl(<.+>)?)[ \t]+[^;]*)$",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+_0-9.eE]+(f32|f64|u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"
+	 "|0[box]?[0-9a-fA-F_]+(u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize)?"
+	 "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"),
 PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 	 "[={}\"]|[^={}\" \t]+"),
 PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",