diff mbox series

[GSoC,v2] Add a diff driver for JavaScript languages.

Message ID 20220312164803.57909-1-a97410985new@gmail.com (mailing list archive)
State Superseded
Headers show
Series [GSoC,v2] Add a diff driver for JavaScript languages. | expand

Commit Message

xing zhi jiang March 12, 2022, 4:48 p.m. UTC
In the xfunction part that matches normal functions,
a variable declaration with an assignment of function, the function declaration
in the class, and also the function is object literal's property[1].

And in the word regex part, that matches numbers, punctuations, and also the
JavaScript identifier.
This part reference the formal ECMA specification[2].

[1]https://github.com/jquery/jquery/blob/de5398a6ad088dc006b46c6a870a2a053f4cd663/src/core.js#L201
[2]https://262.ecma-international.org/12.0/#sec-ecmascript-language-lexical-grammar

Signed-off-by: xing zhi jiang <a97410985new@gmail.com>
---
Range-diff against v1:
1:  7f764f97cf ! 1:  3b326bd2b6 Add a diff driver for JavaScript languages.
    @@ Commit message
     
         In the xfunction part that matches normal functions,
         a variable declaration with an assignment of function, the function declaration
    -    in the class, and also the function is object literal's property.
    +    in the class, and also the function is object literal's property[1].
     
         And in the word regex part, that matches numbers, punctuations, and also the
         JavaScript identifier.
    -    This part heavily references the formal ECMA sepcification[1].
    +    This part reference the formal ECMA specification[2].
     
    -    [1]https://262.ecma-international.org/12.0/#sec-ecmascript-language-lexical-grammar
    +    [1]https://github.com/jquery/jquery/blob/de5398a6ad088dc006b46c6a870a2a053f4cd663/src/core.js#L201
    +    [2]https://262.ecma-international.org/12.0/#sec-ecmascript-language-lexical-grammar
     
         Signed-off-by: xing zhi jiang <a97410985new@gmail.com>
     
    - ## .gitignore ##
    -@@ .gitignore: Release/
    - /git.VC.db
    - *.dSYM
    - /contrib/buildsystems/out
    -+/.cache
    - \ No newline at end of file
    -
      ## Documentation/gitattributes.txt ##
     @@ Documentation/gitattributes.txt: patterns are available:
      
      - `java` suitable for source code in the Java language.
      
    -+- `javascript suitable for source code in the JavaScript language.
    ++- `javascript` suitable for source code in the JavaScript language.
     +
      - `markdown` suitable for Markdown documents.
      
    @@ t/t4018/javascript-assignment-of-anonymous-function (new)
     +	
     +    return a + b; // ChangeMe
     +};
    - \ No newline at end of file
     
      ## t/t4018/javascript-assignment-of-arrow-function (new) ##
     @@
    @@ t/t4018/javascript-assignment-of-arrow-function (new)
     +	
     +    return a + b; // ChangeMe
     +};
    - \ No newline at end of file
    +
    + ## t/t4018/javascript-assignment-of-arrow-function-2 (new) ##
    +@@
    ++const RIGHT = (a, b)=>{
    ++	
    ++    return a + b; // ChangeMe
    ++};
    +
    + ## t/t4018/javascript-assignment-of-arrow-function-3 (new) ##
    +@@
    ++const RIGHT=test=>{
    ++	
    ++    return test + 1; // ChangeMe
    ++};
     
      ## t/t4018/javascript-assignment-of-named-function (new) ##
     @@
    @@ t/t4018/javascript-assignment-of-named-function (new)
     +	
     +    return a + b; // ChangeMe
     +};
    - \ No newline at end of file
     
      ## t/t4018/javascript-async-function (new) ##
     @@
    @@ t/t4018/javascript-async-function (new)
     +  
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-export-async-function (new) ##
     @@
    @@ t/t4018/javascript-export-async-function (new)
     +  
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-export-function (new) ##
     @@
    @@ t/t4018/javascript-export-function (new)
     +  
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-exports-anomyous-function (new) ##
     @@
    @@ t/t4018/javascript-exports-anomyous-function (new)
     +	
     +    return ChangeMe;
     +};
    - \ No newline at end of file
     
      ## t/t4018/javascript-exports-anomyous-function-2 (new) ##
     @@
    @@ t/t4018/javascript-exports-anomyous-function-2 (new)
     +	
     +    return ChangeMe;
     +};
    - \ No newline at end of file
     
      ## t/t4018/javascript-exports-function (new) ##
     @@
     +exports.RIGHT = function(document) {
     +    
    -+    return ChangeMe
    ++    return ChangeMe;
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-function (new) ##
     @@
    @@ t/t4018/javascript-function (new)
     +
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-function-2 (new) ##
     @@
    @@ t/t4018/javascript-function-2 (new)
     +   }
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-function-belong-to-IIFE (new) ##
     @@
    @@ t/t4018/javascript-function-belong-to-IIFE (new)
     +      return ChangeMe;
     +  };
     +}).call(aaaa.prototype);
    - \ No newline at end of file
     
      ## t/t4018/javascript-function-in-class (new) ##
     @@
    @@ t/t4018/javascript-function-in-class (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-function-in-class-2 (new) ##
     @@
    @@ t/t4018/javascript-function-in-class-2 (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
    +
    + ## t/t4018/javascript-function-in-class-3 (new) ##
    +@@
    ++class Test {
    ++  RIGHT(aaaaaaaaaa,
    ++      bbbbbbbbbb,
    ++      cccccccccc,
    ++      dddddddddd
    ++  ) {
    ++    let a = 4;
    ++    let b = ChangeMe;
    ++  }
    ++}
     
      ## t/t4018/javascript-function-in-object-literal (new) ##
     @@
    @@ t/t4018/javascript-function-in-object-literal (new)
     +        return ChangeMe
     +    }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-generator-function (new) ##
     @@
    @@ t/t4018/javascript-generator-function (new)
     +  
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-generator-function-2 (new) ##
     @@
    @@ t/t4018/javascript-generator-function-2 (new)
     +  
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-getter-function-in-class (new) ##
     @@
    @@ t/t4018/javascript-getter-function-in-class (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-setter-function-in-class (new) ##
     @@
    @@ t/t4018/javascript-setter-function-in-class (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-skip-function-call-statement (new) ##
     @@
    @@ t/t4018/javascript-skip-function-call-statement (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-skip-keywords (new) ##
     @@
    @@ t/t4018/javascript-skip-keywords (new)
     +
     +  return a + b; // ChangeMe
     +}
    - \ No newline at end of file
     
      ## t/t4018/javascript-static-function-in-class (new) ##
     @@
    @@ t/t4018/javascript-static-function-in-class (new)
     +    let b = ChangeMe;
     +  }
     +}
    - \ No newline at end of file
     
      ## t/t4034-diff-words.sh ##
     @@ t/t4034-diff-words.sh: test_language_driver dts
    @@ t/t4034-diff-words.sh: test_language_driver dts
      ## t/t4034/javascript/expect (new) ##
     @@
     +<BOLD>diff --git a/pre b/post<RESET>
    -+<BOLD>index b72238f..8bc3e3a 100644<RESET>
    ++<BOLD>index 18f4796..46f9b62 100644<RESET>
     +<BOLD>--- a/pre<RESET>
     +<BOLD>+++ b/post<RESET>
    -+<CYAN>@@ -1,32 +1,32 @@<RESET>
    ++<CYAN>@@ -1,33 +1,33 @@<RESET>
     +// DecimalLiteral<RESET>
     +<RED>123<RESET>
     +<RED>0.123<RESET>
    ++<RED>.123<RESET>
     +<RED>0.123e+5<RESET>
     +<RED>0.123E+5<RESET>
     +<RED>0.123e5<RESET>
     +<RED>1222222222222222223334444n<RESET><GREEN>124<RESET>
     +<GREEN>0.124<RESET>
    ++<GREEN>.124<RESET>
     +<GREEN>0.123e-5<RESET>
     +<GREEN>0.123E-5<RESET>
     +<GREEN>0.123E5<RESET>
    @@ t/t4034/javascript/post (new)
     +// DecimalLiteral
     +124
     +0.124
    ++.124
     +0.123e-5
     +0.123E-5
     +0.123E5
    @@ t/t4034/javascript/post (new)
     +a-=b a%=b a&&=b a|=b
     +b-c a++ a<<b a>>>=b a<<=b
     +a&b a|b a??=b
    - \ No newline at end of file
     
      ## t/t4034/javascript/pre (new) ##
     @@
     +// DecimalLiteral
     +123
     +0.123
    ++.123
     +0.123e+5
     +0.123E+5
     +0.123e5
    @@ t/t4034/javascript/pre (new)
     +a+=b a*=b a**=b a||=b
     +b+c a-- a>>b a>>>b a>>=b
     +a&&b a||b a&&=b
    - \ No newline at end of file
     
      ## userdiff.c ##
     @@ userdiff.c: PATTERNS("java",
    @@ userdiff.c: PATTERNS("java",
     +	 /* don't match the expression may contain parenthesis, because it is not a function declaration */
     +	 "!^[ \t]*(if|do|while|for|with|switch|catch|import|return)\n"
     +	 /* don't match statement */
    -+	 "!^.*;[ \t]*\n"
    ++	 "!;\n"
     +	 /* match normal function */
    -+	 "^[\t ]*((export[\t ]+)?(async[\t ]+)?function[\t ]*([\t ]*\\*[\t ]*|[\t ]*)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)\n"
    ++	 "^((export[\t ]+)?(async[\t ]+)?function[\t ]*[\t *]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)\n"
     +	 /* match JavaScript variable declaration with a lambda expression */
     +	 "^[\t ]*((const|let|var)[\t ]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*"
    -+	 "(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*[\t ])[\t ]*=>[\t ]*\\{?)\n"
    ++	 "(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>[\t ]*\\{?)\n"
     +	 /* match exports for anonymous fucntion */
    -+	 "^[\t ]*(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
    ++	 "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
     +	 /* match assign function to LHS */
     +	 "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*)?[\t ]*\\(.*)\n"
     +	 /* match normal function in object literal */
    @@ userdiff.c: PATTERNS("java",
     +	 /* match function in class */
     +	 "^[\t ]*((static[\t ]+)?((async|get|set)[\t ]+)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)",
     +	 /* word regex */
    -+	 /* hexIntegerLiteral and bigHexIntegerLiteral*/
    -+	 "0[xX][0-9a-fA-F][_0-9a-fA-F]*n?|"
    -+	 /* octalIntegerLiteral and bigOctalIntegerLiteral */
    -+	 "0[oO]?[0-7][_0-7]*n?|"
    -+	 /* binaryIntegerLiteral and bigBinaryIntegerLiteral */
    -+	 "(0[bB][01][_01]*n?)|"
    -+	 /* decimalLiteral */
    -+	 "(0|[1-9][_0-9]*)?\\.?[0-9][_0-9]*([eE][+-]?[_0-9]+)?|"
    -+	 /* bigDecimalLiteral */
    -+	 "(0|[1-9][_0-9]*)n|"
    ++	 /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, DecimalLiteral and its big version */
    ++	 "(0[xXoObB])?[0-9a-fA-F][_0-9a-fA-F]*n?"
    ++	 /* DecimalLiteral may be float */
    ++	 "|(0|[1-9][_0-9]*)?\\.?[0-9][_0-9]*([eE][+-]?[_0-9]+)?"
     +	 /* punctuations */
    -+	 "\\{|\\}|\\(|\\)|\\.|\\.{3}|;|,|<|>|<=|>=|==|!=|={3}|!==|\\+|-|\\*|/|%|\\*{2}|"
    -+	 "\\+{2}|--|<<|>>|>>>|&|\\||\\^|!|~|&&|\\|{2}|\\?{1,2}|:|=|\\+=|-=|\\*=|%=|\\*{2}=|"
    -+	 "<<=|>>=|>>>=|&=|\\|=|\\^=|&&=|\\|{2}=|\\?{2}=|=>|"
    ++	 "|\\.{3}|<=|>=|==|!=|={3}|!==|\\*{2}|\\+{2}|--|<<|>>"
    ++	 "|>>>|&&|\\|{2}|\\?{2}|\\+=|-=|\\*=|%=|\\*{2}="
    ++	 "|<<=|>>=|>>>=|&=|\\|=|\\^=|&&=|\\|{2}=|\\?{2}=|=>"
     +	 /* identifiers */
    -+	 "[$_[:alpha:]][$_[:alnum:]]*"),
    ++	 "|[$_[:alpha:]][$_[:alnum:]]*"),
      PATTERNS("markdown",
      	 "^ {0,3}#{1,6}[ \t].*",
      	 /* -- */

 Documentation/gitattributes.txt               |  2 +
 ...avascript-assignment-of-anonymous-function |  4 ++
 .../javascript-assignment-of-arrow-function   |  4 ++
 .../javascript-assignment-of-arrow-function-2 |  4 ++
 .../javascript-assignment-of-arrow-function-3 |  4 ++
 .../javascript-assignment-of-named-function   |  4 ++
 t/t4018/javascript-async-function             |  4 ++
 t/t4018/javascript-export-async-function      |  4 ++
 t/t4018/javascript-export-function            |  4 ++
 t/t4018/javascript-exports-anomyous-function  |  4 ++
 .../javascript-exports-anomyous-function-2    |  4 ++
 t/t4018/javascript-exports-function           |  4 ++
 t/t4018/javascript-function                   |  4 ++
 t/t4018/javascript-function-2                 | 10 ++++
 t/t4018/javascript-function-belong-to-IIFE    |  6 +++
 t/t4018/javascript-function-in-class          |  6 +++
 t/t4018/javascript-function-in-class-2        | 11 ++++
 t/t4018/javascript-function-in-class-3        | 10 ++++
 t/t4018/javascript-function-in-object-literal |  7 +++
 t/t4018/javascript-generator-function         |  4 ++
 t/t4018/javascript-generator-function-2       |  4 ++
 t/t4018/javascript-getter-function-in-class   |  6 +++
 t/t4018/javascript-setter-function-in-class   |  6 +++
 .../javascript-skip-function-call-statement   |  7 +++
 t/t4018/javascript-skip-keywords              | 34 ++++++++++++
 t/t4018/javascript-static-function-in-class   |  6 +++
 t/t4034-diff-words.sh                         |  1 +
 t/t4034/javascript/expect                     | 54 +++++++++++++++++++
 t/t4034/javascript/post                       | 33 ++++++++++++
 t/t4034/javascript/pre                        | 33 ++++++++++++
 userdiff.c                                    | 32 +++++++++++
 31 files changed, 320 insertions(+)
 create mode 100644 t/t4018/javascript-assignment-of-anonymous-function
 create mode 100644 t/t4018/javascript-assignment-of-arrow-function
 create mode 100644 t/t4018/javascript-assignment-of-arrow-function-2
 create mode 100644 t/t4018/javascript-assignment-of-arrow-function-3
 create mode 100644 t/t4018/javascript-assignment-of-named-function
 create mode 100644 t/t4018/javascript-async-function
 create mode 100644 t/t4018/javascript-export-async-function
 create mode 100644 t/t4018/javascript-export-function
 create mode 100644 t/t4018/javascript-exports-anomyous-function
 create mode 100644 t/t4018/javascript-exports-anomyous-function-2
 create mode 100644 t/t4018/javascript-exports-function
 create mode 100644 t/t4018/javascript-function
 create mode 100644 t/t4018/javascript-function-2
 create mode 100644 t/t4018/javascript-function-belong-to-IIFE
 create mode 100644 t/t4018/javascript-function-in-class
 create mode 100644 t/t4018/javascript-function-in-class-2
 create mode 100644 t/t4018/javascript-function-in-class-3
 create mode 100644 t/t4018/javascript-function-in-object-literal
 create mode 100644 t/t4018/javascript-generator-function
 create mode 100644 t/t4018/javascript-generator-function-2
 create mode 100644 t/t4018/javascript-getter-function-in-class
 create mode 100644 t/t4018/javascript-setter-function-in-class
 create mode 100644 t/t4018/javascript-skip-function-call-statement
 create mode 100644 t/t4018/javascript-skip-keywords
 create mode 100644 t/t4018/javascript-static-function-in-class
 create mode 100644 t/t4034/javascript/expect
 create mode 100644 t/t4034/javascript/post
 create mode 100644 t/t4034/javascript/pre

Comments

Johannes Sixt March 13, 2022, 9:54 p.m. UTC | #1
When you send a new iteration of a patch or patch set, it is customary
on this list to include everyone who took part in the earlier rounds in
the Cc: list.

Am 12.03.22 um 17:48 schrieb xing zhi jiang:
> In the xfunction part that matches normal functions,
> a variable declaration with an assignment of function, the function declaration
> in the class, and also the function is object literal's property[1].
> 
> And in the word regex part, that matches numbers, punctuations, and also the
> JavaScript identifier.
> This part reference the formal ECMA specification[2].
> 
> [1]https://github.com/jquery/jquery/blob/de5398a6ad088dc006b46c6a870a2a053f4cd663/src/core.js#L201
> [2]https://262.ecma-international.org/12.0/#sec-ecmascript-language-lexical-grammar
> 
> Signed-off-by: xing zhi jiang <a97410985new@gmail.com>
> ---

> diff --git a/userdiff.c b/userdiff.c
> index 8578cb0d12..51bfe4021d 100644
> --- a/userdiff.c
> +++ b/userdiff.c
> @@ -168,6 +168,38 @@ PATTERNS("java",
>  	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
>  	 "|[-+*/<>%&^|=!]="
>  	 "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"),
> +
> +PATTERNS("javascript",
> +	 /* don't match the expression may contain parenthesis, because it is not a function declaration */
> +	 "!^[ \t]*(if|do|while|for|with|switch|catch|import|return)\n"
> +	 /* don't match statement */
> +	 "!;\n"
> +	 /* match normal function */
> +	 "^((export[\t ]+)?(async[\t ]+)?function[\t ]*[\t *]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)\n"
> +	 /* match JavaScript variable declaration with a lambda expression */
> +	 "^[\t ]*((const|let|var)[\t ]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*"
> +	 "(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>[\t ]*\\{?)\n"

It would help readability if this second line of this regex were
indented because it is a continuation of the first line.

> +	 /* match exports for anonymous fucntion */
> +	 "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
> +	 /* match assign function to LHS */
> +	 "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*)?[\t ]*\\(.*)\n"

This should be written as

	 "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*[\t ]*)?\\(.*)\n"

Notice that the whitespace after the identifier can only appear when
there is actually an identifier. The point is to reduce the different
matches permitted by the sub-expression "[\t ]*[\t ]*" when there is no
identifier in the text.

Can the keyword function ever be followed by a number? I guess not. Then
[$_[:alpha:]][$_[:alnum:]]* could be reduced to [$_[:alnum:]]+

> +	 /* match normal function in object literal */
> +	 "^[\t ]*([$_[:alpha:]][$_[:alnum:]]*[\t ]*:[\t ]*function[\t ].*)\n"
> +	 /* don't match the function in class, which has more than one ident level */
> +	 "!^(\t{2,}|[ ]{5,})\n"
> +	 /* match function in class */
> +	 "^[\t ]*((static[\t ]+)?((async|get|set)[\t ]+)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)",> +	 /* word regex */
> +	 /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, DecimalLiteral and its big version */
> +	 "(0[xXoObB])?[0-9a-fA-F][_0-9a-fA-F]*n?"
> +	 /* DecimalLiteral may be float */
> +	 "|(0|[1-9][_0-9]*)?\\.?[0-9][_0-9]*([eE][+-]?[_0-9]+)?"

Having alternatives that begin with an optional part make the regex
evaluation comparatively inefficient. In particular, both alternatives
above match a decimal integer. I suggest to have the first alternative
only for hex, octal, and binary integers, and the second for all decimal
numbers including floatingpoint:

	 /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, and
their big versions */
	 "0[xXoObB][_0-9a-fA-F]+n?"
	 /* DecimalLiteral may be float */
	 "|[0-9][_0-9]*(\\.[_0-9]*|n)?([eE][+-]?[_0-9]+)?"

and if floating point literals can begin with a decimal point, then we
also need

	 "|\\.[0-9][_0-9]*([eE][+-]?[_0-9]+)?"

> +	 /* punctuations */
> +	 "|\\.{3}|<=|>=|==|!=|={3}|!==|\\*{2}|\\+{2}|--|<<|>>"
> +	 "|>>>|&&|\\|{2}|\\?{2}|\\+=|-=|\\*=|%=|\\*{2}="
> +	 "|<<=|>>=|>>>=|&=|\\|=|\\^=|&&=|\\|{2}=|\\?{2}=|=>"
> +	 /* identifiers */
> +	 "|[$_[:alpha:]][$_[:alnum:]]*"),
>  PATTERNS("markdown",
>  	 "^ {0,3}#{1,6}[ \t].*",
>  	 /* -- */

-- Hannes
Glen Choo March 14, 2022, 5:20 p.m. UTC | #2
Welcome to the Git project :) I never realized that we didn't include a
built in JS diff driver, so this would be quite welcome.

I'm not entirely familiar with this part of the system or our regex
style, so I won't comment on those. I'll only comment on the patterns we
are trying to detect and whether or not we want them.

I'm not 100% clear on the style around diff drivers e.g. how do we
decide that we want a pattern or not? I'd appreciate any pointers to
docs or commits.

xing zhi jiang <a97410985new@gmail.com> writes:

> diff --git a/t/t4018/javascript-assignment-of-anonymous-function b/t/t4018/javascript-assignment-of-anonymous-function
> new file mode 100644
> index 0000000000..b6f2ccccfc
> --- /dev/null
> +++ b/t/t4018/javascript-assignment-of-anonymous-function
> @@ -0,0 +1,4 @@
> +const RIGHT = function (a, b) {
> +	
> +    return a + b; // ChangeMe
> +};
> diff --git a/t/t4018/javascript-assignment-of-arrow-function b/t/t4018/javascript-assignment-of-arrow-function
> new file mode 100644
> index 0000000000..24ce517b7a
> --- /dev/null
> +++ b/t/t4018/javascript-assignment-of-arrow-function
> @@ -0,0 +1,4 @@
> +const RIGHT = (a, b) => {
> +	
> +    return a + b; // ChangeMe
> +};
> diff --git a/t/t4018/javascript-assignment-of-arrow-function-2 b/t/t4018/javascript-assignment-of-arrow-function-2
> new file mode 100644
> index 0000000000..bbf5de369e
> --- /dev/null
> +++ b/t/t4018/javascript-assignment-of-arrow-function-2
> @@ -0,0 +1,4 @@
> +const RIGHT = (a, b)=>{
> +	
> +    return a + b; // ChangeMe
> +};
> diff --git a/t/t4018/javascript-assignment-of-arrow-function-3 b/t/t4018/javascript-assignment-of-arrow-function-3
> new file mode 100644
> index 0000000000..4a07aa3259
> --- /dev/null
> +++ b/t/t4018/javascript-assignment-of-arrow-function-3
> @@ -0,0 +1,4 @@
> +const RIGHT=test=>{
> +	
> +    return test + 1; // ChangeMe
> +};

These are all variable assignments of anonymous functions, so we won't
'technically' be showing the function name in the diff, but as a
practical matter, they are _probably_ referred to by this name
consistently. So including them makes sense.

> diff --git a/t/t4018/javascript-assignment-of-named-function b/t/t4018/javascript-assignment-of-named-function
> new file mode 100644
> index 0000000000..bfc486ebef
> --- /dev/null
> +++ b/t/t4018/javascript-assignment-of-named-function
> @@ -0,0 +1,4 @@
> +const RIGHT = function test (a, b) {
> +	
> +    return a + b; // ChangeMe
> +};
> diff --git a/t/t4018/javascript-async-function b/t/t4018/javascript-async-function
> new file mode 100644
> index 0000000000..993e6926bf
> --- /dev/null
> +++ b/t/t4018/javascript-async-function
> @@ -0,0 +1,4 @@
> +async function RIGHT(a, b) {
> +  
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-export-async-function b/t/t4018/javascript-export-async-function
> new file mode 100644
> index 0000000000..fecbd669d7
> --- /dev/null
> +++ b/t/t4018/javascript-export-async-function
> @@ -0,0 +1,4 @@
> +export async function RIGHT(a, b) {
> +  
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-export-function b/t/t4018/javascript-export-function
> new file mode 100644
> index 0000000000..b5acbb2b08
> --- /dev/null
> +++ b/t/t4018/javascript-export-function
> @@ -0,0 +1,4 @@
> +export function RIGHT(a, b) {
> +  
> +  return a + b; // ChangeMe
> +}

These look good; the 'export' statements are part of the ES modules
feature. I'm not sure if it makes sense to explicitly test these cases
unless we have reason to believe that the 'export' keyword will affect
the matching.

> diff --git a/t/t4018/javascript-exports-anomyous-function b/t/t4018/javascript-exports-anomyous-function
> new file mode 100644
> index 0000000000..6786cbda8d
> --- /dev/null
> +++ b/t/t4018/javascript-exports-anomyous-function
> @@ -0,0 +1,4 @@
> +exports.setFlagged = RIGHT => {
> +	
> +    return ChangeMe;
> +};
> diff --git a/t/t4018/javascript-exports-anomyous-function-2 b/t/t4018/javascript-exports-anomyous-function-2
> new file mode 100644
> index 0000000000..883569f40d
> --- /dev/null
> +++ b/t/t4018/javascript-exports-anomyous-function-2
> @@ -0,0 +1,4 @@
> +exports.RIGHT = (a, b, runtime) => {
> +	
> +    return ChangeMe;
> +};
> diff --git a/t/t4018/javascript-exports-function b/t/t4018/javascript-exports-function
> new file mode 100644
> index 0000000000..63b79f5991
> --- /dev/null
> +++ b/t/t4018/javascript-exports-function
> @@ -0,0 +1,4 @@
> +exports.RIGHT = function(document) {
> +    
> +    return ChangeMe;
> +}

I don't think we should include 'exports.foo = bar'. To my knowledge,
this is _not_ a standard ES feature, but rather the CommonJS module
system popularized by Node.js [1] and other frameworks. To confirm this,
I searched the ES spec and did not find any reference to exports.* [2].

Even if we wanted to support nonstandard 'language features' (and this
label is tenuous at best, CommonJS is not trying to replace the ES
modules standard AFAIK), Node.js is also starting to support ES modules,
so I don't think this is a good long term direction for Git.

[1] https://nodejs.org/api/modules.html
[2] https://262.ecma-international.org/12.0/#sec-exports

> diff --git a/t/t4018/javascript-function b/t/t4018/javascript-function
> new file mode 100644
> index 0000000000..0cc0bf54e7
> --- /dev/null
> +++ b/t/t4018/javascript-function
> @@ -0,0 +1,4 @@
> +function RIGHT(a, b) {
> +
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-function-2 b/t/t4018/javascript-function-2
> new file mode 100644
> index 0000000000..06cfb779f0
> --- /dev/null
> +++ b/t/t4018/javascript-function-2
> @@ -0,0 +1,10 @@
> +function test(a, b) {
> +  return {
> +			RIGHT: function () {
> +				currentUpdateRemovedChunks.forEach(function (chunkId) {
> +					delete $installedChunks$[chunkId];
> +				});
> +				currentUpdateRemovedChunks = ChangeMe;
> +   }
> +  }
> +}

There is also the ES2015 'method shorthand' syntax [3], e.g. `bar` in:

  const foo = {
    bar() {
      console.log('hi');
    }
  }

[3] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Method_definitions

> diff --git a/t/t4018/javascript-function-belong-to-IIFE b/t/t4018/javascript-function-belong-to-IIFE
> new file mode 100644
> index 0000000000..6e5fe858c0
> --- /dev/null
> +++ b/t/t4018/javascript-function-belong-to-IIFE
> @@ -0,0 +1,6 @@
> +(function () {
> +  this.$RIGHT = function (needle, modifier) {
> +      let a = 5;
> +      return ChangeMe;
> +  };
> +}).call(aaaa.prototype);

 Does the IIFE matter in this case? This line:

  this.$RIGHT = function (needle, modifier) {

looks extremely similar to the previous test of `foo = function bar()`.

Or perhaps this is meant to demonstrate the edge case of "matching in a
complicated construct"? If so, perhaps we should test other edge cases
like:

   function WRONG() {
     let RIGHT = function (needle, modifier) {
         let a = 5;
         return ChangeMe;
     };
   }

> diff --git a/t/t4018/javascript-function-in-class b/t/t4018/javascript-function-in-class
> new file mode 100644
> index 0000000000..0cc0a26612
> --- /dev/null
> +++ b/t/t4018/javascript-function-in-class
> @@ -0,0 +1,6 @@
> +class Test {
> +  RIGHT() {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-function-in-class-2 b/t/t4018/javascript-function-in-class-2
> new file mode 100644
> index 0000000000..725495fe55
> --- /dev/null
> +++ b/t/t4018/javascript-function-in-class-2
> @@ -0,0 +1,11 @@
> +class Test {
> +  RIGHT(
> +      aaaaaaaaaa,
> +      bbbbbbbbbb,
> +      cccccccccc,
> +      dddddddddd
> +  ) {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-function-in-class-3 b/t/t4018/javascript-function-in-class-3
> new file mode 100644
> index 0000000000..e9b20728b2
> --- /dev/null
> +++ b/t/t4018/javascript-function-in-class-3
> @@ -0,0 +1,10 @@
> +class Test {
> +  RIGHT(aaaaaaaaaa,
> +      bbbbbbbbbb,
> +      cccccccccc,
> +      dddddddddd
> +  ) {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-function-in-object-literal b/t/t4018/javascript-function-in-object-literal
> new file mode 100644
> index 0000000000..021cc706dd
> --- /dev/null
> +++ b/t/t4018/javascript-function-in-object-literal
> @@ -0,0 +1,7 @@
> +const obj = {
> +    RIGHT: function (elems, callback, arg) {
> +        var length, value;
> +        // ...
> +        return ChangeMe
> +    }
> +}
> diff --git a/t/t4018/javascript-generator-function b/t/t4018/javascript-generator-function
> new file mode 100644
> index 0000000000..dc7793939f
> --- /dev/null
> +++ b/t/t4018/javascript-generator-function
> @@ -0,0 +1,4 @@
> +function* RIGHT(a, b) {
> +  
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-generator-function-2 b/t/t4018/javascript-generator-function-2
> new file mode 100644
> index 0000000000..950676a612
> --- /dev/null
> +++ b/t/t4018/javascript-generator-function-2
> @@ -0,0 +1,4 @@
> +function *RIGHT(a, b) {
> +  
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-getter-function-in-class b/t/t4018/javascript-getter-function-in-class
> new file mode 100644
> index 0000000000..9a5aee39f7
> --- /dev/null
> +++ b/t/t4018/javascript-getter-function-in-class
> @@ -0,0 +1,6 @@
> +class Test {
> +  get RIGHT() {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-setter-function-in-class b/t/t4018/javascript-setter-function-in-class
> new file mode 100644
> index 0000000000..dc5f288665
> --- /dev/null
> +++ b/t/t4018/javascript-setter-function-in-class
> @@ -0,0 +1,6 @@
> +class Test {
> +  set RIGHT() {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-skip-function-call-statement b/t/t4018/javascript-skip-function-call-statement
> new file mode 100644
> index 0000000000..321993c27e
> --- /dev/null
> +++ b/t/t4018/javascript-skip-function-call-statement
> @@ -0,0 +1,7 @@
> +class Test {
> +  static RIGHT() {
> +    haha();
> +    haha2()
> +    let b = ChangeMe;
> +  }
> +}
> diff --git a/t/t4018/javascript-skip-keywords b/t/t4018/javascript-skip-keywords
> new file mode 100644
> index 0000000000..5584970b58
> --- /dev/null
> +++ b/t/t4018/javascript-skip-keywords
> @@ -0,0 +1,34 @@
> +function RIGHT(a, b) {
> +  import("./async1")
> +  if (a > 1) {
> +    // ...
> +  }
> +  do {
> +    // ...
> +  } while (i < 5);
> +  for (const element of array1) {
> +    console.log(element)
> +  }
> +  with(o) {
> +    console.log(x)
> +  }
> +  switch (expr) {
> +    case 'a':
> +      // ...
> +      break;
> +    case 'b':
> +      // ...
> +      break;
> +    default:
> +      // ...
> +  }
> +  try {
> +    // ...
> +    return (a + c)
> +  } 
> +  catch (error) {
> +    // ...
> +  }
> +
> +  return a + b; // ChangeMe
> +}
> diff --git a/t/t4018/javascript-static-function-in-class b/t/t4018/javascript-static-function-in-class
> new file mode 100644
> index 0000000000..fbf0b7ca3d
> --- /dev/null
> +++ b/t/t4018/javascript-static-function-in-class
> @@ -0,0 +1,6 @@
> +class Test {
> +  static RIGHT() {
> +    let a = 4;
> +    let b = ChangeMe;
> +  }
> +}

The rest of the test cases look good.
Johannes Sixt March 15, 2022, 7:40 a.m. UTC | #3
Am 14.03.22 um 18:20 schrieb Glen Choo:
> xing zhi jiang <a97410985new@gmail.com> writes:
>> diff --git a/t/t4018/javascript-exports-function b/t/t4018/javascript-exports-function
>> new file mode 100644
>> index 0000000000..63b79f5991
>> --- /dev/null
>> +++ b/t/t4018/javascript-exports-function
>> @@ -0,0 +1,4 @@
>> +exports.RIGHT = function(document) {
>> +    
>> +    return ChangeMe;
>> +}
> 
> I don't think we should include 'exports.foo = bar'. To my knowledge,
> this is _not_ a standard ES feature, but rather the CommonJS module
> system popularized by Node.js [1] and other frameworks. To confirm this,
> I searched the ES spec and did not find any reference to exports.* [2].
> 
> Even if we wanted to support nonstandard 'language features' (and this
> label is tenuous at best, CommonJS is not trying to replace the ES
> modules standard AFAIK), Node.js is also starting to support ES modules,
> so I don't think this is a good long term direction for Git.

It is not a priority to model hunk header regular expressions after some
standard and to ignore stuff that is outside the standard. The goal is
to make them useful in a majority of cases. If there exists a noticable
chunk of code that uses non-standard constructs, then that is worth
being supported.

> 
> [1] https://nodejs.org/api/modules.html
> [2] https://262.ecma-international.org/12.0/#sec-exports

-- Hannes
Glen Choo March 15, 2022, 6:51 p.m. UTC | #4
Johannes Sixt <j6t@kdbg.org> writes:

> Am 14.03.22 um 18:20 schrieb Glen Choo:
>> xing zhi jiang <a97410985new@gmail.com> writes:
>>> diff --git a/t/t4018/javascript-exports-function b/t/t4018/javascript-exports-function
>>> new file mode 100644
>>> index 0000000000..63b79f5991
>>> --- /dev/null
>>> +++ b/t/t4018/javascript-exports-function
>>> @@ -0,0 +1,4 @@
>>> +exports.RIGHT = function(document) {
>>> +    
>>> +    return ChangeMe; >>> +}
>> 
>> I don't think we should include 'exports.foo = bar'. To my knowledge,
>> this is _not_ a standard ES feature, but rather the CommonJS module
>> system popularized by Node.js [1] and other frameworks. To confirm this,
>> I searched the ES spec and did not find any reference to exports.* [2].
>> 
>> Even if we wanted to support nonstandard 'language features' (and this
>> label is tenuous at best, CommonJS is not trying to replace the ES
>> modules standard AFAIK), Node.js is also starting to support ES modules,
>> so I don't think this is a good long term direction for Git.
>
> It is not a priority to model hunk header regular expressions after some
> standard and to ignore stuff that is outside the standard. The goal is
> to make them useful in a majority of cases. If there exists a noticable
> chunk of code that uses non-standard constructs, then that is worth
> being supported.

Interesting, I'll take note. I'm still personally not keen on supporting
CommonJS-only patterns when we are purportedly trying to show diffs for
JavaScript, but if we think this fits the style, I'm happy to oblige.

So the question becomes "Is there a significant amount of code that uses
this pattern?" Probably - this is a fairly common pattern in Node.js
after all. But in my experience,

 module.exports.RIGHT = function(document) {
     
     return ChangeMe;
 }

is even more common. The difference between 'module.exports' and
'exports' isn't worth going into (StackOverflow has all the answers, for
the curious), but if we're taking the approach of supporting CommonJS,
I'd like to be consistent and also support 'module.exports', i.e.
perhaps change:

  "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"

to something like:

  "^((module.)?exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
Junio C Hamano March 15, 2022, 7:22 p.m. UTC | #5
Glen Choo <chooglen@google.com> writes:

> Interesting, I'll take note. I'm still personally not keen on supporting
> CommonJS-only patterns when we are purportedly trying to show diffs for
> JavaScript, but if we think this fits the style, I'm happy to oblige.

The question is, with these patterns that are aware of CommonJS
convention, would your bog-standard-and-boring vanilla JS code be
detected incorrectly?  Becoming aware of popular conventions without
hurting others would be a good thing.

And the "popular conventions" does not have to be limited to
CommonJS/Node.
Glen Choo March 15, 2022, 9:34 p.m. UTC | #6
Junio C Hamano <gitster@pobox.com> writes:

> Glen Choo <chooglen@google.com> writes:
>
>> Interesting, I'll take note. I'm still personally not keen on supporting
>> CommonJS-only patterns when we are purportedly trying to show diffs for
>> JavaScript, but if we think this fits the style, I'm happy to oblige.
>
> The question is, with these patterns that are aware of CommonJS
> convention, would your bog-standard-and-boring vanilla JS code be
> detected incorrectly?  Becoming aware of popular conventions without
> hurting others would be a good thing.
>
> And the "popular conventions" does not have to be limited to
> CommonJS/Node.

From the perspective of "'exports' is a special name", yes, we could
detect vanilla JS code 'incorrectly' because, in vanilla JS, the names
'exports' or 'module.exports' are not special. So perhaps, one could
imagine a browser-side script that deals with "imports" and "exports" as
part of their business:

  const exports = {
    quantity: 1,
    type: 'boxes',
  };
  exports.getQuantity = () => {
    foo();
  };

This diff driver would mistakenly detect `exports.getQuantity = () =>
{`.

Although, the more I think about it, the spirit of this patch seems to
be "we want to show headers whenever we think we are in a function", so
we don't actually need to treat 'exports' or 'module.exports' specially
at all, e.g. this case should also pass our diff driver tests:

  const foo = {};
  foo.RIGHT = () => {

    ChangeMe();
  };

and if we do this, we will correctly handle 'exports' and
'module.exports' anyway by virtue of them being plain old JS objects.
xing zhi jiang April 3, 2022, 1:17 p.m. UTC | #7
Sorry for the late reply. I've been busy the last two weeks.

On Mon, 14 Mar 2022 at 05:54, Johannes Sixt <j6t@kdbg.org> wrote:
>
> Am 12.03.22 um 17:48 schrieb xing zhi jiang:
> > diff --git a/userdiff.c b/userdiff.c
> > index 8578cb0d12..51bfe4021d 100644
> > --- a/userdiff.c
> > +++ b/userdiff.c
> > @@ -168,6 +168,38 @@ PATTERNS("java",
> >        "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
> >        "|[-+*/<>%&^|=!]="
> >        "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"),
> > +
> > +PATTERNS("javascript",
> > +      /* don't match the expression may contain parenthesis, because it is not a function declaration */
> > +      "!^[ \t]*(if|do|while|for|with|switch|catch|import|return)\n"
> > +      /* don't match statement */
> > +      "!;\n"
> > +      /* match normal function */
> > +      "^((export[\t ]+)?(async[\t ]+)?function[\t ]*[\t *]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)\n"
> > +      /* match JavaScript variable declaration with a lambda expression */
> > +      "^[\t ]*((const|let|var)[\t ]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*"
> > +      "(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>[\t ]*\\{?)\n"
>
> It would help readability if this second line of this regex were
> indented because it is a continuation of the first line.
>
This will be fixed in the v3 patch.

> > +      /* match exports for anonymous fucntion */
> > +      "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
> > +      /* match assign function to LHS */
> > +      "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*)?[\t ]*\\(.*)\n"
>
> This should be written as
>
>          "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*[\t ]*)?\\(.*)\n"
>
> Notice that the whitespace after the identifier can only appear when
> there is actually an identifier. The point is to reduce the different
> matches permitted by the sub-expression "[\t ]*[\t ]*" when there is no
> identifier in the text.
>
> Can the keyword function ever be followed by a number? I guess not. Then
> [$_[:alpha:]][$_[:alnum:]]* could be reduced to [$_[:alnum:]]+
This will be fixed in the v3 patch.

> > +      /* match normal function in object literal */
> > +      "^[\t ]*([$_[:alpha:]][$_[:alnum:]]*[\t ]*:[\t ]*function[\t ].*)\n"
> > +      /* don't match the function in class, which has more than one ident level */
> > +      "!^(\t{2,}|[ ]{5,})\n"
> > +      /* match function in class */
> > +      "^[\t ]*((static[\t ]+)?((async|get|set)[\t ]+)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)",> +    /* word regex */
> > +      /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, DecimalLiteral and its big version */
> > +      "(0[xXoObB])?[0-9a-fA-F][_0-9a-fA-F]*n?"
> > +      /* DecimalLiteral may be float */
> > +      "|(0|[1-9][_0-9]*)?\\.?[0-9][_0-9]*([eE][+-]?[_0-9]+)?"
>
> Having alternatives that begin with an optional part make the regex
> evaluation comparatively inefficient. In particular, both alternatives
> above match a decimal integer. I suggest to have the first alternative
> only for hex, octal, and binary integers, and the second for all decimal
> numbers including floatingpoint:
>
>          /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, and
> their big versions */
>          "0[xXoObB][_0-9a-fA-F]+n?"
>          /* DecimalLiteral may be float */
>          "|[0-9][_0-9]*(\\.[_0-9]*|n)?([eE][+-]?[_0-9]+)?"
>
> and if floating point literals can begin with a decimal point, then we
> also need
>
>          "|\\.[0-9][_0-9]*([eE][+-]?[_0-9]+)?"
>
I agree on separate decimal regex from others. And in JavaScript
floating numbers can start with a dot. So need add new regex -
"|\\.[0-9][_0-9]*([eE][+-]?[_0-9]+)?".
This will be fixed in the v3 patch.
xing zhi jiang April 3, 2022, 1:20 p.m. UTC | #8
On Wed, 16 Mar 2022 at 02:51, Glen Choo <chooglen@google.com> wrote:
>
> Johannes Sixt <j6t@kdbg.org> writes:
> So the question becomes "Is there a significant amount of code that uses
> this pattern?" Probably - this is a fairly common pattern in Node.js
> after all. But in my experience,
>
>  module.exports.RIGHT = function(document) {
>
>      return ChangeMe;
>  }
>
> is even more common. The difference between 'module.exports' and
> 'exports' isn't worth going into (StackOverflow has all the answers, for
> the curious), but if we're taking the approach of supporting CommonJS,
> I'd like to be consistent and also support 'module.exports', i.e.
> perhaps change:
>
>   "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
>
> to something like:
>
>   "^((module.)?exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
I agree on also support the "module.exports". This will be applied on
the v3 patch
xing zhi jiang April 3, 2022, 1:21 p.m. UTC | #9
Sorry for the late reply. Because I was busy last two weeks. And
grateful for your review.

On Tue, 15 Mar 2022 at 01:20, Glen Choo <chooglen@google.com> wrote:
>
> > diff --git a/t/t4018/javascript-function b/t/t4018/javascript-function
> > new file mode 100644
> > index 0000000000..0cc0bf54e7
> > --- /dev/null
> > +++ b/t/t4018/javascript-function
> > @@ -0,0 +1,4 @@
> > +function RIGHT(a, b) {
> > +
> > +  return a + b; // ChangeMe
> > +}
> > diff --git a/t/t4018/javascript-function-2 b/t/t4018/javascript-function-2
> > new file mode 100644
> > index 0000000000..06cfb779f0
> > --- /dev/null
> > +++ b/t/t4018/javascript-function-2
> > @@ -0,0 +1,10 @@
> > +function test(a, b) {
> > +  return {
> > +                     RIGHT: function () {
> > +                             currentUpdateRemovedChunks.forEach(function (chunkId) {
> > +                                     delete $installedChunks$[chunkId];
> > +                             });
> > +                             currentUpdateRemovedChunks = ChangeMe;
> > +   }
> > +  }
> > +}
>
> There is also the ES2015 'method shorthand' syntax [3], e.g. `bar` in:
>
>   const foo = {
>     bar() {
>       console.log('hi');
>     }
>   }
>
> [3] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Method_definitions
>
The ES2015 method shorthand is already matched by the regex for
function in class
"^[\t ]*((static[\t ]+)?((async|get|set)[\t
]+)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)"
So I update the comment for this regex in the v3 patch. The comment is
"/* match function in class and ES5 method shorthand */"

> > diff --git a/t/t4018/javascript-function-belong-to-IIFE b/t/t4018/javascript-function-belong-to-IIFE
> > new file mode 100644
> > index 0000000000..6e5fe858c0
> > --- /dev/null
> > +++ b/t/t4018/javascript-function-belong-to-IIFE
> > @@ -0,0 +1,6 @@
> > +(function () {
> > +  this.$RIGHT = function (needle, modifier) {
> > +      let a = 5;
> > +      return ChangeMe;
> > +  };
> > +}).call(aaaa.prototype);
>
>  Does the IIFE matter in this case? This line:
>
>   this.$RIGHT = function (needle, modifier) {
>
> looks extremely similar to the previous test of `foo = function bar()`.
>
> Or perhaps this is meant to demonstrate the edge case of "matching in a
> complicated construct"? If so, perhaps we should test other edge cases
> like:
>
>    function WRONG() {
>      let RIGHT = function (needle, modifier) {
>          let a = 5;
>          return ChangeMe;
>      };
>    }
>
Currently, I realize this test case is redundant. Because I trust the
function keyword. So the regex "^(.*=[\t ]*function[\t
]*([$_[:alnum:]]+[\t ]*)?\\(.*)\n" is loose for LHS.
I will remove this case in the v3 patch.
xing zhi jiang April 3, 2022, 1:24 p.m. UTC | #10
On Wed, 16 Mar 2022 at 05:34, Glen Choo <chooglen@google.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Glen Choo <chooglen@google.com> writes:
> >
> >> Interesting, I'll take note. I'm still personally not keen on supporting
> >> CommonJS-only patterns when we are purportedly trying to show diffs for
> >> JavaScript, but if we think this fits the style, I'm happy to oblige.
> >
> > The question is, with these patterns that are aware of CommonJS
> > convention, would your bog-standard-and-boring vanilla JS code be
> > detected incorrectly?  Becoming aware of popular conventions without
> > hurting others would be a good thing.
> >
> > And the "popular conventions" does not have to be limited to
> > CommonJS/Node.
>
> From the perspective of "'exports' is a special name", yes, we could
> detect vanilla JS code 'incorrectly' because, in vanilla JS, the names
> 'exports' or 'module.exports' are not special. So perhaps, one could
> imagine a browser-side script that deals with "imports" and "exports" as
> part of their business:
>
>   const exports = {
>     quantity: 1,
>     type: 'boxes',
>   };
>   exports.getQuantity = () => {
>     foo();
>   };
>
> This diff driver would mistakenly detect `exports.getQuantity = () =>
> {`.
>
> Although, the more I think about it, the spirit of this patch seems to
> be "we want to show headers whenever we think we are in a function", so
> we don't actually need to treat 'exports' or 'module.exports' specially
> at all, e.g. this case should also pass our diff driver tests:
>
>   const foo = {};
>   foo.RIGHT = () => {
>
>     ChangeMe();
>   };
>
> and if we do this, we will correctly handle 'exports' and
> 'module.exports' anyway by virtue of them being plain old JS objects.
The spirit of this patch is to show headers when we are in the
function body(which has a large code block). Because this can help
users understand the context.
And prevent mismatch non-related function. ex:
    function WRONG() {
        // ...
    }
    const foo = {};
    foo.RIGHT = () => {

      ChangeMe();
    };
if we don't match "foo.RIGHT = () => {". It may match the "function
WRONG() {". This would be very misleading.
So the v3 patch. I do not treat `exports` as a special keyword. And
can match the code like "foo.RIGHT = () => {".
The regex would be
"((module\\.)?[$_[:alpha:]][$_[:alnum:]]*\\.[$_[:alpha:]][$_[:alnum:]]*[\t
]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)"
diff mbox series

Patch

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 60984a4682..a8e3e4d735 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -828,6 +828,8 @@  patterns are available:
 
 - `java` suitable for source code in the Java language.
 
+- `javascript` suitable for source code in the JavaScript language.
+
 - `markdown` suitable for Markdown documents.
 
 - `matlab` suitable for source code in the MATLAB and Octave languages.
diff --git a/t/t4018/javascript-assignment-of-anonymous-function b/t/t4018/javascript-assignment-of-anonymous-function
new file mode 100644
index 0000000000..b6f2ccccfc
--- /dev/null
+++ b/t/t4018/javascript-assignment-of-anonymous-function
@@ -0,0 +1,4 @@ 
+const RIGHT = function (a, b) {
+	
+    return a + b; // ChangeMe
+};
diff --git a/t/t4018/javascript-assignment-of-arrow-function b/t/t4018/javascript-assignment-of-arrow-function
new file mode 100644
index 0000000000..24ce517b7a
--- /dev/null
+++ b/t/t4018/javascript-assignment-of-arrow-function
@@ -0,0 +1,4 @@ 
+const RIGHT = (a, b) => {
+	
+    return a + b; // ChangeMe
+};
diff --git a/t/t4018/javascript-assignment-of-arrow-function-2 b/t/t4018/javascript-assignment-of-arrow-function-2
new file mode 100644
index 0000000000..bbf5de369e
--- /dev/null
+++ b/t/t4018/javascript-assignment-of-arrow-function-2
@@ -0,0 +1,4 @@ 
+const RIGHT = (a, b)=>{
+	
+    return a + b; // ChangeMe
+};
diff --git a/t/t4018/javascript-assignment-of-arrow-function-3 b/t/t4018/javascript-assignment-of-arrow-function-3
new file mode 100644
index 0000000000..4a07aa3259
--- /dev/null
+++ b/t/t4018/javascript-assignment-of-arrow-function-3
@@ -0,0 +1,4 @@ 
+const RIGHT=test=>{
+	
+    return test + 1; // ChangeMe
+};
diff --git a/t/t4018/javascript-assignment-of-named-function b/t/t4018/javascript-assignment-of-named-function
new file mode 100644
index 0000000000..bfc486ebef
--- /dev/null
+++ b/t/t4018/javascript-assignment-of-named-function
@@ -0,0 +1,4 @@ 
+const RIGHT = function test (a, b) {
+	
+    return a + b; // ChangeMe
+};
diff --git a/t/t4018/javascript-async-function b/t/t4018/javascript-async-function
new file mode 100644
index 0000000000..993e6926bf
--- /dev/null
+++ b/t/t4018/javascript-async-function
@@ -0,0 +1,4 @@ 
+async function RIGHT(a, b) {
+  
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-export-async-function b/t/t4018/javascript-export-async-function
new file mode 100644
index 0000000000..fecbd669d7
--- /dev/null
+++ b/t/t4018/javascript-export-async-function
@@ -0,0 +1,4 @@ 
+export async function RIGHT(a, b) {
+  
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-export-function b/t/t4018/javascript-export-function
new file mode 100644
index 0000000000..b5acbb2b08
--- /dev/null
+++ b/t/t4018/javascript-export-function
@@ -0,0 +1,4 @@ 
+export function RIGHT(a, b) {
+  
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-exports-anomyous-function b/t/t4018/javascript-exports-anomyous-function
new file mode 100644
index 0000000000..6786cbda8d
--- /dev/null
+++ b/t/t4018/javascript-exports-anomyous-function
@@ -0,0 +1,4 @@ 
+exports.setFlagged = RIGHT => {
+	
+    return ChangeMe;
+};
diff --git a/t/t4018/javascript-exports-anomyous-function-2 b/t/t4018/javascript-exports-anomyous-function-2
new file mode 100644
index 0000000000..883569f40d
--- /dev/null
+++ b/t/t4018/javascript-exports-anomyous-function-2
@@ -0,0 +1,4 @@ 
+exports.RIGHT = (a, b, runtime) => {
+	
+    return ChangeMe;
+};
diff --git a/t/t4018/javascript-exports-function b/t/t4018/javascript-exports-function
new file mode 100644
index 0000000000..63b79f5991
--- /dev/null
+++ b/t/t4018/javascript-exports-function
@@ -0,0 +1,4 @@ 
+exports.RIGHT = function(document) {
+    
+    return ChangeMe;
+}
diff --git a/t/t4018/javascript-function b/t/t4018/javascript-function
new file mode 100644
index 0000000000..0cc0bf54e7
--- /dev/null
+++ b/t/t4018/javascript-function
@@ -0,0 +1,4 @@ 
+function RIGHT(a, b) {
+
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-function-2 b/t/t4018/javascript-function-2
new file mode 100644
index 0000000000..06cfb779f0
--- /dev/null
+++ b/t/t4018/javascript-function-2
@@ -0,0 +1,10 @@ 
+function test(a, b) {
+  return {
+			RIGHT: function () {
+				currentUpdateRemovedChunks.forEach(function (chunkId) {
+					delete $installedChunks$[chunkId];
+				});
+				currentUpdateRemovedChunks = ChangeMe;
+   }
+  }
+}
diff --git a/t/t4018/javascript-function-belong-to-IIFE b/t/t4018/javascript-function-belong-to-IIFE
new file mode 100644
index 0000000000..6e5fe858c0
--- /dev/null
+++ b/t/t4018/javascript-function-belong-to-IIFE
@@ -0,0 +1,6 @@ 
+(function () {
+  this.$RIGHT = function (needle, modifier) {
+      let a = 5;
+      return ChangeMe;
+  };
+}).call(aaaa.prototype);
diff --git a/t/t4018/javascript-function-in-class b/t/t4018/javascript-function-in-class
new file mode 100644
index 0000000000..0cc0a26612
--- /dev/null
+++ b/t/t4018/javascript-function-in-class
@@ -0,0 +1,6 @@ 
+class Test {
+  RIGHT() {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-function-in-class-2 b/t/t4018/javascript-function-in-class-2
new file mode 100644
index 0000000000..725495fe55
--- /dev/null
+++ b/t/t4018/javascript-function-in-class-2
@@ -0,0 +1,11 @@ 
+class Test {
+  RIGHT(
+      aaaaaaaaaa,
+      bbbbbbbbbb,
+      cccccccccc,
+      dddddddddd
+  ) {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-function-in-class-3 b/t/t4018/javascript-function-in-class-3
new file mode 100644
index 0000000000..e9b20728b2
--- /dev/null
+++ b/t/t4018/javascript-function-in-class-3
@@ -0,0 +1,10 @@ 
+class Test {
+  RIGHT(aaaaaaaaaa,
+      bbbbbbbbbb,
+      cccccccccc,
+      dddddddddd
+  ) {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-function-in-object-literal b/t/t4018/javascript-function-in-object-literal
new file mode 100644
index 0000000000..021cc706dd
--- /dev/null
+++ b/t/t4018/javascript-function-in-object-literal
@@ -0,0 +1,7 @@ 
+const obj = {
+    RIGHT: function (elems, callback, arg) {
+        var length, value;
+        // ...
+        return ChangeMe
+    }
+}
diff --git a/t/t4018/javascript-generator-function b/t/t4018/javascript-generator-function
new file mode 100644
index 0000000000..dc7793939f
--- /dev/null
+++ b/t/t4018/javascript-generator-function
@@ -0,0 +1,4 @@ 
+function* RIGHT(a, b) {
+  
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-generator-function-2 b/t/t4018/javascript-generator-function-2
new file mode 100644
index 0000000000..950676a612
--- /dev/null
+++ b/t/t4018/javascript-generator-function-2
@@ -0,0 +1,4 @@ 
+function *RIGHT(a, b) {
+  
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-getter-function-in-class b/t/t4018/javascript-getter-function-in-class
new file mode 100644
index 0000000000..9a5aee39f7
--- /dev/null
+++ b/t/t4018/javascript-getter-function-in-class
@@ -0,0 +1,6 @@ 
+class Test {
+  get RIGHT() {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-setter-function-in-class b/t/t4018/javascript-setter-function-in-class
new file mode 100644
index 0000000000..dc5f288665
--- /dev/null
+++ b/t/t4018/javascript-setter-function-in-class
@@ -0,0 +1,6 @@ 
+class Test {
+  set RIGHT() {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-skip-function-call-statement b/t/t4018/javascript-skip-function-call-statement
new file mode 100644
index 0000000000..321993c27e
--- /dev/null
+++ b/t/t4018/javascript-skip-function-call-statement
@@ -0,0 +1,7 @@ 
+class Test {
+  static RIGHT() {
+    haha();
+    haha2()
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4018/javascript-skip-keywords b/t/t4018/javascript-skip-keywords
new file mode 100644
index 0000000000..5584970b58
--- /dev/null
+++ b/t/t4018/javascript-skip-keywords
@@ -0,0 +1,34 @@ 
+function RIGHT(a, b) {
+  import("./async1")
+  if (a > 1) {
+    // ...
+  }
+  do {
+    // ...
+  } while (i < 5);
+  for (const element of array1) {
+    console.log(element)
+  }
+  with(o) {
+    console.log(x)
+  }
+  switch (expr) {
+    case 'a':
+      // ...
+      break;
+    case 'b':
+      // ...
+      break;
+    default:
+      // ...
+  }
+  try {
+    // ...
+    return (a + c)
+  } 
+  catch (error) {
+    // ...
+  }
+
+  return a + b; // ChangeMe
+}
diff --git a/t/t4018/javascript-static-function-in-class b/t/t4018/javascript-static-function-in-class
new file mode 100644
index 0000000000..fbf0b7ca3d
--- /dev/null
+++ b/t/t4018/javascript-static-function-in-class
@@ -0,0 +1,6 @@ 
+class Test {
+  static RIGHT() {
+    let a = 4;
+    let b = ChangeMe;
+  }
+}
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index d5abcf4b4c..33073edeca 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -324,6 +324,7 @@  test_language_driver dts
 test_language_driver fortran
 test_language_driver html
 test_language_driver java
+test_language_driver javascript
 test_language_driver matlab
 test_language_driver objc
 test_language_driver pascal
diff --git a/t/t4034/javascript/expect b/t/t4034/javascript/expect
new file mode 100644
index 0000000000..419d61903b
--- /dev/null
+++ b/t/t4034/javascript/expect
@@ -0,0 +1,54 @@ 
+<BOLD>diff --git a/pre b/post<RESET>
+<BOLD>index 18f4796..46f9b62 100644<RESET>
+<BOLD>--- a/pre<RESET>
+<BOLD>+++ b/post<RESET>
+<CYAN>@@ -1,33 +1,33 @@<RESET>
+// DecimalLiteral<RESET>
+<RED>123<RESET>
+<RED>0.123<RESET>
+<RED>.123<RESET>
+<RED>0.123e+5<RESET>
+<RED>0.123E+5<RESET>
+<RED>0.123e5<RESET>
+<RED>1222222222222222223334444n<RESET><GREEN>124<RESET>
+<GREEN>0.124<RESET>
+<GREEN>.124<RESET>
+<GREEN>0.123e-5<RESET>
+<GREEN>0.123E-5<RESET>
+<GREEN>0.123E5<RESET>
+<GREEN>12222222222222222233344445n<RESET>
+// HexIntegerLiteral<RESET>
+<RED>0x10<RESET>
+<RED>0X6Fa1<RESET>
+<RED>0x123_456<RESET>
+<RED>0x1234182989812f1289an<RESET><GREEN>0x11<RESET>
+<GREEN>0X5Fa1<RESET>
+<GREEN>0x123_756<RESET>
+<GREEN>0x1234182989812f1289bn<RESET>
+// OctalIntegerLiteral<RESET>
+<RED>05<RESET>
+<RED>0o6<RESET>
+<RED>0O7<RESET>
+<RED>0512_567<RESET>
+<RED>0o424242424242424242424242424242666666n<RESET><GREEN>06<RESET>
+<GREEN>0o5<RESET>
+<GREEN>0O4<RESET>
+<GREEN>0511_567<RESET>
+<GREEN>0o424242424242424242424242424242666667n<RESET>
+// BinaryIntegerLiteral<RESET>
+<RED>0b1001<RESET>
+<RED>0B0110<RESET>
+<RED>0b0001_1001_0011<RESET>
+<RED>0b1111111111111111111111111111111111111n<RESET><GREEN>0b1101<RESET>
+<GREEN>0B0010<RESET>
+<GREEN>0b0001_1101_0011<RESET>
+<GREEN>0b11111111111111000011111111111111111n<RESET>
+// punctuations<RESET>
+{<RED>a<RESET><GREEN>b<RESET>} (<RED>a<RESET><GREEN>b<RESET>)
+<RED>a<RESET><GREEN>b<RESET>;
+[<RED>1,<RESET>2<GREEN>,3<RESET>]
+[<RED>1, 2,<RESET> ...<RED>params<RESET><GREEN>params_v2<RESET> ]
+a<RED><=<RESET><GREEN>=<RESET>2 a<RED>>=<RESET><GREEN>=<RESET>2 a<RED>==<RESET><GREEN>=<RESET>2 a<RED>!=<RESET><GREEN>=<RESET>2 a<RED>===<RESET><GREEN>=<RESET>2 a<RED>!==<RESET><GREEN>=<RESET>2 a<RED>^=<RESET><GREEN>=<RESET>2 a<RED>=><RESET><GREEN>=<RESET>2
+a<RED>+=<RESET><GREEN>-=<RESET>b a<RED>*=<RESET><GREEN>%=<RESET>b a<RED>**=<RESET><GREEN>&&=<RESET>b a<RED>||=<RESET><GREEN>|=<RESET>b
+b<RED>+<RESET><GREEN>-<RESET>c a<RED>--<RESET><GREEN>++<RESET> a<RED>>><RESET><GREEN><<<RESET>b a<RED>>>><RESET><GREEN>>>>=<RESET>b a<RED>>>=<RESET><GREEN><<=<RESET>b
+a<RED>&&<RESET><GREEN>&<RESET>b a<RED>||<RESET><GREEN>|<RESET>b a<RED>&&=<RESET><GREEN>??=<RESET>b
diff --git a/t/t4034/javascript/post b/t/t4034/javascript/post
new file mode 100644
index 0000000000..46f9b627e4
--- /dev/null
+++ b/t/t4034/javascript/post
@@ -0,0 +1,33 @@ 
+// DecimalLiteral
+124
+0.124
+.124
+0.123e-5
+0.123E-5
+0.123E5
+12222222222222222233344445n
+// HexIntegerLiteral
+0x11
+0X5Fa1
+0x123_756
+0x1234182989812f1289bn
+// OctalIntegerLiteral
+06
+0o5
+0O4
+0511_567
+0o424242424242424242424242424242666667n
+// BinaryIntegerLiteral
+0b1101
+0B0010
+0b0001_1101_0011
+0b11111111111111000011111111111111111n
+// punctuations
+{b} (b)
+b;
+[2,3]
+[ ...params_v2 ]
+a=2 a=2 a=2 a=2 a=2 a=2 a=2 a=2
+a-=b a%=b a&&=b a|=b
+b-c a++ a<<b a>>>=b a<<=b
+a&b a|b a??=b
diff --git a/t/t4034/javascript/pre b/t/t4034/javascript/pre
new file mode 100644
index 0000000000..18f479688c
--- /dev/null
+++ b/t/t4034/javascript/pre
@@ -0,0 +1,33 @@ 
+// DecimalLiteral
+123
+0.123
+.123
+0.123e+5
+0.123E+5
+0.123e5
+1222222222222222223334444n
+// HexIntegerLiteral
+0x10
+0X6Fa1
+0x123_456
+0x1234182989812f1289an
+// OctalIntegerLiteral
+05
+0o6
+0O7
+0512_567
+0o424242424242424242424242424242666666n
+// BinaryIntegerLiteral
+0b1001
+0B0110
+0b0001_1001_0011
+0b1111111111111111111111111111111111111n
+// punctuations
+{a} (a)
+a;
+[1,2]
+[ 1, 2, ...params ]
+a<=2 a>=2 a==2 a!=2 a===2 a!==2 a^=2 a=>2
+a+=b a*=b a**=b a||=b
+b+c a-- a>>b a>>>b a>>=b
+a&&b a||b a&&=b
diff --git a/userdiff.c b/userdiff.c
index 8578cb0d12..51bfe4021d 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -168,6 +168,38 @@  PATTERNS("java",
 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
 	 "|[-+*/<>%&^|=!]="
 	 "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"),
+
+PATTERNS("javascript",
+	 /* don't match the expression may contain parenthesis, because it is not a function declaration */
+	 "!^[ \t]*(if|do|while|for|with|switch|catch|import|return)\n"
+	 /* don't match statement */
+	 "!;\n"
+	 /* match normal function */
+	 "^((export[\t ]+)?(async[\t ]+)?function[\t ]*[\t *]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)\n"
+	 /* match JavaScript variable declaration with a lambda expression */
+	 "^[\t ]*((const|let|var)[\t ]*[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*"
+	 "(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>[\t ]*\\{?)\n"
+	 /* match exports for anonymous fucntion */
+	 "^(exports\\.[$_[:alpha:]][$_[:alnum:]]*[\t ]*=[\t ]*(\\(.*\\)|[$_[:alpha:]][$_[:alnum:]]*)[\t ]*=>.*)\n"
+	 /* match assign function to LHS */
+	 "^(.*=[\t ]*function[\t ]*([$_[:alpha:]][$_[:alnum:]]*)?[\t ]*\\(.*)\n"
+	 /* match normal function in object literal */
+	 "^[\t ]*([$_[:alpha:]][$_[:alnum:]]*[\t ]*:[\t ]*function[\t ].*)\n"
+	 /* don't match the function in class, which has more than one ident level */
+	 "!^(\t{2,}|[ ]{5,})\n"
+	 /* match function in class */
+	 "^[\t ]*((static[\t ]+)?((async|get|set)[\t ]+)?[$_[:alpha:]][$_[:alnum:]]*[\t ]*\\(.*)",
+	 /* word regex */
+	 /* hexIntegerLiteral, octalIntegerLiteral, binaryIntegerLiteral, DecimalLiteral and its big version */
+	 "(0[xXoObB])?[0-9a-fA-F][_0-9a-fA-F]*n?"
+	 /* DecimalLiteral may be float */
+	 "|(0|[1-9][_0-9]*)?\\.?[0-9][_0-9]*([eE][+-]?[_0-9]+)?"
+	 /* punctuations */
+	 "|\\.{3}|<=|>=|==|!=|={3}|!==|\\*{2}|\\+{2}|--|<<|>>"
+	 "|>>>|&&|\\|{2}|\\?{2}|\\+=|-=|\\*=|%=|\\*{2}="
+	 "|<<=|>>=|>>>=|&=|\\|=|\\^=|&&=|\\|{2}=|\\?{2}=|=>"
+	 /* identifiers */
+	 "|[$_[:alpha:]][$_[:alnum:]]*"),
 PATTERNS("markdown",
 	 "^ {0,3}#{1,6}[ \t].*",
 	 /* -- */