diff mbox

[i-g-t,v3] lib/igt_core.c: Expand --run-subtest functionality.

Message ID 1454587617-33962-1-git-send-email-derek.j.morton@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Derek Morton Feb. 4, 2016, 12:06 p.m. UTC
Added extended wildcard support when specifying --run-subtest.

Wildcard format is as specified in rfc3977 and the uwildmat() implementation
is taken from libinn.
See https://tools.ietf.org/html/rfc3977#section-4 for a description of
allowed wildcard expressions.

v2: Use comma as list separator (Ville Syrjala)
support both ^ and ! as not operators (Dave Gordon)

v3: Updated to use uwildmat() (Dave Gordon)

Signed-off-by: Derek Morton <derek.j.morton@intel.com>
---
 COPYING                 |  21 +++
 lib/Makefile.sources    |   2 +
 lib/igt_core.c          |  17 +-
 lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/uwildmat/uwildmat.h |  24 +++
 5 files changed, 536 insertions(+), 2 deletions(-)
 create mode 100644 lib/uwildmat/uwildmat.c
 create mode 100644 lib/uwildmat/uwildmat.h

Comments

Dave Gordon Feb. 9, 2016, 2:09 p.m. UTC | #1
On 04/02/16 12:06, Derek Morton wrote:
> Added extended wildcard support when specifying --run-subtest.
>
> Wildcard format is as specified in rfc3977 and the uwildmat() implementation
> is taken from libinn.
> See https://tools.ietf.org/html/rfc3977#section-4 for a description of
> allowed wildcard expressions.
>
> v2: Use comma as list separator (Ville Syrjala)
> support both ^ and ! as not operators (Dave Gordon)
>
> v3: Updated to use uwildmat() (Dave Gordon)
>
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> ---
>   COPYING                 |  21 +++
>   lib/Makefile.sources    |   2 +
>   lib/igt_core.c          |  17 +-
>   lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
>   lib/uwildmat/uwildmat.h |  24 +++
>   5 files changed, 536 insertions(+), 2 deletions(-)
>   create mode 100644 lib/uwildmat/uwildmat.c
>   create mode 100644 lib/uwildmat/uwildmat.h

LGTM.

Reviewed-by: Dave Gordon <david.s.gordon@intel.com>
Daniel Vetter Feb. 15, 2016, 4:55 p.m. UTC | #2
On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
> Added extended wildcard support when specifying --run-subtest.
> 
> Wildcard format is as specified in rfc3977 and the uwildmat() implementation
> is taken from libinn.
> See https://tools.ietf.org/html/rfc3977#section-4 for a description of
> allowed wildcard expressions.
> 
> v2: Use comma as list separator (Ville Syrjala)
> support both ^ and ! as not operators (Dave Gordon)
> 
> v3: Updated to use uwildmat() (Dave Gordon)
> 
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> ---
>  COPYING                 |  21 +++
>  lib/Makefile.sources    |   2 +
>  lib/igt_core.c          |  17 +-
>  lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/uwildmat/uwildmat.h |  24 +++

Not really a fan of copying other sources into ours. Don't we have
something ready-made that's generally available, or can we at least pull
it in as a build-dep?

Thanks, Daniel

>  5 files changed, 536 insertions(+), 2 deletions(-)
>  create mode 100644 lib/uwildmat/uwildmat.c
>  create mode 100644 lib/uwildmat/uwildmat.h
> 
> diff --git a/COPYING b/COPYING
> index b8f6753..16375f2 100644
> --- a/COPYING
> +++ b/COPYING
> @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>  IN THE SOFTWARE.
> +
> +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> +    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> +    2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> +This code is derived from software contributed to the Internet Software
> +Consortium by Rich Salz.
> +
> +Permission to use, copy, modify, and distribute this software for any
> +purpose with or without fee is hereby granted, provided that the above
> +copyright notice and this permission notice appear in all copies.
> +
> +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 4999868..e33861e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = 	\
>  	igt_core.h		\
>  	igt_draw.c		\
>  	igt_draw.h		\
> +	uwildmat/uwildmat.h	\
> +	uwildmat/uwildmat.c	\
>  	$(NULL)
>  
>  .PHONY: version.h.tmp
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 6b69bb7..8e0bd2e 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -56,7 +56,7 @@
>  #include <ctype.h>
>  #include <limits.h>
>  #include <locale.h>
> -#include <fnmatch.h>
> +#include <uwildmat/uwildmat.h>
>  
>  #include "drmtest.h"
>  #include "intel_chipset.h"
> @@ -209,6 +209,19 @@
>   * intel gpu to be present). Then individual subtests can be run with
>   * "--run-subtest". Usage help for tests with subtests can be obtained with the
>   * "--help" command line option.
> + *
> + * A wildcard expression can be given to --run-subtest to specify a subset of
> + * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
> + * description of allowed wildcard expressions.
> + * Some examples of allowed wildcard expressions are:
> + *
> + * - '*basic*' match any subtest containing basic
> + * - 'basic-???' match any subtest named basic- with 3 characters after -
> + * - 'basic-[0-9]' match any subtest named basic- with a single number after -
> + * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
> + * - 'basic*,advanced*' match any subtest starting basic or advanced
> + * - '*,!basic*' match any subtest not starting basic
> + * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
>   */
>  
>  static unsigned int exit_handler_count;
> @@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name)
>  	}
>  
>  	if (run_single_subtest) {
> -		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
> +		if (uwildmat(subtest_name, run_single_subtest) == 0)
>  			return false;
>  		else
>  			run_single_subtest_found = true;
> diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c
> new file mode 100644
> index 0000000..2d34742
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.c
> @@ -0,0 +1,474 @@
> +/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
> +
> +This provides wild card matching originally used in InterNetNews and is
> +described in https://tools.ietf.org/html/rfc3977#section-4
> +
> +INN licence:
> +INN as a whole and all code contained in it not otherwise marked with
> +different licenses and/or copyrights is covered by the following copyright
> +and license:
> +
> +   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> +       2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> +   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> +       2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> +   This code is derived from software contributed to the Internet Software
> +   Consortium by Rich Salz.
> +
> +   Permission to use, copy, modify, and distribute this software for any
> +   purpose with or without fee is hereby granted, provided that the above
> +   copyright notice and this permission notice appear in all copies.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> +   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> +   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> +*/
> +
> +/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
> +**
> +**  wildmat pattern matching with Unicode UTF-8 extensions.
> +**
> +**  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
> +**  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
> +**  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
> +**  July 2000; all malformed patterns should now just fail to match anything.)
> +**
> +**  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
> +**  Rich $alz is now <rsalz@osf.org>.
> +**
> +**  April, 1991:  Replaced mutually-recursive calls with in-line code for the
> +**  star character.
> +**
> +**  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
> +**  This can greatly speed up failing wildcard patterns.  For example:
> +**
> +**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
> +**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
> +**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
> +**
> +**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
> +**  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
> +**  explanation is from Lars:
> +**
> +**  The precondition that must be fulfilled is that DoMatch will consume at
> +**  least one character in text.  This is true if *p is neither '*' nor '\0'.)
> +**  The last return has ABORT instead of false to avoid quadratic behaviour in
> +**  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
> +**  star-loop has to run to the end of the text; with ABORT only the last one
> +**  does.
> +**
> +**  Once the control of one instance of DoMatch enters the star-loop, that
> +**  instance will return either true or ABORT, and any calling instance will
> +**  therefore return immediately after (without calling recursively again).
> +**  In effect, only one star-loop is ever active.  It would be possible to
> +**  modify the code to maintain this context explicitly, eliminating all
> +**  recursive calls at the cost of some complication and loss of clarity (and
> +**  the ABORT stuff seems to be unclear enough by itself).  I think it would
> +**  be unwise to try to get this into a released version unless you have a
> +**  good test data base to try it out on.
> +**
> +**  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and close bracket
> +**  handling for character sets.
> +**
> +**  July, 2000:  Largely rewritten by Russ Allbery <rra@stanford.edu> to add
> +**  support for ',', '!', and optionally '@' to the core wildmat routine.
> +**  Broke the character class matching into a separate function for clarity
> +**  since it's infrequently used in practice, and added some simple lookahead
> +**  to significantly decrease the recursive calls in the '*' matching code.
> +**  Added support for UTF-8 as the default character set for any high-bit
> +**  characters.
> +**
> +**  For more information on UTF-8, see RFC 3629.
> +**
> +**  Please note that this file is intentionally written so that conditionally
> +**  executed expressions are on separate lines from the condition to
> +**  facilitate analysis of the coverage of the test suite using purecov.
> +**  Please preserve this.  As of March 11, 2001, purecov reports that the
> +**  accompanying test suite achieves 100% coverage of this file.
> +*/
> +
> +#include <string.h>
> +#include "uwildmat.h"
> +
> +#define ABORT -1
> +
> +/* Whether or not an octet looks like the start of a UTF-8 character. */
> +#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
> +
> +
> +/*
> +**  Determine the length of a non-ASCII character in octets (for advancing
> +**  pointers when skipping over characters).  Takes a pointer to the start of
> +**  the character and to the last octet of the string.  If end is NULL, expect
> +**  the string pointed to by start to be nul-terminated.  If the character is
> +**  malformed UTF-8, return 1 to treat it like an eight-bit local character.
> +*/
> +static int
> +utf8_length(const unsigned char *start, const unsigned char *end)
> +{
> +    unsigned char mask = 0x80;
> +    const unsigned char *p;
> +    int length = 0;
> +    int left;
> +
> +    for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
> +        length++;
> +    if (length < 2 || length > 6)
> +        return 1;
> +    if (end != NULL && (end - start + 1) < length)
> +        return 1;
> +    left = length - 1;
> +    for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
> +        left--;
> +    return (left == 0) ? length : 1;
> +}
> +
> +
> +/*
> +**  Check whether a string contains only valid UTF-8 characters.
> +*/
> +bool
> +is_valid_utf8(const char *text)
> +{
> +    unsigned char mask;
> +    const unsigned char *p;
> +    int length;
> +    int left;
> +
> +    for (p = (const unsigned char *)text; *p != '\0';) {
> +        mask = 0x80;
> +        length = 0;
> +
> +        /* Find out the expected length of the character. */
> +        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
> +            length++;
> +
> +        p++;
> +
> +        /* Valid ASCII. */
> +        if (length == 0)
> +            continue;
> +        
> +        /* Invalid length. */
> +        if (length < 2 || length > 6)
> +            return false;
> +
> +        /* Check that each byte looks like 10xxxxxx, except for the first. */
> +        left = length - 1;
> +        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
> +            left--;
> +
> +        if (left > 0)
> +            return false;
> +    }
> +
> +    return true;
> +}
> +
> +
> +/*
> +**  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
> +**  character and to the last octet of the string, and to a uint32_t into
> +**  which to put the decoded UCS-4 value.  If end is NULL, expect the string
> +**  pointed to by start to be nul-terminated.  Returns the number of octets in
> +**  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
> +**  the decimal value of the first octet; this is wrong, but it will generally
> +**  cause the rest of the wildmat matching to do the right thing for non-UTF-8
> +**  input.
> +*/
> +static int
> +utf8_decode(const unsigned char *start, const unsigned char *end,
> +            uint32_t *result)
> +{
> +    uint32_t value = 0;
> +    int length, i;
> +    const unsigned char *p = start;
> +    unsigned char mask;
> +
> +    length = utf8_length(start, end);
> +    if (length < 2) {
> +        *result = *start;
> +        return 1;
> +    }
> +    mask = (1 << (7 - length)) - 1;
> +    value = *p & mask;
> +    p++;
> +    for (i = length - 1; i > 0; i--) {
> +        value = (value << 6) | (*p & 0x3f);
> +        p++;
> +    }
> +    *result = value;
> +    return length;
> +}
> +
> +
> +/*
> +**  Match a character class against text, a UCS-4 character.  start is a
> +**  pointer to the first character of the character class, end a pointer to
> +**  the last.  Returns whether the class matches that character.
> +*/
> +static bool
> +match_class(uint32_t text, const unsigned char *start,
> +            const unsigned char *end)
> +{
> +    bool reversed, allowrange;
> +    const unsigned char *p = start;
> +    uint32_t first = 0;
> +    uint32_t last;
> +
> +    /* Check for an inverted character class (starting with ^).  If the
> +       character matches the character class, we return !reversed; that way,
> +       we return true if it's a regular character class and false if it's a
> +       reversed one.  If the character doesn't match, we return reversed. */
> +    reversed = (*p == '^');
> +    if (reversed)
> +        p++;
> +
> +    /* Walk through the character class until we reach the end or find a
> +       match, handling character ranges as we go.  Only permit a range to
> +       start when allowrange is true; this allows - to be treated like a
> +       normal character as the first character of the class and catches
> +       malformed ranges like a-e-n.  We treat the character at the beginning
> +       of a range as both a regular member of the class and the beginning of
> +       the range; this is harmless (although it means that malformed ranges
> +       like m-a will match m and nothing else). */
> +    allowrange = false;
> +    while (p <= end) {
> +        if (allowrange && *p == '-' && p < end) {
> +            p++;
> +            p += utf8_decode(p, end, &last);
> +            if (text >= first && text <= last)
> +                return !reversed;
> +            allowrange = false;
> +        } else {
> +            p += utf8_decode(p, end, &first);
> +            if (text == first)
> +                return !reversed;
> +            allowrange = true;
> +        }
> +    }
> +    return reversed;
> +}
> +
> +
> +/*
> +**  Match the text against the pattern between start and end.  This is a
> +**  single pattern; a leading ! or @ must already be taken care of, and
> +**  commas must be dealt with outside of this routine.
> +*/
> +static int
> +match_pattern(const unsigned char *text, const unsigned char *start,
> +              const unsigned char *end)
> +{
> +    const unsigned char *q, *endclass;
> +    const unsigned char *p = start;
> +    bool ismeta;
> +    int matched, width;
> +    uint32_t c;
> +
> +    for (; p <= end; p++) {
> +        if (!*text && *p != '*')
> +            return ABORT;
> +
> +        switch (*p) {
> +        case '\\':
> +            if (!*++p)
> +                return ABORT;
> +            /* Fall through. */
> +
> +        default:
> +            if (*text++ != *p)
> +                return false;
> +            break;
> +
> +        case '?':
> +            text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +            break;
> +
> +        case '*':
> +            /* Consecutive stars are equivalent to one.  Advance pattern to
> +               the character after the star. */
> +            for (++p; *p == '*'; p++)
> +                ;
> +
> +            /* A trailing star will match anything. */
> +            if (p > end)
> +                return true;
> +
> +            /* Basic algorithm: Recurse at each point where the * could
> +               possibly match.  If the match succeeds or aborts, return
> +               immediately; otherwise, try the next position.
> +
> +               Optimization: If the character after the * in the pattern
> +               isn't a metacharacter (the common case), then the * has to
> +               consume characters at least up to the next occurrence of that
> +               character in the text.  Scan forward for those points rather
> +               than recursing at every possible point to save the extra
> +               function call overhead. */
> +            ismeta = (*p == '[' || *p == '?' || *p == '\\');
> +            while (*text) {
> +                width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +                if (ismeta) {
> +                    matched = match_pattern(text, p, end);
> +                    text += width;
> +                } else {
> +                    while (*text && *text != *p) {
> +                        text += width;
> +                        width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +                    }
> +                    if (!*text)
> +                        return ABORT;
> +                    matched = match_pattern(++text, p + 1, end);
> +                }
> +                if (matched != false)
> +                    return matched;
> +            }
> +            return ABORT;
> +
> +        case '[':
> +            /* Find the end of the character class, making sure not to pick
> +               up a close bracket at the beginning of the class. */
> +            p++;
> +            q = p + (*p == '^') + 1;
> +            if (q > end)
> +                return ABORT;
> +            endclass = memchr(q, ']', (size_t) (end - q + 1));
> +            if (!endclass)
> +                return ABORT;
> +
> +            /* Do the heavy lifting in another function for clarity, since
> +               character classes are an uncommon case. */
> +            text += utf8_decode(text, NULL, &c);
> +            if (!match_class(c, p, endclass - 1))
> +                return false;
> +            p = endclass;
> +            break;
> +        }
> +    }
> +
> +    return (*text == '\0');
> +}
> +
> +
> +/*
> +**  Takes text and a wildmat expression; a wildmat expression is a
> +**  comma-separated list of wildmat patterns, optionally preceded by ! to
> +**  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
> +**  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
> +**  set, allow @ to introduce a poison expression (the same as !, but if it
> +**  triggers the failed match the routine returns UWILDMAT_POISON instead).
> +*/
> +static enum uwildmat
> +match_expression(const unsigned char *text, const unsigned char *start,
> +                 bool allowpoison)
> +{
> +    const unsigned char *end, *split;
> +    const unsigned char *p = start;
> +    bool reverse, escaped;
> +    bool match = false;
> +    bool poison = false;
> +    bool poisoned = false;
> +
> +    /* Handle the empty expression separately, since otherwise end will be
> +       set to an invalid pointer. */
> +    if (!*p)
> +        return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> +    end = start + strlen((const char *) start) - 1;
> +
> +    /* Main match loop.  Find each comma that separates patterns, and attempt 
> +       to match the text with each pattern in order.  The last matching
> +       pattern determines whether the whole expression matches. */
> +    for (; p <= end + 1; p = split + 1) {
> +        if (allowpoison)
> +            poison = (*p == '@');
> +        reverse = (*p == '!') || poison;
> +        if (reverse)
> +            p++;
> +
> +        /* Find the first unescaped comma, if any.  If there is none, split
> +           will be one greater than end and point at the nul at the end of
> +           the string. */
> +        for (escaped = false, split = p; split <= end; split++) {
> +            if (*split == '[') {
> +                split++;
> +                if (*split == ']')
> +                    split++;
> +                while (split <= end && *split != ']')
> +                    split++;
> +            }
> +            if (*split == ',' && !escaped)
> +                break;
> +            escaped = (*split == '\\') ? !escaped : false;
> +        }
> +
> +        /* Optimization: If match == !reverse and poison == poisoned, this
> +           pattern can't change the result, so don't do any work. */
> +        if (match == !reverse && poison == poisoned)
> +            continue;
> +        if (match_pattern(text, p, split - 1) == true) {
> +            poisoned = poison;
> +            match = !reverse;
> +        }
> +    }
> +    if (poisoned)
> +        return UWILDMAT_POISON;
> +    return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> +}
> +
> +
> +/*
> +**  User-level routine used for wildmats where @ should be treated as a
> +**  regular character.
> +*/
> +bool
> +uwildmat(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return true;
> +    else
> +        return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
> +}
> +
> +
> +/*
> +**  User-level routine used for wildmats that support poison matches.
> +*/
> +enum uwildmat
> +uwildmat_poison(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return UWILDMAT_MATCH;
> +    else
> +        return match_expression(utext, upat, true);
> +}
> +
> +
> +/*
> +**  User-level routine for simple expressions (neither , nor ! are special).
> +*/
> +bool
> +uwildmat_simple(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +    size_t length;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return true;
> +    else {
> +        length = strlen(pat);
> +        return (match_pattern(utext, upat, upat + length - 1) == true);
> +    }
> +}
> diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h
> new file mode 100644
> index 0000000..2e47189
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.h
> @@ -0,0 +1,24 @@
> +
> +#ifndef UWILDMAT_H
> +#define UWILDMAT_H 1
> +
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <stdbool.h>
> +
> +/*
> +**  WILDMAT MATCHING
> +*/
> +enum uwildmat {
> +    UWILDMAT_FAIL   = 0,
> +    UWILDMAT_MATCH  = 1,
> +    UWILDMAT_POISON
> +};
> +
> +extern bool             is_valid_utf8(const char *start);
> +extern bool             uwildmat(const char *text, const char *pat);
> +extern bool             uwildmat_simple(const char *text, const char *pat);
> +extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
> +
> +
> +#endif /* UWILDMAT_H */
> -- 
> 1.9.1
>
Dave Gordon Feb. 15, 2016, 7:29 p.m. UTC | #3
On 15/02/16 16:55, Daniel Vetter wrote:
> On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
>> Added extended wildcard support when specifying --run-subtest.
>>
>> Wildcard format is as specified in rfc3977 and the uwildmat() implementation
>> is taken from libinn.
>> See https://tools.ietf.org/html/rfc3977#section-4 for a description of
>> allowed wildcard expressions.
>>
>> v2: Use comma as list separator (Ville Syrjala)
>> support both ^ and ! as not operators (Dave Gordon)
>>
>> v3: Updated to use uwildmat() (Dave Gordon)
>>
>> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
>> ---
>>   COPYING                 |  21 +++
>>   lib/Makefile.sources    |   2 +
>>   lib/igt_core.c          |  17 +-
>>   lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   lib/uwildmat/uwildmat.h |  24 +++
>
> Not really a fan of copying other sources into ours. Don't we have
> something ready-made that's generally available, or can we at least pull
> it in as a build-dep?
>
> Thanks, Daniel

It's a standard, RFC3977. The source is readily available for download, 
for example from the INN master site

https://inn.eyrie.org/trac/browser/trunk/lib/uwildmat.c

It seems to be very stable (last change was 2014: "Change Russ' email 
address").

It *is* already available (in Ubuntu at least), but it's in a fairly 
obscure package that not many people will have installed. On Ubuntu, 
/usr/lib/news/libinn.a is part of package 'inn2-dev'; I understand that 
on some other distros its in a different package (e.g. 'inn-devel' on 
CentOS).

So this is probably why Derek chose to include the source from INN 
rather than working out how to make it a prerequisite for building 
i-g-t. And it almost certainly isn't otherwise available for Android!

.Dave.
Derek Morton Feb. 16, 2016, 10:17 a.m. UTC | #4
>

>

>-----Original Message-----

>From: Gordon, David S 

>Sent: Monday, February 15, 2016 7:29 PM

>To: Daniel Vetter <daniel@ffwll.ch>; Morton, Derek J <derek.j.morton@intel.com>

>Cc: intel-gfx@lists.freedesktop.org; Gore, Tim <tim.gore@intel.com>; david.weinehall@linux.intel.com; ville.syrjala@linux.intel.com; daniel.vetter@ffwll.ch

>Subject: Re: [PATCH i-g-t v3] lib/igt_core.c: Expand --run-subtest functionality.

>

>On 15/02/16 16:55, Daniel Vetter wrote:

>> On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:

>>> Added extended wildcard support when specifying --run-subtest.

>>>

>>> Wildcard format is as specified in rfc3977 and the uwildmat() 

>>> implementation is taken from libinn.

>>> See https://tools.ietf.org/html/rfc3977#section-4 for a description 

>>> of allowed wildcard expressions.

>>>

>>> v2: Use comma as list separator (Ville Syrjala) support both ^ and ! 

>>> as not operators (Dave Gordon)

>>>

>>> v3: Updated to use uwildmat() (Dave Gordon)

>>>

>>> Signed-off-by: Derek Morton <derek.j.morton@intel.com>

>>> ---

>>>   COPYING                 |  21 +++

>>>   lib/Makefile.sources    |   2 +

>>>   lib/igt_core.c          |  17 +-

>>>   lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++

>>>   lib/uwildmat/uwildmat.h |  24 +++

>>

>> Not really a fan of copying other sources into ours. Don't we have 

>> something ready-made that's generally available, or can we at least 

>> pull it in as a build-dep?

>>

>> Thanks, Daniel

>

>It's a standard, RFC3977. The source is readily available for download, for example from the INN master site

>

>https://inn.eyrie.org/trac/browser/trunk/lib/uwildmat.c

>

>It seems to be very stable (last change was 2014: "Change Russ' email address").

>

>It *is* already available (in Ubuntu at least), but it's in a fairly obscure package that not many people will have installed. On Ubuntu, /usr/lib/news/libinn.a is part of package 'inn2-dev'; I understand that on some other distros its in a different package (e.g. 'inn-devel' on CentOS).

>

>So this is probably why Derek chose to include the source from INN rather than working out how to make it a prerequisite for building i-g-t. And it almost certainly isn't otherwise available for Android!


It is not available in android by default and it seemed excessive to require anyone who wanted to use IGT to have to compile and install the internet news package just to pick up this file so I added the file in the patch.

The only 'simple' alternative I can see would be to introduce a new command line parameter 'run-subtest-regex' To allow regular expressions to be used, but using regular expressions to specify 'run all subtests except...' is not particularly pretty and the uwildmat solution is better in my opinion for expressing a subset of tests to run.

//Derek

>

>.Dave.

>
Daniel Vetter Feb. 17, 2016, 5:18 p.m. UTC | #5
On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
> Added extended wildcard support when specifying --run-subtest.
> 
> Wildcard format is as specified in rfc3977 and the uwildmat() implementation
> is taken from libinn.
> See https://tools.ietf.org/html/rfc3977#section-4 for a description of
> allowed wildcard expressions.
> 
> v2: Use comma as list separator (Ville Syrjala)
> support both ^ and ! as not operators (Dave Gordon)
> 
> v3: Updated to use uwildmat() (Dave Gordon)
> 
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>

So really not happy that we perpetuate the interface differences in how
igt is run between android and linux. But oh well, maybe this helps in
getting the android team more on board with upstream igt tests than also
reinventing those wheels.

Applied, thanks for the patch.
-Daniel

> ---
>  COPYING                 |  21 +++
>  lib/Makefile.sources    |   2 +
>  lib/igt_core.c          |  17 +-
>  lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/uwildmat/uwildmat.h |  24 +++
>  5 files changed, 536 insertions(+), 2 deletions(-)
>  create mode 100644 lib/uwildmat/uwildmat.c
>  create mode 100644 lib/uwildmat/uwildmat.h
> 
> diff --git a/COPYING b/COPYING
> index b8f6753..16375f2 100644
> --- a/COPYING
> +++ b/COPYING
> @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>  IN THE SOFTWARE.
> +
> +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> +    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> +    2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> +This code is derived from software contributed to the Internet Software
> +Consortium by Rich Salz.
> +
> +Permission to use, copy, modify, and distribute this software for any
> +purpose with or without fee is hereby granted, provided that the above
> +copyright notice and this permission notice appear in all copies.
> +
> +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 4999868..e33861e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = 	\
>  	igt_core.h		\
>  	igt_draw.c		\
>  	igt_draw.h		\
> +	uwildmat/uwildmat.h	\
> +	uwildmat/uwildmat.c	\
>  	$(NULL)
>  
>  .PHONY: version.h.tmp
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 6b69bb7..8e0bd2e 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -56,7 +56,7 @@
>  #include <ctype.h>
>  #include <limits.h>
>  #include <locale.h>
> -#include <fnmatch.h>
> +#include <uwildmat/uwildmat.h>
>  
>  #include "drmtest.h"
>  #include "intel_chipset.h"
> @@ -209,6 +209,19 @@
>   * intel gpu to be present). Then individual subtests can be run with
>   * "--run-subtest". Usage help for tests with subtests can be obtained with the
>   * "--help" command line option.
> + *
> + * A wildcard expression can be given to --run-subtest to specify a subset of
> + * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
> + * description of allowed wildcard expressions.
> + * Some examples of allowed wildcard expressions are:
> + *
> + * - '*basic*' match any subtest containing basic
> + * - 'basic-???' match any subtest named basic- with 3 characters after -
> + * - 'basic-[0-9]' match any subtest named basic- with a single number after -
> + * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
> + * - 'basic*,advanced*' match any subtest starting basic or advanced
> + * - '*,!basic*' match any subtest not starting basic
> + * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
>   */
>  
>  static unsigned int exit_handler_count;
> @@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name)
>  	}
>  
>  	if (run_single_subtest) {
> -		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
> +		if (uwildmat(subtest_name, run_single_subtest) == 0)
>  			return false;
>  		else
>  			run_single_subtest_found = true;
> diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c
> new file mode 100644
> index 0000000..2d34742
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.c
> @@ -0,0 +1,474 @@
> +/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
> +
> +This provides wild card matching originally used in InterNetNews and is
> +described in https://tools.ietf.org/html/rfc3977#section-4
> +
> +INN licence:
> +INN as a whole and all code contained in it not otherwise marked with
> +different licenses and/or copyrights is covered by the following copyright
> +and license:
> +
> +   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> +       2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> +   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> +       2002, 2003 by The Internet Software Consortium and Rich Salz
> +
> +   This code is derived from software contributed to the Internet Software
> +   Consortium by Rich Salz.
> +
> +   Permission to use, copy, modify, and distribute this software for any
> +   purpose with or without fee is hereby granted, provided that the above
> +   copyright notice and this permission notice appear in all copies.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> +   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> +   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> +*/
> +
> +/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
> +**
> +**  wildmat pattern matching with Unicode UTF-8 extensions.
> +**
> +**  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
> +**  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
> +**  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
> +**  July 2000; all malformed patterns should now just fail to match anything.)
> +**
> +**  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
> +**  Rich $alz is now <rsalz@osf.org>.
> +**
> +**  April, 1991:  Replaced mutually-recursive calls with in-line code for the
> +**  star character.
> +**
> +**  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
> +**  This can greatly speed up failing wildcard patterns.  For example:
> +**
> +**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
> +**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
> +**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
> +**
> +**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
> +**  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
> +**  explanation is from Lars:
> +**
> +**  The precondition that must be fulfilled is that DoMatch will consume at
> +**  least one character in text.  This is true if *p is neither '*' nor '\0'.)
> +**  The last return has ABORT instead of false to avoid quadratic behaviour in
> +**  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
> +**  star-loop has to run to the end of the text; with ABORT only the last one
> +**  does.
> +**
> +**  Once the control of one instance of DoMatch enters the star-loop, that
> +**  instance will return either true or ABORT, and any calling instance will
> +**  therefore return immediately after (without calling recursively again).
> +**  In effect, only one star-loop is ever active.  It would be possible to
> +**  modify the code to maintain this context explicitly, eliminating all
> +**  recursive calls at the cost of some complication and loss of clarity (and
> +**  the ABORT stuff seems to be unclear enough by itself).  I think it would
> +**  be unwise to try to get this into a released version unless you have a
> +**  good test data base to try it out on.
> +**
> +**  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and close bracket
> +**  handling for character sets.
> +**
> +**  July, 2000:  Largely rewritten by Russ Allbery <rra@stanford.edu> to add
> +**  support for ',', '!', and optionally '@' to the core wildmat routine.
> +**  Broke the character class matching into a separate function for clarity
> +**  since it's infrequently used in practice, and added some simple lookahead
> +**  to significantly decrease the recursive calls in the '*' matching code.
> +**  Added support for UTF-8 as the default character set for any high-bit
> +**  characters.
> +**
> +**  For more information on UTF-8, see RFC 3629.
> +**
> +**  Please note that this file is intentionally written so that conditionally
> +**  executed expressions are on separate lines from the condition to
> +**  facilitate analysis of the coverage of the test suite using purecov.
> +**  Please preserve this.  As of March 11, 2001, purecov reports that the
> +**  accompanying test suite achieves 100% coverage of this file.
> +*/
> +
> +#include <string.h>
> +#include "uwildmat.h"
> +
> +#define ABORT -1
> +
> +/* Whether or not an octet looks like the start of a UTF-8 character. */
> +#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
> +
> +
> +/*
> +**  Determine the length of a non-ASCII character in octets (for advancing
> +**  pointers when skipping over characters).  Takes a pointer to the start of
> +**  the character and to the last octet of the string.  If end is NULL, expect
> +**  the string pointed to by start to be nul-terminated.  If the character is
> +**  malformed UTF-8, return 1 to treat it like an eight-bit local character.
> +*/
> +static int
> +utf8_length(const unsigned char *start, const unsigned char *end)
> +{
> +    unsigned char mask = 0x80;
> +    const unsigned char *p;
> +    int length = 0;
> +    int left;
> +
> +    for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
> +        length++;
> +    if (length < 2 || length > 6)
> +        return 1;
> +    if (end != NULL && (end - start + 1) < length)
> +        return 1;
> +    left = length - 1;
> +    for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
> +        left--;
> +    return (left == 0) ? length : 1;
> +}
> +
> +
> +/*
> +**  Check whether a string contains only valid UTF-8 characters.
> +*/
> +bool
> +is_valid_utf8(const char *text)
> +{
> +    unsigned char mask;
> +    const unsigned char *p;
> +    int length;
> +    int left;
> +
> +    for (p = (const unsigned char *)text; *p != '\0';) {
> +        mask = 0x80;
> +        length = 0;
> +
> +        /* Find out the expected length of the character. */
> +        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
> +            length++;
> +
> +        p++;
> +
> +        /* Valid ASCII. */
> +        if (length == 0)
> +            continue;
> +        
> +        /* Invalid length. */
> +        if (length < 2 || length > 6)
> +            return false;
> +
> +        /* Check that each byte looks like 10xxxxxx, except for the first. */
> +        left = length - 1;
> +        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
> +            left--;
> +
> +        if (left > 0)
> +            return false;
> +    }
> +
> +    return true;
> +}
> +
> +
> +/*
> +**  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
> +**  character and to the last octet of the string, and to a uint32_t into
> +**  which to put the decoded UCS-4 value.  If end is NULL, expect the string
> +**  pointed to by start to be nul-terminated.  Returns the number of octets in
> +**  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
> +**  the decimal value of the first octet; this is wrong, but it will generally
> +**  cause the rest of the wildmat matching to do the right thing for non-UTF-8
> +**  input.
> +*/
> +static int
> +utf8_decode(const unsigned char *start, const unsigned char *end,
> +            uint32_t *result)
> +{
> +    uint32_t value = 0;
> +    int length, i;
> +    const unsigned char *p = start;
> +    unsigned char mask;
> +
> +    length = utf8_length(start, end);
> +    if (length < 2) {
> +        *result = *start;
> +        return 1;
> +    }
> +    mask = (1 << (7 - length)) - 1;
> +    value = *p & mask;
> +    p++;
> +    for (i = length - 1; i > 0; i--) {
> +        value = (value << 6) | (*p & 0x3f);
> +        p++;
> +    }
> +    *result = value;
> +    return length;
> +}
> +
> +
> +/*
> +**  Match a character class against text, a UCS-4 character.  start is a
> +**  pointer to the first character of the character class, end a pointer to
> +**  the last.  Returns whether the class matches that character.
> +*/
> +static bool
> +match_class(uint32_t text, const unsigned char *start,
> +            const unsigned char *end)
> +{
> +    bool reversed, allowrange;
> +    const unsigned char *p = start;
> +    uint32_t first = 0;
> +    uint32_t last;
> +
> +    /* Check for an inverted character class (starting with ^).  If the
> +       character matches the character class, we return !reversed; that way,
> +       we return true if it's a regular character class and false if it's a
> +       reversed one.  If the character doesn't match, we return reversed. */
> +    reversed = (*p == '^');
> +    if (reversed)
> +        p++;
> +
> +    /* Walk through the character class until we reach the end or find a
> +       match, handling character ranges as we go.  Only permit a range to
> +       start when allowrange is true; this allows - to be treated like a
> +       normal character as the first character of the class and catches
> +       malformed ranges like a-e-n.  We treat the character at the beginning
> +       of a range as both a regular member of the class and the beginning of
> +       the range; this is harmless (although it means that malformed ranges
> +       like m-a will match m and nothing else). */
> +    allowrange = false;
> +    while (p <= end) {
> +        if (allowrange && *p == '-' && p < end) {
> +            p++;
> +            p += utf8_decode(p, end, &last);
> +            if (text >= first && text <= last)
> +                return !reversed;
> +            allowrange = false;
> +        } else {
> +            p += utf8_decode(p, end, &first);
> +            if (text == first)
> +                return !reversed;
> +            allowrange = true;
> +        }
> +    }
> +    return reversed;
> +}
> +
> +
> +/*
> +**  Match the text against the pattern between start and end.  This is a
> +**  single pattern; a leading ! or @ must already be taken care of, and
> +**  commas must be dealt with outside of this routine.
> +*/
> +static int
> +match_pattern(const unsigned char *text, const unsigned char *start,
> +              const unsigned char *end)
> +{
> +    const unsigned char *q, *endclass;
> +    const unsigned char *p = start;
> +    bool ismeta;
> +    int matched, width;
> +    uint32_t c;
> +
> +    for (; p <= end; p++) {
> +        if (!*text && *p != '*')
> +            return ABORT;
> +
> +        switch (*p) {
> +        case '\\':
> +            if (!*++p)
> +                return ABORT;
> +            /* Fall through. */
> +
> +        default:
> +            if (*text++ != *p)
> +                return false;
> +            break;
> +
> +        case '?':
> +            text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +            break;
> +
> +        case '*':
> +            /* Consecutive stars are equivalent to one.  Advance pattern to
> +               the character after the star. */
> +            for (++p; *p == '*'; p++)
> +                ;
> +
> +            /* A trailing star will match anything. */
> +            if (p > end)
> +                return true;
> +
> +            /* Basic algorithm: Recurse at each point where the * could
> +               possibly match.  If the match succeeds or aborts, return
> +               immediately; otherwise, try the next position.
> +
> +               Optimization: If the character after the * in the pattern
> +               isn't a metacharacter (the common case), then the * has to
> +               consume characters at least up to the next occurrence of that
> +               character in the text.  Scan forward for those points rather
> +               than recursing at every possible point to save the extra
> +               function call overhead. */
> +            ismeta = (*p == '[' || *p == '?' || *p == '\\');
> +            while (*text) {
> +                width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +                if (ismeta) {
> +                    matched = match_pattern(text, p, end);
> +                    text += width;
> +                } else {
> +                    while (*text && *text != *p) {
> +                        text += width;
> +                        width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> +                    }
> +                    if (!*text)
> +                        return ABORT;
> +                    matched = match_pattern(++text, p + 1, end);
> +                }
> +                if (matched != false)
> +                    return matched;
> +            }
> +            return ABORT;
> +
> +        case '[':
> +            /* Find the end of the character class, making sure not to pick
> +               up a close bracket at the beginning of the class. */
> +            p++;
> +            q = p + (*p == '^') + 1;
> +            if (q > end)
> +                return ABORT;
> +            endclass = memchr(q, ']', (size_t) (end - q + 1));
> +            if (!endclass)
> +                return ABORT;
> +
> +            /* Do the heavy lifting in another function for clarity, since
> +               character classes are an uncommon case. */
> +            text += utf8_decode(text, NULL, &c);
> +            if (!match_class(c, p, endclass - 1))
> +                return false;
> +            p = endclass;
> +            break;
> +        }
> +    }
> +
> +    return (*text == '\0');
> +}
> +
> +
> +/*
> +**  Takes text and a wildmat expression; a wildmat expression is a
> +**  comma-separated list of wildmat patterns, optionally preceded by ! to
> +**  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
> +**  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
> +**  set, allow @ to introduce a poison expression (the same as !, but if it
> +**  triggers the failed match the routine returns UWILDMAT_POISON instead).
> +*/
> +static enum uwildmat
> +match_expression(const unsigned char *text, const unsigned char *start,
> +                 bool allowpoison)
> +{
> +    const unsigned char *end, *split;
> +    const unsigned char *p = start;
> +    bool reverse, escaped;
> +    bool match = false;
> +    bool poison = false;
> +    bool poisoned = false;
> +
> +    /* Handle the empty expression separately, since otherwise end will be
> +       set to an invalid pointer. */
> +    if (!*p)
> +        return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> +    end = start + strlen((const char *) start) - 1;
> +
> +    /* Main match loop.  Find each comma that separates patterns, and attempt 
> +       to match the text with each pattern in order.  The last matching
> +       pattern determines whether the whole expression matches. */
> +    for (; p <= end + 1; p = split + 1) {
> +        if (allowpoison)
> +            poison = (*p == '@');
> +        reverse = (*p == '!') || poison;
> +        if (reverse)
> +            p++;
> +
> +        /* Find the first unescaped comma, if any.  If there is none, split
> +           will be one greater than end and point at the nul at the end of
> +           the string. */
> +        for (escaped = false, split = p; split <= end; split++) {
> +            if (*split == '[') {
> +                split++;
> +                if (*split == ']')
> +                    split++;
> +                while (split <= end && *split != ']')
> +                    split++;
> +            }
> +            if (*split == ',' && !escaped)
> +                break;
> +            escaped = (*split == '\\') ? !escaped : false;
> +        }
> +
> +        /* Optimization: If match == !reverse and poison == poisoned, this
> +           pattern can't change the result, so don't do any work. */
> +        if (match == !reverse && poison == poisoned)
> +            continue;
> +        if (match_pattern(text, p, split - 1) == true) {
> +            poisoned = poison;
> +            match = !reverse;
> +        }
> +    }
> +    if (poisoned)
> +        return UWILDMAT_POISON;
> +    return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> +}
> +
> +
> +/*
> +**  User-level routine used for wildmats where @ should be treated as a
> +**  regular character.
> +*/
> +bool
> +uwildmat(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return true;
> +    else
> +        return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
> +}
> +
> +
> +/*
> +**  User-level routine used for wildmats that support poison matches.
> +*/
> +enum uwildmat
> +uwildmat_poison(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return UWILDMAT_MATCH;
> +    else
> +        return match_expression(utext, upat, true);
> +}
> +
> +
> +/*
> +**  User-level routine for simple expressions (neither , nor ! are special).
> +*/
> +bool
> +uwildmat_simple(const char *text, const char *pat)
> +{
> +    const unsigned char *utext = (const unsigned char *) text;
> +    const unsigned char *upat = (const unsigned char *) pat;
> +    size_t length;
> +
> +    if (upat[0] == '*' && upat[1] == '\0')
> +        return true;
> +    else {
> +        length = strlen(pat);
> +        return (match_pattern(utext, upat, upat + length - 1) == true);
> +    }
> +}
> diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h
> new file mode 100644
> index 0000000..2e47189
> --- /dev/null
> +++ b/lib/uwildmat/uwildmat.h
> @@ -0,0 +1,24 @@
> +
> +#ifndef UWILDMAT_H
> +#define UWILDMAT_H 1
> +
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <stdbool.h>
> +
> +/*
> +**  WILDMAT MATCHING
> +*/
> +enum uwildmat {
> +    UWILDMAT_FAIL   = 0,
> +    UWILDMAT_MATCH  = 1,
> +    UWILDMAT_POISON
> +};
> +
> +extern bool             is_valid_utf8(const char *start);
> +extern bool             uwildmat(const char *text, const char *pat);
> +extern bool             uwildmat_simple(const char *text, const char *pat);
> +extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
> +
> +
> +#endif /* UWILDMAT_H */
> -- 
> 1.9.1
>
Daniel Vetter Feb. 17, 2016, 11:10 p.m. UTC | #6
On Wed, Feb 17, 2016 at 06:18:21PM +0100, Daniel Vetter wrote:
> On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
> > Added extended wildcard support when specifying --run-subtest.
> > 
> > Wildcard format is as specified in rfc3977 and the uwildmat() implementation
> > is taken from libinn.
> > See https://tools.ietf.org/html/rfc3977#section-4 for a description of
> > allowed wildcard expressions.
> > 
> > v2: Use comma as list separator (Ville Syrjala)
> > support both ^ and ! as not operators (Dave Gordon)
> > 
> > v3: Updated to use uwildmat() (Dave Gordon)
> > 
> > Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> 
> So really not happy that we perpetuate the interface differences in how
> igt is run between android and linux. But oh well, maybe this helps in
> getting the android team more on board with upstream igt tests than also
> reinventing those wheels.
> 
> Applied, thanks for the patch.

Ok, this breaks the normal partial match functionality, which is pretty
handy for running a few tests. New we need *foo* instead of foo. Can you
pls add a patch on top to restore that?

Thanks, Daniel

> -Daniel
> 
> > ---
> >  COPYING                 |  21 +++
> >  lib/Makefile.sources    |   2 +
> >  lib/igt_core.c          |  17 +-
> >  lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/uwildmat/uwildmat.h |  24 +++
> >  5 files changed, 536 insertions(+), 2 deletions(-)
> >  create mode 100644 lib/uwildmat/uwildmat.c
> >  create mode 100644 lib/uwildmat/uwildmat.h
> > 
> > diff --git a/COPYING b/COPYING
> > index b8f6753..16375f2 100644
> > --- a/COPYING
> > +++ b/COPYING
> > @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> >  IN THE SOFTWARE.
> > +
> > +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> > +    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> > +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> > +    2002, 2003 by The Internet Software Consortium and Rich Salz
> > +
> > +This code is derived from software contributed to the Internet Software
> > +Consortium by Rich Salz.
> > +
> > +Permission to use, copy, modify, and distribute this software for any
> > +purpose with or without fee is hereby granted, provided that the above
> > +copyright notice and this permission notice appear in all copies.
> > +
> > +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> > +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > +MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> > +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > +
> > diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> > index 4999868..e33861e 100644
> > --- a/lib/Makefile.sources
> > +++ b/lib/Makefile.sources
> > @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = 	\
> >  	igt_core.h		\
> >  	igt_draw.c		\
> >  	igt_draw.h		\
> > +	uwildmat/uwildmat.h	\
> > +	uwildmat/uwildmat.c	\
> >  	$(NULL)
> >  
> >  .PHONY: version.h.tmp
> > diff --git a/lib/igt_core.c b/lib/igt_core.c
> > index 6b69bb7..8e0bd2e 100644
> > --- a/lib/igt_core.c
> > +++ b/lib/igt_core.c
> > @@ -56,7 +56,7 @@
> >  #include <ctype.h>
> >  #include <limits.h>
> >  #include <locale.h>
> > -#include <fnmatch.h>
> > +#include <uwildmat/uwildmat.h>
> >  
> >  #include "drmtest.h"
> >  #include "intel_chipset.h"
> > @@ -209,6 +209,19 @@
> >   * intel gpu to be present). Then individual subtests can be run with
> >   * "--run-subtest". Usage help for tests with subtests can be obtained with the
> >   * "--help" command line option.
> > + *
> > + * A wildcard expression can be given to --run-subtest to specify a subset of
> > + * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
> > + * description of allowed wildcard expressions.
> > + * Some examples of allowed wildcard expressions are:
> > + *
> > + * - '*basic*' match any subtest containing basic
> > + * - 'basic-???' match any subtest named basic- with 3 characters after -
> > + * - 'basic-[0-9]' match any subtest named basic- with a single number after -
> > + * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
> > + * - 'basic*,advanced*' match any subtest starting basic or advanced
> > + * - '*,!basic*' match any subtest not starting basic
> > + * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
> >   */
> >  
> >  static unsigned int exit_handler_count;
> > @@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name)
> >  	}
> >  
> >  	if (run_single_subtest) {
> > -		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
> > +		if (uwildmat(subtest_name, run_single_subtest) == 0)
> >  			return false;
> >  		else
> >  			run_single_subtest_found = true;
> > diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c
> > new file mode 100644
> > index 0000000..2d34742
> > --- /dev/null
> > +++ b/lib/uwildmat/uwildmat.c
> > @@ -0,0 +1,474 @@
> > +/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
> > +
> > +This provides wild card matching originally used in InterNetNews and is
> > +described in https://tools.ietf.org/html/rfc3977#section-4
> > +
> > +INN licence:
> > +INN as a whole and all code contained in it not otherwise marked with
> > +different licenses and/or copyrights is covered by the following copyright
> > +and license:
> > +
> > +   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
> > +       2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
> > +   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
> > +       2002, 2003 by The Internet Software Consortium and Rich Salz
> > +
> > +   This code is derived from software contributed to the Internet Software
> > +   Consortium by Rich Salz.
> > +
> > +   Permission to use, copy, modify, and distribute this software for any
> > +   purpose with or without fee is hereby granted, provided that the above
> > +   copyright notice and this permission notice appear in all copies.
> > +
> > +   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
> > +   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > +   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
> > +   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > +
> > +*/
> > +
> > +/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
> > +**
> > +**  wildmat pattern matching with Unicode UTF-8 extensions.
> > +**
> > +**  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
> > +**  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
> > +**  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
> > +**  July 2000; all malformed patterns should now just fail to match anything.)
> > +**
> > +**  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
> > +**  Rich $alz is now <rsalz@osf.org>.
> > +**
> > +**  April, 1991:  Replaced mutually-recursive calls with in-line code for the
> > +**  star character.
> > +**
> > +**  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
> > +**  This can greatly speed up failing wildcard patterns.  For example:
> > +**
> > +**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
> > +**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
> > +**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
> > +**
> > +**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
> > +**  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
> > +**  explanation is from Lars:
> > +**
> > +**  The precondition that must be fulfilled is that DoMatch will consume at
> > +**  least one character in text.  This is true if *p is neither '*' nor '\0'.)
> > +**  The last return has ABORT instead of false to avoid quadratic behaviour in
> > +**  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
> > +**  star-loop has to run to the end of the text; with ABORT only the last one
> > +**  does.
> > +**
> > +**  Once the control of one instance of DoMatch enters the star-loop, that
> > +**  instance will return either true or ABORT, and any calling instance will
> > +**  therefore return immediately after (without calling recursively again).
> > +**  In effect, only one star-loop is ever active.  It would be possible to
> > +**  modify the code to maintain this context explicitly, eliminating all
> > +**  recursive calls at the cost of some complication and loss of clarity (and
> > +**  the ABORT stuff seems to be unclear enough by itself).  I think it would
> > +**  be unwise to try to get this into a released version unless you have a
> > +**  good test data base to try it out on.
> > +**
> > +**  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and close bracket
> > +**  handling for character sets.
> > +**
> > +**  July, 2000:  Largely rewritten by Russ Allbery <rra@stanford.edu> to add
> > +**  support for ',', '!', and optionally '@' to the core wildmat routine.
> > +**  Broke the character class matching into a separate function for clarity
> > +**  since it's infrequently used in practice, and added some simple lookahead
> > +**  to significantly decrease the recursive calls in the '*' matching code.
> > +**  Added support for UTF-8 as the default character set for any high-bit
> > +**  characters.
> > +**
> > +**  For more information on UTF-8, see RFC 3629.
> > +**
> > +**  Please note that this file is intentionally written so that conditionally
> > +**  executed expressions are on separate lines from the condition to
> > +**  facilitate analysis of the coverage of the test suite using purecov.
> > +**  Please preserve this.  As of March 11, 2001, purecov reports that the
> > +**  accompanying test suite achieves 100% coverage of this file.
> > +*/
> > +
> > +#include <string.h>
> > +#include "uwildmat.h"
> > +
> > +#define ABORT -1
> > +
> > +/* Whether or not an octet looks like the start of a UTF-8 character. */
> > +#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
> > +
> > +
> > +/*
> > +**  Determine the length of a non-ASCII character in octets (for advancing
> > +**  pointers when skipping over characters).  Takes a pointer to the start of
> > +**  the character and to the last octet of the string.  If end is NULL, expect
> > +**  the string pointed to by start to be nul-terminated.  If the character is
> > +**  malformed UTF-8, return 1 to treat it like an eight-bit local character.
> > +*/
> > +static int
> > +utf8_length(const unsigned char *start, const unsigned char *end)
> > +{
> > +    unsigned char mask = 0x80;
> > +    const unsigned char *p;
> > +    int length = 0;
> > +    int left;
> > +
> > +    for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
> > +        length++;
> > +    if (length < 2 || length > 6)
> > +        return 1;
> > +    if (end != NULL && (end - start + 1) < length)
> > +        return 1;
> > +    left = length - 1;
> > +    for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
> > +        left--;
> > +    return (left == 0) ? length : 1;
> > +}
> > +
> > +
> > +/*
> > +**  Check whether a string contains only valid UTF-8 characters.
> > +*/
> > +bool
> > +is_valid_utf8(const char *text)
> > +{
> > +    unsigned char mask;
> > +    const unsigned char *p;
> > +    int length;
> > +    int left;
> > +
> > +    for (p = (const unsigned char *)text; *p != '\0';) {
> > +        mask = 0x80;
> > +        length = 0;
> > +
> > +        /* Find out the expected length of the character. */
> > +        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
> > +            length++;
> > +
> > +        p++;
> > +
> > +        /* Valid ASCII. */
> > +        if (length == 0)
> > +            continue;
> > +        
> > +        /* Invalid length. */
> > +        if (length < 2 || length > 6)
> > +            return false;
> > +
> > +        /* Check that each byte looks like 10xxxxxx, except for the first. */
> > +        left = length - 1;
> > +        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
> > +            left--;
> > +
> > +        if (left > 0)
> > +            return false;
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +
> > +/*
> > +**  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
> > +**  character and to the last octet of the string, and to a uint32_t into
> > +**  which to put the decoded UCS-4 value.  If end is NULL, expect the string
> > +**  pointed to by start to be nul-terminated.  Returns the number of octets in
> > +**  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
> > +**  the decimal value of the first octet; this is wrong, but it will generally
> > +**  cause the rest of the wildmat matching to do the right thing for non-UTF-8
> > +**  input.
> > +*/
> > +static int
> > +utf8_decode(const unsigned char *start, const unsigned char *end,
> > +            uint32_t *result)
> > +{
> > +    uint32_t value = 0;
> > +    int length, i;
> > +    const unsigned char *p = start;
> > +    unsigned char mask;
> > +
> > +    length = utf8_length(start, end);
> > +    if (length < 2) {
> > +        *result = *start;
> > +        return 1;
> > +    }
> > +    mask = (1 << (7 - length)) - 1;
> > +    value = *p & mask;
> > +    p++;
> > +    for (i = length - 1; i > 0; i--) {
> > +        value = (value << 6) | (*p & 0x3f);
> > +        p++;
> > +    }
> > +    *result = value;
> > +    return length;
> > +}
> > +
> > +
> > +/*
> > +**  Match a character class against text, a UCS-4 character.  start is a
> > +**  pointer to the first character of the character class, end a pointer to
> > +**  the last.  Returns whether the class matches that character.
> > +*/
> > +static bool
> > +match_class(uint32_t text, const unsigned char *start,
> > +            const unsigned char *end)
> > +{
> > +    bool reversed, allowrange;
> > +    const unsigned char *p = start;
> > +    uint32_t first = 0;
> > +    uint32_t last;
> > +
> > +    /* Check for an inverted character class (starting with ^).  If the
> > +       character matches the character class, we return !reversed; that way,
> > +       we return true if it's a regular character class and false if it's a
> > +       reversed one.  If the character doesn't match, we return reversed. */
> > +    reversed = (*p == '^');
> > +    if (reversed)
> > +        p++;
> > +
> > +    /* Walk through the character class until we reach the end or find a
> > +       match, handling character ranges as we go.  Only permit a range to
> > +       start when allowrange is true; this allows - to be treated like a
> > +       normal character as the first character of the class and catches
> > +       malformed ranges like a-e-n.  We treat the character at the beginning
> > +       of a range as both a regular member of the class and the beginning of
> > +       the range; this is harmless (although it means that malformed ranges
> > +       like m-a will match m and nothing else). */
> > +    allowrange = false;
> > +    while (p <= end) {
> > +        if (allowrange && *p == '-' && p < end) {
> > +            p++;
> > +            p += utf8_decode(p, end, &last);
> > +            if (text >= first && text <= last)
> > +                return !reversed;
> > +            allowrange = false;
> > +        } else {
> > +            p += utf8_decode(p, end, &first);
> > +            if (text == first)
> > +                return !reversed;
> > +            allowrange = true;
> > +        }
> > +    }
> > +    return reversed;
> > +}
> > +
> > +
> > +/*
> > +**  Match the text against the pattern between start and end.  This is a
> > +**  single pattern; a leading ! or @ must already be taken care of, and
> > +**  commas must be dealt with outside of this routine.
> > +*/
> > +static int
> > +match_pattern(const unsigned char *text, const unsigned char *start,
> > +              const unsigned char *end)
> > +{
> > +    const unsigned char *q, *endclass;
> > +    const unsigned char *p = start;
> > +    bool ismeta;
> > +    int matched, width;
> > +    uint32_t c;
> > +
> > +    for (; p <= end; p++) {
> > +        if (!*text && *p != '*')
> > +            return ABORT;
> > +
> > +        switch (*p) {
> > +        case '\\':
> > +            if (!*++p)
> > +                return ABORT;
> > +            /* Fall through. */
> > +
> > +        default:
> > +            if (*text++ != *p)
> > +                return false;
> > +            break;
> > +
> > +        case '?':
> > +            text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> > +            break;
> > +
> > +        case '*':
> > +            /* Consecutive stars are equivalent to one.  Advance pattern to
> > +               the character after the star. */
> > +            for (++p; *p == '*'; p++)
> > +                ;
> > +
> > +            /* A trailing star will match anything. */
> > +            if (p > end)
> > +                return true;
> > +
> > +            /* Basic algorithm: Recurse at each point where the * could
> > +               possibly match.  If the match succeeds or aborts, return
> > +               immediately; otherwise, try the next position.
> > +
> > +               Optimization: If the character after the * in the pattern
> > +               isn't a metacharacter (the common case), then the * has to
> > +               consume characters at least up to the next occurrence of that
> > +               character in the text.  Scan forward for those points rather
> > +               than recursing at every possible point to save the extra
> > +               function call overhead. */
> > +            ismeta = (*p == '[' || *p == '?' || *p == '\\');
> > +            while (*text) {
> > +                width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> > +                if (ismeta) {
> > +                    matched = match_pattern(text, p, end);
> > +                    text += width;
> > +                } else {
> > +                    while (*text && *text != *p) {
> > +                        text += width;
> > +                        width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
> > +                    }
> > +                    if (!*text)
> > +                        return ABORT;
> > +                    matched = match_pattern(++text, p + 1, end);
> > +                }
> > +                if (matched != false)
> > +                    return matched;
> > +            }
> > +            return ABORT;
> > +
> > +        case '[':
> > +            /* Find the end of the character class, making sure not to pick
> > +               up a close bracket at the beginning of the class. */
> > +            p++;
> > +            q = p + (*p == '^') + 1;
> > +            if (q > end)
> > +                return ABORT;
> > +            endclass = memchr(q, ']', (size_t) (end - q + 1));
> > +            if (!endclass)
> > +                return ABORT;
> > +
> > +            /* Do the heavy lifting in another function for clarity, since
> > +               character classes are an uncommon case. */
> > +            text += utf8_decode(text, NULL, &c);
> > +            if (!match_class(c, p, endclass - 1))
> > +                return false;
> > +            p = endclass;
> > +            break;
> > +        }
> > +    }
> > +
> > +    return (*text == '\0');
> > +}
> > +
> > +
> > +/*
> > +**  Takes text and a wildmat expression; a wildmat expression is a
> > +**  comma-separated list of wildmat patterns, optionally preceded by ! to
> > +**  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
> > +**  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
> > +**  set, allow @ to introduce a poison expression (the same as !, but if it
> > +**  triggers the failed match the routine returns UWILDMAT_POISON instead).
> > +*/
> > +static enum uwildmat
> > +match_expression(const unsigned char *text, const unsigned char *start,
> > +                 bool allowpoison)
> > +{
> > +    const unsigned char *end, *split;
> > +    const unsigned char *p = start;
> > +    bool reverse, escaped;
> > +    bool match = false;
> > +    bool poison = false;
> > +    bool poisoned = false;
> > +
> > +    /* Handle the empty expression separately, since otherwise end will be
> > +       set to an invalid pointer. */
> > +    if (!*p)
> > +        return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> > +    end = start + strlen((const char *) start) - 1;
> > +
> > +    /* Main match loop.  Find each comma that separates patterns, and attempt 
> > +       to match the text with each pattern in order.  The last matching
> > +       pattern determines whether the whole expression matches. */
> > +    for (; p <= end + 1; p = split + 1) {
> > +        if (allowpoison)
> > +            poison = (*p == '@');
> > +        reverse = (*p == '!') || poison;
> > +        if (reverse)
> > +            p++;
> > +
> > +        /* Find the first unescaped comma, if any.  If there is none, split
> > +           will be one greater than end and point at the nul at the end of
> > +           the string. */
> > +        for (escaped = false, split = p; split <= end; split++) {
> > +            if (*split == '[') {
> > +                split++;
> > +                if (*split == ']')
> > +                    split++;
> > +                while (split <= end && *split != ']')
> > +                    split++;
> > +            }
> > +            if (*split == ',' && !escaped)
> > +                break;
> > +            escaped = (*split == '\\') ? !escaped : false;
> > +        }
> > +
> > +        /* Optimization: If match == !reverse and poison == poisoned, this
> > +           pattern can't change the result, so don't do any work. */
> > +        if (match == !reverse && poison == poisoned)
> > +            continue;
> > +        if (match_pattern(text, p, split - 1) == true) {
> > +            poisoned = poison;
> > +            match = !reverse;
> > +        }
> > +    }
> > +    if (poisoned)
> > +        return UWILDMAT_POISON;
> > +    return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
> > +}
> > +
> > +
> > +/*
> > +**  User-level routine used for wildmats where @ should be treated as a
> > +**  regular character.
> > +*/
> > +bool
> > +uwildmat(const char *text, const char *pat)
> > +{
> > +    const unsigned char *utext = (const unsigned char *) text;
> > +    const unsigned char *upat = (const unsigned char *) pat;
> > +
> > +    if (upat[0] == '*' && upat[1] == '\0')
> > +        return true;
> > +    else
> > +        return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
> > +}
> > +
> > +
> > +/*
> > +**  User-level routine used for wildmats that support poison matches.
> > +*/
> > +enum uwildmat
> > +uwildmat_poison(const char *text, const char *pat)
> > +{
> > +    const unsigned char *utext = (const unsigned char *) text;
> > +    const unsigned char *upat = (const unsigned char *) pat;
> > +
> > +    if (upat[0] == '*' && upat[1] == '\0')
> > +        return UWILDMAT_MATCH;
> > +    else
> > +        return match_expression(utext, upat, true);
> > +}
> > +
> > +
> > +/*
> > +**  User-level routine for simple expressions (neither , nor ! are special).
> > +*/
> > +bool
> > +uwildmat_simple(const char *text, const char *pat)
> > +{
> > +    const unsigned char *utext = (const unsigned char *) text;
> > +    const unsigned char *upat = (const unsigned char *) pat;
> > +    size_t length;
> > +
> > +    if (upat[0] == '*' && upat[1] == '\0')
> > +        return true;
> > +    else {
> > +        length = strlen(pat);
> > +        return (match_pattern(utext, upat, upat + length - 1) == true);
> > +    }
> > +}
> > diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h
> > new file mode 100644
> > index 0000000..2e47189
> > --- /dev/null
> > +++ b/lib/uwildmat/uwildmat.h
> > @@ -0,0 +1,24 @@
> > +
> > +#ifndef UWILDMAT_H
> > +#define UWILDMAT_H 1
> > +
> > +#include <stdio.h>
> > +#include <sys/types.h>
> > +#include <stdbool.h>
> > +
> > +/*
> > +**  WILDMAT MATCHING
> > +*/
> > +enum uwildmat {
> > +    UWILDMAT_FAIL   = 0,
> > +    UWILDMAT_MATCH  = 1,
> > +    UWILDMAT_POISON
> > +};
> > +
> > +extern bool             is_valid_utf8(const char *start);
> > +extern bool             uwildmat(const char *text, const char *pat);
> > +extern bool             uwildmat_simple(const char *text, const char *pat);
> > +extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
> > +
> > +
> > +#endif /* UWILDMAT_H */
> > -- 
> > 1.9.1
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Derek Morton Feb. 18, 2016, 10:58 a.m. UTC | #7
>
>
>-----Original Message-----
>From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel Vetter
>Sent: Wednesday, February 17, 2016 11:10 PM
>To: Morton, Derek J <derek.j.morton@intel.com>
>Cc: intel-gfx@lists.freedesktop.org; Gore, Tim <tim.gore@intel.com>; Gordon, David S <david.s.gordon@intel.com>; david.weinehall@linux.intel.com; ville.syrjala@linux.intel.com; daniel.vetter@ffwll.ch
>Subject: Re: [PATCH i-g-t v3] lib/igt_core.c: Expand --run-subtest functionality.
>
>On Wed, Feb 17, 2016 at 06:18:21PM +0100, Daniel Vetter wrote:
>> On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote:
>> > Added extended wildcard support when specifying --run-subtest.
>> > 
>> > Wildcard format is as specified in rfc3977 and the uwildmat() 
>> > implementation is taken from libinn.
>> > See https://tools.ietf.org/html/rfc3977#section-4 for a description 
>> > of allowed wildcard expressions.
>> > 
>> > v2: Use comma as list separator (Ville Syrjala) support both ^ and ! 
>> > as not operators (Dave Gordon)
>> > 
>> > v3: Updated to use uwildmat() (Dave Gordon)
>> > 
>> > Signed-off-by: Derek Morton <derek.j.morton@intel.com>
>> 
>> So really not happy that we perpetuate the interface differences in 
>> how igt is run between android and linux. But oh well, maybe this 
>> helps in getting the android team more on board with upstream igt 
>> tests than also reinventing those wheels.
>> 
>> Applied, thanks for the patch.
>
>Ok, this breaks the normal partial match functionality, which is pretty handy for running a few tests. New we need *foo* instead of foo. Can you pls add a patch on top to restore that?

HI Daniel,

Do you know when this partial match functionality was added? I have tried with the commit adding my patch, and the prior commit (e.g without my patch) and the behaviour I see is the same.
./gem_basic --run-subtest fd fails
./gem_basic --run-subtest '*fd*' runs create-fd-close

I also went back to:
commit 982934625ac67234c6d85c6cf29a5a487e54d4f0
lib: allow wildcard matching when specifying subtests

That also has the same behaviour.

The commit prior to that fails both cases as there is no wildcard support.

I did all my testing on Android so if you see something different on Linux perhaps there is a difference in the fnmatch() implementation between Linux and android?

I can create a patch to add this partial match functionality as well as the wildcard match if you want. It should be a simple case of calling strstr() if the wildcard match fails.

//Derek


>
>Thanks, Daniel
>
>> -Daniel
>> 
>> > ---
>> >  COPYING                 |  21 +++
>> >  lib/Makefile.sources    |   2 +
>> >  lib/igt_core.c          |  17 +-
>> >  lib/uwildmat/uwildmat.c | 474 
>> > ++++++++++++++++++++++++++++++++++++++++++++++++
>> >  lib/uwildmat/uwildmat.h |  24 +++
>> >  5 files changed, 536 insertions(+), 2 deletions(-)  create mode 
>> > 100644 lib/uwildmat/uwildmat.c  create mode 100644 
>> > lib/uwildmat/uwildmat.h
>> > 
>> > diff --git a/COPYING b/COPYING
>> > index b8f6753..16375f2 100644
>> > --- a/COPYING
>> > +++ b/COPYING
>> > @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 
>> > ANY CLAIM, DAMAGES OR OTHER  LIABILITY, WHETHER IN AN ACTION OF 
>> > CONTRACT, TORT OR OTHERWISE, ARISING  FROM, OUT OF OR IN CONNECTION 
>> > WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS  IN THE SOFTWARE.
>> > +
>> > +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
>> > +    2013, 2014 by Internet Systems Consortium, Inc. ("ISC") 
>> > +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
>> > +    2002, 2003 by The Internet Software Consortium and Rich Salz
>> > +
>> > +This code is derived from software contributed to the Internet 
>> > +Software Consortium by Rich Salz.
>> > +
>> > +Permission to use, copy, modify, and distribute this software for 
>> > +any purpose with or without fee is hereby granted, provided that 
>> > +the above copyright notice and this permission notice appear in all copies.
>> > +
>> > +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES 
>> > +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 
>> > +MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR 
>> > +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY 
>> > +DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
>> > +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
>> > +ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>> > +
>> > diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 
>> > 4999868..e33861e 100644
>> > --- a/lib/Makefile.sources
>> > +++ b/lib/Makefile.sources
>> > @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = 	\
>> >  	igt_core.h		\
>> >  	igt_draw.c		\
>> >  	igt_draw.h		\
>> > +	uwildmat/uwildmat.h	\
>> > +	uwildmat/uwildmat.c	\
>> >  	$(NULL)
>> >  
>> >  .PHONY: version.h.tmp
>> > diff --git a/lib/igt_core.c b/lib/igt_core.c index 6b69bb7..8e0bd2e 
>> > 100644
>> > --- a/lib/igt_core.c
>> > +++ b/lib/igt_core.c
>> > @@ -56,7 +56,7 @@
>> >  #include <ctype.h>
>> >  #include <limits.h>
>> >  #include <locale.h>
>> > -#include <fnmatch.h>
>> > +#include <uwildmat/uwildmat.h>
>> >  
>> >  #include "drmtest.h"
>> >  #include "intel_chipset.h"
>> > @@ -209,6 +209,19 @@
>> >   * intel gpu to be present). Then individual subtests can be run with
>> >   * "--run-subtest". Usage help for tests with subtests can be obtained with the
>> >   * "--help" command line option.
>> > + *
>> > + * A wildcard expression can be given to --run-subtest to specify a 
>> > + subset of
>> > + * subtests to run. See 
>> > + https://tools.ietf.org/html/rfc3977#section-4 for a
>> > + * description of allowed wildcard expressions.
>> > + * Some examples of allowed wildcard expressions are:
>> > + *
>> > + * - '*basic*' match any subtest containing basic
>> > + * - 'basic-???' match any subtest named basic- with 3 characters 
>> > + after -
>> > + * - 'basic-[0-9]' match any subtest named basic- with a single 
>> > + number after -
>> > + * - 'basic-[^0-9]' match any subtest named basic- with a single 
>> > + non numerical character after -
>> > + * - 'basic*,advanced*' match any subtest starting basic or 
>> > + advanced
>> > + * - '*,!basic*' match any subtest not starting basic
>> > + * - 'basic*,!basic-render*' match any subtest starting basic but 
>> > + not starting basic-render
>> >   */
>> >  
>> >  static unsigned int exit_handler_count; @@ -814,7 +827,7 @@ bool 
>> > __igt_run_subtest(const char *subtest_name)
>> >  	}
>> >  
>> >  	if (run_single_subtest) {
>> > -		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
>> > +		if (uwildmat(subtest_name, run_single_subtest) == 0)
>> >  			return false;
>> >  		else
>> >  			run_single_subtest_found = true; diff --git 
>> > a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c new file mode 
>> > 100644 index 0000000..2d34742
>> > --- /dev/null
>> > +++ b/lib/uwildmat/uwildmat.c
>> > @@ -0,0 +1,474 @@
>> > +/* uwildmat.c is reused from libinn - 
>> > +https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
>> > +
>> > +This provides wild card matching originally used in InterNetNews 
>> > +and is described in https://tools.ietf.org/html/rfc3977#section-4
>> > +
>> > +INN licence:
>> > +INN as a whole and all code contained in it not otherwise marked 
>> > +with different licenses and/or copyrights is covered by the 
>> > +following copyright and license:
>> > +
>> > +   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
>> > +       2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
>> > +   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
>> > +       2002, 2003 by The Internet Software Consortium and Rich Salz
>> > +
>> > +   This code is derived from software contributed to the Internet Software
>> > +   Consortium by Rich Salz.
>> > +
>> > +   Permission to use, copy, modify, and distribute this software for any
>> > +   purpose with or without fee is hereby granted, provided that the above
>> > +   copyright notice and this permission notice appear in all copies.
>> > +
>> > +   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
>> > +   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>> > +   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
>> > +   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>> > +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>> > +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>> > +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>> > +
>> > +*/
>> > +
>> > +/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
>> > +**
>> > +**  wildmat pattern matching with Unicode UTF-8 extensions.
>> > +**
>> > +**  Do shell-style pattern matching for ?, \, [], and * characters.  
>> > +Might not
>> > +**  be robust in face of malformed patterns; e.g., "foo[a-" could 
>> > +cause a
>> > +**  segmentation violation.  It is 8-bit clean.  (Robustness 
>> > +hopefully fixed
>> > +**  July 2000; all malformed patterns should now just fail to match 
>> > +anything.)
>> > +**
>> > +**  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
>> > +**  Rich $alz is now <rsalz@osf.org>.
>> > +**
>> > +**  April, 1991:  Replaced mutually-recursive calls with in-line 
>> > +code for the
>> > +**  star character.
>> > +**
>> > +**  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
>> > +**  This can greatly speed up failing wildcard patterns.  For example:
>> > +**
>> > +**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
>> > +**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
>> > +**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
>> > +**
>> > +**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  
>> > +Without
>> > +**  the ABORT code, it takes 22310 calls to fail.  Ugh.  The 
>> > +following
>> > +**  explanation is from Lars:
>> > +**
>> > +**  The precondition that must be fulfilled is that DoMatch will 
>> > +consume at
>> > +**  least one character in text.  This is true if *p is neither '*' 
>> > +nor '\0'.)
>> > +**  The last return has ABORT instead of false to avoid quadratic 
>> > +behaviour in
>> > +**  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With 
>> > +false, each
>> > +**  star-loop has to run to the end of the text; with ABORT only 
>> > +the last one
>> > +**  does.
>> > +**
>> > +**  Once the control of one instance of DoMatch enters the 
>> > +star-loop, that
>> > +**  instance will return either true or ABORT, and any calling 
>> > +instance will
>> > +**  therefore return immediately after (without calling recursively again).
>> > +**  In effect, only one star-loop is ever active.  It would be 
>> > +possible to
>> > +**  modify the code to maintain this context explicitly, 
>> > +eliminating all
>> > +**  recursive calls at the cost of some complication and loss of 
>> > +clarity (and
>> > +**  the ABORT stuff seems to be unclear enough by itself).  I think 
>> > +it would
>> > +**  be unwise to try to get this into a released version unless you 
>> > +have a
>> > +**  good test data base to try it out on.
>> > +**
>> > +**  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and 
>> > +close bracket
>> > +**  handling for character sets.
>> > +**
>> > +**  July, 2000:  Largely rewritten by Russ Allbery 
>> > +<rra@stanford.edu> to add
>> > +**  support for ',', '!', and optionally '@' to the core wildmat routine.
>> > +**  Broke the character class matching into a separate function for 
>> > +clarity
>> > +**  since it's infrequently used in practice, and added some simple 
>> > +lookahead
>> > +**  to significantly decrease the recursive calls in the '*' matching code.
>> > +**  Added support for UTF-8 as the default character set for any 
>> > +high-bit
>> > +**  characters.
>> > +**
>> > +**  For more information on UTF-8, see RFC 3629.
>> > +**
>> > +**  Please note that this file is intentionally written so that 
>> > +conditionally
>> > +**  executed expressions are on separate lines from the condition 
>> > +to
>> > +**  facilitate analysis of the coverage of the test suite using purecov.
>> > +**  Please preserve this.  As of March 11, 2001, purecov reports 
>> > +that the
>> > +**  accompanying test suite achieves 100% coverage of this file.
>> > +*/
>> > +
>> > +#include <string.h>
>> > +#include "uwildmat.h"
>> > +
>> > +#define ABORT -1
>> > +
>> > +/* Whether or not an octet looks like the start of a UTF-8 character. */
>> > +#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
>> > +
>> > +
>> > +/*
>> > +**  Determine the length of a non-ASCII character in octets (for 
>> > +advancing
>> > +**  pointers when skipping over characters).  Takes a pointer to 
>> > +the start of
>> > +**  the character and to the last octet of the string.  If end is 
>> > +NULL, expect
>> > +**  the string pointed to by start to be nul-terminated.  If the 
>> > +character is
>> > +**  malformed UTF-8, return 1 to treat it like an eight-bit local character.
>> > +*/
>> > +static int
>> > +utf8_length(const unsigned char *start, const unsigned char *end) {
>> > +    unsigned char mask = 0x80;
>> > +    const unsigned char *p;
>> > +    int length = 0;
>> > +    int left;
>> > +
>> > +    for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
>> > +        length++;
>> > +    if (length < 2 || length > 6)
>> > +        return 1;
>> > +    if (end != NULL && (end - start + 1) < length)
>> > +        return 1;
>> > +    left = length - 1;
>> > +    for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
>> > +        left--;
>> > +    return (left == 0) ? length : 1; }
>> > +
>> > +
>> > +/*
>> > +**  Check whether a string contains only valid UTF-8 characters.
>> > +*/
>> > +bool
>> > +is_valid_utf8(const char *text)
>> > +{
>> > +    unsigned char mask;
>> > +    const unsigned char *p;
>> > +    int length;
>> > +    int left;
>> > +
>> > +    for (p = (const unsigned char *)text; *p != '\0';) {
>> > +        mask = 0x80;
>> > +        length = 0;
>> > +
>> > +        /* Find out the expected length of the character. */
>> > +        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
>> > +            length++;
>> > +
>> > +        p++;
>> > +
>> > +        /* Valid ASCII. */
>> > +        if (length == 0)
>> > +            continue;
>> > +        
>> > +        /* Invalid length. */
>> > +        if (length < 2 || length > 6)
>> > +            return false;
>> > +
>> > +        /* Check that each byte looks like 10xxxxxx, except for the first. */
>> > +        left = length - 1;
>> > +        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
>> > +            left--;
>> > +
>> > +        if (left > 0)
>> > +            return false;
>> > +    }
>> > +
>> > +    return true;
>> > +}
>> > +
>> > +
>> > +/*
>> > +**  Convert a UTF-8 character to UCS-4.  Takes a pointer to the 
>> > +start of the
>> > +**  character and to the last octet of the string, and to a 
>> > +uint32_t into
>> > +**  which to put the decoded UCS-4 value.  If end is NULL, expect 
>> > +the string
>> > +**  pointed to by start to be nul-terminated.  Returns the number 
>> > +of octets in
>> > +**  the UTF-8 encoding.  If the UTF-8 character is malformed, set 
>> > +result to
>> > +**  the decimal value of the first octet; this is wrong, but it 
>> > +will generally
>> > +**  cause the rest of the wildmat matching to do the right thing 
>> > +for non-UTF-8
>> > +**  input.
>> > +*/
>> > +static int
>> > +utf8_decode(const unsigned char *start, const unsigned char *end,
>> > +            uint32_t *result)
>> > +{
>> > +    uint32_t value = 0;
>> > +    int length, i;
>> > +    const unsigned char *p = start;
>> > +    unsigned char mask;
>> > +
>> > +    length = utf8_length(start, end);
>> > +    if (length < 2) {
>> > +        *result = *start;
>> > +        return 1;
>> > +    }
>> > +    mask = (1 << (7 - length)) - 1;
>> > +    value = *p & mask;
>> > +    p++;
>> > +    for (i = length - 1; i > 0; i--) {
>> > +        value = (value << 6) | (*p & 0x3f);
>> > +        p++;
>> > +    }
>> > +    *result = value;
>> > +    return length;
>> > +}
>> > +
>> > +
>> > +/*
>> > +**  Match a character class against text, a UCS-4 character.  start 
>> > +is a
>> > +**  pointer to the first character of the character class, end a 
>> > +pointer to
>> > +**  the last.  Returns whether the class matches that character.
>> > +*/
>> > +static bool
>> > +match_class(uint32_t text, const unsigned char *start,
>> > +            const unsigned char *end) {
>> > +    bool reversed, allowrange;
>> > +    const unsigned char *p = start;
>> > +    uint32_t first = 0;
>> > +    uint32_t last;
>> > +
>> > +    /* Check for an inverted character class (starting with ^).  If the
>> > +       character matches the character class, we return !reversed; that way,
>> > +       we return true if it's a regular character class and false if it's a
>> > +       reversed one.  If the character doesn't match, we return reversed. */
>> > +    reversed = (*p == '^');
>> > +    if (reversed)
>> > +        p++;
>> > +
>> > +    /* Walk through the character class until we reach the end or find a
>> > +       match, handling character ranges as we go.  Only permit a range to
>> > +       start when allowrange is true; this allows - to be treated like a
>> > +       normal character as the first character of the class and catches
>> > +       malformed ranges like a-e-n.  We treat the character at the beginning
>> > +       of a range as both a regular member of the class and the beginning of
>> > +       the range; this is harmless (although it means that malformed ranges
>> > +       like m-a will match m and nothing else). */
>> > +    allowrange = false;
>> > +    while (p <= end) {
>> > +        if (allowrange && *p == '-' && p < end) {
>> > +            p++;
>> > +            p += utf8_decode(p, end, &last);
>> > +            if (text >= first && text <= last)
>> > +                return !reversed;
>> > +            allowrange = false;
>> > +        } else {
>> > +            p += utf8_decode(p, end, &first);
>> > +            if (text == first)
>> > +                return !reversed;
>> > +            allowrange = true;
>> > +        }
>> > +    }
>> > +    return reversed;
>> > +}
>> > +
>> > +
>> > +/*
>> > +**  Match the text against the pattern between start and end.  This 
>> > +is a
>> > +**  single pattern; a leading ! or @ must already be taken care of, 
>> > +and
>> > +**  commas must be dealt with outside of this routine.
>> > +*/
>> > +static int
>> > +match_pattern(const unsigned char *text, const unsigned char *start,
>> > +              const unsigned char *end) {
>> > +    const unsigned char *q, *endclass;
>> > +    const unsigned char *p = start;
>> > +    bool ismeta;
>> > +    int matched, width;
>> > +    uint32_t c;
>> > +
>> > +    for (; p <= end; p++) {
>> > +        if (!*text && *p != '*')
>> > +            return ABORT;
>> > +
>> > +        switch (*p) {
>> > +        case '\\':
>> > +            if (!*++p)
>> > +                return ABORT;
>> > +            /* Fall through. */
>> > +
>> > +        default:
>> > +            if (*text++ != *p)
>> > +                return false;
>> > +            break;
>> > +
>> > +        case '?':
>> > +            text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
>> > +            break;
>> > +
>> > +        case '*':
>> > +            /* Consecutive stars are equivalent to one.  Advance pattern to
>> > +               the character after the star. */
>> > +            for (++p; *p == '*'; p++)
>> > +                ;
>> > +
>> > +            /* A trailing star will match anything. */
>> > +            if (p > end)
>> > +                return true;
>> > +
>> > +            /* Basic algorithm: Recurse at each point where the * could
>> > +               possibly match.  If the match succeeds or aborts, return
>> > +               immediately; otherwise, try the next position.
>> > +
>> > +               Optimization: If the character after the * in the pattern
>> > +               isn't a metacharacter (the common case), then the * has to
>> > +               consume characters at least up to the next occurrence of that
>> > +               character in the text.  Scan forward for those points rather
>> > +               than recursing at every possible point to save the extra
>> > +               function call overhead. */
>> > +            ismeta = (*p == '[' || *p == '?' || *p == '\\');
>> > +            while (*text) {
>> > +                width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
>> > +                if (ismeta) {
>> > +                    matched = match_pattern(text, p, end);
>> > +                    text += width;
>> > +                } else {
>> > +                    while (*text && *text != *p) {
>> > +                        text += width;
>> > +                        width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
>> > +                    }
>> > +                    if (!*text)
>> > +                        return ABORT;
>> > +                    matched = match_pattern(++text, p + 1, end);
>> > +                }
>> > +                if (matched != false)
>> > +                    return matched;
>> > +            }
>> > +            return ABORT;
>> > +
>> > +        case '[':
>> > +            /* Find the end of the character class, making sure not to pick
>> > +               up a close bracket at the beginning of the class. */
>> > +            p++;
>> > +            q = p + (*p == '^') + 1;
>> > +            if (q > end)
>> > +                return ABORT;
>> > +            endclass = memchr(q, ']', (size_t) (end - q + 1));
>> > +            if (!endclass)
>> > +                return ABORT;
>> > +
>> > +            /* Do the heavy lifting in another function for clarity, since
>> > +               character classes are an uncommon case. */
>> > +            text += utf8_decode(text, NULL, &c);
>> > +            if (!match_class(c, p, endclass - 1))
>> > +                return false;
>> > +            p = endclass;
>> > +            break;
>> > +        }
>> > +    }
>> > +
>> > +    return (*text == '\0');
>> > +}
>> > +
>> > +
>> > +/*
>> > +**  Takes text and a wildmat expression; a wildmat expression is a
>> > +**  comma-separated list of wildmat patterns, optionally preceded 
>> > +by ! to
>> > +**  invert the sense of the expression.  Returns UWILDMAT_MATCH if 
>> > +that
>> > +**  expression matches the text, UWILDMAT_FAIL otherwise.  If 
>> > +allowpoison is
>> > +**  set, allow @ to introduce a poison expression (the same as !, 
>> > +but if it
>> > +**  triggers the failed match the routine returns UWILDMAT_POISON instead).
>> > +*/
>> > +static enum uwildmat
>> > +match_expression(const unsigned char *text, const unsigned char *start,
>> > +                 bool allowpoison)
>> > +{
>> > +    const unsigned char *end, *split;
>> > +    const unsigned char *p = start;
>> > +    bool reverse, escaped;
>> > +    bool match = false;
>> > +    bool poison = false;
>> > +    bool poisoned = false;
>> > +
>> > +    /* Handle the empty expression separately, since otherwise end will be
>> > +       set to an invalid pointer. */
>> > +    if (!*p)
>> > +        return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
>> > +    end = start + strlen((const char *) start) - 1;
>> > +
>> > +    /* Main match loop.  Find each comma that separates patterns, and attempt 
>> > +       to match the text with each pattern in order.  The last matching
>> > +       pattern determines whether the whole expression matches. */
>> > +    for (; p <= end + 1; p = split + 1) {
>> > +        if (allowpoison)
>> > +            poison = (*p == '@');
>> > +        reverse = (*p == '!') || poison;
>> > +        if (reverse)
>> > +            p++;
>> > +
>> > +        /* Find the first unescaped comma, if any.  If there is none, split
>> > +           will be one greater than end and point at the nul at the end of
>> > +           the string. */
>> > +        for (escaped = false, split = p; split <= end; split++) {
>> > +            if (*split == '[') {
>> > +                split++;
>> > +                if (*split == ']')
>> > +                    split++;
>> > +                while (split <= end && *split != ']')
>> > +                    split++;
>> > +            }
>> > +            if (*split == ',' && !escaped)
>> > +                break;
>> > +            escaped = (*split == '\\') ? !escaped : false;
>> > +        }
>> > +
>> > +        /* Optimization: If match == !reverse and poison == poisoned, this
>> > +           pattern can't change the result, so don't do any work. */
>> > +        if (match == !reverse && poison == poisoned)
>> > +            continue;
>> > +        if (match_pattern(text, p, split - 1) == true) {
>> > +            poisoned = poison;
>> > +            match = !reverse;
>> > +        }
>> > +    }
>> > +    if (poisoned)
>> > +        return UWILDMAT_POISON;
>> > +    return match ? UWILDMAT_MATCH : UWILDMAT_FAIL; }
>> > +
>> > +
>> > +/*
>> > +**  User-level routine used for wildmats where @ should be treated 
>> > +as a
>> > +**  regular character.
>> > +*/
>> > +bool
>> > +uwildmat(const char *text, const char *pat) {
>> > +    const unsigned char *utext = (const unsigned char *) text;
>> > +    const unsigned char *upat = (const unsigned char *) pat;
>> > +
>> > +    if (upat[0] == '*' && upat[1] == '\0')
>> > +        return true;
>> > +    else
>> > +        return (match_expression(utext, upat, false) == 
>> > +UWILDMAT_MATCH); }
>> > +
>> > +
>> > +/*
>> > +**  User-level routine used for wildmats that support poison matches.
>> > +*/
>> > +enum uwildmat
>> > +uwildmat_poison(const char *text, const char *pat) {
>> > +    const unsigned char *utext = (const unsigned char *) text;
>> > +    const unsigned char *upat = (const unsigned char *) pat;
>> > +
>> > +    if (upat[0] == '*' && upat[1] == '\0')
>> > +        return UWILDMAT_MATCH;
>> > +    else
>> > +        return match_expression(utext, upat, true); }
>> > +
>> > +
>> > +/*
>> > +**  User-level routine for simple expressions (neither , nor ! are special).
>> > +*/
>> > +bool
>> > +uwildmat_simple(const char *text, const char *pat) {
>> > +    const unsigned char *utext = (const unsigned char *) text;
>> > +    const unsigned char *upat = (const unsigned char *) pat;
>> > +    size_t length;
>> > +
>> > +    if (upat[0] == '*' && upat[1] == '\0')
>> > +        return true;
>> > +    else {
>> > +        length = strlen(pat);
>> > +        return (match_pattern(utext, upat, upat + length - 1) == true);
>> > +    }
>> > +}
>> > diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h new 
>> > file mode 100644 index 0000000..2e47189
>> > --- /dev/null
>> > +++ b/lib/uwildmat/uwildmat.h
>> > @@ -0,0 +1,24 @@
>> > +
>> > +#ifndef UWILDMAT_H
>> > +#define UWILDMAT_H 1
>> > +
>> > +#include <stdio.h>
>> > +#include <sys/types.h>
>> > +#include <stdbool.h>
>> > +
>> > +/*
>> > +**  WILDMAT MATCHING
>> > +*/
>> > +enum uwildmat {
>> > +    UWILDMAT_FAIL   = 0,
>> > +    UWILDMAT_MATCH  = 1,
>> > +    UWILDMAT_POISON
>> > +};
>> > +
>> > +extern bool             is_valid_utf8(const char *start);
>> > +extern bool             uwildmat(const char *text, const char *pat);
>> > +extern bool             uwildmat_simple(const char *text, const char *pat);
>> > +extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
>> > +
>> > +
>> > +#endif /* UWILDMAT_H */
>> > --
>> > 1.9.1
>> > 
>> 
>> --
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
>
>--
>Daniel Vetter
>Software Engineer, Intel Corporation
>http://blog.ffwll.ch
>
diff mbox

Patch

diff --git a/COPYING b/COPYING
index b8f6753..16375f2 100644
--- a/COPYING
+++ b/COPYING
@@ -106,3 +106,24 @@  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 IN THE SOFTWARE.
+
+Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
+Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+    2002, 2003 by The Internet Software Consortium and Rich Salz
+
+This code is derived from software contributed to the Internet Software
+Consortium by Rich Salz.
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
+SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 4999868..e33861e 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -60,6 +60,8 @@  libintel_tools_la_SOURCES = 	\
 	igt_core.h		\
 	igt_draw.c		\
 	igt_draw.h		\
+	uwildmat/uwildmat.h	\
+	uwildmat/uwildmat.c	\
 	$(NULL)
 
 .PHONY: version.h.tmp
diff --git a/lib/igt_core.c b/lib/igt_core.c
index 6b69bb7..8e0bd2e 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -56,7 +56,7 @@ 
 #include <ctype.h>
 #include <limits.h>
 #include <locale.h>
-#include <fnmatch.h>
+#include <uwildmat/uwildmat.h>
 
 #include "drmtest.h"
 #include "intel_chipset.h"
@@ -209,6 +209,19 @@ 
  * intel gpu to be present). Then individual subtests can be run with
  * "--run-subtest". Usage help for tests with subtests can be obtained with the
  * "--help" command line option.
+ *
+ * A wildcard expression can be given to --run-subtest to specify a subset of
+ * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
+ * description of allowed wildcard expressions.
+ * Some examples of allowed wildcard expressions are:
+ *
+ * - '*basic*' match any subtest containing basic
+ * - 'basic-???' match any subtest named basic- with 3 characters after -
+ * - 'basic-[0-9]' match any subtest named basic- with a single number after -
+ * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
+ * - 'basic*,advanced*' match any subtest starting basic or advanced
+ * - '*,!basic*' match any subtest not starting basic
+ * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
  */
 
 static unsigned int exit_handler_count;
@@ -814,7 +827,7 @@  bool __igt_run_subtest(const char *subtest_name)
 	}
 
 	if (run_single_subtest) {
-		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
+		if (uwildmat(subtest_name, run_single_subtest) == 0)
 			return false;
 		else
 			run_single_subtest_found = true;
diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c
new file mode 100644
index 0000000..2d34742
--- /dev/null
+++ b/lib/uwildmat/uwildmat.c
@@ -0,0 +1,474 @@ 
+/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
+
+This provides wild card matching originally used in InterNetNews and is
+described in https://tools.ietf.org/html/rfc3977#section-4
+
+INN licence:
+INN as a whole and all code contained in it not otherwise marked with
+different licenses and/or copyrights is covered by the following copyright
+and license:
+
+   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+       2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
+   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+       2002, 2003 by The Internet Software Consortium and Rich Salz
+
+   This code is derived from software contributed to the Internet Software
+   Consortium by Rich Salz.
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
+   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
+**
+**  wildmat pattern matching with Unicode UTF-8 extensions.
+**
+**  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
+**  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
+**  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
+**  July 2000; all malformed patterns should now just fail to match anything.)
+**
+**  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+**  Rich $alz is now <rsalz@osf.org>.
+**
+**  April, 1991:  Replaced mutually-recursive calls with in-line code for the
+**  star character.
+**
+**  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
+**  This can greatly speed up failing wildcard patterns.  For example:
+**
+**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
+**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
+**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
+**
+**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
+**  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
+**  explanation is from Lars:
+**
+**  The precondition that must be fulfilled is that DoMatch will consume at
+**  least one character in text.  This is true if *p is neither '*' nor '\0'.)
+**  The last return has ABORT instead of false to avoid quadratic behaviour in
+**  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
+**  star-loop has to run to the end of the text; with ABORT only the last one
+**  does.
+**
+**  Once the control of one instance of DoMatch enters the star-loop, that
+**  instance will return either true or ABORT, and any calling instance will
+**  therefore return immediately after (without calling recursively again).
+**  In effect, only one star-loop is ever active.  It would be possible to
+**  modify the code to maintain this context explicitly, eliminating all
+**  recursive calls at the cost of some complication and loss of clarity (and
+**  the ABORT stuff seems to be unclear enough by itself).  I think it would
+**  be unwise to try to get this into a released version unless you have a
+**  good test data base to try it out on.
+**
+**  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and close bracket
+**  handling for character sets.
+**
+**  July, 2000:  Largely rewritten by Russ Allbery <rra@stanford.edu> to add
+**  support for ',', '!', and optionally '@' to the core wildmat routine.
+**  Broke the character class matching into a separate function for clarity
+**  since it's infrequently used in practice, and added some simple lookahead
+**  to significantly decrease the recursive calls in the '*' matching code.
+**  Added support for UTF-8 as the default character set for any high-bit
+**  characters.
+**
+**  For more information on UTF-8, see RFC 3629.
+**
+**  Please note that this file is intentionally written so that conditionally
+**  executed expressions are on separate lines from the condition to
+**  facilitate analysis of the coverage of the test suite using purecov.
+**  Please preserve this.  As of March 11, 2001, purecov reports that the
+**  accompanying test suite achieves 100% coverage of this file.
+*/
+
+#include <string.h>
+#include "uwildmat.h"
+
+#define ABORT -1
+
+/* Whether or not an octet looks like the start of a UTF-8 character. */
+#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
+
+
+/*
+**  Determine the length of a non-ASCII character in octets (for advancing
+**  pointers when skipping over characters).  Takes a pointer to the start of
+**  the character and to the last octet of the string.  If end is NULL, expect
+**  the string pointed to by start to be nul-terminated.  If the character is
+**  malformed UTF-8, return 1 to treat it like an eight-bit local character.
+*/
+static int
+utf8_length(const unsigned char *start, const unsigned char *end)
+{
+    unsigned char mask = 0x80;
+    const unsigned char *p;
+    int length = 0;
+    int left;
+
+    for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
+        length++;
+    if (length < 2 || length > 6)
+        return 1;
+    if (end != NULL && (end - start + 1) < length)
+        return 1;
+    left = length - 1;
+    for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
+        left--;
+    return (left == 0) ? length : 1;
+}
+
+
+/*
+**  Check whether a string contains only valid UTF-8 characters.
+*/
+bool
+is_valid_utf8(const char *text)
+{
+    unsigned char mask;
+    const unsigned char *p;
+    int length;
+    int left;
+
+    for (p = (const unsigned char *)text; *p != '\0';) {
+        mask = 0x80;
+        length = 0;
+
+        /* Find out the expected length of the character. */
+        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
+            length++;
+
+        p++;
+
+        /* Valid ASCII. */
+        if (length == 0)
+            continue;
+        
+        /* Invalid length. */
+        if (length < 2 || length > 6)
+            return false;
+
+        /* Check that each byte looks like 10xxxxxx, except for the first. */
+        left = length - 1;
+        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
+            left--;
+
+        if (left > 0)
+            return false;
+    }
+
+    return true;
+}
+
+
+/*
+**  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
+**  character and to the last octet of the string, and to a uint32_t into
+**  which to put the decoded UCS-4 value.  If end is NULL, expect the string
+**  pointed to by start to be nul-terminated.  Returns the number of octets in
+**  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
+**  the decimal value of the first octet; this is wrong, but it will generally
+**  cause the rest of the wildmat matching to do the right thing for non-UTF-8
+**  input.
+*/
+static int
+utf8_decode(const unsigned char *start, const unsigned char *end,
+            uint32_t *result)
+{
+    uint32_t value = 0;
+    int length, i;
+    const unsigned char *p = start;
+    unsigned char mask;
+
+    length = utf8_length(start, end);
+    if (length < 2) {
+        *result = *start;
+        return 1;
+    }
+    mask = (1 << (7 - length)) - 1;
+    value = *p & mask;
+    p++;
+    for (i = length - 1; i > 0; i--) {
+        value = (value << 6) | (*p & 0x3f);
+        p++;
+    }
+    *result = value;
+    return length;
+}
+
+
+/*
+**  Match a character class against text, a UCS-4 character.  start is a
+**  pointer to the first character of the character class, end a pointer to
+**  the last.  Returns whether the class matches that character.
+*/
+static bool
+match_class(uint32_t text, const unsigned char *start,
+            const unsigned char *end)
+{
+    bool reversed, allowrange;
+    const unsigned char *p = start;
+    uint32_t first = 0;
+    uint32_t last;
+
+    /* Check for an inverted character class (starting with ^).  If the
+       character matches the character class, we return !reversed; that way,
+       we return true if it's a regular character class and false if it's a
+       reversed one.  If the character doesn't match, we return reversed. */
+    reversed = (*p == '^');
+    if (reversed)
+        p++;
+
+    /* Walk through the character class until we reach the end or find a
+       match, handling character ranges as we go.  Only permit a range to
+       start when allowrange is true; this allows - to be treated like a
+       normal character as the first character of the class and catches
+       malformed ranges like a-e-n.  We treat the character at the beginning
+       of a range as both a regular member of the class and the beginning of
+       the range; this is harmless (although it means that malformed ranges
+       like m-a will match m and nothing else). */
+    allowrange = false;
+    while (p <= end) {
+        if (allowrange && *p == '-' && p < end) {
+            p++;
+            p += utf8_decode(p, end, &last);
+            if (text >= first && text <= last)
+                return !reversed;
+            allowrange = false;
+        } else {
+            p += utf8_decode(p, end, &first);
+            if (text == first)
+                return !reversed;
+            allowrange = true;
+        }
+    }
+    return reversed;
+}
+
+
+/*
+**  Match the text against the pattern between start and end.  This is a
+**  single pattern; a leading ! or @ must already be taken care of, and
+**  commas must be dealt with outside of this routine.
+*/
+static int
+match_pattern(const unsigned char *text, const unsigned char *start,
+              const unsigned char *end)
+{
+    const unsigned char *q, *endclass;
+    const unsigned char *p = start;
+    bool ismeta;
+    int matched, width;
+    uint32_t c;
+
+    for (; p <= end; p++) {
+        if (!*text && *p != '*')
+            return ABORT;
+
+        switch (*p) {
+        case '\\':
+            if (!*++p)
+                return ABORT;
+            /* Fall through. */
+
+        default:
+            if (*text++ != *p)
+                return false;
+            break;
+
+        case '?':
+            text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
+            break;
+
+        case '*':
+            /* Consecutive stars are equivalent to one.  Advance pattern to
+               the character after the star. */
+            for (++p; *p == '*'; p++)
+                ;
+
+            /* A trailing star will match anything. */
+            if (p > end)
+                return true;
+
+            /* Basic algorithm: Recurse at each point where the * could
+               possibly match.  If the match succeeds or aborts, return
+               immediately; otherwise, try the next position.
+
+               Optimization: If the character after the * in the pattern
+               isn't a metacharacter (the common case), then the * has to
+               consume characters at least up to the next occurrence of that
+               character in the text.  Scan forward for those points rather
+               than recursing at every possible point to save the extra
+               function call overhead. */
+            ismeta = (*p == '[' || *p == '?' || *p == '\\');
+            while (*text) {
+                width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
+                if (ismeta) {
+                    matched = match_pattern(text, p, end);
+                    text += width;
+                } else {
+                    while (*text && *text != *p) {
+                        text += width;
+                        width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
+                    }
+                    if (!*text)
+                        return ABORT;
+                    matched = match_pattern(++text, p + 1, end);
+                }
+                if (matched != false)
+                    return matched;
+            }
+            return ABORT;
+
+        case '[':
+            /* Find the end of the character class, making sure not to pick
+               up a close bracket at the beginning of the class. */
+            p++;
+            q = p + (*p == '^') + 1;
+            if (q > end)
+                return ABORT;
+            endclass = memchr(q, ']', (size_t) (end - q + 1));
+            if (!endclass)
+                return ABORT;
+
+            /* Do the heavy lifting in another function for clarity, since
+               character classes are an uncommon case. */
+            text += utf8_decode(text, NULL, &c);
+            if (!match_class(c, p, endclass - 1))
+                return false;
+            p = endclass;
+            break;
+        }
+    }
+
+    return (*text == '\0');
+}
+
+
+/*
+**  Takes text and a wildmat expression; a wildmat expression is a
+**  comma-separated list of wildmat patterns, optionally preceded by ! to
+**  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
+**  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
+**  set, allow @ to introduce a poison expression (the same as !, but if it
+**  triggers the failed match the routine returns UWILDMAT_POISON instead).
+*/
+static enum uwildmat
+match_expression(const unsigned char *text, const unsigned char *start,
+                 bool allowpoison)
+{
+    const unsigned char *end, *split;
+    const unsigned char *p = start;
+    bool reverse, escaped;
+    bool match = false;
+    bool poison = false;
+    bool poisoned = false;
+
+    /* Handle the empty expression separately, since otherwise end will be
+       set to an invalid pointer. */
+    if (!*p)
+        return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
+    end = start + strlen((const char *) start) - 1;
+
+    /* Main match loop.  Find each comma that separates patterns, and attempt 
+       to match the text with each pattern in order.  The last matching
+       pattern determines whether the whole expression matches. */
+    for (; p <= end + 1; p = split + 1) {
+        if (allowpoison)
+            poison = (*p == '@');
+        reverse = (*p == '!') || poison;
+        if (reverse)
+            p++;
+
+        /* Find the first unescaped comma, if any.  If there is none, split
+           will be one greater than end and point at the nul at the end of
+           the string. */
+        for (escaped = false, split = p; split <= end; split++) {
+            if (*split == '[') {
+                split++;
+                if (*split == ']')
+                    split++;
+                while (split <= end && *split != ']')
+                    split++;
+            }
+            if (*split == ',' && !escaped)
+                break;
+            escaped = (*split == '\\') ? !escaped : false;
+        }
+
+        /* Optimization: If match == !reverse and poison == poisoned, this
+           pattern can't change the result, so don't do any work. */
+        if (match == !reverse && poison == poisoned)
+            continue;
+        if (match_pattern(text, p, split - 1) == true) {
+            poisoned = poison;
+            match = !reverse;
+        }
+    }
+    if (poisoned)
+        return UWILDMAT_POISON;
+    return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
+}
+
+
+/*
+**  User-level routine used for wildmats where @ should be treated as a
+**  regular character.
+*/
+bool
+uwildmat(const char *text, const char *pat)
+{
+    const unsigned char *utext = (const unsigned char *) text;
+    const unsigned char *upat = (const unsigned char *) pat;
+
+    if (upat[0] == '*' && upat[1] == '\0')
+        return true;
+    else
+        return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
+}
+
+
+/*
+**  User-level routine used for wildmats that support poison matches.
+*/
+enum uwildmat
+uwildmat_poison(const char *text, const char *pat)
+{
+    const unsigned char *utext = (const unsigned char *) text;
+    const unsigned char *upat = (const unsigned char *) pat;
+
+    if (upat[0] == '*' && upat[1] == '\0')
+        return UWILDMAT_MATCH;
+    else
+        return match_expression(utext, upat, true);
+}
+
+
+/*
+**  User-level routine for simple expressions (neither , nor ! are special).
+*/
+bool
+uwildmat_simple(const char *text, const char *pat)
+{
+    const unsigned char *utext = (const unsigned char *) text;
+    const unsigned char *upat = (const unsigned char *) pat;
+    size_t length;
+
+    if (upat[0] == '*' && upat[1] == '\0')
+        return true;
+    else {
+        length = strlen(pat);
+        return (match_pattern(utext, upat, upat + length - 1) == true);
+    }
+}
diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h
new file mode 100644
index 0000000..2e47189
--- /dev/null
+++ b/lib/uwildmat/uwildmat.h
@@ -0,0 +1,24 @@ 
+
+#ifndef UWILDMAT_H
+#define UWILDMAT_H 1
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+/*
+**  WILDMAT MATCHING
+*/
+enum uwildmat {
+    UWILDMAT_FAIL   = 0,
+    UWILDMAT_MATCH  = 1,
+    UWILDMAT_POISON
+};
+
+extern bool             is_valid_utf8(const char *start);
+extern bool             uwildmat(const char *text, const char *pat);
+extern bool             uwildmat_simple(const char *text, const char *pat);
+extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
+
+
+#endif /* UWILDMAT_H */