git.cworth.org Git - tar/blob - gnu/fnmatch_loop.c

   1 /* -*- buffer-read-only: t -*- vi: set ro: */
   2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
   3 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
   4    2003, 2004, 2005, 2006, 2009, 2010 Free Software Foundation, Inc.
   5    This file is part of the GNU C Library.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software Foundation,
  19    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  20
  21 /* Match STRING against the file name pattern PATTERN, returning zero if
  22    it matches, nonzero if not.  */
  23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  24                 const CHAR *string_end, bool no_leading_period, int flags)
  25      internal_function;
  26 static const CHAR *END (const CHAR *patternp) internal_function;
  27
  28 static int
  29 internal_function
  30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  31      bool no_leading_period, int flags)
  32 {
  33   register const CHAR *p = pattern, *n = string;
  34   register UCHAR c;
  35 #ifdef _LIBC
  36 # if WIDE_CHAR_VERSION
  37   const char *collseq = (const char *)
  38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  39 # else
  40   const UCHAR *collseq = (const UCHAR *)
  41     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  42 # endif
  43 #endif
  44
  45   while ((c = *p++) != L_('\0'))
  46     {
  47       bool new_no_leading_period = false;
  48       c = FOLD (c);
  49
  50       switch (c)
  51         {
  52         case L_('?'):
  53           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  54             {
  55               int res;
  56
  57               res = EXT (c, p, n, string_end, no_leading_period,
  58                          flags);
  59               if (res != -1)
  60                 return res;
  61             }
  62
  63           if (n == string_end)
  64             return FNM_NOMATCH;
  65           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  66             return FNM_NOMATCH;
  67           else if (*n == L_('.') && no_leading_period)
  68             return FNM_NOMATCH;
  69           break;
  70
  71         case L_('\\'):
  72           if (!(flags & FNM_NOESCAPE))
  73             {
  74               c = *p++;
  75               if (c == L_('\0'))
  76                 /* Trailing \ loses.  */
  77                 return FNM_NOMATCH;
  78               c = FOLD (c);
  79             }
  80           if (n == string_end || FOLD ((UCHAR) *n) != c)
  81             return FNM_NOMATCH;
  82           break;
  83
  84         case L_('*'):
  85           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  86             {
  87               int res;
  88
  89               res = EXT (c, p, n, string_end, no_leading_period,
  90                          flags);
  91               if (res != -1)
  92                 return res;
  93             }
  94
  95           if (n != string_end && *n == L_('.') && no_leading_period)
  96             return FNM_NOMATCH;
  97
  98           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  99             {
 100               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
 101                 {
 102                   const CHAR *endp = END (p);
 103                   if (endp != p)
 104                     {
 105                       /* This is a pattern.  Skip over it.  */
 106                       p = endp;
 107                       continue;
 108                     }
 109                 }
 110
 111               if (c == L_('?'))
 112                 {
 113                   /* A ? needs to match one character.  */
 114                   if (n == string_end)
 115                     /* There isn't another character; no match.  */
 116                     return FNM_NOMATCH;
 117                   else if (*n == L_('/')
 118                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 119                     /* A slash does not match a wildcard under
 120                        FNM_FILE_NAME.  */
 121                     return FNM_NOMATCH;
 122                   else
 123                     /* One character of the string is consumed in matching
 124                        this ? wildcard, so *??? won't match if there are
 125                        less than three characters.  */
 126                     ++n;
 127                 }
 128             }
 129
 130           if (c == L_('\0'))
 131             /* The wildcard(s) is/are the last element of the pattern.
 132                If the name is a file name and contains another slash
 133                this means it cannot match, unless the FNM_LEADING_DIR
 134                flag is set.  */
 135             {
 136               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 137
 138               if (flags & FNM_FILE_NAME)
 139                 {
 140                   if (flags & FNM_LEADING_DIR)
 141                     result = 0;
 142                   else
 143                     {
 144                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 145                         result = 0;
 146                     }
 147                 }
 148
 149               return result;
 150             }
 151           else
 152             {
 153               const CHAR *endp;
 154
 155               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 156                              string_end - n);
 157               if (endp == NULL)
 158                 endp = string_end;
 159
 160               if (c == L_('[')
 161                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 162                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 163                       && *p == L_('(')))
 164                 {
 165                   int flags2 = ((flags & FNM_FILE_NAME)
 166                                 ? flags : (flags & ~FNM_PERIOD));
 167                   bool no_leading_period2 = no_leading_period;
 168
 169                   for (--p; n < endp; ++n, no_leading_period2 = false)
 170                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 171                         == 0)
 172                       return 0;
 173                 }
 174               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 175                 {
 176                   while (n < string_end && *n != L_('/'))
 177                     ++n;
 178                   if (n < string_end && *n == L_('/')
 179                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 180                           == 0))
 181                     return 0;
 182                 }
 183               else
 184                 {
 185                   int flags2 = ((flags & FNM_FILE_NAME)
 186                                 ? flags : (flags & ~FNM_PERIOD));
 187                   int no_leading_period2 = no_leading_period;
 188
 189                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 190                     c = *p;
 191                   c = FOLD (c);
 192                   for (--p; n < endp; ++n, no_leading_period2 = false)
 193                     if (FOLD ((UCHAR) *n) == c
 194                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 195                             == 0))
 196                       return 0;
 197                 }
 198             }
 199
 200           /* If we come here no match is possible with the wildcard.  */
 201           return FNM_NOMATCH;
 202
 203         case L_('['):
 204           {
 205             /* Nonzero if the sense of the character class is inverted.  */
 206             register bool not;
 207             CHAR cold;
 208             UCHAR fn;
 209
 210             if (posixly_correct == 0)
 211               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 212
 213             if (n == string_end)
 214               return FNM_NOMATCH;
 215
 216             if (*n == L_('.') && no_leading_period)
 217               return FNM_NOMATCH;
 218
 219             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 220               /* `/' cannot be matched.  */
 221               return FNM_NOMATCH;
 222
 223             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 224             if (not)
 225               ++p;
 226
 227             fn = FOLD ((UCHAR) *n);
 228
 229             c = *p++;
 230             for (;;)
 231               {
 232                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 233                   {
 234                     if (*p == L_('\0'))
 235                       return FNM_NOMATCH;
 236                     c = FOLD ((UCHAR) *p);
 237                     ++p;
 238
 239                     goto normal_bracket;
 240                   }
 241                 else if (c == L_('[') && *p == L_(':'))
 242                   {
 243                     /* Leave room for the null.  */
 244                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 245                     size_t c1 = 0;
 246 #if defined _LIBC || WIDE_CHAR_SUPPORT
 247                     wctype_t wt;
 248 #endif
 249                     const CHAR *startp = p;
 250
 251                     for (;;)
 252                       {
 253                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 254                           /* The name is too long and therefore the pattern
 255                              is ill-formed.  */
 256                           return FNM_NOMATCH;
 257
 258                         c = *++p;
 259                         if (c == L_(':') && p[1] == L_(']'))
 260                           {
 261                             p += 2;
 262                             break;
 263                           }
 264                         if (c < L_('a') || c >= L_('z'))
 265                           {
 266                             /* This cannot possibly be a character class name.
 267                                Match it as a normal range.  */
 268                             p = startp;
 269                             c = L_('[');
 270                             goto normal_bracket;
 271                           }
 272                         str[c1++] = c;
 273                       }
 274                     str[c1] = L_('\0');
 275
 276 #if defined _LIBC || WIDE_CHAR_SUPPORT
 277                     wt = IS_CHAR_CLASS (str);
 278                     if (wt == 0)
 279                       /* Invalid character class name.  */
 280                       return FNM_NOMATCH;
 281
 282 # if defined _LIBC && ! WIDE_CHAR_VERSION
 283                     /* The following code is glibc specific but does
 284                        there a good job in speeding up the code since
 285                        we can avoid the btowc() call.  */
 286                     if (_ISCTYPE ((UCHAR) *n, wt))
 287                       goto matched;
 288 # else
 289                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 290                       goto matched;
 291 # endif
 292 #else
 293                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
 294                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
 295                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
 296                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
 297                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
 298                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
 299                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
 300                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
 301                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
 302                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
 303                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
 304                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
 305                       goto matched;
 306 #endif
 307                     c = *p++;
 308                   }
 309 #ifdef _LIBC
 310                 else if (c == L_('[') && *p == L_('='))
 311                   {
 312                     UCHAR str[1];
 313                     uint32_t nrules =
 314                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 315                     const CHAR *startp = p;
 316
 317                     c = *++p;
 318                     if (c == L_('\0'))
 319                       {
 320                         p = startp;
 321                         c = L_('[');
 322                         goto normal_bracket;
 323                       }
 324                     str[0] = c;
 325
 326                     c = *++p;
 327                     if (c != L_('=') || p[1] != L_(']'))
 328                       {
 329                         p = startp;
 330                         c = L_('[');
 331                         goto normal_bracket;
 332                       }
 333                     p += 2;
 334
 335                     if (nrules == 0)
 336                       {
 337                         if ((UCHAR) *n == str[0])
 338                           goto matched;
 339                       }
 340                     else
 341                       {
 342                         const int32_t *table;
 343 # if WIDE_CHAR_VERSION
 344                         const int32_t *weights;
 345                         const int32_t *extra;
 346 # else
 347                         const unsigned char *weights;
 348                         const unsigned char *extra;
 349 # endif
 350                         const int32_t *indirect;
 351                         int32_t idx;
 352                         const UCHAR *cp = (const UCHAR *) str;
 353
 354                         /* This #include defines a local function!  */
 355 # if WIDE_CHAR_VERSION
 356 #  include <locale/weightwc.h>
 357 # else
 358 #  include <locale/weight.h>
 359 # endif
 360
 361 # if WIDE_CHAR_VERSION
 362                         table = (const int32_t *)
 363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 364                         weights = (const int32_t *)
 365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 366                         extra = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 368                         indirect = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 370 # else
 371                         table = (const int32_t *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 373                         weights = (const unsigned char *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 375                         extra = (const unsigned char *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 377                         indirect = (const int32_t *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 379 # endif
 380
 381                         idx = findidx (&cp);
 382                         if (idx != 0)
 383                           {
 384                             /* We found a table entry.  Now see whether the
 385                                character we are currently at has the same
 386                                equivalance class value.  */
 387                             int len = weights[idx & 0xffffff];
 388                             int32_t idx2;
 389                             const UCHAR *np = (const UCHAR *) n;
 390
 391                             idx2 = findidx (&np);
 392                             if (idx2 != 0
 393                                 && (idx >> 24) == (idx2 >> 24)
 394                                 && len == weights[idx2 & 0xffffff])
 395                               {
 396                                 int cnt = 0;
 397
 398                                 idx &= 0xffffff;
 399                                 idx2 &= 0xffffff;
 400
 401                                 while (cnt < len
 402                                        && (weights[idx + 1 + cnt]
 403                                            == weights[idx2 + 1 + cnt]))
 404                                   ++cnt;
 405
 406                                 if (cnt == len)
 407                                   goto matched;
 408                               }
 409                           }
 410                       }
 411
 412                     c = *p++;
 413                   }
 414 #endif
 415                 else if (c == L_('\0'))
 416                   /* [ (unterminated) loses.  */
 417                   return FNM_NOMATCH;
 418                 else
 419                   {
 420                     bool is_range = false;
 421
 422 #ifdef _LIBC
 423                     bool is_seqval = false;
 424
 425                     if (c == L_('[') && *p == L_('.'))
 426                       {
 427                         uint32_t nrules =
 428                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 429                         const CHAR *startp = p;
 430                         size_t c1 = 0;
 431
 432                         while (1)
 433                           {
 434                             c = *++p;
 435                             if (c == L_('.') && p[1] == L_(']'))
 436                               {
 437                                 p += 2;
 438                                 break;
 439                               }
 440                             if (c == '\0')
 441                               return FNM_NOMATCH;
 442                             ++c1;
 443                           }
 444
 445                         /* We have to handling the symbols differently in
 446                            ranges since then the collation sequence is
 447                            important.  */
 448                         is_range = *p == L_('-') && p[1] != L_('\0');
 449
 450                         if (nrules == 0)
 451                           {
 452                             /* There are no names defined in the collation
 453                                data.  Therefore we only accept the trivial
 454                                names consisting of the character itself.  */
 455                             if (c1 != 1)
 456                               return FNM_NOMATCH;
 457
 458                             if (!is_range && *n == startp[1])
 459                               goto matched;
 460
 461                             cold = startp[1];
 462                             c = *p++;
 463                           }
 464                         else
 465                           {
 466                             int32_t table_size;
 467                             const int32_t *symb_table;
 468 # ifdef WIDE_CHAR_VERSION
 469                             char str[c1];
 470                             size_t strcnt;
 471 # else
 472 #  define str (startp + 1)
 473 # endif
 474                             const unsigned char *extra;
 475                             int32_t idx;
 476                             int32_t elem;
 477                             int32_t second;
 478                             int32_t hash;
 479
 480 # ifdef WIDE_CHAR_VERSION
 481                             /* We have to convert the name to a single-byte
 482                                string.  This is possible since the names
 483                                consist of ASCII characters and the internal
 484                                representation is UCS4.  */
 485                             for (strcnt = 0; strcnt < c1; ++strcnt)
 486                               str[strcnt] = startp[1 + strcnt];
 487 # endif
 488
 489                             table_size =
 490                               _NL_CURRENT_WORD (LC_COLLATE,
 491                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 492                             symb_table = (const int32_t *)
 493                               _NL_CURRENT (LC_COLLATE,
 494                                            _NL_COLLATE_SYMB_TABLEMB);
 495                             extra = (const unsigned char *)
 496                               _NL_CURRENT (LC_COLLATE,
 497                                            _NL_COLLATE_SYMB_EXTRAMB);
 498
 499                             /* Locate the character in the hashing table.  */
 500                             hash = elem_hash (str, c1);
 501
 502                             idx = 0;
 503                             elem = hash % table_size;
 504                             if (symb_table[2 * elem] != 0)
 505                               {
 506                                 second = hash % (table_size - 2) + 1;
 507
 508                                 do
 509                                   {
 510                                     /* First compare the hashing value.  */
 511                                     if (symb_table[2 * elem] == hash
 512                                         && (c1
 513                                             == extra[symb_table[2 * elem + 1]])
 514                                         && memcmp (str,
 515                                                    &extra[symb_table[2 * elem
 516                                                                      + 1]
 517                                                           + 1], c1) == 0)
 518                                       {
 519                                         /* Yep, this is the entry.  */
 520                                         idx = symb_table[2 * elem + 1];
 521                                         idx += 1 + extra[idx];
 522                                         break;
 523                                       }
 524
 525                                     /* Next entry.  */
 526                                     elem += second;
 527                                   }
 528                                 while (symb_table[2 * elem] != 0);
 529                               }
 530
 531                             if (symb_table[2 * elem] != 0)
 532                               {
 533                                 /* Compare the byte sequence but only if
 534                                    this is not part of a range.  */
 535 # ifdef WIDE_CHAR_VERSION
 536                                 int32_t *wextra;
 537
 538                                 idx += 1 + extra[idx];
 539                                 /* Adjust for the alignment.  */
 540                                 idx = (idx + 3) & ~3;
 541
 542                                 wextra = (int32_t *) &extra[idx + 4];
 543 # endif
 544
 545                                 if (! is_range)
 546                                   {
 547 # ifdef WIDE_CHAR_VERSION
 548                                     for (c1 = 0;
 549                                          (int32_t) c1 < wextra[idx];
 550                                          ++c1)
 551                                       if (n[c1] != wextra[1 + c1])
 552                                         break;
 553
 554                                     if ((int32_t) c1 == wextra[idx])
 555                                       goto matched;
 556 # else
 557                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 558                                       if (n[c1] != extra[1 + c1])
 559                                         break;
 560
 561                                     if (c1 == extra[idx])
 562                                       goto matched;
 563 # endif
 564                                   }
 565
 566                                 /* Get the collation sequence value.  */
 567                                 is_seqval = true;
 568 # ifdef WIDE_CHAR_VERSION
 569                                 cold = wextra[1 + wextra[idx]];
 570 # else
 571                                 /* Adjust for the alignment.  */
 572                                 idx += 1 + extra[idx];
 573                                 idx = (idx + 3) & ~4;
 574                                 cold = *((int32_t *) &extra[idx]);
 575 # endif
 576
 577                                 c = *p++;
 578                               }
 579                             else if (c1 == 1)
 580                               {
 581                                 /* No valid character.  Match it as a
 582                                    single byte.  */
 583                                 if (!is_range && *n == str[0])
 584                                   goto matched;
 585
 586                                 cold = str[0];
 587                                 c = *p++;
 588                               }
 589                             else
 590                               return FNM_NOMATCH;
 591                           }
 592                       }
 593                     else
 594 # undef str
 595 #endif
 596                       {
 597                         c = FOLD (c);
 598                       normal_bracket:
 599
 600                         /* We have to handling the symbols differently in
 601                            ranges since then the collation sequence is
 602                            important.  */
 603                         is_range = (*p == L_('-') && p[1] != L_('\0')
 604                                     && p[1] != L_(']'));
 605
 606                         if (!is_range && c == fn)
 607                           goto matched;
 608
 609 #if _LIBC
 610                         /* This is needed if we goto normal_bracket; from
 611                            outside of is_seqval's scope.  */
 612                         is_seqval = false;
 613 #endif
 614
 615                         cold = c;
 616                         c = *p++;
 617                       }
 618
 619                     if (c == L_('-') && *p != L_(']'))
 620                       {
 621 #if _LIBC
 622                         /* We have to find the collation sequence
 623                            value for C.  Collation sequence is nothing
 624                            we can regularly access.  The sequence
 625                            value is defined by the order in which the
 626                            definitions of the collation values for the
 627                            various characters appear in the source
 628                            file.  A strange concept, nowhere
 629                            documented.  */
 630                         uint32_t fcollseq;
 631                         uint32_t lcollseq;
 632                         UCHAR cend = *p++;
 633
 634 # ifdef WIDE_CHAR_VERSION
 635                         /* Search in the `names' array for the characters.  */
 636                         fcollseq = __collseq_table_lookup (collseq, fn);
 637                         if (fcollseq == ~((uint32_t) 0))
 638                           /* XXX We don't know anything about the character
 639                              we are supposed to match.  This means we are
 640                              failing.  */
 641                           goto range_not_matched;
 642
 643                         if (is_seqval)
 644                           lcollseq = cold;
 645                         else
 646                           lcollseq = __collseq_table_lookup (collseq, cold);
 647 # else
 648                         fcollseq = collseq[fn];
 649                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 650 # endif
 651
 652                         is_seqval = false;
 653                         if (cend == L_('[') && *p == L_('.'))
 654                           {
 655                             uint32_t nrules =
 656                               _NL_CURRENT_WORD (LC_COLLATE,
 657                                                 _NL_COLLATE_NRULES);
 658                             const CHAR *startp = p;
 659                             size_t c1 = 0;
 660
 661                             while (1)
 662                               {
 663                                 c = *++p;
 664                                 if (c == L_('.') && p[1] == L_(']'))
 665                                   {
 666                                     p += 2;
 667                                     break;
 668                                   }
 669                                 if (c == '\0')
 670                                   return FNM_NOMATCH;
 671                                 ++c1;
 672                               }
 673
 674                             if (nrules == 0)
 675                               {
 676                                 /* There are no names defined in the
 677                                    collation data.  Therefore we only
 678                                    accept the trivial names consisting
 679                                    of the character itself.  */
 680                                 if (c1 != 1)
 681                                   return FNM_NOMATCH;
 682
 683                                 cend = startp[1];
 684                               }
 685                             else
 686                               {
 687                                 int32_t table_size;
 688                                 const int32_t *symb_table;
 689 # ifdef WIDE_CHAR_VERSION
 690                                 char str[c1];
 691                                 size_t strcnt;
 692 # else
 693 #  define str (startp + 1)
 694 # endif
 695                                 const unsigned char *extra;
 696                                 int32_t idx;
 697                                 int32_t elem;
 698                                 int32_t second;
 699                                 int32_t hash;
 700
 701 # ifdef WIDE_CHAR_VERSION
 702                                 /* We have to convert the name to a single-byte
 703                                    string.  This is possible since the names
 704                                    consist of ASCII characters and the internal
 705                                    representation is UCS4.  */
 706                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 707                                   str[strcnt] = startp[1 + strcnt];
 708 # endif
 709
 710                                 table_size =
 711                                   _NL_CURRENT_WORD (LC_COLLATE,
 712                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 713                                 symb_table = (const int32_t *)
 714                                   _NL_CURRENT (LC_COLLATE,
 715                                                _NL_COLLATE_SYMB_TABLEMB);
 716                                 extra = (const unsigned char *)
 717                                   _NL_CURRENT (LC_COLLATE,
 718                                                _NL_COLLATE_SYMB_EXTRAMB);
 719
 720                                 /* Locate the character in the hashing
 721                                    table.  */
 722                                 hash = elem_hash (str, c1);
 723
 724                                 idx = 0;
 725                                 elem = hash % table_size;
 726                                 if (symb_table[2 * elem] != 0)
 727                                   {
 728                                     second = hash % (table_size - 2) + 1;
 729
 730                                     do
 731                                       {
 732                                         /* First compare the hashing value.  */
 733                                         if (symb_table[2 * elem] == hash
 734                                             && (c1
 735                                                 == extra[symb_table[2 * elem + 1]])
 736                                             && memcmp (str,
 737                                                        &extra[symb_table[2 * elem + 1]
 738                                                               + 1], c1) == 0)
 739                                           {
 740                                             /* Yep, this is the entry.  */
 741                                             idx = symb_table[2 * elem + 1];
 742                                             idx += 1 + extra[idx];
 743                                             break;
 744                                           }
 745
 746                                         /* Next entry.  */
 747                                         elem += second;
 748                                       }
 749                                     while (symb_table[2 * elem] != 0);
 750                                   }
 751
 752                                 if (symb_table[2 * elem] != 0)
 753                                   {
 754                                     /* Compare the byte sequence but only if
 755                                        this is not part of a range.  */
 756 # ifdef WIDE_CHAR_VERSION
 757                                     int32_t *wextra;
 758
 759                                     idx += 1 + extra[idx];
 760                                     /* Adjust for the alignment.  */
 761                                     idx = (idx + 3) & ~4;
 762
 763                                     wextra = (int32_t *) &extra[idx + 4];
 764 # endif
 765                                     /* Get the collation sequence value.  */
 766                                     is_seqval = true;
 767 # ifdef WIDE_CHAR_VERSION
 768                                     cend = wextra[1 + wextra[idx]];
 769 # else
 770                                     /* Adjust for the alignment.  */
 771                                     idx += 1 + extra[idx];
 772                                     idx = (idx + 3) & ~4;
 773                                     cend = *((int32_t *) &extra[idx]);
 774 # endif
 775                                   }
 776                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 777                                   {
 778                                     cend = str[0];
 779                                     c = *p++;
 780                                   }
 781                                 else
 782                                   return FNM_NOMATCH;
 783                               }
 784 # undef str
 785                           }
 786                         else
 787                           {
 788                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 789                               cend = *p++;
 790                             if (cend == L_('\0'))
 791                               return FNM_NOMATCH;
 792                             cend = FOLD (cend);
 793                           }
 794
 795                         /* XXX It is not entirely clear to me how to handle
 796                            characters which are not mentioned in the
 797                            collation specification.  */
 798                         if (
 799 # ifdef WIDE_CHAR_VERSION
 800                             lcollseq == 0xffffffff ||
 801 # endif
 802                             lcollseq <= fcollseq)
 803                           {
 804                             /* We have to look at the upper bound.  */
 805                             uint32_t hcollseq;
 806
 807                             if (is_seqval)
 808                               hcollseq = cend;
 809                             else
 810                               {
 811 # ifdef WIDE_CHAR_VERSION
 812                                 hcollseq =
 813                                   __collseq_table_lookup (collseq, cend);
 814                                 if (hcollseq == ~((uint32_t) 0))
 815                                   {
 816                                     /* Hum, no information about the upper
 817                                        bound.  The matching succeeds if the
 818                                        lower bound is matched exactly.  */
 819                                     if (lcollseq != fcollseq)
 820                                       goto range_not_matched;
 821
 822                                     goto matched;
 823                                   }
 824 # else
 825                                 hcollseq = collseq[cend];
 826 # endif
 827                               }
 828
 829                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 830                               goto matched;
 831                           }
 832 # ifdef WIDE_CHAR_VERSION
 833                       range_not_matched:
 834 # endif
 835 #else
 836                         /* We use a boring value comparison of the character
 837                            values.  This is better than comparing using
 838                            `strcoll' since the latter would have surprising
 839                            and sometimes fatal consequences.  */
 840                         UCHAR cend = *p++;
 841
 842                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 843                           cend = *p++;
 844                         if (cend == L_('\0'))
 845                           return FNM_NOMATCH;
 846
 847                         /* It is a range.  */
 848                         if (cold <= fn && fn <= cend)
 849                           goto matched;
 850 #endif
 851
 852                         c = *p++;
 853                       }
 854                   }
 855
 856                 if (c == L_(']'))
 857                   break;
 858               }
 859
 860             if (!not)
 861               return FNM_NOMATCH;
 862             break;
 863
 864           matched:
 865             /* Skip the rest of the [...] that already matched.  */
 866             do
 867               {
 868               ignore_next:
 869                 c = *p++;
 870
 871                 if (c == L_('\0'))
 872                   /* [... (unterminated) loses.  */
 873                   return FNM_NOMATCH;
 874
 875                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 876                   {
 877                     if (*p == L_('\0'))
 878                       return FNM_NOMATCH;
 879                     /* XXX 1003.2d11 is unclear if this is right.  */
 880                     ++p;
 881                   }
 882                 else if (c == L_('[') && *p == L_(':'))
 883                   {
 884                     int c1 = 0;
 885                     const CHAR *startp = p;
 886
 887                     while (1)
 888                       {
 889                         c = *++p;
 890                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 891                           return FNM_NOMATCH;
 892
 893                         if (*p == L_(':') && p[1] == L_(']'))
 894                           break;
 895
 896                         if (c < L_('a') || c >= L_('z'))
 897                           {
 898                             p = startp;
 899                             goto ignore_next;
 900                           }
 901                       }
 902                     p += 2;
 903                     c = *p++;
 904                   }
 905                 else if (c == L_('[') && *p == L_('='))
 906                   {
 907                     c = *++p;
 908                     if (c == L_('\0'))
 909                       return FNM_NOMATCH;
 910                     c = *++p;
 911                     if (c != L_('=') || p[1] != L_(']'))
 912                       return FNM_NOMATCH;
 913                     p += 2;
 914                     c = *p++;
 915                   }
 916                 else if (c == L_('[') && *p == L_('.'))
 917                   {
 918                     ++p;
 919                     while (1)
 920                       {
 921                         c = *++p;
 922                         if (c == '\0')
 923                           return FNM_NOMATCH;
 924
 925                         if (*p == L_('.') && p[1] == L_(']'))
 926                           break;
 927                       }
 928                     p += 2;
 929                     c = *p++;
 930                   }
 931               }
 932             while (c != L_(']'));
 933             if (not)
 934               return FNM_NOMATCH;
 935           }
 936           break;
 937
 938         case L_('+'):
 939         case L_('@'):
 940         case L_('!'):
 941           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 942             {
 943               int res;
 944
 945               res = EXT (c, p, n, string_end, no_leading_period, flags);
 946               if (res != -1)
 947                 return res;
 948             }
 949           goto normal_match;
 950
 951         case L_('/'):
 952           if (NO_LEADING_PERIOD (flags))
 953             {
 954               if (n == string_end || c != (UCHAR) *n)
 955                 return FNM_NOMATCH;
 956
 957               new_no_leading_period = true;
 958               break;
 959             }
 960           /* FALLTHROUGH */
 961         default:
 962         normal_match:
 963           if (n == string_end || c != FOLD ((UCHAR) *n))
 964             return FNM_NOMATCH;
 965         }
 966
 967       no_leading_period = new_no_leading_period;
 968       ++n;
 969     }
 970
 971   if (n == string_end)
 972     return 0;
 973
 974   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 975     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 976     return 0;
 977
 978   return FNM_NOMATCH;
 979 }
 980
 981
 982 static const CHAR *
 983 internal_function
 984 END (const CHAR *pattern)
 985 {
 986   const CHAR *p = pattern;
 987
 988   while (1)
 989     if (*++p == L_('\0'))
 990       /* This is an invalid pattern.  */
 991       return pattern;
 992     else if (*p == L_('['))
 993       {
 994         /* Handle brackets special.  */
 995         if (posixly_correct == 0)
 996           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 997
 998         /* Skip the not sign.  We have to recognize it because of a possibly
 999            following ']'.  */
1000         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1001           ++p;
1002         /* A leading ']' is recognized as such.  */
1003         if (*p == L_(']'))
1004           ++p;
1005         /* Skip over all characters of the list.  */
1006         while (*p != L_(']'))
1007           if (*p++ == L_('\0'))
1008             /* This is no valid pattern.  */
1009             return pattern;
1010       }
1011     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1012               || *p == L_('!')) && p[1] == L_('('))
1013       p = END (p + 1);
1014     else if (*p == L_(')'))
1015       break;
1016
1017   return p + 1;
1018 }
1019
1020
1021 static int
1022 internal_function
1023 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1024      bool no_leading_period, int flags)
1025 {
1026   const CHAR *startp;
1027   size_t level;
1028   struct patternlist
1029   {
1030     struct patternlist *next;
1031     CHAR str[1];
1032   } *list = NULL;
1033   struct patternlist **lastp = &list;
1034   size_t pattern_len = STRLEN (pattern);
1035   const CHAR *p;
1036   const CHAR *rs;
1037   enum { ALLOCA_LIMIT = 8000 };
1038
1039   /* Parse the pattern.  Store the individual parts in the list.  */
1040   level = 0;
1041   for (startp = p = pattern + 1; ; ++p)
1042     if (*p == L_('\0'))
1043       /* This is an invalid pattern.  */
1044       return -1;
1045     else if (*p == L_('['))
1046       {
1047         /* Handle brackets special.  */
1048         if (posixly_correct == 0)
1049           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1050
1051         /* Skip the not sign.  We have to recognize it because of a possibly
1052            following ']'.  */
1053         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1054           ++p;
1055         /* A leading ']' is recognized as such.  */
1056         if (*p == L_(']'))
1057           ++p;
1058         /* Skip over all characters of the list.  */
1059         while (*p != L_(']'))
1060           if (*p++ == L_('\0'))
1061             /* This is no valid pattern.  */
1062             return -1;
1063       }
1064     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1065               || *p == L_('!')) && p[1] == L_('('))
1066       /* Remember the nesting level.  */
1067       ++level;
1068     else if (*p == L_(')'))
1069       {
1070         if (level-- == 0)
1071           {
1072             /* This means we found the end of the pattern.  */
1073 #define NEW_PATTERN \
1074             struct patternlist *newp;                                         \
1075             size_t plen;                                                      \
1076             size_t plensize;                                                  \
1077             size_t newpsize;                                                  \
1078                                                                               \
1079             plen = (opt == L_('?') || opt == L_('@')                          \
1080                     ? pattern_len                                             \
1081                     : p - startp + 1UL);                                      \
1082             plensize = plen * sizeof (CHAR);                                  \
1083             newpsize = offsetof (struct patternlist, str) + plensize;         \
1084             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1085                 || newpsize < offsetof (struct patternlist, str)              \
1086                 || ALLOCA_LIMIT <= newpsize)                                  \
1087               return -1;                                                      \
1088             newp = (struct patternlist *) alloca (newpsize);                  \
1089             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1090             newp->next = NULL;                                                \
1091             *lastp = newp;                                                    \
1092             lastp = &newp->next
1093             NEW_PATTERN;
1094             break;
1095           }
1096       }
1097     else if (*p == L_('|'))
1098       {
1099         if (level == 0)
1100           {
1101             NEW_PATTERN;
1102             startp = p + 1;
1103           }
1104       }
1105   assert (list != NULL);
1106   assert (p[-1] == L_(')'));
1107 #undef NEW_PATTERN
1108
1109   switch (opt)
1110     {
1111     case L_('*'):
1112       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1113         return 0;
1114       /* FALLTHROUGH */
1115
1116     case L_('+'):
1117       do
1118         {
1119           for (rs = string; rs <= string_end; ++rs)
1120             /* First match the prefix with the current pattern with the
1121                current pattern.  */
1122             if (FCT (list->str, string, rs, no_leading_period,
1123                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1124                 /* This was successful.  Now match the rest with the rest
1125                    of the pattern.  */
1126                 && (FCT (p, rs, string_end,
1127                          rs == string
1128                          ? no_leading_period
1129                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1130                          flags & FNM_FILE_NAME
1131                          ? flags : flags & ~FNM_PERIOD) == 0
1132                     /* This didn't work.  Try the whole pattern.  */
1133                     || (rs != string
1134                         && FCT (pattern - 1, rs, string_end,
1135                                 rs == string
1136                                 ? no_leading_period
1137                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1138                                 flags & FNM_FILE_NAME
1139                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1140               /* It worked.  Signal success.  */
1141               return 0;
1142         }
1143       while ((list = list->next) != NULL);
1144
1145       /* None of the patterns lead to a match.  */
1146       return FNM_NOMATCH;
1147
1148     case L_('?'):
1149       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1150         return 0;
1151       /* FALLTHROUGH */
1152
1153     case L_('@'):
1154       do
1155         /* I cannot believe it but `strcat' is actually acceptable
1156            here.  Match the entire string with the prefix from the
1157            pattern list and the rest of the pattern following the
1158            pattern list.  */
1159         if (FCT (STRCAT (list->str, p), string, string_end,
1160                  no_leading_period,
1161                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1162           /* It worked.  Signal success.  */
1163           return 0;
1164       while ((list = list->next) != NULL);
1165
1166       /* None of the patterns lead to a match.  */
1167       return FNM_NOMATCH;
1168
1169     case L_('!'):
1170       for (rs = string; rs <= string_end; ++rs)
1171         {
1172           struct patternlist *runp;
1173
1174           for (runp = list; runp != NULL; runp = runp->next)
1175             if (FCT (runp->str, string, rs,  no_leading_period,
1176                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1177               break;
1178
1179           /* If none of the patterns matched see whether the rest does.  */
1180           if (runp == NULL
1181               && (FCT (p, rs, string_end,
1182                        rs == string
1183                        ? no_leading_period
1184                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1185                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1186                   == 0))
1187             /* This is successful.  */
1188             return 0;
1189         }
1190
1191       /* None of the patterns together with the rest of the pattern
1192          lead to a match.  */
1193       return FNM_NOMATCH;
1194
1195     default:
1196       assert (! "Invalid extended matching operator");
1197       break;
1198     }
1199
1200   return -1;
1201 }
1202
1203
1204 #undef FOLD
1205 #undef CHAR
1206 #undef UCHAR
1207 #undef INT
1208 #undef FCT
1209 #undef EXT
1210 #undef END
1211 #undef MEMPCPY
1212 #undef MEMCHR
1213 #undef STRCOLL
1214 #undef STRLEN
1215 #undef STRCAT
1216 #undef L_
1217 #undef BTOWC