]> git.cworth.org Git - tar/blob - lib/fnmatch_loop.c
Imported Upstream version 1.20
[tar] / lib / fnmatch_loop.c
1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
4    Free Software Foundation, Inc.
5    This file is part of the GNU C Library.
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software Foundation,
19    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
20
21 /* Match STRING against the file name pattern PATTERN, returning zero if
22    it matches, nonzero if not.  */
23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
24                 const CHAR *string_end, bool no_leading_period, int flags)
25      internal_function;
26 static const CHAR *END (const CHAR *patternp) internal_function;
27
28 static int
29 internal_function
30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
31      bool no_leading_period, int flags)
32 {
33   register const CHAR *p = pattern, *n = string;
34   register UCHAR c;
35 #ifdef _LIBC
36 # if WIDE_CHAR_VERSION
37   const char *collseq = (const char *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
39 # else
40   const UCHAR *collseq = (const UCHAR *)
41     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 # endif
43 #endif
44
45   while ((c = *p++) != L_('\0'))
46     {
47       bool new_no_leading_period = false;
48       c = FOLD (c);
49
50       switch (c)
51         {
52         case L_('?'):
53           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54             {
55               int res;
56
57               res = EXT (c, p, n, string_end, no_leading_period,
58                          flags);
59               if (res != -1)
60                 return res;
61             }
62
63           if (n == string_end)
64             return FNM_NOMATCH;
65           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
66             return FNM_NOMATCH;
67           else if (*n == L_('.') && no_leading_period)
68             return FNM_NOMATCH;
69           break;
70
71         case L_('\\'):
72           if (!(flags & FNM_NOESCAPE))
73             {
74               c = *p++;
75               if (c == L_('\0'))
76                 /* Trailing \ loses.  */
77                 return FNM_NOMATCH;
78               c = FOLD (c);
79             }
80           if (n == string_end || FOLD ((UCHAR) *n) != c)
81             return FNM_NOMATCH;
82           break;
83
84         case L_('*'):
85           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86             {
87               int res;
88
89               res = EXT (c, p, n, string_end, no_leading_period,
90                          flags);
91               if (res != -1)
92                 return res;
93             }
94
95           if (n != string_end && *n == L_('.') && no_leading_period)
96             return FNM_NOMATCH;
97
98           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
99             {
100               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
101                 {
102                   const CHAR *endp = END (p);
103                   if (endp != p)
104                     {
105                       /* This is a pattern.  Skip over it.  */
106                       p = endp;
107                       continue;
108                     }
109                 }
110
111               if (c == L_('?'))
112                 {
113                   /* A ? needs to match one character.  */
114                   if (n == string_end)
115                     /* There isn't another character; no match.  */
116                     return FNM_NOMATCH;
117                   else if (*n == L_('/')
118                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
119                     /* A slash does not match a wildcard under
120                        FNM_FILE_NAME.  */
121                     return FNM_NOMATCH;
122                   else
123                     /* One character of the string is consumed in matching
124                        this ? wildcard, so *??? won't match if there are
125                        less than three characters.  */
126                     ++n;
127                 }
128             }
129
130           if (c == L_('\0'))
131             /* The wildcard(s) is/are the last element of the pattern.
132                If the name is a file name and contains another slash
133                this means it cannot match, unless the FNM_LEADING_DIR
134                flag is set.  */
135             {
136               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
137
138               if (flags & FNM_FILE_NAME)
139                 {
140                   if (flags & FNM_LEADING_DIR)
141                     result = 0;
142                   else
143                     {
144                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
145                         result = 0;
146                     }
147                 }
148
149               return result;
150             }
151           else
152             {
153               const CHAR *endp;
154
155               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
156                              string_end - n);
157               if (endp == NULL)
158                 endp = string_end;
159
160               if (c == L_('[')
161                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
162                       && (c == L_('@') || c == L_('+') || c == L_('!'))
163                       && *p == L_('(')))
164                 {
165                   int flags2 = ((flags & FNM_FILE_NAME)
166                                 ? flags : (flags & ~FNM_PERIOD));
167                   bool no_leading_period2 = no_leading_period;
168
169                   for (--p; n < endp; ++n, no_leading_period2 = false)
170                     if (FCT (p, n, string_end, no_leading_period2, flags2)
171                         == 0)
172                       return 0;
173                 }
174               else if (c == L_('/') && (flags & FNM_FILE_NAME))
175                 {
176                   while (n < string_end && *n != L_('/'))
177                     ++n;
178                   if (n < string_end && *n == L_('/')
179                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
180                           == 0))
181                     return 0;
182                 }
183               else
184                 {
185                   int flags2 = ((flags & FNM_FILE_NAME)
186                                 ? flags : (flags & ~FNM_PERIOD));
187                   int no_leading_period2 = no_leading_period;
188
189                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190                     c = *p;
191                   c = FOLD (c);
192                   for (--p; n < endp; ++n, no_leading_period2 = false)
193                     if (FOLD ((UCHAR) *n) == c
194                         && (FCT (p, n, string_end, no_leading_period2, flags2)
195                             == 0))
196                       return 0;
197                 }
198             }
199
200           /* If we come here no match is possible with the wildcard.  */
201           return FNM_NOMATCH;
202
203         case L_('['):
204           {
205             /* Nonzero if the sense of the character class is inverted.  */
206             register bool not;
207             CHAR cold;
208             UCHAR fn;
209
210             if (posixly_correct == 0)
211               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
212
213             if (n == string_end)
214               return FNM_NOMATCH;
215
216             if (*n == L_('.') && no_leading_period)
217               return FNM_NOMATCH;
218
219             if (*n == L_('/') && (flags & FNM_FILE_NAME))
220               /* `/' cannot be matched.  */
221               return FNM_NOMATCH;
222
223             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
224             if (not)
225               ++p;
226
227             fn = FOLD ((UCHAR) *n);
228
229             c = *p++;
230             for (;;)
231               {
232                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
233                   {
234                     if (*p == L_('\0'))
235                       return FNM_NOMATCH;
236                     c = FOLD ((UCHAR) *p);
237                     ++p;
238
239                     goto normal_bracket;
240                   }
241                 else if (c == L_('[') && *p == L_(':'))
242                   {
243                     /* Leave room for the null.  */
244                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
245                     size_t c1 = 0;
246 #if defined _LIBC || WIDE_CHAR_SUPPORT
247                     wctype_t wt;
248 #endif
249                     const CHAR *startp = p;
250
251                     for (;;)
252                       {
253                         if (c1 == CHAR_CLASS_MAX_LENGTH)
254                           /* The name is too long and therefore the pattern
255                              is ill-formed.  */
256                           return FNM_NOMATCH;
257
258                         c = *++p;
259                         if (c == L_(':') && p[1] == L_(']'))
260                           {
261                             p += 2;
262                             break;
263                           }
264                         if (c < L_('a') || c >= L_('z'))
265                           {
266                             /* This cannot possibly be a character class name.
267                                Match it as a normal range.  */
268                             p = startp;
269                             c = L_('[');
270                             goto normal_bracket;
271                           }
272                         str[c1++] = c;
273                       }
274                     str[c1] = L_('\0');
275
276 #if defined _LIBC || WIDE_CHAR_SUPPORT
277                     wt = IS_CHAR_CLASS (str);
278                     if (wt == 0)
279                       /* Invalid character class name.  */
280                       return FNM_NOMATCH;
281
282 # if defined _LIBC && ! WIDE_CHAR_VERSION
283                     /* The following code is glibc specific but does
284                        there a good job in speeding up the code since
285                        we can avoid the btowc() call.  */
286                     if (_ISCTYPE ((UCHAR) *n, wt))
287                       goto matched;
288 # else
289                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
290                       goto matched;
291 # endif
292 #else
293                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
294                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
295                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
296                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
297                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
298                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
299                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
300                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
301                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
302                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
303                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
304                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
305                       goto matched;
306 #endif
307                     c = *p++;
308                   }
309 #ifdef _LIBC
310                 else if (c == L_('[') && *p == L_('='))
311                   {
312                     UCHAR str[1];
313                     uint32_t nrules =
314                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
315                     const CHAR *startp = p;
316
317                     c = *++p;
318                     if (c == L_('\0'))
319                       {
320                         p = startp;
321                         c = L_('[');
322                         goto normal_bracket;
323                       }
324                     str[0] = c;
325
326                     c = *++p;
327                     if (c != L_('=') || p[1] != L_(']'))
328                       {
329                         p = startp;
330                         c = L_('[');
331                         goto normal_bracket;
332                       }
333                     p += 2;
334
335                     if (nrules == 0)
336                       {
337                         if ((UCHAR) *n == str[0])
338                           goto matched;
339                       }
340                     else
341                       {
342                         const int32_t *table;
343 # if WIDE_CHAR_VERSION
344                         const int32_t *weights;
345                         const int32_t *extra;
346 # else
347                         const unsigned char *weights;
348                         const unsigned char *extra;
349 # endif
350                         const int32_t *indirect;
351                         int32_t idx;
352                         const UCHAR *cp = (const UCHAR *) str;
353
354                         /* This #include defines a local function!  */
355 # if WIDE_CHAR_VERSION
356 #  include <locale/weightwc.h>
357 # else
358 #  include <locale/weight.h>
359 # endif
360
361 # if WIDE_CHAR_VERSION
362                         table = (const int32_t *)
363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
364                         weights = (const int32_t *)
365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
366                         extra = (const int32_t *)
367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
368                         indirect = (const int32_t *)
369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
370 # else
371                         table = (const int32_t *)
372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
373                         weights = (const unsigned char *)
374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
375                         extra = (const unsigned char *)
376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
377                         indirect = (const int32_t *)
378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
379 # endif
380
381                         idx = findidx (&cp);
382                         if (idx != 0)
383                           {
384                             /* We found a table entry.  Now see whether the
385                                character we are currently at has the same
386                                equivalance class value.  */
387                             int len = weights[idx];
388                             int32_t idx2;
389                             const UCHAR *np = (const UCHAR *) n;
390
391                             idx2 = findidx (&np);
392                             if (idx2 != 0 && len == weights[idx2])
393                               {
394                                 int cnt = 0;
395
396                                 while (cnt < len
397                                        && (weights[idx + 1 + cnt]
398                                            == weights[idx2 + 1 + cnt]))
399                                   ++cnt;
400
401                                 if (cnt == len)
402                                   goto matched;
403                               }
404                           }
405                       }
406
407                     c = *p++;
408                   }
409 #endif
410                 else if (c == L_('\0'))
411                   /* [ (unterminated) loses.  */
412                   return FNM_NOMATCH;
413                 else
414                   {
415                     bool is_range = false;
416
417 #ifdef _LIBC
418                     bool is_seqval = false;
419
420                     if (c == L_('[') && *p == L_('.'))
421                       {
422                         uint32_t nrules =
423                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
424                         const CHAR *startp = p;
425                         size_t c1 = 0;
426
427                         while (1)
428                           {
429                             c = *++p;
430                             if (c == L_('.') && p[1] == L_(']'))
431                               {
432                                 p += 2;
433                                 break;
434                               }
435                             if (c == '\0')
436                               return FNM_NOMATCH;
437                             ++c1;
438                           }
439
440                         /* We have to handling the symbols differently in
441                            ranges since then the collation sequence is
442                            important.  */
443                         is_range = *p == L_('-') && p[1] != L_('\0');
444
445                         if (nrules == 0)
446                           {
447                             /* There are no names defined in the collation
448                                data.  Therefore we only accept the trivial
449                                names consisting of the character itself.  */
450                             if (c1 != 1)
451                               return FNM_NOMATCH;
452
453                             if (!is_range && *n == startp[1])
454                               goto matched;
455
456                             cold = startp[1];
457                             c = *p++;
458                           }
459                         else
460                           {
461                             int32_t table_size;
462                             const int32_t *symb_table;
463 # ifdef WIDE_CHAR_VERSION
464                             char str[c1];
465                             size_t strcnt;
466 # else
467 #  define str (startp + 1)
468 # endif
469                             const unsigned char *extra;
470                             int32_t idx;
471                             int32_t elem;
472                             int32_t second;
473                             int32_t hash;
474
475 # ifdef WIDE_CHAR_VERSION
476                             /* We have to convert the name to a single-byte
477                                string.  This is possible since the names
478                                consist of ASCII characters and the internal
479                                representation is UCS4.  */
480                             for (strcnt = 0; strcnt < c1; ++strcnt)
481                               str[strcnt] = startp[1 + strcnt];
482 # endif
483
484                             table_size =
485                               _NL_CURRENT_WORD (LC_COLLATE,
486                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
487                             symb_table = (const int32_t *)
488                               _NL_CURRENT (LC_COLLATE,
489                                            _NL_COLLATE_SYMB_TABLEMB);
490                             extra = (const unsigned char *)
491                               _NL_CURRENT (LC_COLLATE,
492                                            _NL_COLLATE_SYMB_EXTRAMB);
493
494                             /* Locate the character in the hashing table.  */
495                             hash = elem_hash (str, c1);
496
497                             idx = 0;
498                             elem = hash % table_size;
499                             if (symb_table[2 * elem] != 0)
500                               {
501                                 second = hash % (table_size - 2) + 1;
502
503                                 do
504                                   {
505                                     /* First compare the hashing value.  */
506                                     if (symb_table[2 * elem] == hash
507                                         && (c1
508                                             == extra[symb_table[2 * elem + 1]])
509                                         && memcmp (str,
510                                                    &extra[symb_table[2 * elem
511                                                                      + 1]
512                                                           + 1], c1) == 0)
513                                       {
514                                         /* Yep, this is the entry.  */
515                                         idx = symb_table[2 * elem + 1];
516                                         idx += 1 + extra[idx];
517                                         break;
518                                       }
519
520                                     /* Next entry.  */
521                                     elem += second;
522                                   }
523                                 while (symb_table[2 * elem] != 0);
524                               }
525
526                             if (symb_table[2 * elem] != 0)
527                               {
528                                 /* Compare the byte sequence but only if
529                                    this is not part of a range.  */
530 # ifdef WIDE_CHAR_VERSION
531                                 int32_t *wextra;
532
533                                 idx += 1 + extra[idx];
534                                 /* Adjust for the alignment.  */
535                                 idx = (idx + 3) & ~3;
536
537                                 wextra = (int32_t *) &extra[idx + 4];
538 # endif
539
540                                 if (! is_range)
541                                   {
542 # ifdef WIDE_CHAR_VERSION
543                                     for (c1 = 0;
544                                          (int32_t) c1 < wextra[idx];
545                                          ++c1)
546                                       if (n[c1] != wextra[1 + c1])
547                                         break;
548
549                                     if ((int32_t) c1 == wextra[idx])
550                                       goto matched;
551 # else
552                                     for (c1 = 0; c1 < extra[idx]; ++c1)
553                                       if (n[c1] != extra[1 + c1])
554                                         break;
555
556                                     if (c1 == extra[idx])
557                                       goto matched;
558 # endif
559                                   }
560
561                                 /* Get the collation sequence value.  */
562                                 is_seqval = true;
563 # ifdef WIDE_CHAR_VERSION
564                                 cold = wextra[1 + wextra[idx]];
565 # else
566                                 /* Adjust for the alignment.  */
567                                 idx += 1 + extra[idx];
568                                 idx = (idx + 3) & ~4;
569                                 cold = *((int32_t *) &extra[idx]);
570 # endif
571
572                                 c = *p++;
573                               }
574                             else if (c1 == 1)
575                               {
576                                 /* No valid character.  Match it as a
577                                    single byte.  */
578                                 if (!is_range && *n == str[0])
579                                   goto matched;
580
581                                 cold = str[0];
582                                 c = *p++;
583                               }
584                             else
585                               return FNM_NOMATCH;
586                           }
587                       }
588                     else
589 # undef str
590 #endif
591                       {
592                         c = FOLD (c);
593                       normal_bracket:
594
595                         /* We have to handling the symbols differently in
596                            ranges since then the collation sequence is
597                            important.  */
598                         is_range = (*p == L_('-') && p[1] != L_('\0')
599                                     && p[1] != L_(']'));
600
601                         if (!is_range && c == fn)
602                           goto matched;
603
604 #if _LIBC
605                         /* This is needed if we goto normal_bracket; from
606                            outside of is_seqval's scope.  */
607                         is_seqval = false;
608 #endif
609
610                         cold = c;
611                         c = *p++;
612                       }
613
614                     if (c == L_('-') && *p != L_(']'))
615                       {
616 #if _LIBC
617                         /* We have to find the collation sequence
618                            value for C.  Collation sequence is nothing
619                            we can regularly access.  The sequence
620                            value is defined by the order in which the
621                            definitions of the collation values for the
622                            various characters appear in the source
623                            file.  A strange concept, nowhere
624                            documented.  */
625                         uint32_t fcollseq;
626                         uint32_t lcollseq;
627                         UCHAR cend = *p++;
628
629 # ifdef WIDE_CHAR_VERSION
630                         /* Search in the `names' array for the characters.  */
631                         fcollseq = __collseq_table_lookup (collseq, fn);
632                         if (fcollseq == ~((uint32_t) 0))
633                           /* XXX We don't know anything about the character
634                              we are supposed to match.  This means we are
635                              failing.  */
636                           goto range_not_matched;
637
638                         if (is_seqval)
639                           lcollseq = cold;
640                         else
641                           lcollseq = __collseq_table_lookup (collseq, cold);
642 # else
643                         fcollseq = collseq[fn];
644                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
645 # endif
646
647                         is_seqval = false;
648                         if (cend == L_('[') && *p == L_('.'))
649                           {
650                             uint32_t nrules =
651                               _NL_CURRENT_WORD (LC_COLLATE,
652                                                 _NL_COLLATE_NRULES);
653                             const CHAR *startp = p;
654                             size_t c1 = 0;
655
656                             while (1)
657                               {
658                                 c = *++p;
659                                 if (c == L_('.') && p[1] == L_(']'))
660                                   {
661                                     p += 2;
662                                     break;
663                                   }
664                                 if (c == '\0')
665                                   return FNM_NOMATCH;
666                                 ++c1;
667                               }
668
669                             if (nrules == 0)
670                               {
671                                 /* There are no names defined in the
672                                    collation data.  Therefore we only
673                                    accept the trivial names consisting
674                                    of the character itself.  */
675                                 if (c1 != 1)
676                                   return FNM_NOMATCH;
677
678                                 cend = startp[1];
679                               }
680                             else
681                               {
682                                 int32_t table_size;
683                                 const int32_t *symb_table;
684 # ifdef WIDE_CHAR_VERSION
685                                 char str[c1];
686                                 size_t strcnt;
687 # else
688 #  define str (startp + 1)
689 # endif
690                                 const unsigned char *extra;
691                                 int32_t idx;
692                                 int32_t elem;
693                                 int32_t second;
694                                 int32_t hash;
695
696 # ifdef WIDE_CHAR_VERSION
697                                 /* We have to convert the name to a single-byte
698                                    string.  This is possible since the names
699                                    consist of ASCII characters and the internal
700                                    representation is UCS4.  */
701                                 for (strcnt = 0; strcnt < c1; ++strcnt)
702                                   str[strcnt] = startp[1 + strcnt];
703 # endif
704
705                                 table_size =
706                                   _NL_CURRENT_WORD (LC_COLLATE,
707                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
708                                 symb_table = (const int32_t *)
709                                   _NL_CURRENT (LC_COLLATE,
710                                                _NL_COLLATE_SYMB_TABLEMB);
711                                 extra = (const unsigned char *)
712                                   _NL_CURRENT (LC_COLLATE,
713                                                _NL_COLLATE_SYMB_EXTRAMB);
714
715                                 /* Locate the character in the hashing
716                                    table.  */
717                                 hash = elem_hash (str, c1);
718
719                                 idx = 0;
720                                 elem = hash % table_size;
721                                 if (symb_table[2 * elem] != 0)
722                                   {
723                                     second = hash % (table_size - 2) + 1;
724
725                                     do
726                                       {
727                                         /* First compare the hashing value.  */
728                                         if (symb_table[2 * elem] == hash
729                                             && (c1
730                                                 == extra[symb_table[2 * elem + 1]])
731                                             && memcmp (str,
732                                                        &extra[symb_table[2 * elem + 1]
733                                                               + 1], c1) == 0)
734                                           {
735                                             /* Yep, this is the entry.  */
736                                             idx = symb_table[2 * elem + 1];
737                                             idx += 1 + extra[idx];
738                                             break;
739                                           }
740
741                                         /* Next entry.  */
742                                         elem += second;
743                                       }
744                                     while (symb_table[2 * elem] != 0);
745                                   }
746
747                                 if (symb_table[2 * elem] != 0)
748                                   {
749                                     /* Compare the byte sequence but only if
750                                        this is not part of a range.  */
751 # ifdef WIDE_CHAR_VERSION
752                                     int32_t *wextra;
753
754                                     idx += 1 + extra[idx];
755                                     /* Adjust for the alignment.  */
756                                     idx = (idx + 3) & ~4;
757
758                                     wextra = (int32_t *) &extra[idx + 4];
759 # endif
760                                     /* Get the collation sequence value.  */
761                                     is_seqval = true;
762 # ifdef WIDE_CHAR_VERSION
763                                     cend = wextra[1 + wextra[idx]];
764 # else
765                                     /* Adjust for the alignment.  */
766                                     idx += 1 + extra[idx];
767                                     idx = (idx + 3) & ~4;
768                                     cend = *((int32_t *) &extra[idx]);
769 # endif
770                                   }
771                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
772                                   {
773                                     cend = str[0];
774                                     c = *p++;
775                                   }
776                                 else
777                                   return FNM_NOMATCH;
778                               }
779 # undef str
780                           }
781                         else
782                           {
783                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
784                               cend = *p++;
785                             if (cend == L_('\0'))
786                               return FNM_NOMATCH;
787                             cend = FOLD (cend);
788                           }
789
790                         /* XXX It is not entirely clear to me how to handle
791                            characters which are not mentioned in the
792                            collation specification.  */
793                         if (
794 # ifdef WIDE_CHAR_VERSION
795                             lcollseq == 0xffffffff ||
796 # endif
797                             lcollseq <= fcollseq)
798                           {
799                             /* We have to look at the upper bound.  */
800                             uint32_t hcollseq;
801
802                             if (is_seqval)
803                               hcollseq = cend;
804                             else
805                               {
806 # ifdef WIDE_CHAR_VERSION
807                                 hcollseq =
808                                   __collseq_table_lookup (collseq, cend);
809                                 if (hcollseq == ~((uint32_t) 0))
810                                   {
811                                     /* Hum, no information about the upper
812                                        bound.  The matching succeeds if the
813                                        lower bound is matched exactly.  */
814                                     if (lcollseq != fcollseq)
815                                       goto range_not_matched;
816
817                                     goto matched;
818                                   }
819 # else
820                                 hcollseq = collseq[cend];
821 # endif
822                               }
823
824                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
825                               goto matched;
826                           }
827 # ifdef WIDE_CHAR_VERSION
828                       range_not_matched:
829 # endif
830 #else
831                         /* We use a boring value comparison of the character
832                            values.  This is better than comparing using
833                            `strcoll' since the latter would have surprising
834                            and sometimes fatal consequences.  */
835                         UCHAR cend = *p++;
836
837                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
838                           cend = *p++;
839                         if (cend == L_('\0'))
840                           return FNM_NOMATCH;
841
842                         /* It is a range.  */
843                         if (cold <= fn && fn <= cend)
844                           goto matched;
845 #endif
846
847                         c = *p++;
848                       }
849                   }
850
851                 if (c == L_(']'))
852                   break;
853               }
854
855             if (!not)
856               return FNM_NOMATCH;
857             break;
858
859           matched:
860             /* Skip the rest of the [...] that already matched.  */
861             do
862               {
863               ignore_next:
864                 c = *p++;
865
866                 if (c == L_('\0'))
867                   /* [... (unterminated) loses.  */
868                   return FNM_NOMATCH;
869
870                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
871                   {
872                     if (*p == L_('\0'))
873                       return FNM_NOMATCH;
874                     /* XXX 1003.2d11 is unclear if this is right.  */
875                     ++p;
876                   }
877                 else if (c == L_('[') && *p == L_(':'))
878                   {
879                     int c1 = 0;
880                     const CHAR *startp = p;
881
882                     while (1)
883                       {
884                         c = *++p;
885                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
886                           return FNM_NOMATCH;
887
888                         if (*p == L_(':') && p[1] == L_(']'))
889                           break;
890
891                         if (c < L_('a') || c >= L_('z'))
892                           {
893                             p = startp;
894                             goto ignore_next;
895                           }
896                       }
897                     p += 2;
898                     c = *p++;
899                   }
900                 else if (c == L_('[') && *p == L_('='))
901                   {
902                     c = *++p;
903                     if (c == L_('\0'))
904                       return FNM_NOMATCH;
905                     c = *++p;
906                     if (c != L_('=') || p[1] != L_(']'))
907                       return FNM_NOMATCH;
908                     p += 2;
909                     c = *p++;
910                   }
911                 else if (c == L_('[') && *p == L_('.'))
912                   {
913                     ++p;
914                     while (1)
915                       {
916                         c = *++p;
917                         if (c == '\0')
918                           return FNM_NOMATCH;
919
920                         if (*p == L_('.') && p[1] == L_(']'))
921                           break;
922                       }
923                     p += 2;
924                     c = *p++;
925                   }
926               }
927             while (c != L_(']'));
928             if (not)
929               return FNM_NOMATCH;
930           }
931           break;
932
933         case L_('+'):
934         case L_('@'):
935         case L_('!'):
936           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
937             {
938               int res;
939
940               res = EXT (c, p, n, string_end, no_leading_period, flags);
941               if (res != -1)
942                 return res;
943             }
944           goto normal_match;
945
946         case L_('/'):
947           if (NO_LEADING_PERIOD (flags))
948             {
949               if (n == string_end || c != (UCHAR) *n)
950                 return FNM_NOMATCH;
951
952               new_no_leading_period = true;
953               break;
954             }
955           /* FALLTHROUGH */
956         default:
957         normal_match:
958           if (n == string_end || c != FOLD ((UCHAR) *n))
959             return FNM_NOMATCH;
960         }
961
962       no_leading_period = new_no_leading_period;
963       ++n;
964     }
965
966   if (n == string_end)
967     return 0;
968
969   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
970     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
971     return 0;
972
973   return FNM_NOMATCH;
974 }
975
976
977 static const CHAR *
978 internal_function
979 END (const CHAR *pattern)
980 {
981   const CHAR *p = pattern;
982
983   while (1)
984     if (*++p == L_('\0'))
985       /* This is an invalid pattern.  */
986       return pattern;
987     else if (*p == L_('['))
988       {
989         /* Handle brackets special.  */
990         if (posixly_correct == 0)
991           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
992
993         /* Skip the not sign.  We have to recognize it because of a possibly
994            following ']'.  */
995         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
996           ++p;
997         /* A leading ']' is recognized as such.  */
998         if (*p == L_(']'))
999           ++p;
1000         /* Skip over all characters of the list.  */
1001         while (*p != L_(']'))
1002           if (*p++ == L_('\0'))
1003             /* This is no valid pattern.  */
1004             return pattern;
1005       }
1006     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1007               || *p == L_('!')) && p[1] == L_('('))
1008       p = END (p + 1);
1009     else if (*p == L_(')'))
1010       break;
1011
1012   return p + 1;
1013 }
1014
1015
1016 static int
1017 internal_function
1018 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1019      bool no_leading_period, int flags)
1020 {
1021   const CHAR *startp;
1022   size_t level;
1023   struct patternlist
1024   {
1025     struct patternlist *next;
1026     CHAR str[1];
1027   } *list = NULL;
1028   struct patternlist **lastp = &list;
1029   size_t pattern_len = STRLEN (pattern);
1030   const CHAR *p;
1031   const CHAR *rs;
1032   enum { ALLOCA_LIMIT = 8000 };
1033
1034   /* Parse the pattern.  Store the individual parts in the list.  */
1035   level = 0;
1036   for (startp = p = pattern + 1; ; ++p)
1037     if (*p == L_('\0'))
1038       /* This is an invalid pattern.  */
1039       return -1;
1040     else if (*p == L_('['))
1041       {
1042         /* Handle brackets special.  */
1043         if (posixly_correct == 0)
1044           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1045
1046         /* Skip the not sign.  We have to recognize it because of a possibly
1047            following ']'.  */
1048         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1049           ++p;
1050         /* A leading ']' is recognized as such.  */
1051         if (*p == L_(']'))
1052           ++p;
1053         /* Skip over all characters of the list.  */
1054         while (*p != L_(']'))
1055           if (*p++ == L_('\0'))
1056             /* This is no valid pattern.  */
1057             return -1;
1058       }
1059     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1060               || *p == L_('!')) && p[1] == L_('('))
1061       /* Remember the nesting level.  */
1062       ++level;
1063     else if (*p == L_(')'))
1064       {
1065         if (level-- == 0)
1066           {
1067             /* This means we found the end of the pattern.  */
1068 #define NEW_PATTERN \
1069             struct patternlist *newp;                                         \
1070             size_t plen;                                                      \
1071             size_t plensize;                                                  \
1072             size_t newpsize;                                                  \
1073                                                                               \
1074             plen = (opt == L_('?') || opt == L_('@')                          \
1075                     ? pattern_len                                             \
1076                     : p - startp + 1);                                        \
1077             plensize = plen * sizeof (CHAR);                                  \
1078             newpsize = offsetof (struct patternlist, str) + plensize;         \
1079             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1080                 || newpsize < offsetof (struct patternlist, str)              \
1081                 || ALLOCA_LIMIT <= newpsize)                                  \
1082               return -1;                                                      \
1083             newp = (struct patternlist *) alloca (newpsize);                  \
1084             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1085             newp->next = NULL;                                                \
1086             *lastp = newp;                                                    \
1087             lastp = &newp->next
1088             NEW_PATTERN;
1089             break;
1090           }
1091       }
1092     else if (*p == L_('|'))
1093       {
1094         if (level == 0)
1095           {
1096             NEW_PATTERN;
1097             startp = p + 1;
1098           }
1099       }
1100   assert (list != NULL);
1101   assert (p[-1] == L_(')'));
1102 #undef NEW_PATTERN
1103
1104   switch (opt)
1105     {
1106     case L_('*'):
1107       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1108         return 0;
1109       /* FALLTHROUGH */
1110
1111     case L_('+'):
1112       do
1113         {
1114           for (rs = string; rs <= string_end; ++rs)
1115             /* First match the prefix with the current pattern with the
1116                current pattern.  */
1117             if (FCT (list->str, string, rs, no_leading_period,
1118                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1119                 /* This was successful.  Now match the rest with the rest
1120                    of the pattern.  */
1121                 && (FCT (p, rs, string_end,
1122                          rs == string
1123                          ? no_leading_period
1124                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1125                          flags & FNM_FILE_NAME
1126                          ? flags : flags & ~FNM_PERIOD) == 0
1127                     /* This didn't work.  Try the whole pattern.  */
1128                     || (rs != string
1129                         && FCT (pattern - 1, rs, string_end,
1130                                 rs == string
1131                                 ? no_leading_period
1132                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1133                                 flags & FNM_FILE_NAME
1134                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1135               /* It worked.  Signal success.  */
1136               return 0;
1137         }
1138       while ((list = list->next) != NULL);
1139
1140       /* None of the patterns lead to a match.  */
1141       return FNM_NOMATCH;
1142
1143     case L_('?'):
1144       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1145         return 0;
1146       /* FALLTHROUGH */
1147
1148     case L_('@'):
1149       do
1150         /* I cannot believe it but `strcat' is actually acceptable
1151            here.  Match the entire string with the prefix from the
1152            pattern list and the rest of the pattern following the
1153            pattern list.  */
1154         if (FCT (STRCAT (list->str, p), string, string_end,
1155                  no_leading_period,
1156                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1157           /* It worked.  Signal success.  */
1158           return 0;
1159       while ((list = list->next) != NULL);
1160
1161       /* None of the patterns lead to a match.  */
1162       return FNM_NOMATCH;
1163
1164     case L_('!'):
1165       for (rs = string; rs <= string_end; ++rs)
1166         {
1167           struct patternlist *runp;
1168
1169           for (runp = list; runp != NULL; runp = runp->next)
1170             if (FCT (runp->str, string, rs,  no_leading_period,
1171                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1172               break;
1173
1174           /* If none of the patterns matched see whether the rest does.  */
1175           if (runp == NULL
1176               && (FCT (p, rs, string_end,
1177                        rs == string
1178                        ? no_leading_period
1179                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1180                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1181                   == 0))
1182             /* This is successful.  */
1183             return 0;
1184         }
1185
1186       /* None of the patterns together with the rest of the pattern
1187          lead to a match.  */
1188       return FNM_NOMATCH;
1189
1190     default:
1191       assert (! "Invalid extended matching operator");
1192       break;
1193     }
1194
1195   return -1;
1196 }
1197
1198
1199 #undef FOLD
1200 #undef CHAR
1201 #undef UCHAR
1202 #undef INT
1203 #undef FCT
1204 #undef EXT
1205 #undef END
1206 #undef MEMPCPY
1207 #undef MEMCHR
1208 #undef STRCOLL
1209 #undef STRLEN
1210 #undef STRCAT
1211 #undef L_
1212 #undef BTOWC