1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 #ifndef D_FILE_OFFSET_BITS
28 #define _FILE_OFFSET_BITS 64
30 #ifndef _LARGEFILE64_SOURCE
31 #define _LARGEFILE64_SOURCE 1
34 #include <sys/types.h>
50 * parse structure, passed up and down to avoid global variables and
55 char *next; /* next character in RE */
56 char *end; /* end of string (-> NUL normally) */
57 int error; /* has an error been seen? */
58 sop *strip; /* malloced strip */
59 sopno ssize; /* malloced strip size (allocated) */
60 sopno slen; /* malloced strip length (used) */
61 int ncsalloc; /* number of csets allocated */
63 #define NPAREN 10 /* we need to remember () 1-9 for back refs */
64 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
65 sopno pend[NPAREN]; /* -> ) ([0] unused) */
70 static char vogl_nuls[10]; /* place to point scanner in event of error */
73 * macros for use with parse structure
74 * BEWARE: these know that the parse structure is named `p' !!!
76 #define PEEK() (*p->next)
77 #define PEEK2() (*(p->next + 1))
78 #define MORE() (p->next < p->end)
79 #define MORE2() (p->next + 1 < p->end)
80 #define SEE(c) (MORE() && PEEK() == (c))
81 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
82 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
83 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
84 #define NEXT() (p->next++)
85 #define NEXT2() (p->next += 2)
86 #define NEXTn(n) (p->next += (n))
87 #define GETNEXT() (*p->next++)
88 #define SETERROR(e) vogl_seterr(p, (e))
89 #define REQUIRE(co, e) ((co) || SETERROR(e))
90 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
91 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
92 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
93 #define EMIT(op, sopnd) vogl_doemit(p, (sop)(op), (size_t)(sopnd))
94 #define INSERT(op, pos) vogl_doinsert(p, (sop)(op), HERE() - (pos) + 1, pos)
95 #define AHEAD(pos) vogl_dofwd(p, pos, HERE() - (pos))
96 #define ASTERN(sop, pos) EMIT(sop, HERE() - pos)
97 #define HERE() (p->slen)
98 #define THERE() (p->slen - 1)
99 #define THERETHERE() (p->slen - 2)
100 #define DROP(n) (p->slen -= (n))
103 static int vogl_never = 0; /* for use in asserts; shuts lint up */
105 #define vogl_never 0 /* some <assert.h>s have bugs too */
108 // Attempt to work around locale issues, not sure if I've got them all here.
109 static inline int regcomp_isdigit(int c)
111 return (c >= '0') && (c <= '9');
114 static inline int regcomp_islower(int c)
116 return (c >= 'a') && (c <= 'z');
119 static inline int regcomp_isupper(int c)
121 return (c >= 'A') && (c <= 'Z');
124 static inline int regcomp_isalpha(int c)
126 return regcomp_islower(c) || regcomp_isupper(c);
129 static inline int regcomp_toupper(int c)
131 return ((c >= 'a') && (c <= 'z')) ? (c - 'a' + 'A') : c;
134 static inline int regcomp_tolower(int c)
136 return ((c >= 'A') && (c <= 'Z')) ? (c - 'A' + 'a') : c;
140 - regcomp - interface for parser and compilation
141 = extern int vogl_regcomp(regex_t *, const char *, int);
142 = #define REG_BASIC 0000
143 = #define REG_EXTENDED 0001
144 = #define REG_ICASE 0002
145 = #define REG_NOSUB 0004
146 = #define REG_NEWLINE 0010
147 = #define REG_NOSPEC 0020
148 = #define REG_PEND 0040
149 = #define REG_DUMP 0200
151 int /* 0 success, otherwise REG_something */
152 vogl_regcomp(preg, pattern, cflags)
158 register struct re_guts *g;
159 register struct parse *p = &pa;
163 #define GOODFLAGS(f) (f)
165 #define GOODFLAGS(f) ((f) & ~REG_DUMP)
168 cflags = GOODFLAGS(cflags);
169 if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC))
172 if (cflags & REG_PEND)
174 if (preg->re_endp < pattern)
176 len = preg->re_endp - pattern;
179 len = strlen((char *)pattern);
181 /* do the mallocs early so failure handling is easy */
182 g = (struct re_guts *)regex_malloc(sizeof(struct re_guts) +
183 (NC - 1) * sizeof(cat_t));
186 p->ssize = len / (size_t)2 * (size_t)3 + (size_t)1; /* ugh */
187 p->strip = (sop *)regex_malloc(p->ssize * sizeof(sop));
189 if (p->strip == NULL)
191 regex_free((char *)g);
197 p->next = (char *)pattern; /* convenience; we do not modify it */
198 p->end = p->next + len;
201 for (i = 0; i < NPAREN; i++)
217 g->ncategories = 1; /* category 0 is "everything else" */
218 g->categories = &g->catspace[-(CHAR_MIN)];
219 (void)memset((char *)g->catspace, 0, NC * sizeof(cat_t));
224 g->firststate = THERE();
225 if (cflags & REG_EXTENDED)
227 else if (cflags & REG_NOSPEC)
230 vogl_p_bre(p, OUT, OUT);
232 g->laststate = THERE();
234 /* tidy up loose ends and fill things in */
235 vogl_categorize(p, g);
236 vogl_stripsnug(p, g);
238 g->nplus = vogl_pluscount(p, g);
240 preg->re_nsub = g->nsub;
242 preg->re_magic = MAGIC1;
244 /* not debugging, so can't rely on the assert() in vogl_regexec() */
246 SETERROR(REG_ASSERT);
249 /* win or lose, we're done */
250 if (p->error != 0) /* lose */
256 - vogl_p_ere - ERE parser top level, concatenation and alternation
257 == static void vogl_p_ere(register struct parse *p, int stop);
260 vogl_p_ere(p, stop) register struct parse *p;
261 int stop; /* character this ERE should end at */
264 register sopno prevback = 0;
265 register sopno prevfwd = 0;
267 register int first = 1; /* is this the first alternative? */
271 /* do a bunch of concatenated expressions */
273 while (MORE() && (c = PEEK()) != '|' && c != stop)
275 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
278 break; /* NOTE BREAK OUT */
282 INSERT(OCH_, conc); /* offset is wrong */
287 ASTERN(OOR1, prevback);
289 AHEAD(prevfwd); /* fix previous offset */
291 EMIT(OOR2, 0); /* offset is very wrong */
294 if (!first) /* tail-end fixups */
297 ASTERN(O_CH, prevback);
300 assert(!MORE() || SEE(stop));
304 - vogl_p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
305 == static void vogl_p_ere_exp(register struct parse *p);
308 vogl_p_ere_exp(p) register struct parse *p;
314 register sopno subno;
317 assert(MORE()); /* caller should have ensured this */
324 REQUIRE(MORE(), REG_EPAREN);
328 p->pbegin[subno] = HERE();
329 EMIT(OLPAREN, subno);
334 p->pend[subno] = HERE();
335 assert(p->pend[subno] != 0);
337 EMIT(ORPAREN, subno);
338 MUSTEAT(')', REG_EPAREN);
340 #ifndef POSIX_MISTAKE
341 case ')': /* happens only if no current unmatched ( */
343 * You may ask, why the ifndef? Because I didn't notice
344 * this until slightly too late for 1003.2, and none of the
345 * other 1003.2 regular-expression reviewers noticed it at
346 * all. So an unmatched ) is legal POSIX, at least until
347 * we can get it fixed.
349 SETERROR(REG_EPAREN);
354 p->g->iflags |= USEBOL;
360 p->g->iflags |= USEEOL;
369 SETERROR(REG_BADRPT);
372 if (p->g->cflags & REG_NEWLINE)
381 REQUIRE(MORE(), REG_EESCAPE);
385 case '{': /* okay as ordinary except if digit follows */
386 REQUIRE(!MORE() || !regcomp_isdigit(PEEK()), REG_BADRPT);
396 /* we call { a repetition if followed by a digit */
397 if (!(c == '*' || c == '+' || c == '?' ||
398 (c == '{' && MORE2() && regcomp_isdigit(PEEK2()))))
399 return; /* no repetition, we're done */
402 REQUIRE(!wascaret, REG_BADRPT);
405 case '*': /* implemented as +? */
406 /* this case does not require the (y|) trick, noKLUDGE */
409 INSERT(OQUEST_, pos);
410 ASTERN(O_QUEST, pos);
417 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
418 INSERT(OCH_, pos); /* offset slightly wrong */
419 ASTERN(OOR1, pos); /* this one's right */
420 AHEAD(pos); /* fix the OCH_ */
421 EMIT(OOR2, 0); /* offset very wrong... */
422 AHEAD(THERE()); /* ...so fix it */
423 ASTERN(O_CH, THERETHERE());
426 count = vogl_p_count(p);
429 if (regcomp_isdigit(PEEK()))
431 count2 = vogl_p_count(p);
432 REQUIRE(count <= count2, REG_BADBR);
434 else /* single number with comma */
437 else /* just a single number */
439 vogl_repeat(p, pos, count, count2);
440 if (!EAT('}')) /* error heuristics */
442 while (MORE() && PEEK() != '}')
444 REQUIRE(MORE(), REG_EBRACE);
453 if (!(c == '*' || c == '+' || c == '?' ||
454 (c == '{' && MORE2() && regcomp_isdigit(PEEK2()))))
456 SETERROR(REG_BADRPT);
460 - p_str - string (no metacharacters) "parser"
461 == static void p_str(register struct parse *p);
464 vogl_p_str(p) register struct parse *p;
466 REQUIRE(MORE(), REG_EMPTY);
468 vogl_ordinary(p, GETNEXT());
472 - p_bre - BRE parser top level, anchoring and concatenation
473 == static void p_bre(register struct parse *p, register int end1, \
474 == register int end2);
475 * Giving end1 as OUT essentially eliminates the end1/end2 check.
477 * This implementation is a bit of a kludge, in that a trailing $ is first
478 * taken as an ordinary character and then revised to be an anchor. The
479 * only undesirable side effect is that '$' gets included as a character
480 * category in such cases. This is fairly harmless; not worth fixing.
481 * The amount of lookahead needed to avoid this kludge is excessive.
484 vogl_p_bre(p, end1, end2) register struct parse *p;
485 register int end1; /* first terminating character */
486 register int end2; /* second terminating character */
488 register sopno start = HERE();
489 register int first = 1; /* first subexpression? */
490 register int wasdollar = 0;
495 p->g->iflags |= USEBOL;
498 while (MORE() && !SEETWO(end1, end2))
500 wasdollar = vogl_p_simp_re(p, first);
503 if (wasdollar) /* oops, that was a trailing anchor */
507 p->g->iflags |= USEEOL;
511 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
515 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
516 == static int p_simp_re(register struct parse *p, int starordinary);
518 static int /* was the simple RE an unbackslashed $? */
519 vogl_p_simp_re(p, starordinary) register struct parse *p;
520 int starordinary; /* is a leading * an ordinary character? */
527 register sopno subno;
528 #define BACKSL (1 << CHAR_BIT)
530 pos = HERE(); /* repetion op, if any, covers from here */
532 assert(MORE()); /* caller should have ensured this */
536 REQUIRE(MORE(), REG_EESCAPE);
537 c = BACKSL | (unsigned char)GETNEXT();
542 if (p->g->cflags & REG_NEWLINE)
551 SETERROR(REG_BADRPT);
557 p->pbegin[subno] = HERE();
558 EMIT(OLPAREN, subno);
559 /* the MORE here is an error heuristic */
560 if (MORE() && !SEETWO('\\', ')'))
561 vogl_p_bre(p, '\\', ')');
564 p->pend[subno] = HERE();
565 assert(p->pend[subno] != 0);
567 EMIT(ORPAREN, subno);
568 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
570 case BACKSL | ')': /* should not get here -- must be user */
572 SETERROR(REG_EPAREN);
583 i = (c & ~BACKSL) - '0';
587 assert(i <= p->g->nsub);
589 assert(p->pbegin[i] != 0);
590 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
591 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
592 (void)vogl_dupl(p, p->pbegin[i] + 1, p->pend[i]);
596 SETERROR(REG_ESUBREG);
600 REQUIRE(starordinary, REG_BADRPT);
603 vogl_ordinary(p, (char)c); /* takes off BACKSL, if any */
607 if (EAT('*')) /* implemented as +? */
609 /* this case does not require the (y|) trick, noKLUDGE */
612 INSERT(OQUEST_, pos);
613 ASTERN(O_QUEST, pos);
615 else if (EATTWO('\\', '{'))
617 count = vogl_p_count(p);
620 if (MORE() && regcomp_isdigit(PEEK()))
622 count2 = vogl_p_count(p);
623 REQUIRE(count <= count2, REG_BADBR);
625 else /* single number with comma */
628 else /* just a single number */
630 vogl_repeat(p, pos, count, count2);
631 if (!EATTWO('\\', '}')) /* error heuristics */
633 while (MORE() && !SEETWO('\\', '}'))
635 REQUIRE(MORE(), REG_EBRACE);
639 else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
646 - p_count - parse a repetition count
647 == static int p_count(register struct parse *p);
649 static int /* the value */
650 vogl_p_count(p) register struct parse *p;
652 register int count = 0;
653 register int ndigits = 0;
655 while (MORE() && regcomp_isdigit(PEEK()) && count <= DUPMAX)
657 count = count * 10 + (GETNEXT() - '0');
661 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
666 - p_bracket - parse a bracketed character list
667 == static void p_bracket(register struct parse *p);
669 * Note a significant property of this code: if the allocset() did SETERROR,
670 * no set operations are done.
673 vogl_p_bracket(p) register struct parse *p;
675 register cset *cs = vogl_allocset(p);
676 register int invert = 0;
678 /* Dept of Truly Sickening Special-Case Kludges */
679 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0)
685 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0)
693 invert++; /* make note to invert set at end */
698 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
699 vogl_p_b_term(p, cs);
702 MUSTEAT(']', REG_EBRACK);
704 if (p->error != 0) /* don't mess things up further */
707 if (p->g->cflags & REG_ICASE)
712 for (i = p->g->csetsize - 1; i >= 0; i--)
713 if (CHIN(cs, i) && regcomp_isalpha(i))
715 ci = vogl_othercase(i);
719 if (cs->multis != NULL)
726 for (i = p->g->csetsize - 1; i >= 0; i--)
731 if (p->g->cflags & REG_NEWLINE)
733 if (cs->multis != NULL)
734 vogl_mcinvert(p, cs);
737 assert(cs->multis == NULL); /* xxx */
739 if (vogl_nch(p, cs) == 1) /* optimize singleton sets */
741 vogl_ordinary(p, vogl_firstch(p, cs));
745 EMIT(OANYOF, vogl_freezeset(p, cs));
749 - p_b_term - parse one term of a bracketed character list
750 == static void p_b_term(register struct parse *p, register cset *cs);
753 vogl_p_b_term(p, cs) register struct parse *p;
757 register char start, finish;
760 /* classify what we've got */
761 switch ((MORE()) ? PEEK() : '\0')
764 c = (MORE2()) ? PEEK2() : '\0';
767 SETERROR(REG_ERANGE);
768 return; /* NOTE RETURN */
777 case ':': /* character class */
779 REQUIRE(MORE(), REG_EBRACK);
781 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
782 vogl_p_b_cclass(p, cs);
783 REQUIRE(MORE(), REG_EBRACK);
784 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
786 case '=': /* equivalence class */
788 REQUIRE(MORE(), REG_EBRACK);
790 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
791 vogl_p_b_eclass(p, cs);
792 REQUIRE(MORE(), REG_EBRACK);
793 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
795 default: /* symbol, ordinary character, or range */
796 /* xxx revision needed for multichar stuff */
797 start = vogl_p_b_symbol(p);
798 if (SEE('-') && MORE2() && PEEK2() != ']')
805 finish = vogl_p_b_symbol(p);
809 /* xxx what about signed chars here... */
810 REQUIRE(start <= finish, REG_ERANGE);
811 for (i = start; i <= finish; i++)
818 - p_b_cclass - parse a character-class name and deal with it
819 == static void p_b_cclass(register struct parse *p, register cset *cs);
822 vogl_p_b_cclass(p, cs) register struct parse *p;
825 register char *sp = p->next;
826 register struct cclass *cp;
831 while (MORE() && regcomp_isalpha(PEEK()))
834 for (cp = cclasses; cp->name != NULL; cp++)
835 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
837 if (cp->name == NULL)
839 /* oops, didn't find it */
840 SETERROR(REG_ECTYPE);
845 while ((c = *u++) != '\0')
847 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
852 - p_b_eclass - parse an equivalence-class name and deal with it
853 == static void p_b_eclass(register struct parse *p, register cset *cs);
855 * This implementation is incomplete. xxx
858 vogl_p_b_eclass(p, cs) register struct parse *p;
863 c = vogl_p_b_coll_elem(p, '=');
868 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
869 == static char p_b_symbol(register struct parse *p);
871 static char /* value of symbol */
872 vogl_p_b_symbol(p) register struct parse *p;
876 REQUIRE(MORE(), REG_EBRACK);
877 if (!EATTWO('[', '.'))
880 /* collating symbol */
881 value = vogl_p_b_coll_elem(p, '.');
882 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
887 - p_b_coll_elem - parse a collating-element name and look it up
888 == static char p_b_coll_elem(register struct parse *p, int endc);
890 static char /* value of collating element */
891 vogl_p_b_coll_elem(p, endc) register struct parse *p;
892 int endc; /* name ended by endc,']' */
894 register char *sp = p->next;
895 register struct cname *cp;
898 while (MORE() && !SEETWO(endc, ']'))
902 SETERROR(REG_EBRACK);
906 for (cp = cnames; cp->name != NULL; cp++)
907 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
908 return (cp->code); /* known name */
910 return (*sp); /* single character */
911 SETERROR(REG_ECOLLATE); /* neither */
916 - othercase - return the case counterpart of an alphabetic
917 == static char othercase(int ch);
919 static char /* if no counterpart, return ch */
920 vogl_othercase(ch) int ch;
922 assert(regcomp_isalpha(ch));
923 if (regcomp_isupper(ch))
924 return (regcomp_tolower(ch));
925 else if (regcomp_islower(ch))
926 return (regcomp_toupper(ch));
927 else /* peculiar, but could happen */
932 - bothcases - emit a dualcase version of a two-case character
933 == static void bothcases(register struct parse *p, int ch);
935 * Boy, is this implementation ever a kludge...
938 vogl_bothcases(p, ch) register struct parse *p;
941 register char *oldnext = p->next;
942 register char *oldend = p->end;
945 assert(othercase(ch) != ch); /* p_bracket() would recurse */
947 p->end = bracket + 2;
952 assert(p->next == bracket + 2);
958 - ordinary - emit an ordinary character
959 == static void ordinary(register struct parse *p, register int ch);
962 vogl_ordinary(p, ch) register struct parse *p;
965 register cat_t *cap = p->g->categories;
967 if ((p->g->cflags & REG_ICASE) && regcomp_isalpha(ch) && vogl_othercase(ch) != ch)
968 vogl_bothcases(p, ch);
971 EMIT(OCHAR, (unsigned char)ch);
973 cap[ch] = p->g->ncategories++;
978 - nonnewline - emit REG_NEWLINE version of OANY
979 == static void nonnewline(register struct parse *p);
981 * Boy, is this implementation ever a kludge...
984 vogl_nonnewline(p) register struct parse *p;
986 register char *oldnext = p->next;
987 register char *oldend = p->end;
991 p->end = bracket + 3;
997 assert(p->next == bracket + 3);
1003 - repeat - generate code for a bounded repetition, recursively if needed
1004 == static void repeat(register struct parse *p, sopno start, int from, int to);
1007 vogl_repeat(p, start, from, to) register struct parse *p;
1008 sopno start; /* operand from here to end of strip */
1009 int from; /* repeated from this number */
1010 int to; /* to this number of times (maybe INFINITY) */
1012 register sopno finish = HERE();
1015 #define REP(f, t) ((f) * 8 + (t))
1016 #define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1017 register sopno copy;
1019 if (p->error != 0) /* head off possible runaway recursion */
1024 switch (REP(MAP(from), MAP(to)))
1027 : /* must be user doing this */
1028 DROP(finish - start); /* drop the operand */
1036 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1037 INSERT(OCH_, start); /* offset is wrong... */
1038 vogl_repeat(p, start + 1, 1, to);
1039 ASTERN(OOR1, start);
1040 AHEAD(start); /* ... fix it */
1043 ASTERN(O_CH, THERETHERE());
1046 : /* trivial case */
1050 : /* as x?x{1,n-1} */
1051 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1052 INSERT(OCH_, start);
1053 ASTERN(OOR1, start);
1055 EMIT(OOR2, 0); /* offset very wrong... */
1056 AHEAD(THERE()); /* ...so fix it */
1057 ASTERN(O_CH, THERETHERE());
1058 copy = vogl_dupl(p, start + 1, finish + 1);
1059 assert(copy == finish + 4);
1060 vogl_repeat(p, copy, 1, to - 1);
1064 INSERT(OPLUS_, start);
1065 ASTERN(O_PLUS, start);
1068 : /* as xx{m-1,n-1} */
1069 copy = vogl_dupl(p, start, finish);
1070 vogl_repeat(p, copy, from - 1, to - 1);
1073 : /* as xx{n-1,INF} */
1074 copy = vogl_dupl(p, start, finish);
1075 vogl_repeat(p, copy, from - 1, to);
1077 default: /* "can't happen" */
1078 SETERROR(REG_ASSERT); /* just in case */
1084 - seterr - set an error condition
1085 == static int seterr(register struct parse *p, int e);
1087 static int /* useless but makes type checking happy */
1088 vogl_seterr(p, e) register struct parse *p;
1091 if (p->error == 0) /* keep earliest error condition */
1093 p->next = vogl_nuls; /* try to bring things to a halt */
1095 return (0); /* make the return value well-defined */
1099 - allocset - allocate a set of characters for []
1100 == static cset *allocset(register struct parse *p);
1103 vogl_allocset(p) register struct parse *p;
1105 register int no = p->g->ncsets++;
1107 register size_t nbytes;
1109 register size_t css = (size_t)p->g->csetsize;
1112 if (no >= p->ncsalloc) /* need another column of space */
1114 p->ncsalloc += CHAR_BIT;
1116 assert(nc % CHAR_BIT == 0);
1117 nbytes = nc / CHAR_BIT * css;
1118 if (p->g->sets == NULL)
1119 p->g->sets = (cset *)regex_malloc(nc * sizeof(cset));
1121 p->g->sets = (cset *)regex_realloc((char *)p->g->sets,
1123 if (p->g->setbits == NULL)
1124 p->g->setbits = (uch *)regex_malloc(nbytes);
1127 p->g->setbits = (uch *)regex_realloc((char *)p->g->setbits,
1129 /* xxx this isn't right if setbits is now NULL */
1130 for (i = 0; i < no; i++)
1131 p->g->sets[i].ptr = p->g->setbits + css * (i / CHAR_BIT);
1133 if (p->g->sets != NULL && p->g->setbits != NULL)
1134 (void)memset((char *)p->g->setbits + (nbytes - css),
1139 SETERROR(REG_ESPACE);
1140 /* caller's responsibility not to do set ops */
1144 assert(p->g->sets != NULL); /* xxx */
1145 cs = &p->g->sets[no];
1146 cs->ptr = p->g->setbits + css * ((no) / CHAR_BIT);
1147 cs->mask = 1 << ((no) % CHAR_BIT);
1156 - freeset - free a now-unused set
1157 == static void freeset(register struct parse *p, register cset *cs);
1160 vogl_freeset(p, cs) register struct parse *p;
1164 register cset *top = &p->g->sets[p->g->ncsets];
1165 register size_t css = (size_t)p->g->csetsize;
1167 for (i = 0; (size_t)i < css; i++)
1169 if (cs == top - 1) /* recover only the easy case */
1174 - freezeset - final processing on a set of characters
1175 == static int freezeset(register struct parse *p, register cset *cs);
1177 * The main task here is merging identical sets. This is usually a waste
1178 * of time (although the hash code minimizes the overhead), but can win
1179 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1180 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1183 static int /* set number */
1184 vogl_freezeset(p, cs) register struct parse *p;
1187 register uch h = cs->hash;
1189 register cset *top = &p->g->sets[p->g->ncsets];
1191 register size_t css = (size_t)p->g->csetsize;
1193 /* look for an earlier one which is the same */
1194 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1195 if (cs2->hash == h && cs2 != cs)
1198 for (i = 0; (size_t)i < css; i++)
1199 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1201 if ((size_t)i == css)
1205 if (cs2 < top) /* found one */
1207 vogl_freeset(p, cs);
1211 return ((int)(cs - p->g->sets));
1215 - firstch - return first character in a set (which must have at least one)
1216 == static int firstch(register struct parse *p, register cset *cs);
1218 static int /* character; there is no "none" value */
1219 vogl_firstch(p, cs) register struct parse *p;
1223 register size_t css = (size_t)p->g->csetsize;
1225 for (i = 0; (size_t)i < css; i++)
1229 return (0); /* arbitrary */
1233 - nch - number of characters in a set
1234 == static int nch(register struct parse *p, register cset *cs);
1237 vogl_nch(p, cs) register struct parse *p;
1241 register size_t css = (size_t)p->g->csetsize;
1244 for (i = 0; (size_t)i < css; i++)
1251 - mcadd - add a collating element to a cset
1252 == static void mcadd(register struct parse *p, register cset *cs, \
1253 == register char *cp);
1256 vogl_mcadd(p, cs, cp) register struct parse *p;
1260 register size_t oldend = cs->smultis;
1262 cs->smultis += strlen(cp) + 1;
1263 if (cs->multis == NULL)
1264 cs->multis = regex_malloc(cs->smultis);
1266 cs->multis = regex_realloc(cs->multis, cs->smultis);
1267 if (cs->multis == NULL)
1269 SETERROR(REG_ESPACE);
1273 (void)strcpy(cs->multis + oldend - 1, cp);
1274 cs->multis[cs->smultis - 1] = '\0';
1278 - mcsub - subtract a collating element from a cset
1279 == static void mcsub(register cset *cs, register char *cp);
1282 vogl_mcsub(cs, cp) register cset *cs;
1285 register char *fp = vogl_mcfind(cs, cp);
1286 register size_t len = strlen(fp);
1289 (void)memmove(fp, fp + len + 1,
1290 cs->smultis - (fp + len + 1 - cs->multis));
1293 if (cs->smultis == 0)
1295 regex_free(cs->multis);
1300 cs->multis = regex_realloc(cs->multis, cs->smultis);
1301 assert(cs->multis != NULL);
1305 - mcin - is a collating element in a cset?
1306 == static int mcin(register cset *cs, register char *cp);
1309 vogl_mcin(cs, cp) register cset *cs;
1312 return (vogl_mcfind(cs, cp) != NULL);
1316 - mcfind - find a collating element in a cset
1317 == static char *mcfind(register cset *cs, register char *cp);
1320 vogl_mcfind(cs, cp) register cset *cs;
1325 if (cs->multis == NULL)
1327 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1328 if (strcmp(cp, p) == 0)
1334 - mcinvert - invert the list of collating elements in a cset
1335 == static void mcinvert(register struct parse *p, register cset *cs);
1337 * This would have to know the set of possibilities. Implementation
1341 vogl_mcinvert(p, cs) register struct parse *p;
1346 assert(cs->multis == NULL); /* xxx */
1350 - mccase - add case counterparts of the list of collating elements in a cset
1351 == static void mccase(register struct parse *p, register cset *cs);
1353 * This would have to know the set of possibilities. Implementation
1357 vogl_mccase(p, cs) register struct parse *p;
1362 assert(cs->multis == NULL); /* xxx */
1366 - isinsets - is this character in any sets?
1367 == static int isinsets(register struct re_guts *g, int c);
1369 static int /* predicate */
1370 vogl_isinsets(g, c) register struct re_guts *g;
1375 register int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;
1376 register unsigned uc = (unsigned char)c;
1378 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1385 - samesets - are these two characters in exactly the same sets?
1386 == static int samesets(register struct re_guts *g, int c1, int c2);
1388 static int /* predicate */
1389 vogl_samesets(g, c1, c2) register struct re_guts *g;
1395 register int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;
1396 register unsigned uc1 = (unsigned char)c1;
1397 register unsigned uc2 = (unsigned char)c2;
1399 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1400 if (col[uc1] != col[uc2])
1406 - categorize - sort out character categories
1407 == static void categorize(struct parse *p, register struct re_guts *g);
1410 vogl_categorize(p, g) struct parse *p;
1411 register struct re_guts *g;
1413 register cat_t *cats = g->categories;
1418 /* avoid making error situations worse */
1422 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1423 if (cats[c] == 0 && vogl_isinsets(g, c))
1425 cat = g->ncategories++;
1427 for (c2 = c + 1; c2 <= CHAR_MAX; c2++)
1428 if (cats[c2] == 0 && vogl_samesets(g, c, c2))
1434 - dupl - emit a duplicate of a bunch of sops
1435 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1437 static sopno /* start of duplicate */
1438 vogl_dupl(p, start, finish) register struct parse *p;
1439 sopno start; /* from here */
1440 sopno finish; /* to this less one */
1442 register sopno ret = HERE();
1443 register sopno len = finish - start;
1445 assert(finish >= start);
1448 vogl_enlarge(p, p->ssize + len); /* this many unexpected additions */
1449 assert(p->ssize >= p->slen + len);
1450 (void)memcpy((char *)(p->strip + p->slen),
1451 (char *)(p->strip + start), (size_t)len * sizeof(sop));
1457 - doemit - emit a strip operator
1458 == static void doemit(register struct parse *p, sop op, size_t opnd);
1460 * It might seem better to implement this as a macro with a function as
1461 * hard-case backup, but it's just too big and messy unless there are
1462 * some changes to the data structures. Maybe later.
1465 vogl_doemit(p, op, opnd) register struct parse *p;
1469 /* avoid making error situations worse */
1473 /* deal with oversize operands ("can't happen", more or less) */
1474 assert(opnd < 1 << OPSHIFT);
1476 /* deal with undersized strip */
1477 if (p->slen >= p->ssize)
1478 vogl_enlarge(p, (p->ssize + 1) / 2 * 3); /* +50% */
1479 assert(p->slen < p->ssize);
1481 /* finally, it's all reduced to the easy case */
1482 p->strip[p->slen++] = SOP(op, opnd);
1486 - doinsert - insert a sop into the strip
1487 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1490 vogl_doinsert(p, op, opnd, pos) register struct parse *p;
1499 /* avoid making error situations worse */
1504 EMIT(op, opnd); /* do checks, ensure space */
1505 assert(HERE() == sn + 1);
1508 /* adjust paren pointers */
1510 for (i = 1; i < NPAREN; i++)
1512 if (p->pbegin[i] >= pos)
1516 if (p->pend[i] >= pos)
1522 memmove((char *)&p->strip[pos + 1], (char *)&p->strip[pos],
1523 (HERE() - pos - 1) * sizeof(sop));
1528 - dofwd - complete a forward reference
1529 == static void dofwd(register struct parse *p, sopno pos, sop value);
1532 vogl_dofwd(p, pos, value) register struct parse *p;
1536 /* avoid making error situations worse */
1540 assert(value < 1 << OPSHIFT);
1541 p->strip[pos] = OP(p->strip[pos]) | value;
1545 - enlarge - enlarge the strip
1546 == static void enlarge(register struct parse *p, sopno size);
1549 vogl_enlarge(p, size) register struct parse *p;
1550 register sopno size;
1554 if (p->ssize >= size)
1557 sp = (sop *)regex_realloc(p->strip, size * sizeof(sop));
1560 SETERROR(REG_ESPACE);
1568 - stripsnug - compact the strip
1569 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1572 vogl_stripsnug(p, g) register struct parse *p;
1573 register struct re_guts *g;
1575 g->nstates = p->slen;
1576 g->strip = (sop *)regex_realloc((char *)p->strip, p->slen * sizeof(sop));
1577 if (g->strip == NULL)
1579 SETERROR(REG_ESPACE);
1580 g->strip = p->strip;
1585 - findmust - fill in must and mlen with longest mandatory literal string
1586 == static void findmust(register struct parse *p, register struct re_guts *g);
1588 * This algorithm could do fancy things like analyzing the operands of |
1589 * for common subsequences. Someday. This code is simple and finds most
1590 * of the interesting cases.
1592 * Note that must and mlen got initialized during setup.
1595 vogl_findmust(p, g) struct parse *p;
1596 register struct re_guts *g;
1600 register sop *newstart = NULL;
1601 register sopno newlen;
1606 /* avoid making error situations worse */
1610 /* find the longest OCHAR sequence in strip */
1612 scan = g->strip + 1;
1618 case OCHAR: /* sequence member */
1619 if (newlen == 0) /* new sequence */
1620 newstart = scan - 1;
1623 case OPLUS_: /* things that don't break one */
1627 case OQUEST_: /* things that must be skipped */
1634 /* assert() interferes w debug printouts */
1635 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1641 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1643 default: /* things that break a sequence */
1644 if (newlen > g->mlen) /* ends one */
1652 } while (OP(s) != OEND);
1654 if (g->mlen == 0) /* there isn't one */
1657 /* turn it into a character string */
1658 g->must = regex_malloc((size_t)g->mlen + 1);
1659 if (g->must == NULL) /* argh; just forget it */
1666 for (i = g->mlen; i > 0; i--)
1668 while (OP(s = *scan++) != OCHAR)
1670 assert(cp < g->must + g->mlen);
1671 *cp++ = (char)OPND(s);
1673 assert(cp == g->must + g->mlen);
1674 *cp++ = '\0'; /* just on general principles */
1678 - pluscount - count + nesting
1679 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1681 static sopno /* nesting depth */
1682 vogl_pluscount(p, g) struct parse *p;
1683 register struct re_guts *g;
1687 register sopno plusnest = 0;
1688 register sopno maxnest = 0;
1691 return (0); /* there may not be an OEND */
1693 scan = g->strip + 1;
1703 if (plusnest > maxnest)
1708 } while (OP(s) != OEND);