1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
5 Software Foundation, Inc.
6 This file is part of the GNU C Library.
7 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License along
20 with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
23 static void re_string_construct_common (const char *str, Idx len,
25 RE_TRANSLATE_TYPE trans, bool icase,
26 const re_dfa_t *dfa) internal_function;
27 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
29 re_hashval_t hash) internal_function;
30 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
31 const re_node_set *nodes,
33 re_hashval_t hash) internal_function;
35 /* Functions for string operation. */
37 /* This function allocate the buffers. It is necessary to call
38 re_string_reconstruct before using the object. */
41 internal_function __attribute_warn_unused_result__
42 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
43 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
48 /* Ensure at least one character fits into the buffers. */
49 if (init_len < dfa->mb_cur_max)
50 init_len = dfa->mb_cur_max;
51 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
52 re_string_construct_common (str, len, pstr, trans, icase, dfa);
54 ret = re_string_realloc_buffers (pstr, init_buf_len);
55 if (BE (ret != REG_NOERROR, 0))
58 pstr->word_char = dfa->word_char;
59 pstr->word_ops_used = dfa->word_ops_used;
60 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
61 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
62 pstr->valid_raw_len = pstr->valid_len;
66 /* This function allocate the buffers, and initialize them. */
69 internal_function __attribute_warn_unused_result__
70 re_string_construct (re_string_t *pstr, const char *str, Idx len,
71 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
74 memset (pstr, '\0', sizeof (re_string_t));
75 re_string_construct_common (str, len, pstr, trans, icase, dfa);
79 ret = re_string_realloc_buffers (pstr, len + 1);
80 if (BE (ret != REG_NOERROR, 0))
83 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
88 if (dfa->mb_cur_max > 1)
92 ret = build_wcs_upper_buffer (pstr);
93 if (BE (ret != REG_NOERROR, 0))
95 if (pstr->valid_raw_len >= len)
97 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
99 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
100 if (BE (ret != REG_NOERROR, 0))
105 #endif /* RE_ENABLE_I18N */
106 build_upper_buffer (pstr);
110 #ifdef RE_ENABLE_I18N
111 if (dfa->mb_cur_max > 1)
112 build_wcs_buffer (pstr);
114 #endif /* RE_ENABLE_I18N */
117 re_string_translate_buffer (pstr);
120 pstr->valid_len = pstr->bufs_len;
121 pstr->valid_raw_len = pstr->bufs_len;
129 /* Helper functions for re_string_allocate, and re_string_construct. */
132 internal_function __attribute_warn_unused_result__
133 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
135 #ifdef RE_ENABLE_I18N
136 if (pstr->mb_cur_max > 1)
140 /* Avoid overflow. */
141 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
142 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
145 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
146 if (BE (new_wcs == NULL, 0))
149 if (pstr->offsets != NULL)
151 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
152 if (BE (new_offsets == NULL, 0))
154 pstr->offsets = new_offsets;
157 #endif /* RE_ENABLE_I18N */
158 if (pstr->mbs_allocated)
160 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
162 if (BE (new_mbs == NULL, 0))
166 pstr->bufs_len = new_buf_len;
173 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
174 RE_TRANSLATE_TYPE trans, bool icase,
177 pstr->raw_mbs = (const unsigned char *) str;
182 pstr->mbs_allocated = (trans != NULL || icase);
183 pstr->mb_cur_max = dfa->mb_cur_max;
184 pstr->is_utf8 = dfa->is_utf8;
185 pstr->map_notascii = dfa->map_notascii;
186 pstr->stop = pstr->len;
187 pstr->raw_stop = pstr->stop;
190 #ifdef RE_ENABLE_I18N
192 /* Build wide character buffer PSTR->WCS.
193 If the byte sequence of the string are:
194 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
195 Then wide character buffer will be:
196 <wc1> , WEOF , <wc2> , WEOF , <wc3>
197 We use WEOF for padding, they indicate that the position isn't
198 a first byte of a multibyte character.
200 Note that this function assumes PSTR->VALID_LEN elements are already
201 built and starts from PSTR->VALID_LEN. */
205 build_wcs_buffer (re_string_t *pstr)
208 unsigned char buf[MB_LEN_MAX];
209 assert (MB_LEN_MAX >= pstr->mb_cur_max);
211 unsigned char buf[64];
214 Idx byte_idx, end_idx, remain_len;
217 /* Build the buffers from pstr->valid_len to either pstr->len or
219 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
220 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
225 remain_len = end_idx - byte_idx;
226 prev_st = pstr->cur_state;
227 /* Apply the translation if we need. */
228 if (BE (pstr->trans != NULL, 0))
232 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
234 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
235 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
237 p = (const char *) buf;
240 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
241 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
242 if (BE (mbclen == (size_t) -2, 0))
244 /* The buffer doesn't have enough space, finish to build. */
245 pstr->cur_state = prev_st;
248 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
250 /* We treat these cases as a singlebyte character. */
252 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
253 if (BE (pstr->trans != NULL, 0))
254 wc = pstr->trans[wc];
255 pstr->cur_state = prev_st;
258 /* Write wide character and padding. */
259 pstr->wcs[byte_idx++] = wc;
260 /* Write paddings. */
261 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
262 pstr->wcs[byte_idx++] = WEOF;
264 pstr->valid_len = byte_idx;
265 pstr->valid_raw_len = byte_idx;
268 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
269 but for REG_ICASE. */
272 internal_function __attribute_warn_unused_result__
273 build_wcs_upper_buffer (re_string_t *pstr)
276 Idx src_idx, byte_idx, end_idx, remain_len;
279 char buf[MB_LEN_MAX];
280 assert (MB_LEN_MAX >= pstr->mb_cur_max);
285 byte_idx = pstr->valid_len;
286 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
288 /* The following optimization assumes that ASCII characters can be
289 mapped to wide characters with a simple cast. */
290 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
292 while (byte_idx < end_idx)
296 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
297 && mbsinit (&pstr->cur_state))
299 /* In case of a singlebyte character. */
301 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
302 /* The next step uses the assumption that wchar_t is encoded
303 ASCII-safe: all ASCII values can be converted like this. */
304 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
309 remain_len = end_idx - byte_idx;
310 prev_st = pstr->cur_state;
311 mbclen = __mbrtowc (&wc,
312 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
313 + byte_idx), remain_len, &pstr->cur_state);
314 if (BE (mbclen < (size_t) -2, 1))
322 mbcdlen = wcrtomb (buf, wcu, &prev_st);
323 if (BE (mbclen == mbcdlen, 1))
324 memcpy (pstr->mbs + byte_idx, buf, mbclen);
332 memcpy (pstr->mbs + byte_idx,
333 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
334 pstr->wcs[byte_idx++] = wcu;
335 /* Write paddings. */
336 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
337 pstr->wcs[byte_idx++] = WEOF;
339 else if (mbclen == (size_t) -1 || mbclen == 0)
341 /* It is an invalid character or '\0'. Just use the byte. */
342 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
343 pstr->mbs[byte_idx] = ch;
344 /* And also cast it to wide char. */
345 pstr->wcs[byte_idx++] = (wchar_t) ch;
346 if (BE (mbclen == (size_t) -1, 0))
347 pstr->cur_state = prev_st;
351 /* The buffer doesn't have enough space, finish to build. */
352 pstr->cur_state = prev_st;
356 pstr->valid_len = byte_idx;
357 pstr->valid_raw_len = byte_idx;
361 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
366 remain_len = end_idx - byte_idx;
367 prev_st = pstr->cur_state;
368 if (BE (pstr->trans != NULL, 0))
372 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
374 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
375 buf[i] = pstr->trans[ch];
377 p = (const char *) buf;
380 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
381 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
382 if (BE (mbclen < (size_t) -2, 1))
390 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
391 if (BE (mbclen == mbcdlen, 1))
392 memcpy (pstr->mbs + byte_idx, buf, mbclen);
393 else if (mbcdlen != (size_t) -1)
397 if (byte_idx + mbcdlen > pstr->bufs_len)
399 pstr->cur_state = prev_st;
403 if (pstr->offsets == NULL)
405 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
407 if (pstr->offsets == NULL)
410 if (!pstr->offsets_needed)
412 for (i = 0; i < (size_t) byte_idx; ++i)
413 pstr->offsets[i] = i;
414 pstr->offsets_needed = 1;
417 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418 pstr->wcs[byte_idx] = wcu;
419 pstr->offsets[byte_idx] = src_idx;
420 for (i = 1; i < mbcdlen; ++i)
422 pstr->offsets[byte_idx + i]
423 = src_idx + (i < mbclen ? i : mbclen - 1);
424 pstr->wcs[byte_idx + i] = WEOF;
426 pstr->len += mbcdlen - mbclen;
427 if (pstr->raw_stop > src_idx)
428 pstr->stop += mbcdlen - mbclen;
429 end_idx = (pstr->bufs_len > pstr->len)
430 ? pstr->len : pstr->bufs_len;
436 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 memcpy (pstr->mbs + byte_idx, p, mbclen);
441 if (BE (pstr->offsets_needed != 0, 0))
444 for (i = 0; i < mbclen; ++i)
445 pstr->offsets[byte_idx + i] = src_idx + i;
449 pstr->wcs[byte_idx++] = wcu;
450 /* Write paddings. */
451 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452 pstr->wcs[byte_idx++] = WEOF;
454 else if (mbclen == (size_t) -1 || mbclen == 0)
456 /* It is an invalid character or '\0'. Just use the byte. */
457 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
459 if (BE (pstr->trans != NULL, 0))
460 ch = pstr->trans [ch];
461 pstr->mbs[byte_idx] = ch;
463 if (BE (pstr->offsets_needed != 0, 0))
464 pstr->offsets[byte_idx] = src_idx;
467 /* And also cast it to wide char. */
468 pstr->wcs[byte_idx++] = (wchar_t) ch;
469 if (BE (mbclen == (size_t) -1, 0))
470 pstr->cur_state = prev_st;
474 /* The buffer doesn't have enough space, finish to build. */
475 pstr->cur_state = prev_st;
479 pstr->valid_len = byte_idx;
480 pstr->valid_raw_len = src_idx;
484 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
489 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
496 /* Skip the characters which are not necessary to check. */
497 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
498 rawbuf_idx < new_raw_idx;)
502 remain_len = pstr->len - rawbuf_idx;
503 prev_st = pstr->cur_state;
504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
505 remain_len, &pstr->cur_state);
506 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
508 /* We treat these cases as a single byte character. */
509 if (mbclen == 0 || remain_len == 0)
512 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
514 pstr->cur_state = prev_st;
518 /* Then proceed the next character. */
519 rawbuf_idx += mbclen;
524 #endif /* RE_ENABLE_I18N */
526 /* Build the buffer PSTR->MBS, and apply the translation if we need.
527 This function is used in case of REG_ICASE. */
531 build_upper_buffer (re_string_t *pstr)
533 Idx char_idx, end_idx;
534 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
536 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
538 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
539 if (BE (pstr->trans != NULL, 0))
540 ch = pstr->trans[ch];
542 pstr->mbs[char_idx] = toupper (ch);
544 pstr->mbs[char_idx] = ch;
546 pstr->valid_len = char_idx;
547 pstr->valid_raw_len = char_idx;
550 /* Apply TRANS to the buffer in PSTR. */
554 re_string_translate_buffer (re_string_t *pstr)
556 Idx buf_idx, end_idx;
557 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
559 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
561 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
562 pstr->mbs[buf_idx] = pstr->trans[ch];
565 pstr->valid_len = buf_idx;
566 pstr->valid_raw_len = buf_idx;
569 /* This function re-construct the buffers.
570 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
571 convert to upper case in case of REG_ICASE, apply translation. */
574 internal_function __attribute_warn_unused_result__
575 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
579 if (BE (pstr->raw_mbs_idx <= idx, 0))
580 offset = idx - pstr->raw_mbs_idx;
584 #ifdef RE_ENABLE_I18N
585 if (pstr->mb_cur_max > 1)
586 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
587 #endif /* RE_ENABLE_I18N */
588 pstr->len = pstr->raw_len;
589 pstr->stop = pstr->raw_stop;
591 pstr->raw_mbs_idx = 0;
592 pstr->valid_raw_len = 0;
593 pstr->offsets_needed = 0;
594 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
595 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
596 if (!pstr->mbs_allocated)
597 pstr->mbs = (unsigned char *) pstr->raw_mbs;
601 if (BE (offset != 0, 1))
603 /* Should the already checked characters be kept? */
604 if (BE (offset < pstr->valid_raw_len, 1))
606 /* Yes, move them to the front of the buffer. */
607 #ifdef RE_ENABLE_I18N
608 if (BE (pstr->offsets_needed, 0))
610 Idx low = 0, high = pstr->valid_len, mid;
613 mid = (high + low) / 2;
614 if (pstr->offsets[mid] > offset)
616 else if (pstr->offsets[mid] < offset)
622 if (pstr->offsets[mid] < offset)
624 pstr->tip_context = re_string_context_at (pstr, mid - 1,
626 /* This can be quite complicated, so handle specially
627 only the common and easy case where the character with
628 different length representation of lower and upper
629 case is present at or after offset. */
630 if (pstr->valid_len > offset
631 && mid == offset && pstr->offsets[mid] == offset)
633 memmove (pstr->wcs, pstr->wcs + offset,
634 (pstr->valid_len - offset) * sizeof (wint_t));
635 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
636 pstr->valid_len -= offset;
637 pstr->valid_raw_len -= offset;
638 for (low = 0; low < pstr->valid_len; low++)
639 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643 /* Otherwise, just find out how long the partial multibyte
644 character at offset is and fill it with WEOF/255. */
645 pstr->len = pstr->raw_len - idx + offset;
646 pstr->stop = pstr->raw_stop - idx + offset;
647 pstr->offsets_needed = 0;
648 while (mid > 0 && pstr->offsets[mid - 1] == offset)
650 while (mid < pstr->valid_len)
651 if (pstr->wcs[mid] != WEOF)
655 if (mid == pstr->valid_len)
659 pstr->valid_len = pstr->offsets[mid] - offset;
662 for (low = 0; low < pstr->valid_len; ++low)
663 pstr->wcs[low] = WEOF;
664 memset (pstr->mbs, 255, pstr->valid_len);
667 pstr->valid_raw_len = pstr->valid_len;
673 pstr->tip_context = re_string_context_at (pstr, offset - 1,
675 #ifdef RE_ENABLE_I18N
676 if (pstr->mb_cur_max > 1)
677 memmove (pstr->wcs, pstr->wcs + offset,
678 (pstr->valid_len - offset) * sizeof (wint_t));
679 #endif /* RE_ENABLE_I18N */
680 if (BE (pstr->mbs_allocated, 0))
681 memmove (pstr->mbs, pstr->mbs + offset,
682 pstr->valid_len - offset);
683 pstr->valid_len -= offset;
684 pstr->valid_raw_len -= offset;
686 assert (pstr->valid_len > 0);
692 #ifdef RE_ENABLE_I18N
693 /* No, skip all characters until IDX. */
694 Idx prev_valid_len = pstr->valid_len;
696 if (BE (pstr->offsets_needed, 0))
698 pstr->len = pstr->raw_len - idx + offset;
699 pstr->stop = pstr->raw_stop - idx + offset;
700 pstr->offsets_needed = 0;
704 #ifdef RE_ENABLE_I18N
705 if (pstr->mb_cur_max > 1)
712 const unsigned char *raw, *p, *end;
714 /* Special case UTF-8. Multi-byte chars start with any
715 byte other than 0x80 - 0xbf. */
716 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
717 end = raw + (offset - pstr->mb_cur_max);
718 if (end < pstr->raw_mbs)
720 p = raw + offset - 1;
722 /* We know the wchar_t encoding is UCS4, so for the simple
723 case, ASCII characters, skip the conversion step. */
724 if (isascii (*p) && BE (pstr->trans == NULL, 1))
726 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
727 /* pstr->valid_len = 0; */
732 for (; p >= end; --p)
733 if ((*p & 0xc0) != 0x80)
737 Idx mlen = raw + pstr->len - p;
740 #if 0 /* dead code: buf is set but never used */
741 unsigned char buf[6];
742 if (BE (pstr->trans != NULL, 0))
744 int i = mlen < 6 ? mlen : 6;
746 buf[i] = pstr->trans[p[i]];
749 /* XXX Don't use mbrtowc, we know which conversion
750 to use (UTF-8 -> UCS4). */
751 memset (&cur_state, 0, sizeof (cur_state));
752 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
754 if (raw + offset - p <= mbclen
755 && mbclen < (size_t) -2)
757 memset (&pstr->cur_state, '\0',
759 pstr->valid_len = mbclen - (raw + offset - p);
767 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
770 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
772 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
773 && IS_WIDE_WORD_CHAR (wc))
775 : ((IS_WIDE_NEWLINE (wc)
776 && pstr->newline_anchor)
777 ? CONTEXT_NEWLINE : 0));
778 if (BE (pstr->valid_len, 0))
780 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
781 pstr->wcs[wcs_idx] = WEOF;
782 if (pstr->mbs_allocated)
783 memset (pstr->mbs, 255, pstr->valid_len);
785 pstr->valid_raw_len = pstr->valid_len;
788 #endif /* RE_ENABLE_I18N */
790 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
791 pstr->valid_raw_len = 0;
794 pstr->tip_context = (bitset_contain (pstr->word_char, c)
796 : ((IS_NEWLINE (c) && pstr->newline_anchor)
797 ? CONTEXT_NEWLINE : 0));
800 if (!BE (pstr->mbs_allocated, 0))
803 pstr->raw_mbs_idx = idx;
805 pstr->stop -= offset;
807 /* Then build the buffers. */
808 #ifdef RE_ENABLE_I18N
809 if (pstr->mb_cur_max > 1)
813 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
814 if (BE (ret != REG_NOERROR, 0))
818 build_wcs_buffer (pstr);
821 #endif /* RE_ENABLE_I18N */
822 if (BE (pstr->mbs_allocated, 0))
825 build_upper_buffer (pstr);
826 else if (pstr->trans != NULL)
827 re_string_translate_buffer (pstr);
830 pstr->valid_len = pstr->len;
837 internal_function __attribute ((pure))
838 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
843 /* Handle the common (easiest) cases first. */
844 if (BE (!pstr->mbs_allocated, 1))
845 return re_string_peek_byte (pstr, idx);
847 #ifdef RE_ENABLE_I18N
848 if (pstr->mb_cur_max > 1
849 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
850 return re_string_peek_byte (pstr, idx);
853 off = pstr->cur_idx + idx;
854 #ifdef RE_ENABLE_I18N
855 if (pstr->offsets_needed)
856 off = pstr->offsets[off];
859 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
861 #ifdef RE_ENABLE_I18N
862 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
863 this function returns CAPITAL LETTER I instead of first byte of
864 DOTLESS SMALL LETTER I. The latter would confuse the parser,
865 since peek_byte_case doesn't advance cur_idx in any way. */
866 if (pstr->offsets_needed && !isascii (ch))
867 return re_string_peek_byte (pstr, idx);
874 internal_function __attribute ((pure))
875 re_string_fetch_byte_case (re_string_t *pstr)
877 if (BE (!pstr->mbs_allocated, 1))
878 return re_string_fetch_byte (pstr);
880 #ifdef RE_ENABLE_I18N
881 if (pstr->offsets_needed)
886 /* For tr_TR.UTF-8 [[:islower:]] there is
887 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
888 in that case the whole multi-byte character and return
889 the original letter. On the other side, with
890 [[: DOTLESS SMALL LETTER I return [[:I, as doing
891 anything else would complicate things too much. */
893 if (!re_string_first_byte (pstr, pstr->cur_idx))
894 return re_string_fetch_byte (pstr);
896 off = pstr->offsets[pstr->cur_idx];
897 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
900 return re_string_fetch_byte (pstr);
902 re_string_skip_bytes (pstr,
903 re_string_char_size_at (pstr, pstr->cur_idx));
908 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
913 re_string_destruct (re_string_t *pstr)
915 #ifdef RE_ENABLE_I18N
917 re_free (pstr->offsets);
918 #endif /* RE_ENABLE_I18N */
919 if (pstr->mbs_allocated)
923 /* Return the context at IDX in INPUT. */
927 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
930 if (BE (! REG_VALID_INDEX (idx), 0))
931 /* In this case, we use the value stored in input->tip_context,
932 since we can't know the character in input->mbs[-1] here. */
933 return input->tip_context;
934 if (BE (idx == input->len, 0))
935 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
936 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
937 #ifdef RE_ENABLE_I18N
938 if (input->mb_cur_max > 1)
942 while(input->wcs[wc_idx] == WEOF)
945 /* It must not happen. */
946 assert (REG_VALID_INDEX (wc_idx));
949 if (! REG_VALID_INDEX (wc_idx))
950 return input->tip_context;
952 wc = input->wcs[wc_idx];
953 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
955 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
956 ? CONTEXT_NEWLINE : 0);
961 c = re_string_byte_at (input, idx);
962 if (bitset_contain (input->word_char, c))
964 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
968 /* Functions for set operation. */
971 internal_function __attribute_warn_unused_result__
972 re_node_set_alloc (re_node_set *set, Idx size)
976 set->elems = re_malloc (Idx, size);
977 if (BE (set->elems == NULL, 0))
983 internal_function __attribute_warn_unused_result__
984 re_node_set_init_1 (re_node_set *set, Idx elem)
988 set->elems = re_malloc (Idx, 1);
989 if (BE (set->elems == NULL, 0))
991 set->alloc = set->nelem = 0;
994 set->elems[0] = elem;
999 internal_function __attribute_warn_unused_result__
1000 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1003 set->elems = re_malloc (Idx, 2);
1004 if (BE (set->elems == NULL, 0))
1009 set->elems[0] = elem1;
1016 set->elems[0] = elem1;
1017 set->elems[1] = elem2;
1021 set->elems[0] = elem2;
1022 set->elems[1] = elem1;
1028 static reg_errcode_t
1029 internal_function __attribute_warn_unused_result__
1030 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1032 dest->nelem = src->nelem;
1035 dest->alloc = dest->nelem;
1036 dest->elems = re_malloc (Idx, dest->alloc);
1037 if (BE (dest->elems == NULL, 0))
1039 dest->alloc = dest->nelem = 0;
1042 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1045 re_node_set_init_empty (dest);
1049 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1050 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1051 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1053 static reg_errcode_t
1054 internal_function __attribute_warn_unused_result__
1055 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1056 const re_node_set *src2)
1058 Idx i1, i2, is, id, delta, sbase;
1059 if (src1->nelem == 0 || src2->nelem == 0)
1062 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1063 conservative estimate. */
1064 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1066 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1067 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1068 if (BE (new_elems == NULL, 0))
1070 dest->elems = new_elems;
1071 dest->alloc = new_alloc;
1074 /* Find the items in the intersection of SRC1 and SRC2, and copy
1075 into the top of DEST those that are not already in DEST itself. */
1076 sbase = dest->nelem + src1->nelem + src2->nelem;
1077 i1 = src1->nelem - 1;
1078 i2 = src2->nelem - 1;
1079 id = dest->nelem - 1;
1082 if (src1->elems[i1] == src2->elems[i2])
1084 /* Try to find the item in DEST. Maybe we could binary search? */
1085 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1088 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1089 dest->elems[--sbase] = src1->elems[i1];
1091 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1095 /* Lower the highest of the two items. */
1096 else if (src1->elems[i1] < src2->elems[i2])
1098 if (! REG_VALID_INDEX (--i2))
1103 if (! REG_VALID_INDEX (--i1))
1108 id = dest->nelem - 1;
1109 is = dest->nelem + src1->nelem + src2->nelem - 1;
1110 delta = is - sbase + 1;
1112 /* Now copy. When DELTA becomes zero, the remaining
1113 DEST elements are already in place; this is more or
1114 less the same loop that is in re_node_set_merge. */
1115 dest->nelem += delta;
1116 if (delta > 0 && REG_VALID_INDEX (id))
1119 if (dest->elems[is] > dest->elems[id])
1121 /* Copy from the top. */
1122 dest->elems[id + delta--] = dest->elems[is--];
1128 /* Slide from the bottom. */
1129 dest->elems[id + delta] = dest->elems[id];
1130 if (! REG_VALID_INDEX (--id))
1135 /* Copy remaining SRC elements. */
1136 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1141 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1142 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1144 static reg_errcode_t
1145 internal_function __attribute_warn_unused_result__
1146 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1147 const re_node_set *src2)
1150 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1152 dest->alloc = src1->nelem + src2->nelem;
1153 dest->elems = re_malloc (Idx, dest->alloc);
1154 if (BE (dest->elems == NULL, 0))
1159 if (src1 != NULL && src1->nelem > 0)
1160 return re_node_set_init_copy (dest, src1);
1161 else if (src2 != NULL && src2->nelem > 0)
1162 return re_node_set_init_copy (dest, src2);
1164 re_node_set_init_empty (dest);
1167 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1169 if (src1->elems[i1] > src2->elems[i2])
1171 dest->elems[id++] = src2->elems[i2++];
1174 if (src1->elems[i1] == src2->elems[i2])
1176 dest->elems[id++] = src1->elems[i1++];
1178 if (i1 < src1->nelem)
1180 memcpy (dest->elems + id, src1->elems + i1,
1181 (src1->nelem - i1) * sizeof (Idx));
1182 id += src1->nelem - i1;
1184 else if (i2 < src2->nelem)
1186 memcpy (dest->elems + id, src2->elems + i2,
1187 (src2->nelem - i2) * sizeof (Idx));
1188 id += src2->nelem - i2;
1194 /* Calculate the union set of the sets DEST and SRC. And store it to
1195 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1197 static reg_errcode_t
1198 internal_function __attribute_warn_unused_result__
1199 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1201 Idx is, id, sbase, delta;
1202 if (src == NULL || src->nelem == 0)
1204 if (dest->alloc < 2 * src->nelem + dest->nelem)
1206 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1207 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1208 if (BE (new_buffer == NULL, 0))
1210 dest->elems = new_buffer;
1211 dest->alloc = new_alloc;
1214 if (BE (dest->nelem == 0, 0))
1216 dest->nelem = src->nelem;
1217 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1221 /* Copy into the top of DEST the items of SRC that are not
1222 found in DEST. Maybe we could binary search in DEST? */
1223 for (sbase = dest->nelem + 2 * src->nelem,
1224 is = src->nelem - 1, id = dest->nelem - 1;
1225 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1227 if (dest->elems[id] == src->elems[is])
1229 else if (dest->elems[id] < src->elems[is])
1230 dest->elems[--sbase] = src->elems[is--];
1231 else /* if (dest->elems[id] > src->elems[is]) */
1235 if (REG_VALID_INDEX (is))
1237 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1239 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1242 id = dest->nelem - 1;
1243 is = dest->nelem + 2 * src->nelem - 1;
1244 delta = is - sbase + 1;
1248 /* Now copy. When DELTA becomes zero, the remaining
1249 DEST elements are already in place. */
1250 dest->nelem += delta;
1253 if (dest->elems[is] > dest->elems[id])
1255 /* Copy from the top. */
1256 dest->elems[id + delta--] = dest->elems[is--];
1262 /* Slide from the bottom. */
1263 dest->elems[id + delta] = dest->elems[id];
1264 if (! REG_VALID_INDEX (--id))
1266 /* Copy remaining SRC elements. */
1267 memcpy (dest->elems, dest->elems + sbase,
1268 delta * sizeof (Idx));
1277 /* Insert the new element ELEM to the re_node_set* SET.
1278 SET should not already have ELEM.
1279 Return true if successful. */
1282 internal_function __attribute_warn_unused_result__
1283 re_node_set_insert (re_node_set *set, Idx elem)
1286 /* In case the set is empty. */
1287 if (set->alloc == 0)
1288 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1290 if (BE (set->nelem, 0) == 0)
1292 /* We already guaranteed above that set->alloc != 0. */
1293 set->elems[0] = elem;
1298 /* Realloc if we need. */
1299 if (set->alloc == set->nelem)
1302 set->alloc = set->alloc * 2;
1303 new_elems = re_realloc (set->elems, Idx, set->alloc);
1304 if (BE (new_elems == NULL, 0))
1306 set->elems = new_elems;
1309 /* Move the elements which follows the new element. Test the
1310 first element separately to skip a check in the inner loop. */
1311 if (elem < set->elems[0])
1314 for (idx = set->nelem; idx > 0; idx--)
1315 set->elems[idx] = set->elems[idx - 1];
1319 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1320 set->elems[idx] = set->elems[idx - 1];
1323 /* Insert the new element. */
1324 set->elems[idx] = elem;
1329 /* Insert the new element ELEM to the re_node_set* SET.
1330 SET should not already have any element greater than or equal to ELEM.
1331 Return true if successful. */
1334 internal_function __attribute_warn_unused_result__
1335 re_node_set_insert_last (re_node_set *set, Idx elem)
1337 /* Realloc if we need. */
1338 if (set->alloc == set->nelem)
1341 set->alloc = (set->alloc + 1) * 2;
1342 new_elems = re_realloc (set->elems, Idx, set->alloc);
1343 if (BE (new_elems == NULL, 0))
1345 set->elems = new_elems;
1348 /* Insert the new element. */
1349 set->elems[set->nelem++] = elem;
1353 /* Compare two node sets SET1 and SET2.
1354 Return true if SET1 and SET2 are equivalent. */
1357 internal_function __attribute ((pure))
1358 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1361 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1363 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1364 if (set1->elems[i] != set2->elems[i])
1369 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1372 internal_function __attribute ((pure))
1373 re_node_set_contains (const re_node_set *set, Idx elem)
1375 __re_size_t idx, right, mid;
1376 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1379 /* Binary search the element. */
1381 right = set->nelem - 1;
1384 mid = (idx + right) / 2;
1385 if (set->elems[mid] < elem)
1390 return set->elems[idx] == elem ? idx + 1 : 0;
1395 re_node_set_remove_at (re_node_set *set, Idx idx)
1397 if (idx < 0 || idx >= set->nelem)
1400 for (; idx < set->nelem; idx++)
1401 set->elems[idx] = set->elems[idx + 1];
1405 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1406 Or return REG_MISSING if an error occurred. */
1410 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1412 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1414 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1415 Idx *new_nexts, *new_indices;
1416 re_node_set *new_edests, *new_eclosures;
1417 re_token_t *new_nodes;
1418 size_t max_object_size =
1419 MAX (sizeof (re_token_t),
1420 MAX (sizeof (re_node_set),
1423 /* Avoid overflows. */
1424 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1427 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1428 if (BE (new_nodes == NULL, 0))
1430 dfa->nodes = new_nodes;
1431 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1432 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1433 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1434 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1435 if (BE (new_nexts == NULL || new_indices == NULL
1436 || new_edests == NULL || new_eclosures == NULL, 0))
1438 dfa->nexts = new_nexts;
1439 dfa->org_indices = new_indices;
1440 dfa->edests = new_edests;
1441 dfa->eclosures = new_eclosures;
1442 dfa->nodes_alloc = new_nodes_alloc;
1444 dfa->nodes[dfa->nodes_len] = token;
1445 dfa->nodes[dfa->nodes_len].constraint = 0;
1446 #ifdef RE_ENABLE_I18N
1448 int type = token.type;
1449 dfa->nodes[dfa->nodes_len].accept_mb =
1450 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1453 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1454 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1455 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1456 return dfa->nodes_len++;
1459 static inline re_hashval_t
1461 calc_state_hash (const re_node_set *nodes, unsigned int context)
1463 re_hashval_t hash = nodes->nelem + context;
1465 for (i = 0 ; i < nodes->nelem ; i++)
1466 hash += nodes->elems[i];
1470 /* Search for the state whose node_set is equivalent to NODES.
1471 Return the pointer to the state, if we found it in the DFA.
1472 Otherwise create the new one and return it. In case of an error
1473 return NULL and set the error code in ERR.
1474 Note: - We assume NULL as the invalid state, then it is possible that
1475 return value is NULL and ERR is REG_NOERROR.
1476 - We never return non-NULL value in case of any errors, it is for
1479 static re_dfastate_t *
1480 internal_function __attribute_warn_unused_result__
1481 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1482 const re_node_set *nodes)
1485 re_dfastate_t *new_state;
1486 struct re_state_table_entry *spot;
1489 /* Suppress bogus uninitialized-variable warnings. */
1492 if (BE (nodes->nelem == 0, 0))
1497 hash = calc_state_hash (nodes, 0);
1498 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1500 for (i = 0 ; i < spot->num ; i++)
1502 re_dfastate_t *state = spot->array[i];
1503 if (hash != state->hash)
1505 if (re_node_set_compare (&state->nodes, nodes))
1509 /* There are no appropriate state in the dfa, create the new one. */
1510 new_state = create_ci_newstate (dfa, nodes, hash);
1511 if (BE (new_state == NULL, 0))
1517 /* Search for the state whose node_set is equivalent to NODES and
1518 whose context is equivalent to CONTEXT.
1519 Return the pointer to the state, if we found it in the DFA.
1520 Otherwise create the new one and return it. In case of an error
1521 return NULL and set the error code in ERR.
1522 Note: - We assume NULL as the invalid state, then it is possible that
1523 return value is NULL and ERR is REG_NOERROR.
1524 - We never return non-NULL value in case of any errors, it is for
1527 static re_dfastate_t *
1528 internal_function __attribute_warn_unused_result__
1529 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1530 const re_node_set *nodes, unsigned int context)
1533 re_dfastate_t *new_state;
1534 struct re_state_table_entry *spot;
1537 /* Suppress bogus uninitialized-variable warnings. */
1540 if (nodes->nelem == 0)
1545 hash = calc_state_hash (nodes, context);
1546 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1548 for (i = 0 ; i < spot->num ; i++)
1550 re_dfastate_t *state = spot->array[i];
1551 if (state->hash == hash
1552 && state->context == context
1553 && re_node_set_compare (state->entrance_nodes, nodes))
1556 /* There are no appropriate state in `dfa', create the new one. */
1557 new_state = create_cd_newstate (dfa, nodes, context, hash);
1558 if (BE (new_state == NULL, 0))
1564 /* Finish initialization of the new state NEWSTATE, and using its hash value
1565 HASH put in the appropriate bucket of DFA's state table. Return value
1566 indicates the error code if failed. */
1568 static reg_errcode_t
1569 __attribute_warn_unused_result__
1570 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1573 struct re_state_table_entry *spot;
1577 newstate->hash = hash;
1578 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1579 if (BE (err != REG_NOERROR, 0))
1581 for (i = 0; i < newstate->nodes.nelem; i++)
1583 Idx elem = newstate->nodes.elems[i];
1584 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1585 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1589 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1590 if (BE (spot->alloc <= spot->num, 0))
1592 Idx new_alloc = 2 * spot->num + 2;
1593 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1595 if (BE (new_array == NULL, 0))
1597 spot->array = new_array;
1598 spot->alloc = new_alloc;
1600 spot->array[spot->num++] = newstate;
1605 free_state (re_dfastate_t *state)
1607 re_node_set_free (&state->non_eps_nodes);
1608 re_node_set_free (&state->inveclosure);
1609 if (state->entrance_nodes != &state->nodes)
1611 re_node_set_free (state->entrance_nodes);
1612 re_free (state->entrance_nodes);
1614 re_node_set_free (&state->nodes);
1615 re_free (state->word_trtable);
1616 re_free (state->trtable);
1620 /* Create the new state which is independ of contexts.
1621 Return the new state if succeeded, otherwise return NULL. */
1623 static re_dfastate_t *
1624 internal_function __attribute_warn_unused_result__
1625 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1630 re_dfastate_t *newstate;
1632 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1633 if (BE (newstate == NULL, 0))
1635 err = re_node_set_init_copy (&newstate->nodes, nodes);
1636 if (BE (err != REG_NOERROR, 0))
1642 newstate->entrance_nodes = &newstate->nodes;
1643 for (i = 0 ; i < nodes->nelem ; i++)
1645 re_token_t *node = dfa->nodes + nodes->elems[i];
1646 re_token_type_t type = node->type;
1647 if (type == CHARACTER && !node->constraint)
1649 #ifdef RE_ENABLE_I18N
1650 newstate->accept_mb |= node->accept_mb;
1651 #endif /* RE_ENABLE_I18N */
1653 /* If the state has the halt node, the state is a halt state. */
1654 if (type == END_OF_RE)
1656 else if (type == OP_BACK_REF)
1657 newstate->has_backref = 1;
1658 else if (type == ANCHOR || node->constraint)
1659 newstate->has_constraint = 1;
1661 err = register_state (dfa, newstate, hash);
1662 if (BE (err != REG_NOERROR, 0))
1664 free_state (newstate);
1670 /* Create the new state which is depend on the context CONTEXT.
1671 Return the new state if succeeded, otherwise return NULL. */
1673 static re_dfastate_t *
1674 internal_function __attribute_warn_unused_result__
1675 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1676 unsigned int context, re_hashval_t hash)
1678 Idx i, nctx_nodes = 0;
1680 re_dfastate_t *newstate;
1682 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1683 if (BE (newstate == NULL, 0))
1685 err = re_node_set_init_copy (&newstate->nodes, nodes);
1686 if (BE (err != REG_NOERROR, 0))
1692 newstate->context = context;
1693 newstate->entrance_nodes = &newstate->nodes;
1695 for (i = 0 ; i < nodes->nelem ; i++)
1697 re_token_t *node = dfa->nodes + nodes->elems[i];
1698 re_token_type_t type = node->type;
1699 unsigned int constraint = node->constraint;
1701 if (type == CHARACTER && !constraint)
1703 #ifdef RE_ENABLE_I18N
1704 newstate->accept_mb |= node->accept_mb;
1705 #endif /* RE_ENABLE_I18N */
1707 /* If the state has the halt node, the state is a halt state. */
1708 if (type == END_OF_RE)
1710 else if (type == OP_BACK_REF)
1711 newstate->has_backref = 1;
1715 if (newstate->entrance_nodes == &newstate->nodes)
1717 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1718 if (BE (newstate->entrance_nodes == NULL, 0))
1720 free_state (newstate);
1723 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1727 newstate->has_constraint = 1;
1730 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1732 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1737 err = register_state (dfa, newstate, hash);
1738 if (BE (err != REG_NOERROR, 0))
1740 free_state (newstate);