2 www.sourceforge.net/projects/tinyxml
\r
3 Original code by Lee Thomason (www.grinninglizard.com)
\r
5 This software is provided 'as-is', without any express or implied
\r
6 warranty. In no event will the authors be held liable for any
\r
7 damages arising from the use of this software.
\r
9 Permission is granted to anyone to use this software for any
\r
10 purpose, including commercial applications, and to alter it and
\r
11 redistribute it freely, subject to the following restrictions:
\r
13 1. The origin of this software must not be misrepresented; you must
\r
14 not claim that you wrote the original software. If you use this
\r
15 software in a product, an acknowledgment in the product documentation
\r
16 would be appreciated but is not required.
\r
18 2. Altered source versions must be plainly marked as such, and
\r
19 must not be misrepresented as being the original software.
\r
21 3. This notice may not be removed or altered from any source
\r
28 #include "tinyxml.h"
\r
30 //#define DEBUG_PARSER
\r
31 #if defined(DEBUG_PARSER)
\r
32 #if defined(DEBUG) && defined(_MSC_VER)
\r
33 #include <windows.h>
\r
34 #define TIXML_LOG OutputDebugString
\r
36 #define TIXML_LOG printf
\r
40 // Note tha "PutString" hardcodes the same list. This
\r
41 // is less flexible than it appears. Changing the entries
\r
42 // or order will break putstring.
\r
43 TiXmlBase::Entity TiXmlBase::entity[TiXmlBase::NUM_ENTITY] =
\r
45 { "&", 5, '&' },
\r
48 { """, 6, '\"' },
\r
49 { "'", 6, '\'' }
\r
52 // Bunch of unicode info at:
\r
53 // http://www.unicode.org/faq/utf_bom.html
\r
54 // Including the basic of this table, which determines the #bytes in the
\r
55 // sequence from the lead byte. 1 placed for invalid sequences --
\r
56 // although the result will be junk, pass it through as much as possible.
\r
57 // Beware of the non-characters in UTF-8:
\r
58 // ef bb bf (Microsoft "lead bytes")
\r
62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
\r
63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
\r
64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
\r
66 const int TiXmlBase::utf8ByteTable[256] =
\r
68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
\r
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
\r
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
\r
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
\r
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
\r
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
\r
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
\r
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
\r
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
\r
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
\r
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
\r
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
\r
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
\r
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
\r
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
\r
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
\r
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
\r
87 void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char *output, int *length)
\r
89 const unsigned long BYTE_MASK = 0xBF;
\r
90 const unsigned long BYTE_MARK = 0x80;
\r
91 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
\r
95 else if (input < 0x800)
\r
97 else if (input < 0x10000)
\r
99 else if (input < 0x200000)
\r
103 *length = 0; // This code won't covert this correctly anyway.
\r
109 // Scary scary fall throughs.
\r
114 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
\r
118 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
\r
122 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
\r
126 *output = (char)(input | FIRST_BYTE_MARK[*length]);
\r
130 /*static*/ int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding /*encoding*/)
\r
132 // This will only work for low-ascii, everything else is assumed to be a valid
\r
133 // letter. I'm not sure this is the best approach, but it is quite tricky trying
\r
134 // to figure out alhabetical vs. not across encoding. So take a very
\r
135 // conservative approach.
\r
137 // if ( encoding == TIXML_ENCODING_UTF8 )
\r
140 return isalpha(anyByte);
\r
142 return 1; // What else to do? The unicode set is huge...get the english ones right.
\r
146 // return isalpha( anyByte );
\r
150 /*static*/ int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding /*encoding*/)
\r
152 // This will only work for low-ascii, everything else is assumed to be a valid
\r
153 // letter. I'm not sure this is the best approach, but it is quite tricky trying
\r
154 // to figure out alhabetical vs. not across encoding. So take a very
\r
155 // conservative approach.
\r
157 // if ( encoding == TIXML_ENCODING_UTF8 )
\r
160 return isalnum(anyByte);
\r
162 return 1; // What else to do? The unicode set is huge...get the english ones right.
\r
166 // return isalnum( anyByte );
\r
170 class TiXmlParsingData
\r
172 friend class TiXmlDocument;
\r
175 void Stamp(const char *now, TiXmlEncoding encoding);
\r
177 const TiXmlCursor &Cursor() const
\r
183 // Only used by the document!
\r
184 TiXmlParsingData(const char *start, int _tabsize, int row, int col)
\r
188 tabsize = _tabsize;
\r
193 TiXmlCursor cursor;
\r
198 void TiXmlParsingData::Stamp(const char *now, TiXmlEncoding encoding)
\r
202 // Do nothing if the tabsize is 0.
\r
208 // Get the current row, column.
\r
209 int row = cursor.row;
\r
210 int col = cursor.col;
\r
211 const char *p = stamp;
\r
216 // Treat p as unsigned, so we have a happy compiler.
\r
217 const unsigned char *pU = (const unsigned char *)p;
\r
219 // Code contributed by Fletcher Dunn: (modified by lee)
\r
223 // We *should* never get here, but in case we do, don't
\r
224 // advance past the terminating null character, ever
\r
228 // bump down to the next line
\r
231 // Eat the character
\r
234 // Check for \r\n sequence, and treat this as a single character
\r
242 // bump down to the next line
\r
246 // Eat the character
\r
249 // Check for \n\r sequence, and treat this as a single
\r
250 // character. (Yes, this bizarre thing does occur still
\r
251 // on some arcane platforms...)
\r
259 // Eat the character
\r
262 // Skip to next tab stop
\r
263 col = (col / tabsize + 1) * tabsize;
\r
266 case TIXML_UTF_LEAD_0:
\r
267 if (encoding == TIXML_ENCODING_UTF8)
\r
269 if (*(p + 1) && *(p + 2))
\r
271 // In these cases, don't advance the column. These are
\r
273 if (*(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2) == TIXML_UTF_LEAD_2)
\r
275 else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbeU)
\r
277 else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbfU)
\r
281 p += 3; // A normal character.
\r
294 if (encoding == TIXML_ENCODING_UTF8)
\r
296 // Eat the 1 to 4 byte utf8 character.
\r
297 int step = TiXmlBase::utf8ByteTable[*((const unsigned char *)p)];
\r
299 step = 1; // Error case from bad encoding, but handle gracefully.
\r
302 // Just advance one column, of course.
\r
315 assert(cursor.row >= -1);
\r
316 assert(cursor.col >= -1);
\r
321 const char *TiXmlBase::SkipWhiteSpace(const char *p, TiXmlEncoding encoding)
\r
327 if (encoding == TIXML_ENCODING_UTF8)
\r
331 const unsigned char *pU = (const unsigned char *)p;
\r
333 // Skip the stupid Microsoft UTF-8 Byte order marks
\r
334 if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2) == TIXML_UTF_LEAD_2)
\r
339 else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU && *(pU + 2) == 0xbeU)
\r
344 else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU && *(pU + 2) == 0xbfU)
\r
350 if (IsWhiteSpace(*p)) // Still using old rules for white space.
\r
358 while (*p && IsWhiteSpace(*p))
\r
365 #ifdef TIXML_USE_STL
\r
366 /*static*/ bool TiXmlBase::StreamWhiteSpace(std::istream *in, TIXML_STRING *tag)
\r
373 int c = in->peek();
\r
374 // At this scope, we can't get to a document. So fail silently.
\r
375 if (!IsWhiteSpace(c) || c <= 0)
\r
378 *tag += (char)in->get();
\r
382 /*static*/ bool TiXmlBase::StreamTo(std::istream *in, int character, TIXML_STRING *tag)
\r
384 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
\r
387 int c = in->peek();
\r
388 if (c == character)
\r
390 if (c <= 0) // Silent failure: can't get document at this scope
\r
400 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
\r
401 // "assign" optimization removes over 10% of the execution time.
\r
403 const char *TiXmlBase::ReadName(const char *p, TIXML_STRING *name, TiXmlEncoding encoding)
\r
405 // Oddly, not supported on some comilers,
\r
411 // Names start with letters or underscores.
\r
412 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
\r
413 // algorithm is generous.
\r
415 // After that, they can be letters, underscores, numbers,
\r
416 // hyphens, or colons. (Colons are valid ony for namespaces,
\r
417 // but tinyxml can't tell namespaces from names.)
\r
418 if (p && *p && (IsAlpha((unsigned char)*p, encoding) || *p == '_'))
\r
420 const char *start = p;
\r
421 while (p && *p && (IsAlphaNum((unsigned char)*p, encoding) || *p == '_' || *p == '-' || *p == '.' || *p == ':'))
\r
423 //(*name) += *p; // expensive
\r
428 name->assign(start, p - start);
\r
435 const char *TiXmlBase::GetEntity(const char *p, char *value, int *length, TiXmlEncoding encoding)
\r
437 // Presume an entity, and pull it out.
\r
442 if (*(p + 1) && *(p + 1) == '#' && *(p + 2))
\r
444 unsigned long ucs = 0;
\r
445 ptrdiff_t delta = 0;
\r
448 if (*(p + 2) == 'x')
\r
454 const char *q = p + 3;
\r
455 q = strchr(q, ';');
\r
465 if (*q >= '0' && *q <= '9')
\r
466 ucs += mult * (*q - '0');
\r
467 else if (*q >= 'a' && *q <= 'f')
\r
468 ucs += mult * (*q - 'a' + 10);
\r
469 else if (*q >= 'A' && *q <= 'F')
\r
470 ucs += mult * (*q - 'A' + 10);
\r
483 const char *q = p + 2;
\r
484 q = strchr(q, ';');
\r
494 if (*q >= '0' && *q <= '9')
\r
495 ucs += mult * (*q - '0');
\r
502 if (encoding == TIXML_ENCODING_UTF8)
\r
504 // convert the UCS to UTF-8
\r
505 ConvertUTF32ToUTF8(ucs, value, length);
\r
509 *value = (char)ucs;
\r
512 return p + delta + 1;
\r
515 // Now try to match it.
\r
516 for (i = 0; i < NUM_ENTITY; ++i)
\r
518 if (strncmp(entity[i].str, p, entity[i].strLength) == 0)
\r
520 assert(strlen(entity[i].str) == entity[i].strLength);
\r
521 *value = entity[i].chr;
\r
523 return (p + entity[i].strLength);
\r
527 // So it wasn't an entity, its unrecognized, or something like that.
\r
528 *value = *p; // Don't put back the last one, since we return it!
\r
529 //*length = 1; // Leave unrecognized entities - this doesn't really work.
\r
530 // Just writes strange XML.
\r
534 bool TiXmlBase::StringEqual(const char *p,
\r
537 TiXmlEncoding encoding)
\r
551 while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding))
\r
562 while (*q && *tag && *q == *tag)
\r
568 if (*tag == 0) // Have we found the end of the tag, and everything equal?
\r
574 const char *TiXmlBase::ReadText(const char *p,
\r
575 TIXML_STRING *text,
\r
576 bool trimWhiteSpace,
\r
577 const char *endTag,
\r
578 bool caseInsensitive,
\r
579 TiXmlEncoding encoding)
\r
582 if (!trimWhiteSpace // certain tags always keep whitespace
\r
583 || !condenseWhiteSpace) // if true, whitespace is always kept
\r
585 // Keep all the white space.
\r
586 while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding))
\r
589 char cArr[4] = { 0, 0, 0, 0 };
\r
590 p = GetChar(p, cArr, &len, encoding);
\r
591 text->append(cArr, len);
\r
596 bool whitespace = false;
\r
598 // Remove leading white space:
\r
599 p = SkipWhiteSpace(p, encoding);
\r
600 while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding))
\r
602 if (*p == '\r' || *p == '\n')
\r
607 else if (IsWhiteSpace(*p))
\r
614 // If we've found whitespace, add it before the
\r
615 // new character. Any whitespace just becomes a space.
\r
619 whitespace = false;
\r
622 char cArr[4] = { 0, 0, 0, 0 };
\r
623 p = GetChar(p, cArr, &len, encoding);
\r
625 (*text) += cArr[0]; // more efficient
\r
627 text->append(cArr, len);
\r
632 p += strlen(endTag);
\r
633 return (p && *p) ? p : 0;
\r
636 #ifdef TIXML_USE_STL
\r
638 void TiXmlDocument::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
640 // The basic issue with a document is that we don't know what we're
\r
641 // streaming. Read something presumed to be a tag (and hope), then
\r
642 // identify it, and call the appropriate stream method on the tag.
\r
644 // This "pre-streaming" will never read the closing ">" so the
\r
645 // sub-tag can orient itself.
\r
647 if (!StreamTo(in, '<', tag))
\r
649 SetError(TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
655 int tagIndex = (int)tag->length();
\r
656 while (in->good() && in->peek() != '>')
\r
661 SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
669 // We now have something we presume to be a node of
\r
670 // some sort. Identify it, and call the node to
\r
671 // continue streaming.
\r
672 TiXmlNode *node = Identify(tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING);
\r
676 node->StreamIn(in, tag);
\r
677 bool isElement = node->ToElement() != 0;
\r
681 // If this is the root element, we're done. Parsing will be
\r
682 // done by the >> operator.
\r
690 SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
695 // We should have returned sooner.
\r
696 SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
701 const char *TiXmlDocument::Parse(const char *p, TiXmlParsingData *prevData, TiXmlEncoding encoding)
\r
705 // Parse away, at the document level. Since a document
\r
706 // contains nothing but other tags, most of what happens
\r
707 // here is skipping white space.
\r
710 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
714 // Note that, for a document, this needs to come
\r
715 // before the while space skip, so that parsing
\r
716 // starts from the pointer we are given.
\r
720 location.row = prevData->cursor.row;
\r
721 location.col = prevData->cursor.col;
\r
728 TiXmlParsingData data(p, TabSize(), location.row, location.col);
\r
729 location = data.Cursor();
\r
731 if (encoding == TIXML_ENCODING_UNKNOWN)
\r
733 // Check for the Microsoft UTF-8 lead bytes.
\r
734 const unsigned char *pU = (const unsigned char *)p;
\r
735 if (*(pU + 0) && *(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) && *(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2) && *(pU + 2) == TIXML_UTF_LEAD_2)
\r
737 encoding = TIXML_ENCODING_UTF8;
\r
738 useMicrosoftBOM = true;
\r
742 p = SkipWhiteSpace(p, encoding);
\r
745 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
751 TiXmlNode *node = Identify(p, encoding);
\r
754 p = node->Parse(p, &data, encoding);
\r
755 LinkEndChild(node);
\r
762 // Did we get encoding info?
\r
763 if (encoding == TIXML_ENCODING_UNKNOWN && node->ToDeclaration())
\r
765 TiXmlDeclaration *dec = node->ToDeclaration();
\r
766 const char *enc = dec->Encoding();
\r
770 encoding = TIXML_ENCODING_UTF8;
\r
771 else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN))
\r
772 encoding = TIXML_ENCODING_UTF8;
\r
773 else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN))
\r
774 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
\r
776 encoding = TIXML_ENCODING_LEGACY;
\r
779 p = SkipWhiteSpace(p, encoding);
\r
785 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding);
\r
793 void TiXmlDocument::SetError(int err, const char *pError, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
795 // The first error in a chain is more accurate - don't set again!
\r
799 assert(err > 0 && err < TIXML_ERROR_STRING_COUNT);
\r
802 errorDesc = errorString[errorId];
\r
804 errorLocation.Clear();
\r
805 if (pError && data)
\r
807 data->Stamp(pError, encoding);
\r
808 errorLocation = data->Cursor();
\r
812 TiXmlNode *TiXmlNode::Identify(const char *p, TiXmlEncoding encoding)
\r
814 TiXmlNode *returnNode = 0;
\r
816 p = SkipWhiteSpace(p, encoding);
\r
817 if (!p || !*p || *p != '<')
\r
822 p = SkipWhiteSpace(p, encoding);
\r
829 // What is this thing?
\r
830 // - Elements start with a letter or underscore, but xml is reserved.
\r
831 // - Comments: <!--
\r
832 // - Decleration: <?xml
\r
833 // - Everthing else is unknown to tinyxml.
\r
836 const char *xmlHeader = { "<?xml" };
\r
837 const char *commentHeader = { "<!--" };
\r
838 const char *dtdHeader = { "<!" };
\r
839 const char *cdataHeader = { "<![CDATA[" };
\r
841 if (StringEqual(p, xmlHeader, true, encoding))
\r
843 #ifdef DEBUG_PARSER
\r
844 TIXML_LOG("XML parsing Declaration\n");
\r
846 returnNode = new TiXmlDeclaration();
\r
848 else if (StringEqual(p, commentHeader, false, encoding))
\r
850 #ifdef DEBUG_PARSER
\r
851 TIXML_LOG("XML parsing Comment\n");
\r
853 returnNode = new TiXmlComment();
\r
855 else if (StringEqual(p, cdataHeader, false, encoding))
\r
857 #ifdef DEBUG_PARSER
\r
858 TIXML_LOG("XML parsing CDATA\n");
\r
860 TiXmlText *text = new TiXmlText("");
\r
861 text->SetCDATA(true);
\r
864 else if (StringEqual(p, dtdHeader, false, encoding))
\r
866 #ifdef DEBUG_PARSER
\r
867 TIXML_LOG("XML parsing Unknown(1)\n");
\r
869 returnNode = new TiXmlUnknown();
\r
871 else if (IsAlpha(*(p + 1), encoding) || *(p + 1) == '_')
\r
873 #ifdef DEBUG_PARSER
\r
874 TIXML_LOG("XML parsing Element\n");
\r
876 returnNode = new TiXmlElement("");
\r
880 #ifdef DEBUG_PARSER
\r
881 TIXML_LOG("XML parsing Unknown(2)\n");
\r
883 returnNode = new TiXmlUnknown();
\r
888 // Set the parent, so it can report errors
\r
889 returnNode->parent = this;
\r
894 #ifdef TIXML_USE_STL
\r
896 void TiXmlElement::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
898 // We're called with some amount of pre-parsing. That is, some of "this"
\r
899 // element is in "tag". Go ahead and stream to the closing ">"
\r
905 TiXmlDocument *document = GetDocument();
\r
907 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
916 if (tag->length() < 3)
\r
919 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
\r
920 // If not, identify and stream.
\r
922 if (tag->at(tag->length() - 1) == '>' && tag->at(tag->length() - 2) == '/')
\r
927 else if (tag->at(tag->length() - 1) == '>')
\r
929 // There is more. Could be:
\r
931 // cdata text (which looks like another node)
\r
936 StreamWhiteSpace(in, tag);
\r
938 // Do we have text?
\r
939 if (in->good() && in->peek() != '<')
\r
942 TiXmlText text("");
\r
943 text.StreamIn(in, tag);
\r
945 // What follows text is a closing tag or another node.
\r
946 // Go around again and figure it out.
\r
950 // We now have either a closing tag...or another node.
\r
951 // We should be at a "<", regardless.
\r
954 assert(in->peek() == '<');
\r
955 int tagIndex = (int)tag->length();
\r
957 bool closingTag = false;
\r
958 bool firstCharFound = false;
\r
965 int c = in->peek();
\r
968 TiXmlDocument *document = GetDocument();
\r
970 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
980 // Early out if we find the CDATA id.
\r
981 if (c == '[' && tag->size() >= 9)
\r
983 size_t len = tag->size();
\r
984 const char *start = tag->c_str() + len - 9;
\r
985 if (strcmp(start, "<![CDATA[") == 0)
\r
987 assert(!closingTag);
\r
992 if (!firstCharFound && c != '<' && !IsWhiteSpace(c))
\r
994 firstCharFound = true;
\r
999 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
\r
1000 // If it was not, the streaming will be done by the tag.
\r
1006 int c = in->get();
\r
1009 TiXmlDocument *document = GetDocument();
\r
1011 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
1017 // We are done, once we've found our closing tag.
\r
1022 // If not a closing tag, id it, and stream.
\r
1023 const char *tagloc = tag->c_str() + tagIndex;
\r
1024 TiXmlNode *node = Identify(tagloc, TIXML_DEFAULT_ENCODING);
\r
1027 node->StreamIn(in, tag);
\r
1031 // No return: go around from the beginning: text, closing tag, or node.
\r
1038 const char *TiXmlElement::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1040 p = SkipWhiteSpace(p, encoding);
\r
1041 TiXmlDocument *document = GetDocument();
\r
1046 document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding);
\r
1052 data->Stamp(p, encoding);
\r
1053 location = data->Cursor();
\r
1059 document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding);
\r
1063 p = SkipWhiteSpace(p + 1, encoding);
\r
1066 const char *pErr = p;
\r
1068 p = ReadName(p, &value, encoding);
\r
1072 document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding);
\r
1076 TIXML_STRING endTag("</");
\r
1079 // Check for and read attributes. Also look for an empty
\r
1080 // tag or an end tag.
\r
1084 p = SkipWhiteSpace(p, encoding);
\r
1088 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
\r
1098 document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data, encoding);
\r
1103 else if (*p == '>')
\r
1105 // Done with attributes (if there were any.)
\r
1106 // Read the value -- which can include other
\r
1107 // elements -- read the end tag, and return.
\r
1109 p = ReadValue(p, data, encoding); // Note this is an Element method, and will set the error if one happens.
\r
1112 // We were looking for the end tag, but found nothing.
\r
1113 // Fix for [ 1663758 ] Failure to report error on bad XML
\r
1115 document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding);
\r
1119 // We should find the end tag now
\r
1123 // are both valid end tags.
\r
1124 if (StringEqual(p, endTag.c_str(), false, encoding))
\r
1126 p += endTag.length();
\r
1127 p = SkipWhiteSpace(p, encoding);
\r
1128 if (p && *p && *p == '>')
\r
1134 document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding);
\r
1140 document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding);
\r
1146 // Try to read an attribute:
\r
1147 TiXmlAttribute *attrib = new TiXmlAttribute();
\r
1153 attrib->SetDocument(document);
\r
1155 p = attrib->Parse(p, data, encoding);
\r
1160 document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding);
\r
1165 // Handle the strange case of double attributes:
\r
1166 #ifdef TIXML_USE_STL
\r
1167 TiXmlAttribute *node = attributeSet.Find(attrib->NameTStr());
\r
1169 TiXmlAttribute *node = attributeSet.Find(attrib->Name());
\r
1174 document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding);
\r
1179 attributeSet.Add(attrib);
\r
1185 const char *TiXmlElement::ReadValue(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1187 TiXmlDocument *document = GetDocument();
\r
1189 // Read in text and elements in any order.
\r
1190 const char *pWithWhiteSpace = p;
\r
1191 p = SkipWhiteSpace(p, encoding);
\r
1197 // Take what we have, make a text element.
\r
1198 TiXmlText *textNode = new TiXmlText("");
\r
1205 if (TiXmlBase::IsWhiteSpaceCondensed())
\r
1207 p = textNode->Parse(p, data, encoding);
\r
1211 // Special case: we want to keep the white space
\r
1212 // so that leading spaces aren't removed.
\r
1213 p = textNode->Parse(pWithWhiteSpace, data, encoding);
\r
1216 if (!textNode->Blank())
\r
1217 LinkEndChild(textNode);
\r
1224 // Have we hit a new element or an end tag? This could also be
\r
1225 // a TiXmlText in the "CDATA" style.
\r
1226 if (StringEqual(p, "</", false, encoding))
\r
1232 TiXmlNode *node = Identify(p, encoding);
\r
1235 p = node->Parse(p, data, encoding);
\r
1236 LinkEndChild(node);
\r
1244 pWithWhiteSpace = p;
\r
1245 p = SkipWhiteSpace(p, encoding);
\r
1251 document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding);
\r
1256 #ifdef TIXML_USE_STL
\r
1257 void TiXmlUnknown::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
1259 while (in->good())
\r
1261 int c = in->get();
\r
1264 TiXmlDocument *document = GetDocument();
\r
1266 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
1269 (*tag) += (char)c;
\r
1280 const char *TiXmlUnknown::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1282 TiXmlDocument *document = GetDocument();
\r
1283 p = SkipWhiteSpace(p, encoding);
\r
1287 data->Stamp(p, encoding);
\r
1288 location = data->Cursor();
\r
1290 if (!p || !*p || *p != '<')
\r
1293 document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding);
\r
1299 while (p && *p && *p != '>')
\r
1308 document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding);
\r
1310 if (p && *p == '>')
\r
1315 #ifdef TIXML_USE_STL
\r
1316 void TiXmlComment::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
1318 while (in->good())
\r
1320 int c = in->get();
\r
1323 TiXmlDocument *document = GetDocument();
\r
1325 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
1329 (*tag) += (char)c;
\r
1331 if (c == '>' && tag->at(tag->length() - 2) == '-' && tag->at(tag->length() - 3) == '-')
\r
1340 const char *TiXmlComment::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1342 TiXmlDocument *document = GetDocument();
\r
1345 p = SkipWhiteSpace(p, encoding);
\r
1349 data->Stamp(p, encoding);
\r
1350 location = data->Cursor();
\r
1352 const char *startTag = "<!--";
\r
1353 const char *endTag = "-->";
\r
1355 if (!StringEqual(p, startTag, false, encoding))
\r
1358 document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding);
\r
1361 p += strlen(startTag);
\r
1363 // [ 1475201 ] TinyXML parses entities in comments
\r
1364 // Oops - ReadText doesn't work, because we don't want to parse the entities.
\r
1365 // p = ReadText( p, &value, false, endTag, false, encoding );
\r
1367 // from the XML spec:
\r
1369 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
\r
1370 they may appear within the document type declaration at places allowed by the grammar.
\r
1371 They are not part of the document's character data; an XML processor MAY, but need not,
\r
1372 make it possible for an application to retrieve the text of comments. For compatibility,
\r
1373 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
\r
1374 references MUST NOT be recognized within comments.
\r
1376 An example of a comment:
\r
1378 <!-- declarations for <head> & <body> -->
\r
1382 // Keep all the white space.
\r
1383 while (p && *p && !StringEqual(p, endTag, false, encoding))
\r
1385 value.append(p, 1);
\r
1389 p += strlen(endTag);
\r
1394 const char *TiXmlAttribute::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1396 p = SkipWhiteSpace(p, encoding);
\r
1402 data->Stamp(p, encoding);
\r
1403 location = data->Cursor();
\r
1405 // Read the name, the '=' and the value.
\r
1406 const char *pErr = p;
\r
1407 p = ReadName(p, &name, encoding);
\r
1411 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
\r
1414 p = SkipWhiteSpace(p, encoding);
\r
1415 if (!p || !*p || *p != '=')
\r
1418 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
\r
1423 p = SkipWhiteSpace(p, encoding);
\r
1427 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
\r
1432 const char SINGLE_QUOTE = '\'';
\r
1433 const char DOUBLE_QUOTE = '\"';
\r
1435 if (*p == SINGLE_QUOTE)
\r
1438 end = "\'"; // single quote in string
\r
1439 p = ReadText(p, &value, false, end, false, encoding);
\r
1441 else if (*p == DOUBLE_QUOTE)
\r
1444 end = "\""; // double quote in string
\r
1445 p = ReadText(p, &value, false, end, false, encoding);
\r
1449 // All attribute values should be in single or double quotes.
\r
1450 // But this is such a common error that the parser will try
\r
1451 // its best, even without them.
\r
1453 while (p && *p // existence
\r
1454 && !IsWhiteSpace(*p) // whitespace
\r
1455 && *p != '/' && *p != '>') // tag end
\r
1457 if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE)
\r
1459 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
\r
1460 // We did not have an opening quote but seem to have a
\r
1461 // closing one. Give up and throw an error.
\r
1463 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
\r
1473 #ifdef TIXML_USE_STL
\r
1474 void TiXmlText::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
1476 while (in->good())
\r
1478 int c = in->peek();
\r
1479 if (!cdata && (c == '<'))
\r
1485 TiXmlDocument *document = GetDocument();
\r
1487 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
1491 (*tag) += (char)c;
\r
1492 in->get(); // "commits" the peek made above
\r
1494 if (cdata && c == '>' && tag->size() >= 3)
\r
1496 size_t len = tag->size();
\r
1497 if ((*tag)[len - 2] == ']' && (*tag)[len - 3] == ']')
\r
1499 // terminator of cdata.
\r
1507 const char *TiXmlText::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
\r
1510 TiXmlDocument *document = GetDocument();
\r
1514 data->Stamp(p, encoding);
\r
1515 location = data->Cursor();
\r
1518 const char *const startTag = "<![CDATA[";
\r
1519 const char *const endTag = "]]>";
\r
1521 if (cdata || StringEqual(p, startTag, false, encoding))
\r
1525 if (!StringEqual(p, startTag, false, encoding))
\r
1528 document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding);
\r
1531 p += strlen(startTag);
\r
1533 // Keep all the white space, ignore the encoding, etc.
\r
1534 while (p && *p && !StringEqual(p, endTag, false, encoding))
\r
1540 TIXML_STRING dummy;
\r
1541 p = ReadText(p, &dummy, false, endTag, false, encoding);
\r
1546 bool ignoreWhite = true;
\r
1548 const char *end = "<";
\r
1549 p = ReadText(p, &value, ignoreWhite, end, false, encoding);
\r
1551 return p - 1; // don't truncate the '<'
\r
1556 #ifdef TIXML_USE_STL
\r
1557 void TiXmlDeclaration::StreamIn(std::istream *in, TIXML_STRING *tag)
\r
1559 while (in->good())
\r
1561 int c = in->get();
\r
1564 TiXmlDocument *document = GetDocument();
\r
1566 document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
\r
1569 (*tag) += (char)c;
\r
1580 const char *TiXmlDeclaration::Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding _encoding)
\r
1582 p = SkipWhiteSpace(p, _encoding);
\r
1583 // Find the beginning, find the end, and look for
\r
1584 // the stuff in-between.
\r
1585 TiXmlDocument *document = GetDocument();
\r
1586 if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding))
\r
1589 document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding);
\r
1594 data->Stamp(p, _encoding);
\r
1595 location = data->Cursor();
\r
1611 p = SkipWhiteSpace(p, _encoding);
\r
1612 if (StringEqual(p, "version", true, _encoding))
\r
1614 TiXmlAttribute attrib;
\r
1615 p = attrib.Parse(p, data, _encoding);
\r
1616 version = attrib.Value();
\r
1618 else if (StringEqual(p, "encoding", true, _encoding))
\r
1620 TiXmlAttribute attrib;
\r
1621 p = attrib.Parse(p, data, _encoding);
\r
1622 encoding = attrib.Value();
\r
1624 else if (StringEqual(p, "standalone", true, _encoding))
\r
1626 TiXmlAttribute attrib;
\r
1627 p = attrib.Parse(p, data, _encoding);
\r
1628 standalone = attrib.Value();
\r
1632 // Read over whatever it is.
\r
1633 while (p && *p && *p != '>' && !IsWhiteSpace(*p))
\r
1640 bool TiXmlText::Blank() const
\r
1642 for (unsigned i = 0; i < value.length(); i++)
\r
1643 if (!IsWhiteSpace(value[i]))
\r