00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "XmlReader.h"
00025
00026
00027 #include <string.h>
00028 #include <stdlib.h>
00029
00030
00031 int yyFlexLexer::yylex() { return 0; }
00032
00033 using namespace std;
00034
00035 namespace Tanl {
00036 namespace XML {
00037
00038 XmlReader::XmlReader(istream& stream) :
00039 NodeType(None),
00040 state(Initial),
00041 depth(0),
00042 isEmptyElement(false),
00043 scanner(&stream),
00044 encoding(Text::Encoding::get("UTF-8"))
00045 {
00046 }
00047
00048 static inline string decode(char const* start, size_t len, Text::Encoding const* from)
00049 {
00050 static Text::Encoding const* utf8Encoding = Text::Encoding::get("UTF-8");
00051
00052 if (from == utf8Encoding)
00053 return string(start, len);
00054 else {
00055 char* conv;
00056 size_t l = utf8Encoding->Encode(from, start, len, conv);
00057 string out = string(conv, l);
00058 free(conv);
00059 return out;
00060 }
00061 }
00062
00063 bool XmlReader::readAttributes()
00064 {
00065 attrIndex = -1;
00066 while (true) {
00067 int tokType = scanner.scan();
00068 switch (tokType) {
00069 case ID: {
00070 string name = scanner.YYText();
00071 switch (scanner.scan()) {
00072 case EQ:
00073 switch (scanner.scan()) {
00074 case VAL: {
00075 string value = scanner.YYText();
00076 attrs.push_back(NodeAttr(name, value));
00077 continue;
00078 }
00079 case EOF:
00080 state = Error;
00081 throw XmlException("Premature EOF");
00082 }
00083 break;
00084 case EOF:
00085 state = Error;
00086 throw XmlException("Premature EOF");
00087 default:
00088 state = Error;
00089 throw XmlException("Syntax error: no equal sign");
00090 }
00091 }
00092 case TAG_EMPTY:
00093 isEmptyElement = true;
00094 tagName.clear();
00095
00096 case TAG_END:
00097 return true;
00098 }
00099 }
00100 return false;
00101 }
00102
00103 bool XmlReader::Read() throw(XmlException)
00104 {
00105 if (Eof())
00106 return false;
00107 state = Interactive;
00108 while (true) {
00109 Name.clear();
00110 Value.clear();
00111 int tokType = scanner.scan();
00112 switch (tokType) {
00113 case XML_DECL:
00114 NodeType = XmlDeclaration;
00115 Name = "xml";
00116 return readAttributes();
00117 case PROC_INST:
00118 NodeType = ProcessingInstruction;
00119
00120 Value = decode(scanner.YYText()+2, scanner.YYLeng()-4, encoding);
00121 return true;
00122 case DOCTYPE:
00123 NodeType = DocumentType;
00124
00125 Value = decode(scanner.YYText()+10, scanner.YYLeng()-11, encoding);
00126 return true;
00127 case COMMENT:
00128 NodeType = Comment;
00129 Value = decode(scanner.YYText()+4, scanner.YYLeng()-7, encoding);
00130 return true;
00131 case TAG_OPEN: {
00132 attrs.clear();
00133 attrIndex = 0;
00134 tokType = scanner.scan();
00135 if (tokType == ID) {
00136 NodeType = Element;
00137 Name = tagName = scanner.YYText();
00138 } else if (tokType == EndOfFile) {
00139 state = Error;
00140 throw XmlException("Premature EOF");
00141 } else {
00142 state = Error;
00143 throw XmlException("Syntax error: no element name");
00144 }
00145 if (readAttributes()) {
00146 ++depth;
00147 return true;
00148 }
00149 return false;
00150 }
00151 case TAG_CLOSE:
00152 tagName.clear();
00153 if (depth) {
00154 NodeType = EndElement;
00155 char const* text = scanner.YYText()+2;
00156
00157 char const* end = strpbrk(text, " \n\r\t>");
00158 Name = string(text, end - text);
00159 --depth;
00160 return true;
00161 } else {
00162 state = Error;
00163 throw XmlException("Syntax error: extra close tag");
00164 }
00165 case CHARACTERS:
00166 NodeType = Text;
00167 Value = decode(scanner.YYText(), scanner.YYLeng(), encoding);
00168 return true;
00169 case RAW_CHARS:
00170 NodeType = CDATA;
00171 Value = decode(scanner.YYText(), scanner.YYLeng(), encoding);
00172 return true;
00173 case WHITESPACE:
00174 NodeType = Whitespace;
00175 Value = scanner.YYText();
00176 return true;
00177 case EoF:
00178 state = EndOfFile;
00179 return false;
00180 default:
00181 state = Error;
00182 throw XmlException("Syntax error: ");
00183 }
00184 }
00185 }
00186
00187 bool XmlReader::MoveToFirstAttribute()
00188 {
00189 if (!attrs.size())
00190 return false;
00191 attrIndex = 0;
00192 NodeType = Attribute;
00193 Name = attrs[attrIndex].name;
00194 Value = attrs[attrIndex].value;
00195 return true;
00196 }
00197
00198 bool XmlReader::MoveToNextAttribute()
00199 {
00200 if (++attrIndex < attrs.size()) {
00201 NodeType = Attribute;
00202 Name = attrs[attrIndex].name;
00203 Value = attrs[attrIndex].value;
00204 return true;
00205 } else
00206 return false;
00207 }
00208
00209 }
00210 }