| #include "XMLHandler.h" |
| |
| #include <algorithm> |
| #include <expat.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| #include <errno.h> |
| |
| #define NS_SEPARATOR 1 |
| #define MORE_INDENT " " |
| |
| static string |
| xml_text_escape(const string& s) |
| { |
| string result; |
| const size_t N = s.length(); |
| for (size_t i=0; i<N; i++) { |
| char c = s[i]; |
| switch (c) { |
| case '<': |
| result += "<"; |
| break; |
| case '>': |
| result += ">"; |
| break; |
| case '&': |
| result += "&"; |
| break; |
| default: |
| result += c; |
| break; |
| } |
| } |
| return result; |
| } |
| |
| static string |
| xml_attr_escape(const string& s) |
| { |
| string result; |
| const size_t N = s.length(); |
| for (size_t i=0; i<N; i++) { |
| char c = s[i]; |
| switch (c) { |
| case '\"': |
| result += """; |
| break; |
| default: |
| result += c; |
| break; |
| } |
| } |
| return result; |
| } |
| |
| XMLNamespaceMap::XMLNamespaceMap() |
| { |
| } |
| |
| XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces) |
| |
| { |
| while (*nspaces) { |
| m_map[nspaces[1]] = nspaces[0]; |
| nspaces += 2; |
| } |
| } |
| |
| string |
| XMLNamespaceMap::Get(const string& ns) const |
| { |
| if (ns == "xml") { |
| return ns; |
| } |
| map<string,string>::const_iterator it = m_map.find(ns); |
| if (it == m_map.end()) { |
| return ""; |
| } else { |
| return it->second; |
| } |
| } |
| |
| string |
| XMLNamespaceMap::GetPrefix(const string& ns) const |
| { |
| if (ns == "") { |
| return ""; |
| } |
| map<string,string>::const_iterator it = m_map.find(ns); |
| if (it != m_map.end()) { |
| if (it->second == "") { |
| return ""; |
| } else { |
| return it->second + ":"; |
| } |
| } else { |
| return ":"; // invalid |
| } |
| } |
| |
| void |
| XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const |
| { |
| map<string,string>::const_iterator it; |
| for (it=m_map.begin(); it!=m_map.end(); it++) { |
| if (it->second == "xml") { |
| continue; |
| } |
| XMLAttribute attr; |
| if (it->second == "") { |
| attr.name = "xmlns"; |
| } else { |
| attr.name = "xmlns:"; |
| attr.name += it->second; |
| } |
| attr.value = it->first; |
| attrs->push_back(attr); |
| } |
| } |
| |
| XMLAttribute::XMLAttribute() |
| { |
| } |
| |
| XMLAttribute::XMLAttribute(const XMLAttribute& that) |
| :ns(that.ns), |
| name(that.name), |
| value(that.value) |
| { |
| } |
| |
| XMLAttribute::XMLAttribute(string n, string na, string v) |
| :ns(n), |
| name(na), |
| value(v) |
| { |
| } |
| |
| XMLAttribute::~XMLAttribute() |
| { |
| } |
| |
| int |
| XMLAttribute::Compare(const XMLAttribute& that) const |
| { |
| if (ns != that.ns) { |
| return ns < that.ns ? -1 : 1; |
| } |
| if (name != that.name) { |
| return name < that.name ? -1 : 1; |
| } |
| return 0; |
| } |
| |
| string |
| XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name, |
| const string& def) |
| { |
| const size_t N = list.size(); |
| for (size_t i=0; i<N; i++) { |
| const XMLAttribute& attr = list[i]; |
| if (attr.ns == ns && attr.name == name) { |
| return attr.value; |
| } |
| } |
| return def; |
| } |
| |
| struct xml_handler_data { |
| vector<XMLHandler*> stack; |
| XML_Parser parser; |
| vector<vector<XMLAttribute>*> attributes; |
| string filename; |
| }; |
| |
| XMLNode::XMLNode() |
| { |
| } |
| |
| XMLNode::~XMLNode() |
| { |
| // for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>); |
| } |
| |
| XMLNode* |
| XMLNode::Clone() const |
| { |
| switch (m_type) { |
| case ELEMENT: { |
| XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty); |
| const size_t N = m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| e->m_children.push_back(m_children[i]->Clone()); |
| } |
| return e; |
| } |
| case TEXT: { |
| return XMLNode::NewText(m_pos, m_text, m_pretty); |
| } |
| default: |
| return NULL; |
| } |
| } |
| |
| XMLNode* |
| XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name, |
| const vector<XMLAttribute>& attrs, int pretty) |
| { |
| XMLNode* node = new XMLNode(); |
| node->m_type = ELEMENT; |
| node->m_pretty = pretty; |
| node->m_pos = pos; |
| node->m_ns = ns; |
| node->m_name = name; |
| node->m_attrs = attrs; |
| return node; |
| } |
| |
| XMLNode* |
| XMLNode::NewText(const SourcePos& pos, const string& text, int pretty) |
| { |
| XMLNode* node = new XMLNode(); |
| node->m_type = TEXT; |
| node->m_pretty = pretty; |
| node->m_pos = pos; |
| node->m_text = text; |
| return node; |
| } |
| |
| void |
| XMLNode::SetPrettyRecursive(int value) |
| { |
| m_pretty = value; |
| const size_t N = m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| m_children[i]->SetPrettyRecursive(value); |
| } |
| } |
| |
| string |
| XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const |
| { |
| return contents_to_string(nspaces, ""); |
| } |
| |
| string |
| XMLNode::ToString(const XMLNamespaceMap& nspaces) const |
| { |
| return to_string(nspaces, ""); |
| } |
| |
| string |
| XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const |
| { |
| return open_tag_to_string(nspaces, "", pretty); |
| } |
| |
| string |
| XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const |
| { |
| string result; |
| const size_t N = m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| const XMLNode* child = m_children[i]; |
| switch (child->Type()) { |
| case ELEMENT: |
| if (m_pretty == PRETTY) { |
| result += '\n'; |
| result += indent; |
| } |
| case TEXT: |
| result += child->to_string(nspaces, indent); |
| break; |
| } |
| } |
| return result; |
| } |
| |
| string |
| trim_string(const string& str) |
| { |
| const char* p = str.c_str(); |
| while (*p && isspace(*p)) { |
| p++; |
| } |
| const char* q = str.c_str() + str.length() - 1; |
| while (q > p && isspace(*q)) { |
| q--; |
| } |
| q++; |
| return string(p, q-p); |
| } |
| |
| string |
| XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const |
| { |
| if (m_type != ELEMENT) { |
| return ""; |
| } |
| string result = "<"; |
| result += nspaces.GetPrefix(m_ns); |
| result += m_name; |
| |
| vector<XMLAttribute> attrs = m_attrs; |
| |
| sort(attrs.begin(), attrs.end()); |
| |
| const size_t N = attrs.size(); |
| for (size_t i=0; i<N; i++) { |
| const XMLAttribute& attr = attrs[i]; |
| if (i == 0 || m_pretty == EXACT || pretty == EXACT) { |
| result += ' '; |
| } |
| else { |
| result += "\n"; |
| result += indent; |
| result += MORE_INDENT; |
| result += MORE_INDENT; |
| } |
| result += nspaces.GetPrefix(attr.ns); |
| result += attr.name; |
| result += "=\""; |
| result += xml_attr_escape(attr.value); |
| result += '\"'; |
| } |
| |
| if (m_children.size() > 0) { |
| result += '>'; |
| } else { |
| result += " />"; |
| } |
| return result; |
| } |
| |
| string |
| XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const |
| { |
| switch (m_type) |
| { |
| case TEXT: { |
| if (m_pretty == EXACT) { |
| return xml_text_escape(m_text); |
| } else { |
| return xml_text_escape(trim_string(m_text)); |
| } |
| } |
| case ELEMENT: { |
| string result = open_tag_to_string(nspaces, indent, PRETTY); |
| |
| if (m_children.size() > 0) { |
| result += contents_to_string(nspaces, indent + MORE_INDENT); |
| |
| if (m_pretty == PRETTY && m_children.size() > 0) { |
| result += '\n'; |
| result += indent; |
| } |
| |
| result += "</"; |
| result += nspaces.GetPrefix(m_ns); |
| result += m_name; |
| result += '>'; |
| } |
| return result; |
| } |
| default: |
| return ""; |
| } |
| } |
| |
| string |
| XMLNode::CollapseTextContents() const |
| { |
| if (m_type == TEXT) { |
| return m_text; |
| } |
| else if (m_type == ELEMENT) { |
| string result; |
| |
| const size_t N=m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| result += m_children[i]->CollapseTextContents(); |
| } |
| |
| return result; |
| } |
| else { |
| return ""; |
| } |
| } |
| |
| vector<XMLNode*> |
| XMLNode::GetElementsByName(const string& ns, const string& name) const |
| { |
| vector<XMLNode*> result; |
| const size_t N=m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| XMLNode* child = m_children[i]; |
| if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { |
| result.push_back(child); |
| } |
| } |
| return result; |
| } |
| |
| XMLNode* |
| XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const |
| { |
| vector<XMLNode*> result; |
| const size_t N=m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| XMLNode* child = m_children[i]; |
| if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { |
| if (index == 0) { |
| return child; |
| } else { |
| index--; |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| size_t |
| XMLNode::CountElementsByName(const string& ns, const string& name) const |
| { |
| size_t result = 0; |
| const size_t N=m_children.size(); |
| for (size_t i=0; i<N; i++) { |
| XMLNode* child = m_children[i]; |
| if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { |
| result++; |
| } |
| } |
| return result; |
| } |
| |
| string |
| XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const |
| { |
| return XMLAttribute::Find(m_attrs, ns, name, def); |
| } |
| |
| static void |
| parse_namespace(const char* data, string* ns, string* name) |
| { |
| const char* p = strchr(data, NS_SEPARATOR); |
| if (p != NULL) { |
| ns->assign(data, p-data); |
| name->assign(p+1); |
| } else { |
| ns->assign(""); |
| name->assign(data); |
| } |
| } |
| |
| static void |
| convert_attrs(const char** in, vector<XMLAttribute>* out) |
| { |
| while (*in) { |
| XMLAttribute attr; |
| parse_namespace(in[0], &attr.ns, &attr.name); |
| attr.value = in[1]; |
| out->push_back(attr); |
| in += 2; |
| } |
| } |
| |
| static bool |
| list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler) |
| { |
| const size_t N = stack.size(); |
| for (size_t i=0; i<N; i++) { |
| if (stack[i] == handler) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| static void XMLCALL |
| start_element_handler(void *userData, const char *name, const char **attrs) |
| { |
| xml_handler_data* data = (xml_handler_data*)userData; |
| |
| XMLHandler* handler = data->stack[data->stack.size()-1]; |
| |
| SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); |
| string nsString; |
| string nameString; |
| XMLHandler* next = handler; |
| vector<XMLAttribute> attributes; |
| |
| parse_namespace(name, &nsString, &nameString); |
| convert_attrs(attrs, &attributes); |
| |
| handler->OnStartElement(pos, nsString, nameString, attributes, &next); |
| |
| if (next == NULL) { |
| next = handler; |
| } |
| |
| if (next != handler) { |
| next->elementPos = pos; |
| next->elementNamespace = nsString; |
| next->elementName = nameString; |
| next->elementAttributes = attributes; |
| } |
| |
| data->stack.push_back(next); |
| } |
| |
| static void XMLCALL |
| end_element_handler(void *userData, const char *name) |
| { |
| xml_handler_data* data = (xml_handler_data*)userData; |
| |
| XMLHandler* handler = data->stack[data->stack.size()-1]; |
| data->stack.pop_back(); |
| |
| SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); |
| |
| if (!list_contains(data->stack, handler)) { |
| handler->OnDone(pos); |
| if (data->stack.size() > 1) { |
| // not top one |
| delete handler; |
| } |
| } |
| |
| handler = data->stack[data->stack.size()-1]; |
| |
| string nsString; |
| string nameString; |
| |
| parse_namespace(name, &nsString, &nameString); |
| |
| handler->OnEndElement(pos, nsString, nameString); |
| } |
| |
| static void XMLCALL |
| text_handler(void *userData, const XML_Char *s, int len) |
| { |
| xml_handler_data* data = (xml_handler_data*)userData; |
| XMLHandler* handler = data->stack[data->stack.size()-1]; |
| SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); |
| handler->OnText(pos, string(s, len)); |
| } |
| |
| static void XMLCALL |
| comment_handler(void *userData, const char *comment) |
| { |
| xml_handler_data* data = (xml_handler_data*)userData; |
| XMLHandler* handler = data->stack[data->stack.size()-1]; |
| SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); |
| handler->OnComment(pos, string(comment)); |
| } |
| |
| bool |
| XMLHandler::ParseFile(const string& filename, XMLHandler* handler) |
| { |
| char buf[16384]; |
| int fd = open(filename.c_str(), O_RDONLY); |
| if (fd < 0) { |
| SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno)); |
| return false; |
| } |
| |
| XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); |
| xml_handler_data state; |
| state.stack.push_back(handler); |
| state.parser = parser; |
| state.filename = filename; |
| |
| XML_SetUserData(parser, &state); |
| XML_SetElementHandler(parser, start_element_handler, end_element_handler); |
| XML_SetCharacterDataHandler(parser, text_handler); |
| XML_SetCommentHandler(parser, comment_handler); |
| |
| ssize_t len; |
| bool done; |
| do { |
| len = read(fd, buf, sizeof(buf)); |
| done = len < (ssize_t)sizeof(buf); |
| if (len < 0) { |
| SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno)); |
| close(fd); |
| return false; |
| } |
| if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) { |
| SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( |
| "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); |
| close(fd); |
| return false; |
| } |
| } while (!done); |
| |
| XML_ParserFree(parser); |
| |
| close(fd); |
| |
| return true; |
| } |
| |
| bool |
| XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler) |
| { |
| XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); |
| xml_handler_data state; |
| state.stack.push_back(handler); |
| state.parser = parser; |
| state.filename = filename; |
| |
| XML_SetUserData(parser, &state); |
| XML_SetElementHandler(parser, start_element_handler, end_element_handler); |
| XML_SetCharacterDataHandler(parser, text_handler); |
| XML_SetCommentHandler(parser, comment_handler); |
| |
| if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) { |
| SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( |
| "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); |
| return false; |
| } |
| |
| XML_ParserFree(parser); |
| |
| return true; |
| } |
| |
| XMLHandler::XMLHandler() |
| { |
| } |
| |
| XMLHandler::~XMLHandler() |
| { |
| } |
| |
| int |
| XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, |
| const vector<XMLAttribute>& attrs, XMLHandler** next) |
| { |
| return 0; |
| } |
| |
| int |
| XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) |
| { |
| return 0; |
| } |
| |
| int |
| XMLHandler::OnText(const SourcePos& pos, const string& text) |
| { |
| return 0; |
| } |
| |
| int |
| XMLHandler::OnComment(const SourcePos& pos, const string& text) |
| { |
| return 0; |
| } |
| |
| int |
| XMLHandler::OnDone(const SourcePos& pos) |
| { |
| return 0; |
| } |
| |
| TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next) |
| :m_ns(ns), |
| m_name(name), |
| m_next(next) |
| { |
| } |
| |
| int |
| TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, |
| const vector<XMLAttribute>& attrs, XMLHandler** next) |
| { |
| *next = m_next; |
| return 0; |
| } |
| |
| int |
| TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) |
| { |
| return 0; |
| } |
| |
| int |
| TopElementHandler::OnText(const SourcePos& pos, const string& text) |
| { |
| return 0; |
| } |
| |
| int |
| TopElementHandler::OnDone(const SourcePos& pos) |
| { |
| return 0; |
| } |
| |
| |
| NodeHandler::NodeHandler(XMLNode* root, int pretty) |
| :m_root(root), |
| m_pretty(pretty) |
| { |
| if (root != NULL) { |
| m_nodes.push_back(root); |
| } |
| } |
| |
| NodeHandler::~NodeHandler() |
| { |
| } |
| |
| int |
| NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, |
| const vector<XMLAttribute>& attrs, XMLHandler** next) |
| { |
| int pretty; |
| if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") { |
| pretty = XMLNode::EXACT; |
| } else { |
| if (m_root == NULL) { |
| pretty = m_pretty; |
| } else { |
| pretty = m_nodes[m_nodes.size()-1]->Pretty(); |
| } |
| } |
| XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty); |
| if (m_root == NULL) { |
| m_root = n; |
| } else { |
| m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); |
| } |
| m_nodes.push_back(n); |
| return 0; |
| } |
| |
| int |
| NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) |
| { |
| m_nodes.pop_back(); |
| return 0; |
| } |
| |
| int |
| NodeHandler::OnText(const SourcePos& pos, const string& text) |
| { |
| if (m_root == NULL) { |
| return 1; |
| } |
| XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty()); |
| m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); |
| return 0; |
| } |
| |
| int |
| NodeHandler::OnComment(const SourcePos& pos, const string& text) |
| { |
| return 0; |
| } |
| |
| int |
| NodeHandler::OnDone(const SourcePos& pos) |
| { |
| return 0; |
| } |
| |
| XMLNode* |
| NodeHandler::ParseFile(const string& filename, int pretty) |
| { |
| NodeHandler handler(NULL, pretty); |
| if (!XMLHandler::ParseFile(filename, &handler)) { |
| fprintf(stderr, "error parsing file: %s\n", filename.c_str()); |
| return NULL; |
| } |
| return handler.Root(); |
| } |
| |
| XMLNode* |
| NodeHandler::ParseString(const string& filename, const string& text, int pretty) |
| { |
| NodeHandler handler(NULL, pretty); |
| if (!XMLHandler::ParseString(filename, text, &handler)) { |
| fprintf(stderr, "error parsing file: %s\n", filename.c_str()); |
| return NULL; |
| } |
| return handler.Root(); |
| } |
| |
| |