blob: 3fab211eaf8949e07d69bea636886ab9ea01b898 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001#include "XMLHandler.h"
2
3#include <algorithm>
4#include <expat.h>
5#include <stdio.h>
6#include <string.h>
7#include <fcntl.h>
8#include <unistd.h>
9#include <errno.h>
10
11#define NS_SEPARATOR 1
12#define MORE_INDENT " "
13
14static string
15xml_text_escape(const string& s)
16{
17 string result;
18 const size_t N = s.length();
19 for (size_t i=0; i<N; i++) {
20 char c = s[i];
21 switch (c) {
22 case '<':
23 result += "&lt;";
24 break;
25 case '>':
26 result += "&gt;";
27 break;
28 case '&':
29 result += "&amp;";
30 break;
31 default:
32 result += c;
33 break;
34 }
35 }
36 return result;
37}
38
39static string
40xml_attr_escape(const string& s)
41{
42 string result;
43 const size_t N = s.length();
44 for (size_t i=0; i<N; i++) {
45 char c = s[i];
46 switch (c) {
47 case '\"':
48 result += "&quot;";
49 break;
50 default:
51 result += c;
52 break;
53 }
54 }
55 return result;
56}
57
58XMLNamespaceMap::XMLNamespaceMap()
59{
60}
61
62XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
63
64{
65 while (*nspaces) {
66 m_map[nspaces[1]] = nspaces[0];
67 nspaces += 2;
68 }
69}
70
71string
72XMLNamespaceMap::Get(const string& ns) const
73{
74 if (ns == "xml") {
75 return ns;
76 }
77 map<string,string>::const_iterator it = m_map.find(ns);
78 if (it == m_map.end()) {
79 return "";
80 } else {
81 return it->second;
82 }
83}
84
85string
86XMLNamespaceMap::GetPrefix(const string& ns) const
87{
88 if (ns == "") {
89 return "";
90 }
91 map<string,string>::const_iterator it = m_map.find(ns);
92 if (it != m_map.end()) {
93 if (it->second == "") {
94 return "";
95 } else {
96 return it->second + ":";
97 }
98 } else {
99 return ":"; // invalid
100 }
101}
102
103void
104XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
105{
106 map<string,string>::const_iterator it;
107 for (it=m_map.begin(); it!=m_map.end(); it++) {
108 if (it->second == "xml") {
109 continue;
110 }
111 XMLAttribute attr;
112 if (it->second == "") {
113 attr.name = "xmlns";
114 } else {
115 attr.name = "xmlns:";
116 attr.name += it->second;
117 }
118 attr.value = it->first;
119 attrs->push_back(attr);
120 }
121}
122
123XMLAttribute::XMLAttribute()
124{
125}
126
127XMLAttribute::XMLAttribute(const XMLAttribute& that)
128 :ns(that.ns),
129 name(that.name),
130 value(that.value)
131{
132}
133
134XMLAttribute::XMLAttribute(string n, string na, string v)
135 :ns(n),
136 name(na),
137 value(v)
138{
139}
140
141XMLAttribute::~XMLAttribute()
142{
143}
144
145int
146XMLAttribute::Compare(const XMLAttribute& that) const
147{
148 if (ns != that.ns) {
149 return ns < that.ns ? -1 : 1;
150 }
151 if (name != that.name) {
152 return name < that.name ? -1 : 1;
153 }
154 return 0;
155}
156
157string
158XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
159 const string& def)
160{
161 const size_t N = list.size();
162 for (size_t i=0; i<N; i++) {
163 const XMLAttribute& attr = list[i];
164 if (attr.ns == ns && attr.name == name) {
165 return attr.value;
166 }
167 }
168 return def;
169}
170
171struct xml_handler_data {
172 vector<XMLHandler*> stack;
173 XML_Parser parser;
174 vector<vector<XMLAttribute>*> attributes;
175 string filename;
176};
177
178XMLNode::XMLNode()
179{
180}
181
182XMLNode::~XMLNode()
183{
184// for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
185}
186
187XMLNode*
188XMLNode::Clone() const
189{
190 switch (m_type) {
191 case ELEMENT: {
192 XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
193 const size_t N = m_children.size();
194 for (size_t i=0; i<N; i++) {
195 e->m_children.push_back(m_children[i]->Clone());
196 }
197 return e;
198 }
199 case TEXT: {
200 return XMLNode::NewText(m_pos, m_text, m_pretty);
201 }
202 default:
203 return NULL;
204 }
205}
206
207XMLNode*
208XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
209 const vector<XMLAttribute>& attrs, int pretty)
210{
211 XMLNode* node = new XMLNode();
212 node->m_type = ELEMENT;
213 node->m_pretty = pretty;
214 node->m_pos = pos;
215 node->m_ns = ns;
216 node->m_name = name;
217 node->m_attrs = attrs;
218 return node;
219}
220
221XMLNode*
222XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
223{
224 XMLNode* node = new XMLNode();
225 node->m_type = TEXT;
226 node->m_pretty = pretty;
227 node->m_pos = pos;
228 node->m_text = text;
229 return node;
230}
231
232void
233XMLNode::SetPrettyRecursive(int value)
234{
235 m_pretty = value;
236 const size_t N = m_children.size();
237 for (size_t i=0; i<N; i++) {
238 m_children[i]->SetPrettyRecursive(value);
239 }
240}
241
242string
243XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
244{
245 return contents_to_string(nspaces, "");
246}
247
248string
249XMLNode::ToString(const XMLNamespaceMap& nspaces) const
250{
251 return to_string(nspaces, "");
252}
253
254string
255XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
256{
257 return open_tag_to_string(nspaces, "", pretty);
258}
259
260string
261XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
262{
263 string result;
264 const size_t N = m_children.size();
265 for (size_t i=0; i<N; i++) {
266 const XMLNode* child = m_children[i];
267 switch (child->Type()) {
268 case ELEMENT:
269 if (m_pretty == PRETTY) {
270 result += '\n';
271 result += indent;
272 }
273 case TEXT:
274 result += child->to_string(nspaces, indent);
275 break;
276 }
277 }
278 return result;
279}
280
281string
282trim_string(const string& str)
283{
284 const char* p = str.c_str();
285 while (*p && isspace(*p)) {
286 p++;
287 }
288 const char* q = str.c_str() + str.length() - 1;
289 while (q > p && isspace(*q)) {
290 q--;
291 }
292 q++;
293 return string(p, q-p);
294}
295
296string
297XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
298{
299 if (m_type != ELEMENT) {
300 return "";
301 }
302 string result = "<";
303 result += nspaces.GetPrefix(m_ns);
304 result += m_name;
305
306 vector<XMLAttribute> attrs = m_attrs;
307
308 sort(attrs.begin(), attrs.end());
309
310 const size_t N = attrs.size();
311 for (size_t i=0; i<N; i++) {
312 const XMLAttribute& attr = attrs[i];
313 if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
314 result += ' ';
315 }
316 else {
317 result += "\n";
318 result += indent;
319 result += MORE_INDENT;
320 result += MORE_INDENT;
321 }
322 result += nspaces.GetPrefix(attr.ns);
323 result += attr.name;
324 result += "=\"";
325 result += xml_attr_escape(attr.value);
326 result += '\"';
327 }
328
329 if (m_children.size() > 0) {
330 result += '>';
331 } else {
332 result += " />";
333 }
334 return result;
335}
336
337string
338XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
339{
340 switch (m_type)
341 {
342 case TEXT: {
343 if (m_pretty == EXACT) {
344 return xml_text_escape(m_text);
345 } else {
346 return xml_text_escape(trim_string(m_text));
347 }
348 }
349 case ELEMENT: {
350 string result = open_tag_to_string(nspaces, indent, PRETTY);
351
352 if (m_children.size() > 0) {
353 result += contents_to_string(nspaces, indent + MORE_INDENT);
354
355 if (m_pretty == PRETTY && m_children.size() > 0) {
356 result += '\n';
357 result += indent;
358 }
359
360 result += "</";
361 result += nspaces.GetPrefix(m_ns);
362 result += m_name;
363 result += '>';
364 }
365 return result;
366 }
367 default:
368 return "";
369 }
370}
371
372string
373XMLNode::CollapseTextContents() const
374{
375 if (m_type == TEXT) {
376 return m_text;
377 }
378 else if (m_type == ELEMENT) {
379 string result;
380
381 const size_t N=m_children.size();
382 for (size_t i=0; i<N; i++) {
383 result += m_children[i]->CollapseTextContents();
384 }
385
386 return result;
387 }
388 else {
389 return "";
390 }
391}
392
393vector<XMLNode*>
394XMLNode::GetElementsByName(const string& ns, const string& name) const
395{
396 vector<XMLNode*> result;
397 const size_t N=m_children.size();
398 for (size_t i=0; i<N; i++) {
399 XMLNode* child = m_children[i];
400 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
401 result.push_back(child);
402 }
403 }
404 return result;
405}
406
407XMLNode*
408XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
409{
410 vector<XMLNode*> result;
411 const size_t N=m_children.size();
412 for (size_t i=0; i<N; i++) {
413 XMLNode* child = m_children[i];
414 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
415 if (index == 0) {
416 return child;
417 } else {
418 index--;
419 }
420 }
421 }
422 return NULL;
423}
424
425size_t
426XMLNode::CountElementsByName(const string& ns, const string& name) const
427{
428 size_t result = 0;
429 const size_t N=m_children.size();
430 for (size_t i=0; i<N; i++) {
431 XMLNode* child = m_children[i];
432 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
433 result++;
434 }
435 }
436 return result;
437}
438
439string
440XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
441{
442 return XMLAttribute::Find(m_attrs, ns, name, def);
443}
444
445static void
446parse_namespace(const char* data, string* ns, string* name)
447{
448 const char* p = strchr(data, NS_SEPARATOR);
449 if (p != NULL) {
450 ns->assign(data, p-data);
451 name->assign(p+1);
452 } else {
453 ns->assign("");
454 name->assign(data);
455 }
456}
457
458static void
459convert_attrs(const char** in, vector<XMLAttribute>* out)
460{
461 while (*in) {
462 XMLAttribute attr;
463 parse_namespace(in[0], &attr.ns, &attr.name);
464 attr.value = in[1];
465 out->push_back(attr);
466 in += 2;
467 }
468}
469
470static bool
471list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
472{
473 const size_t N = stack.size();
474 for (size_t i=0; i<N; i++) {
475 if (stack[i] == handler) {
476 return true;
477 }
478 }
479 return false;
480}
481
482static void XMLCALL
483start_element_handler(void *userData, const char *name, const char **attrs)
484{
485 xml_handler_data* data = (xml_handler_data*)userData;
486
487 XMLHandler* handler = data->stack[data->stack.size()-1];
488
489 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
490 string nsString;
491 string nameString;
492 XMLHandler* next = handler;
493 vector<XMLAttribute> attributes;
494
495 parse_namespace(name, &nsString, &nameString);
496 convert_attrs(attrs, &attributes);
497
498 handler->OnStartElement(pos, nsString, nameString, attributes, &next);
499
500 if (next == NULL) {
501 next = handler;
502 }
503
504 if (next != handler) {
505 next->elementPos = pos;
506 next->elementNamespace = nsString;
507 next->elementName = nameString;
508 next->elementAttributes = attributes;
509 }
510
511 data->stack.push_back(next);
512}
513
514static void XMLCALL
515end_element_handler(void *userData, const char *name)
516{
517 xml_handler_data* data = (xml_handler_data*)userData;
518
519 XMLHandler* handler = data->stack[data->stack.size()-1];
520 data->stack.pop_back();
521
522 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
523
524 if (!list_contains(data->stack, handler)) {
525 handler->OnDone(pos);
526 if (data->stack.size() > 1) {
527 // not top one
528 delete handler;
529 }
530 }
531
532 handler = data->stack[data->stack.size()-1];
533
534 string nsString;
535 string nameString;
536
537 parse_namespace(name, &nsString, &nameString);
538
539 handler->OnEndElement(pos, nsString, nameString);
540}
541
542static void XMLCALL
543text_handler(void *userData, const XML_Char *s, int len)
544{
545 xml_handler_data* data = (xml_handler_data*)userData;
546 XMLHandler* handler = data->stack[data->stack.size()-1];
547 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
548 handler->OnText(pos, string(s, len));
549}
550
551static void XMLCALL
552comment_handler(void *userData, const char *comment)
553{
554 xml_handler_data* data = (xml_handler_data*)userData;
555 XMLHandler* handler = data->stack[data->stack.size()-1];
556 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
557 handler->OnComment(pos, string(comment));
558}
559
560bool
561XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
562{
563 char buf[16384];
564 int fd = open(filename.c_str(), O_RDONLY);
565 if (fd < 0) {
566 SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
567 return false;
568 }
569
570 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
571 xml_handler_data state;
572 state.stack.push_back(handler);
573 state.parser = parser;
574 state.filename = filename;
575
576 XML_SetUserData(parser, &state);
577 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
578 XML_SetCharacterDataHandler(parser, text_handler);
579 XML_SetCommentHandler(parser, comment_handler);
580
581 ssize_t len;
582 bool done;
583 do {
584 len = read(fd, buf, sizeof(buf));
585 done = len < (ssize_t)sizeof(buf);
586 if (len < 0) {
587 SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
588 close(fd);
589 return false;
590 }
591 if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
592 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
593 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
594 close(fd);
595 return false;
596 }
597 } while (!done);
598
599 XML_ParserFree(parser);
600
601 close(fd);
602
603 return true;
604}
605
606bool
607XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
608{
609 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
610 xml_handler_data state;
611 state.stack.push_back(handler);
612 state.parser = parser;
613 state.filename = filename;
614
615 XML_SetUserData(parser, &state);
616 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
617 XML_SetCharacterDataHandler(parser, text_handler);
618 XML_SetCommentHandler(parser, comment_handler);
619
620 if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
621 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
622 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
623 return false;
624 }
625
626 XML_ParserFree(parser);
627
628 return true;
629}
630
631XMLHandler::XMLHandler()
632{
633}
634
635XMLHandler::~XMLHandler()
636{
637}
638
639int
640XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
641 const vector<XMLAttribute>& attrs, XMLHandler** next)
642{
643 return 0;
644}
645
646int
647XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
648{
649 return 0;
650}
651
652int
653XMLHandler::OnText(const SourcePos& pos, const string& text)
654{
655 return 0;
656}
657
658int
659XMLHandler::OnComment(const SourcePos& pos, const string& text)
660{
661 return 0;
662}
663
664int
665XMLHandler::OnDone(const SourcePos& pos)
666{
667 return 0;
668}
669
670TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
671 :m_ns(ns),
672 m_name(name),
673 m_next(next)
674{
675}
676
677int
678TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
679 const vector<XMLAttribute>& attrs, XMLHandler** next)
680{
681 *next = m_next;
682 return 0;
683}
684
685int
686TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
687{
688 return 0;
689}
690
691int
692TopElementHandler::OnText(const SourcePos& pos, const string& text)
693{
694 return 0;
695}
696
697int
698TopElementHandler::OnDone(const SourcePos& pos)
699{
700 return 0;
701}
702
703
704NodeHandler::NodeHandler(XMLNode* root, int pretty)
705 :m_root(root),
706 m_pretty(pretty)
707{
708 if (root != NULL) {
709 m_nodes.push_back(root);
710 }
711}
712
713NodeHandler::~NodeHandler()
714{
715}
716
717int
718NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
719 const vector<XMLAttribute>& attrs, XMLHandler** next)
720{
721 int pretty;
722 if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
723 pretty = XMLNode::EXACT;
724 } else {
725 if (m_root == NULL) {
726 pretty = m_pretty;
727 } else {
728 pretty = m_nodes[m_nodes.size()-1]->Pretty();
729 }
730 }
731 XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
732 if (m_root == NULL) {
733 m_root = n;
734 } else {
735 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
736 }
737 m_nodes.push_back(n);
738 return 0;
739}
740
741int
742NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
743{
744 m_nodes.pop_back();
745 return 0;
746}
747
748int
749NodeHandler::OnText(const SourcePos& pos, const string& text)
750{
751 if (m_root == NULL) {
752 return 1;
753 }
754 XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
755 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
756 return 0;
757}
758
759int
760NodeHandler::OnComment(const SourcePos& pos, const string& text)
761{
762 return 0;
763}
764
765int
766NodeHandler::OnDone(const SourcePos& pos)
767{
768 return 0;
769}
770
771XMLNode*
772NodeHandler::ParseFile(const string& filename, int pretty)
773{
774 NodeHandler handler(NULL, pretty);
775 if (!XMLHandler::ParseFile(filename, &handler)) {
776 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
777 return NULL;
778 }
779 return handler.Root();
780}
781
782XMLNode*
783NodeHandler::ParseString(const string& filename, const string& text, int pretty)
784{
785 NodeHandler handler(NULL, pretty);
786 if (!XMLHandler::ParseString(filename, text, &handler)) {
787 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
788 return NULL;
789 }
790 return handler.Root();
791}
792
793