Revert "Import xml-rs 0.8.3"
Revert submission 1767390-import-serde-xml-rs
Reason for revert: DroidMonitor: Potential culprit for Bug 193808832 - verifying through Forrest before revert submission. This is part of the standard investigation process, and does not mean your CL will be reverted
Reverted Changes:
Ic95f41063:Import xml-rs 0.8.3
Ie6b7a9e17:Import serde-xml-rs 0.4.1
Change-Id: If0f29d300185d6219fbff485f4e21b32427f56b8
diff --git a/Android.bp b/Android.bp
deleted file mode 100644
index 1144ed1..0000000
--- a/Android.bp
+++ /dev/null
@@ -1,146 +0,0 @@
-// This file is generated by cargo2android.py --config cargo2android.json.
-// Do not modify this file as changes will be overridden on upgrade.
-
-
-
-rust_library {
- name: "libxml",
- // has rustc warnings
- host_supported: true,
- crate_name: "xml",
- srcs: ["src/lib.rs"],
- edition: "2015",
- apex_available: [
- "//apex_available:platform",
- "com.android.virt",
- ],
-}
-
-rust_defaults {
- name: "xml-rs_test_defaults",
- crate_name: "xml_analyze",
- // has rustc warnings
- srcs: ["src/analyze.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- edition: "2015",
- rustlibs: [
- "liblazy_static",
- "libxml",
- ],
-}
-
-rust_test_host {
- name: "xml-rs_host_test_src_analyze",
- defaults: ["xml-rs_test_defaults"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "xml-rs_device_test_src_analyze",
- defaults: ["xml-rs_test_defaults"],
-}
-
-rust_defaults {
- name: "xml-rs_test_defaults_xml",
- crate_name: "xml",
- // has rustc warnings
- srcs: ["src/lib.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- edition: "2015",
- rustlibs: [
- "liblazy_static",
- ],
-}
-
-rust_test_host {
- name: "xml-rs_host_test_src_lib",
- defaults: ["xml-rs_test_defaults_xml"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "xml-rs_device_test_src_lib",
- defaults: ["xml-rs_test_defaults_xml"],
-}
-
-rust_defaults {
- name: "xml-rs_test_defaults_xml_rs",
- crate_name: "xml_rs",
- test_suites: ["general-tests"],
- auto_gen_config: true,
- edition: "2015",
- rustlibs: [
- "liblazy_static",
- "libxml",
- ],
-}
-
-rust_test_host {
- name: "xml-rs_host_test_tests_event_reader",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/event_reader.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "xml-rs_device_test_tests_event_reader",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/event_reader.rs"],
-}
-
-rust_test_host {
- name: "xml-rs_host_test_tests_event_writer",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/event_writer.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "xml-rs_device_test_tests_event_writer",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/event_writer.rs"],
- data: ["tests/documents/*"],
-}
-
-rust_test_host {
- name: "xml-rs_host_test_tests_streaming",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/streaming.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "xml-rs_device_test_tests_streaming",
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/streaming.rs"],
-}
-
-rust_binary {
- name: "xml_analyze",
- // has rustc warnings
- host_supported: true,
- crate_name: "xml_analyze",
- srcs: ["src/analyze.rs"],
- edition: "2015",
- rustlibs: [
- "libxml",
- ],
-}
diff --git a/Cargo.toml b/Cargo.toml
deleted file mode 100644
index 7e47daf..0000000
--- a/Cargo.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-[package]
-name = "xml-rs"
-version = "0.8.3"
-authors = ["Vladimir Matveev <vmatveev@citrine.cc>"]
-license = "MIT"
-description = "An XML library in pure Rust"
-repository = "https://github.com/netvl/xml-rs"
-documentation = "http://docs.rs/xml-rs/"
-readme = "Readme.md"
-keywords = ["xml", "parsing", "parser"]
-categories = ["parsing"]
-
-[lib]
-name = "xml"
-path = "src/lib.rs"
-
-[[bin]]
-name = "xml-analyze"
-path = "src/analyze.rs"
-
-[dev-dependencies]
-doc-comment = "0.3"
-lazy_static = "1.2.0"
diff --git a/Changelog.md b/Changelog.md
deleted file mode 100644
index b922e22..0000000
--- a/Changelog.md
+++ /dev/null
@@ -1,119 +0,0 @@
-## Version 0.8.3
-
-* Added a new parser option, `ignore_root_level_whitespace`, which makes the parser
- skip emitting whitespace events outside of the root element when set to `true`.
- This helps with certain tasks like canonicalization.
-
-## Version 0.8.2
-
-* Added a new parser option, `replace_unknown_entity_references`, which allows to ignore
- invalid Unicode code points and replace them with a Unicode "replacement character"
- during parsing. This can be helpful to deal with e.g. UTF-16 surrogate pairs.
-* Added a new emitter option, `pad_self_closing`, which determines the style of the self-closing
- elements when they are emitted: `<a />` (`true`) vs `<a/>` (`false`).
-
-## Version 0.8.1
-
-* Fixed various issues with tests introduced by updates in Rust.
-* Adjusted the lexer to ignore contents of the `<!DOCTYPE>` tag.
-* Removed unnecessary unsafety in tests.
-* Added tests for doc comments in the readme file.
-* Switched to GitHub Actions from Travis CI.
-
-## Version 0.8.0
-
-* Same as 0.7.1, with 0.7.1 being yanked because of the incorrect semver bump.
-
-## Version 0.7.1
-
-* Removed dependency on bitflags.
-* Added the `XmlWriter::inner_mut()` method.
-* Fixed some rustdoc warnings.
-
-## Version 0.7.0
-
-* Same as 0.6.2, with 0.6.2 being yanked because of the incompatible bump of minimum required version of rustc.
-
-## Version 0.6.2
-
-* Bumped `bitflags` to 1.0.
-
-## Version 0.6.1
-
-* Fixed the writer to escape some special characters when writing attribute values.
-
-## Version 0.6.0
-
-* Changed the target type of extra entities from `char` to `String`. This is an incompatible
- change.
-
-## Version 0.5.0
-
-* Added support for ignoring EOF errors in order to read documents from streams incrementally.
-* Bumped `bitflags` to 0.9.
-
-## Version 0.4.1
-
-* Added missing `Debug` implementation to `xml::writer::XmlEvent`.
-
-## Version 0.4.0
-
-* Bumped version number, since changes introduced in 0.3.7 break backwards compatibility.
-
-## Version 0.3.8
-
-* Fixed a problem introduced in 0.3.7 with entities in attributes causing parsing errors.
-
-## Version 0.3.7
-
-* Fixed the problem with parsing non-whitespace character entities as whitespace (issue #140).
-* Added support for configuring custom entities in the parser configuration.
-
-## Version 0.3.6
-
-* Added an `Error` implementation for `EmitterError`.
-* Fixed escaping of strings with multi-byte code points.
-
-## Version 0.3.5
-
-* Added `Debug` implementation for `XmlVersion`.
-* Fixed some failing tests.
-
-## Version 0.3.3
-
-* Updated `bitflags` to 0.7.
-
-## Version 0.3.2
-
-* Added `From<io::Error>` for `xml::reader::Error`, which improves usability of working with parsing errors.
-
-## Version 0.3.1
-
-* Bumped `bitflags` dependency to 0.4, some internal warning fixes.
-
-## Version 0.3.0
-
-* Changed error handling in `EventReader` - now I/O errors are properly bubbled up from the lexer.
-
-## Version 0.2.4
-
-* Fixed #112 - incorrect handling of namespace redefinitions when writing a document.
-
-## Version 0.2.3
-
-* Added `into_inner()` methods to `EventReader` and `EventWriter`.
-
-## Version 0.2.2
-
-* Using `join` instead of the deprecated `connect`.
-* Added a simple XML analyzer program which demonstrates library usage and can be used to check XML documents for well-formedness.
-* Fixed incorrect handling of unqualified attribute names (#107).
-* Added this changelog.
-
-## Version 0.2.1
-
-* Fixed #105 - incorrect handling of double dashes.
-
-## Version 0.2.0
-
-* Major update, includes proper document writing support and significant architecture changes.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 6caa1d3..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2014 Vladimir Matveev
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/METADATA b/METADATA
deleted file mode 100644
index 1cf43c1..0000000
--- a/METADATA
+++ /dev/null
@@ -1,17 +0,0 @@
-name: "xml-rs"
-description:
- "An XML library in pure Rust"
-
-third_party {
- url {
- type: HOMEPAGE
- value: "https://crates.io/crates/xml-rs"
- }
- url {
- type: GIT
- value: "https://github.com/netvl/xml-rs"
- }
- version: "0.8.3"
- last_upgrade_date { year: 2021 month: 6 day: 21 }
- license_type: NOTICE
-}
\ No newline at end of file
diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_MIT
deleted file mode 100644
index e69de29..0000000
--- a/MODULE_LICENSE_MIT
+++ /dev/null
diff --git a/OWNERS b/OWNERS
deleted file mode 100644
index 98a1473..0000000
--- a/OWNERS
+++ /dev/null
@@ -1,2 +0,0 @@
-include platform/prebuilts/rust:/OWNERS
-
diff --git a/Readme.md b/Readme.md
deleted file mode 100644
index 5ab88f8..0000000
--- a/Readme.md
+++ /dev/null
@@ -1,236 +0,0 @@
-xml-rs, an XML library for Rust
-===============================
-
-[![Build Status][build-status-img]](https://github.com/netvl/xml-rs/actions?query=workflow%3ACI)
-[![crates.io][crates-io-img]](https://crates.io/crates/xml-rs)
-[![docs][docs-img]](https://docs.rs/xml-rs/)
-
-[Documentation](https://docs.rs/xml-rs/)
-
- [build-status-img]: https://img.shields.io/github/workflow/status/netvl/xml-rs/CI/master?style=flat-square
- [crates-io-img]: https://img.shields.io/crates/v/xml-rs.svg?style=flat-square
- [docs-img]: https://img.shields.io/badge/docs-latest%20release-6495ed.svg?style=flat-square
-
-xml-rs is an XML library for [Rust](http://www.rust-lang.org/) programming language.
-It is heavily inspired by Java [Streaming API for XML (StAX)][stax].
-
- [stax]: https://en.wikipedia.org/wiki/StAX
-
-This library currently contains pull parser much like [StAX event reader][stax-reader].
-It provides iterator API, so you can leverage Rust's existing iterators library features.
-
- [stax-reader]: http://docs.oracle.com/javase/8/docs/api/javax/xml/stream/XMLEventReader.html
-
-It also provides a streaming document writer much like [StAX event writer][stax-writer].
-This writer consumes its own set of events, but reader events can be converted to
-writer events easily, and so it is possible to write XML transformation chains in a pretty
-clean manner.
-
- [stax-writer]: http://docs.oracle.com/javase/8/docs/api/javax/xml/stream/XMLEventWriter.html
-
-This parser is mostly full-featured, however, there are limitations:
-* no other encodings but UTF-8 are supported yet, because no stream-based encoding library
- is available now; when (or if) one will be available, I'll try to make use of it;
-* DTD validation is not supported, `<!DOCTYPE>` declarations are completely ignored; thus no
- support for custom entities too; internal DTD declarations are likely to cause parsing errors;
-* attribute value normalization is not performed, and end-of-line characters are not normalized too.
-
-Other than that the parser tries to be mostly XML-1.0-compliant.
-
-Writer is also mostly full-featured with the following limitations:
-* no support for encodings other than UTF-8, for the same reason as above;
-* no support for emitting `<!DOCTYPE>` declarations;
-* more validations of input are needed, for example, checking that namespace prefixes are bounded
- or comments are well-formed.
-
-What is planned (highest priority first, approximately):
-
-0. missing features required by XML standard (e.g. aforementioned normalization and
- proper DTD parsing);
-1. miscellaneous features of the writer;
-2. parsing into a DOM tree and its serialization back to XML text;
-3. SAX-like callback-based parser (fairly easy to implement over pull parser);
-4. DTD validation;
-5. (let's dream a bit) XML Schema validation.
-
-Building and using
-------------------
-
-xml-rs uses [Cargo](http://crates.io), so just add a dependency section in your project's manifest:
-
-```toml
-[dependencies]
-xml-rs = "0.8"
-```
-
-The package exposes a single crate called `xml`:
-
-```rust
-extern crate xml;
-```
-
-Reading XML documents
----------------------
-
-`xml::reader::EventReader` requires a `Read` instance to read from. When a proper stream-based encoding
-library is available, it is likely that xml-rs will be switched to use whatever character stream structure
-this library would provide, but currently it is a `Read`.
-
-Using `EventReader` is very straightforward. Just provide a `Read` instance to obtain an iterator
-over events:
-
-```rust,no_run
-extern crate xml;
-
-use std::fs::File;
-use std::io::BufReader;
-
-use xml::reader::{EventReader, XmlEvent};
-
-fn indent(size: usize) -> String {
- const INDENT: &'static str = " ";
- (0..size).map(|_| INDENT)
- .fold(String::with_capacity(size*INDENT.len()), |r, s| r + s)
-}
-
-fn main() {
- let file = File::open("file.xml").unwrap();
- let file = BufReader::new(file);
-
- let parser = EventReader::new(file);
- let mut depth = 0;
- for e in parser {
- match e {
- Ok(XmlEvent::StartElement { name, .. }) => {
- println!("{}+{}", indent(depth), name);
- depth += 1;
- }
- Ok(XmlEvent::EndElement { name }) => {
- depth -= 1;
- println!("{}-{}", indent(depth), name);
- }
- Err(e) => {
- println!("Error: {}", e);
- break;
- }
- _ => {}
- }
- }
-}
-```
-
-`EventReader` implements `IntoIterator` trait, so you can just use it in a `for` loop directly.
-Document parsing can end normally or with an error. Regardless of exact cause, the parsing
-process will be stopped, and iterator will terminate normally.
-
-You can also have finer control over when to pull the next event from the parser using its own
-`next()` method:
-
-```rust,ignore
-match parser.next() {
- ...
-}
-```
-
-Upon the end of the document or an error the parser will remember that last event and will always
-return it in the result of `next()` call afterwards. If iterator is used, then it will yield
-error or end-of-document event once and will produce `None` afterwards.
-
-It is also possible to tweak parsing process a little using `xml::reader::ParserConfig` structure.
-See its documentation for more information and examples.
-
-You can find a more extensive example of using `EventReader` in `src/analyze.rs`, which is a
-small program (BTW, it is built with `cargo build` and can be run after that) which shows various
-statistics about specified XML document. It can also be used to check for well-formedness of
-XML documents - if a document is not well-formed, this program will exit with an error.
-
-Writing XML documents
----------------------
-
-xml-rs also provides a streaming writer much like StAX event writer. With it you can write an
-XML document to any `Write` implementor.
-
-```rust,no_run
-extern crate xml;
-
-use std::fs::File;
-use std::io::{self, Write};
-
-use xml::writer::{EventWriter, EmitterConfig, XmlEvent, Result};
-
-fn handle_event<W: Write>(w: &mut EventWriter<W>, line: String) -> Result<()> {
- let line = line.trim();
- let event: XmlEvent = if line.starts_with("+") && line.len() > 1 {
- XmlEvent::start_element(&line[1..]).into()
- } else if line.starts_with("-") {
- XmlEvent::end_element().into()
- } else {
- XmlEvent::characters(&line).into()
- };
- w.write(event)
-}
-
-fn main() {
- let mut file = File::create("output.xml").unwrap();
-
- let mut input = io::stdin();
- let mut output = io::stdout();
- let mut writer = EmitterConfig::new().perform_indent(true).create_writer(&mut file);
- loop {
- print!("> "); output.flush().unwrap();
- let mut line = String::new();
- match input.read_line(&mut line) {
- Ok(0) => break,
- Ok(_) => match handle_event(&mut writer, line) {
- Ok(_) => {}
- Err(e) => panic!("Write error: {}", e)
- },
- Err(e) => panic!("Input error: {}", e)
- }
- }
-}
-```
-
-The code example above also demonstrates how to create a writer out of its configuration.
-Similar thing also works with `EventReader`.
-
-The library provides an XML event building DSL which helps to construct complex events,
-e.g. ones having namespace definitions. Some examples:
-
-```rust,ignore
-// <a:hello a:param="value" xmlns:a="urn:some:document">
-XmlEvent::start_element("a:hello").attr("a:param", "value").ns("a", "urn:some:document")
-
-// <hello b:config="name" xmlns="urn:default:uri">
-XmlEvent::start_element("hello").attr("b:config", "value").default_ns("urn:defaul:uri")
-
-// <![CDATA[some unescaped text]]>
-XmlEvent::cdata("some unescaped text")
-```
-
-Of course, one can create `XmlEvent` enum variants directly instead of using the builder DSL.
-There are more examples in `xml::writer::XmlEvent` documentation.
-
-The writer has multiple configuration options; see `EmitterConfig` documentation for more
-information.
-
-Other things
-------------
-
-No performance tests or measurements are done. The implementation is rather naive, and no specific
-optimizations are made. Hopefully the library is sufficiently fast to process documents of common size.
-I intend to add benchmarks in future, but not until more important features are added.
-
-Known issues
-------------
-
-All known issues are present on GitHub issue tracker: <http://github.com/netvl/xml-rs/issues>.
-Feel free to post any found problems there.
-
-License
--------
-
-This library is licensed under MIT license.
-
----
-Copyright (C) Vladimir Matveev, 2014-2020
diff --git a/cargo2android.json b/cargo2android.json
deleted file mode 100644
index d63dca0..0000000
--- a/cargo2android.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "apex-available": [
- "//apex_available:platform",
- "com.android.virt"
- ],
- "dependency-blocklist": [
- "doc_comment"
- ],
- "device": true,
- "run": true,
- "tests": true,
- "patch": "patches/Android.bp.patch"
-}
\ No newline at end of file
diff --git a/design.md b/design.md
deleted file mode 100644
index da67c7b..0000000
--- a/design.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Reader
-
-Basic features:
- * [x] Parsing XML 1.0 documents and returning a stream of events
- - [ ] Support reading embedded DTD schemas
- - [ ] Support for embedded entities
- * [x] Support for namespaces and emitting namespace information in events
- * [ ] \[maybe\] push-based wrapper
- * Missing XML features
- - [ ] Support for different encodings
- - [ ] Attribute values normalization
- - [ ] EOL characters normalization
-
-Advanced features:
- * [ ] DTD schema validation
- * [ ] XSD schema validation
-
-# Writer
-
-Basic features:
- * [x] Writing basic XML 1.0 documents in UTF-8
- * [x] Writing XML 1.0 documents with namespace support
- * [x] Support for writing elements with empty body as empty elements
- * [x] Pretty-printed and compact output
- * [ ] Writing XML document with embedded DTDs and DTD references
- * Misc features:
- - [ ] Support for different encodings
- - [x] Support for writing CDATA as characters
- - [ ] Checking events for invalid characters (e.g. `--` in comments)
- - [ ] Check for namespaces more correctly, i.e. check both for prefix and namespace URI
- - [ ] Support checking namespace prefix presence in the current namespace for events with prefix but without namespace
- - [ ] Support checking namespace prefix for events with both prefix and namespace URI
-
-# Other
-
-DOM-based API:
- * [ ] Basic support for DOM-based API
diff --git a/patches/Android.bp.patch b/patches/Android.bp.patch
deleted file mode 100644
index 816c5db..0000000
--- a/patches/Android.bp.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff --git a/Android.bp b/Android.bp
-index b9fe3bd..1144ed1 100644
---- a/Android.bp
-+++ b/Android.bp
-@@ -113,6 +113,7 @@ rust_test {
- defaults: ["xml-rs_test_defaults_xml_rs"],
- // has rustc warnings
- srcs: ["tests/event_writer.rs"],
-+ data: ["tests/documents/*"],
- }
-
- rust_test_host {
diff --git a/src/analyze.rs b/src/analyze.rs
deleted file mode 100644
index d369d2f..0000000
--- a/src/analyze.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-#![forbid(unsafe_code)]
-
-extern crate xml;
-
-use std::cmp;
-use std::env;
-use std::io::{self, Read, Write, BufReader};
-use std::fs::File;
-use std::collections::HashSet;
-
-use xml::ParserConfig;
-use xml::reader::XmlEvent;
-
-macro_rules! abort {
- ($code:expr) => {::std::process::exit($code)};
- ($code:expr, $($args:tt)+) => {{
- writeln!(&mut ::std::io::stderr(), $($args)+).unwrap();
- ::std::process::exit($code);
- }}
-}
-
-fn main() {
- let mut file;
- let mut stdin;
- let source: &mut Read = match env::args().nth(1) {
- Some(file_name) => {
- file = File::open(file_name)
- .unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e));
- &mut file
- }
- None => {
- stdin = io::stdin();
- &mut stdin
- }
- };
-
- let reader = ParserConfig::new()
- .whitespace_to_characters(true)
- .ignore_comments(false)
- .create_reader(BufReader::new(source));
-
- let mut processing_instructions = 0;
- let mut elements = 0;
- let mut character_blocks = 0;
- let mut cdata_blocks = 0;
- let mut characters = 0;
- let mut comment_blocks = 0;
- let mut comment_characters = 0;
- let mut namespaces = HashSet::new();
- let mut depth = 0;
- let mut max_depth = 0;
-
- for e in reader {
- match e {
- Ok(e) => match e {
- XmlEvent::StartDocument { version, encoding, standalone } =>
- println!(
- "XML document version {}, encoded in {}, {}standalone",
- version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
- ),
- XmlEvent::EndDocument => println!("Document finished"),
- XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
- XmlEvent::Whitespace(_) => {} // can't happen due to configuration
- XmlEvent::Characters(s) => {
- character_blocks += 1;
- characters += s.len();
- }
- XmlEvent::CData(s) => {
- cdata_blocks += 1;
- characters += s.len();
- }
- XmlEvent::Comment(s) => {
- comment_blocks += 1;
- comment_characters += s.len();
- }
- XmlEvent::StartElement { namespace, .. } => {
- depth += 1;
- max_depth = cmp::max(max_depth, depth);
- elements += 1;
- namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri));
- }
- XmlEvent::EndElement { .. } => {
- depth -= 1;
- }
- },
- Err(e) => abort!(1, "Error parsing XML document: {}", e)
- }
- }
- namespaces.remove(xml::namespace::NS_EMPTY_URI);
- namespaces.remove(xml::namespace::NS_XMLNS_URI);
- namespaces.remove(xml::namespace::NS_XML_URI);
-
- println!("Elements: {}, maximum depth: {}", elements, max_depth);
- println!("Namespaces (excluding built-in): {}", namespaces.len());
- println!("Characters: {}, characters blocks: {}, CDATA blocks: {}",
- characters, character_blocks, cdata_blocks);
- println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters);
- println!("Processing instructions (excluding built-in): {}", processing_instructions);
-}
diff --git a/src/attribute.rs b/src/attribute.rs
deleted file mode 100644
index 8728f49..0000000
--- a/src/attribute.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-//! Contains XML attributes manipulation types and functions.
-//!
-
-use std::fmt;
-
-use name::{Name, OwnedName};
-use escape::escape_str_attribute;
-
-/// A borrowed version of an XML attribute.
-///
-/// Consists of a borrowed qualified name and a borrowed string value.
-#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
-pub struct Attribute<'a> {
- /// Attribute name.
- pub name: Name<'a>,
-
- /// Attribute value.
- pub value: &'a str
-}
-
-impl<'a> fmt::Display for Attribute<'a> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}=\"{}\"", self.name, escape_str_attribute(self.value))
- }
-}
-
-impl<'a> Attribute<'a> {
- /// Creates an owned attribute out of this borrowed one.
- #[inline]
- pub fn to_owned(&self) -> OwnedAttribute {
- OwnedAttribute {
- name: self.name.into(),
- value: self.value.into(),
- }
- }
-
- /// Creates a borrowed attribute using the provided borrowed name and a borrowed string value.
- #[inline]
- pub fn new(name: Name<'a>, value: &'a str) -> Attribute<'a> {
- Attribute { name, value, }
- }
-}
-
-/// An owned version of an XML attribute.
-///
-/// Consists of an owned qualified name and an owned string value.
-#[derive(Clone, Eq, PartialEq, Hash, Debug)]
-pub struct OwnedAttribute {
- /// Attribute name.
- pub name: OwnedName,
-
- /// Attribute value.
- pub value: String
-}
-
-impl OwnedAttribute {
- /// Returns a borrowed `Attribute` out of this owned one.
- pub fn borrow(&self) -> Attribute {
- Attribute {
- name: self.name.borrow(),
- value: &*self.value,
- }
- }
-
- /// Creates a new owned attribute using the provided owned name and an owned string value.
- #[inline]
- pub fn new<S: Into<String>>(name: OwnedName, value: S) -> OwnedAttribute {
- OwnedAttribute {
- name,
- value: value.into(),
- }
- }
-}
-
-impl fmt::Display for OwnedAttribute {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}=\"{}\"", self.name, escape_str_attribute(&*self.value))
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::{Attribute};
-
- use name::Name;
-
- #[test]
- fn attribute_display() {
- let attr = Attribute::new(
- Name::qualified("attribute", "urn:namespace", Some("n")),
- "its value with > & \" ' < weird symbols"
- );
-
- assert_eq!(
- &*attr.to_string(),
- "{urn:namespace}n:attribute=\"its value with > & " ' < weird symbols\""
- )
- }
-}
diff --git a/src/common.rs b/src/common.rs
deleted file mode 100644
index 029e851..0000000
--- a/src/common.rs
+++ /dev/null
@@ -1,142 +0,0 @@
-//! Contains common types and functions used throughout the library.
-
-use std::fmt;
-
-/// Represents a position inside some textual document.
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct TextPosition {
- /// Row, counting from 0
- pub row: u64,
- /// Column, counting from 0
- pub column: u64,
-}
-
-impl TextPosition {
- /// Creates a new position initialized to the beginning of the document
- #[inline]
- pub fn new() -> TextPosition {
- TextPosition { row: 0, column: 0 }
- }
-
- /// Advances the position in a line
- #[inline]
- pub fn advance(&mut self, count: u8) {
- self.column += count as u64;
- }
-
- /// Advances the position in a line to the next tab position
- #[inline]
- pub fn advance_to_tab(&mut self, width: u8) {
- let width = width as u64;
- self.column += width - self.column % width
- }
-
- /// Advances the position to the beginning of the next line
- #[inline]
- pub fn new_line(&mut self) {
- self.column = 0;
- self.row += 1;
- }
-}
-
-impl fmt::Debug for TextPosition {
- #[inline]
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}:{}", self.row + 1, self.column + 1)
- }
-}
-
-impl fmt::Display for TextPosition {
- #[inline]
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}:{}", self.row + 1, self.column + 1)
- }
-}
-
-/// Get the position in the document corresponding to the object
-///
-/// This trait is implemented by parsers, lexers and errors.
-pub trait Position {
- /// Returns the current position or a position corresponding to the object.
- fn position(&self) -> TextPosition;
-}
-
-impl Position for TextPosition {
- #[inline]
- fn position(&self) -> TextPosition {
- *self
- }
-}
-
-/// XML version enumeration.
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub enum XmlVersion {
- /// XML version 1.0.
- Version10,
-
- /// XML version 1.1.
- Version11
-}
-
-impl fmt::Display for XmlVersion {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- XmlVersion::Version10 => write!(f, "1.0"),
- XmlVersion::Version11 => write!(f, "1.1")
- }
- }
-}
-
-impl fmt::Debug for XmlVersion {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- fmt::Display::fmt(self, f)
- }
-}
-
-/// Checks whether the given character is a white space character (`S`)
-/// as is defined by XML 1.1 specification, [section 2.3][1].
-///
-/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
-pub fn is_whitespace_char(c: char) -> bool {
- match c {
- '\x20' | '\x09' | '\x0d' | '\x0a' => true,
- _ => false
- }
-}
-
-/// Checks whether the given string is compound only by white space
-/// characters (`S`) using the previous is_whitespace_char to check
-/// all characters of this string
-pub fn is_whitespace_str(s: &str) -> bool {
- s.chars().all(is_whitespace_char)
-}
-
-/// Checks whether the given character is a name start character (`NameStartChar`)
-/// as is defined by XML 1.1 specification, [section 2.3][1].
-///
-/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
-pub fn is_name_start_char(c: char) -> bool {
- match c {
- ':' | 'A'...'Z' | '_' | 'a'...'z' |
- '\u{C0}'...'\u{D6}' | '\u{D8}'...'\u{F6}' | '\u{F8}'...'\u{2FF}' |
- '\u{370}'...'\u{37D}' | '\u{37F}'...'\u{1FFF}' |
- '\u{200C}'...'\u{200D}' | '\u{2070}'...'\u{218F}' |
- '\u{2C00}'...'\u{2FEF}' | '\u{3001}'...'\u{D7FF}' |
- '\u{F900}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' |
- '\u{10000}'...'\u{EFFFF}' => true,
- _ => false
- }
-}
-
-/// Checks whether the given character is a name character (`NameChar`)
-/// as is defined by XML 1.1 specification, [section 2.3][1].
-///
-/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
-pub fn is_name_char(c: char) -> bool {
- match c {
- _ if is_name_start_char(c) => true,
- '-' | '.' | '0'...'9' | '\u{B7}' |
- '\u{300}'...'\u{36F}' | '\u{203F}'...'\u{2040}' => true,
- _ => false
- }
-}
diff --git a/src/escape.rs b/src/escape.rs
deleted file mode 100644
index 18298b9..0000000
--- a/src/escape.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-//! Contains functions for performing XML special characters escaping.
-
-use std::borrow::Cow;
-
-enum Value {
- Char(char),
- Str(&'static str)
-}
-
-impl Value {
- fn dispatch_for_attribute(c: char) -> Value {
- match c {
- '<' => Value::Str("<"),
- '>' => Value::Str(">"),
- '"' => Value::Str("""),
- '\'' => Value::Str("'"),
- '&' => Value::Str("&"),
- '\n' => Value::Str("
"),
- '\r' => Value::Str("
"),
- _ => Value::Char(c)
- }
- }
-
- fn dispatch_for_pcdata(c: char) -> Value {
- match c {
- '<' => Value::Str("<"),
- '&' => Value::Str("&"),
- _ => Value::Char(c)
- }
- }
-}
-
-enum Process<'a> {
- Borrowed(&'a str),
- Owned(String)
-}
-
-impl<'a> Process<'a> {
- fn process(&mut self, (i, next): (usize, Value)) {
- match next {
- Value::Str(s) => match *self {
- Process::Owned(ref mut o) => o.push_str(s),
- Process::Borrowed(b) => {
- let mut r = String::with_capacity(b.len() + s.len());
- r.push_str(&b[..i]);
- r.push_str(s);
- *self = Process::Owned(r);
- }
- },
- Value::Char(c) => match *self {
- Process::Borrowed(_) => {}
- Process::Owned(ref mut o) => o.push(c)
- }
- }
- }
-
- fn into_result(self) -> Cow<'a, str> {
- match self {
- Process::Borrowed(b) => Cow::Borrowed(b),
- Process::Owned(o) => Cow::Owned(o)
- }
- }
-}
-
-impl<'a> Extend<(usize, Value)> for Process<'a> {
- fn extend<I: IntoIterator<Item=(usize, Value)>>(&mut self, it: I) {
- for v in it.into_iter() {
- self.process(v);
- }
- }
-}
-
-fn escape_str(s: &str, dispatch: fn(char) -> Value) -> Cow<str> {
- let mut p = Process::Borrowed(s);
- p.extend(s.char_indices().map(|(ind, c)| (ind, dispatch(c))));
- p.into_result()
-}
-
-/// Performs escaping of common XML characters inside an attribute value.
-///
-/// This function replaces several important markup characters with their
-/// entity equivalents:
-///
-/// * `<` → `<`
-/// * `>` → `>`
-/// * `"` → `"`
-/// * `'` → `'`
-/// * `&` → `&`
-///
-/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
-///
-/// Does not perform allocations if the given string does not contain escapable characters.
-#[inline]
-pub fn escape_str_attribute(s: &str) -> Cow<str> {
- escape_str(s, Value::dispatch_for_attribute)
-}
-
-/// Performs escaping of common XML characters inside PCDATA.
-///
-/// This function replaces several important markup characters with their
-/// entity equivalents:
-///
-/// * `<` → `<`
-/// * `&` → `&`
-///
-/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
-///
-/// Does not perform allocations if the given string does not contain escapable characters.
-#[inline]
-pub fn escape_str_pcdata(s: &str) -> Cow<str> {
- escape_str(s, Value::dispatch_for_pcdata)
-}
-
-#[cfg(test)]
-mod tests {
- use super::{escape_str_pcdata, escape_str_attribute};
-
- // TODO: add more tests
-
- #[test]
- fn test_escape_multibyte_code_points() {
- assert_eq!(escape_str_attribute("☃<"), "☃<");
- assert_eq!(escape_str_pcdata("☃<"), "☃<");
- }
-}
-
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index fb672ef..0000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-//#![warn(missing_doc)]
-#![allow(dead_code)]
-#![allow(unused_variables)]
-#![forbid(non_camel_case_types)]
-#![forbid(unsafe_code)]
-
-//! This crate currently provides an almost XML 1.0/1.1-compliant pull parser.
-
-#[cfg(doctest)]
-#[macro_use]
-extern crate doc_comment;
-
-#[cfg(doctest)]
-doctest!("../Readme.md");
-
-pub use reader::EventReader;
-pub use reader::ParserConfig;
-pub use writer::EventWriter;
-pub use writer::EmitterConfig;
-
-pub mod macros;
-pub mod name;
-pub mod attribute;
-pub mod common;
-pub mod escape;
-pub mod namespace;
-pub mod reader;
-pub mod writer;
-mod util;
diff --git a/src/macros.rs b/src/macros.rs
deleted file mode 100644
index 1cce3d6..0000000
--- a/src/macros.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-#![macro_use]
-
-//! Contains several macros used in this crate.
-
-macro_rules! gen_setter {
- ($target:ty, $field:ident : into $t:ty) => {
- impl $target {
- /// Sets the field to the provided value and returns updated config object.
- pub fn $field<T: Into<$t>>(mut self, value: T) -> $target {
- self.$field = value.into();
- self
- }
- }
- };
- ($target:ty, $field:ident : val $t:ty) => {
- impl $target {
- /// Sets the field to the provided value and returns updated config object.
- pub fn $field(mut self, value: $t) -> $target {
- self.$field = value;
- self
- }
- }
- }
-}
-
-macro_rules! gen_setters {
- ($target:ty, $($field:ident : $k:tt $tpe:ty),+) => ($(
- gen_setter! { $target, $field : $k $tpe }
- )+)
-}
diff --git a/src/name.rs b/src/name.rs
deleted file mode 100644
index a20eae2..0000000
--- a/src/name.rs
+++ /dev/null
@@ -1,301 +0,0 @@
-//! Contains XML qualified names manipulation types and functions.
-//!
-
-use std::fmt;
-use std::str::FromStr;
-
-use namespace::NS_NO_PREFIX;
-
-/// Represents a qualified XML name.
-///
-/// A qualified name always consists at least of a local name. It can optionally contain
-/// a prefix; when reading an XML document, if it contains a prefix, it must also contain a
-/// namespace URI, but this is not enforced statically; see below. The name can contain a
-/// namespace without a prefix; in that case a default, empty prefix is assumed.
-///
-/// When writing XML documents, it is possible to omit the namespace URI, leaving only
-/// the prefix. In this case the writer will check that the specifed prefix is bound to some
-/// URI in the current namespace context. If both prefix and namespace URI are specified,
-/// it is checked that the current namespace context contains this exact correspondence
-/// between prefix and namespace URI.
-///
-/// # Prefixes and URIs
-///
-/// A qualified name with a prefix must always contain a proper namespace URI --- names with
-/// a prefix but without a namespace associated with that prefix are meaningless. However,
-/// it is impossible to obtain proper namespace URI by a prefix without a context, and such
-/// context is only available when parsing a document (or it can be constructed manually
-/// when writing a document). Tying a name to a context statically seems impractical. This
-/// may change in future, though.
-///
-/// # Conversions
-///
-/// `Name` implements some `From` instances for conversion from strings and tuples. For example:
-///
-/// ```rust
-/// # use xml::name::Name;
-/// let n1: Name = "p:some-name".into();
-/// let n2: Name = ("p", "some-name").into();
-///
-/// assert_eq!(n1, n2);
-/// assert_eq!(n1.local_name, "some-name");
-/// assert_eq!(n1.prefix, Some("p"));
-/// assert!(n1.namespace.is_none());
-/// ```
-///
-/// This is added to support easy specification of XML elements when writing XML documents.
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
-pub struct Name<'a> {
- /// A local name, e.g. `string` in `xsi:string`.
- pub local_name: &'a str,
-
- /// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
- pub namespace: Option<&'a str>,
-
- /// A name prefix, e.g. `xsi` in `xsi:string`.
- pub prefix: Option<&'a str>
-}
-
-impl<'a> From<&'a str> for Name<'a> {
- fn from(s: &'a str) -> Name<'a> {
- let mut parts = s.splitn(2, ":").fuse();
- match (parts.next(), parts.next()) {
- (Some(name), None) => Name::local(name),
- (Some(prefix), Some(name)) => Name::prefixed(name, prefix),
- _ => unreachable!()
- }
- }
-}
-
-impl<'a> From<(&'a str, &'a str)> for Name<'a> {
- fn from((prefix, name): (&'a str, &'a str)) -> Name<'a> {
- Name::prefixed(name, prefix)
- }
-}
-
-impl<'a> fmt::Display for Name<'a> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- if let Some(namespace) = self.namespace {
- write!(f, "{{{}}}", namespace)?;
- }
-
- if let Some(prefix) = self.prefix {
- write!(f, "{}:", prefix)?;
- }
-
- write!(f, "{}", self.local_name)
- }
-}
-
-impl<'a> Name<'a> {
- /// Returns an owned variant of the qualified name.
- pub fn to_owned(&self) -> OwnedName {
- OwnedName {
- local_name: self.local_name.into(),
- namespace: self.namespace.map(|s| s.into()),
- prefix: self.prefix.map(|s| s.into())
- }
- }
-
- /// Returns a new `Name` instance representing plain local name.
- #[inline]
- pub fn local(local_name: &str) -> Name {
- Name {
- local_name,
- prefix: None,
- namespace: None
- }
- }
-
- /// Returns a new `Name` instance with the given local name and prefix.
- #[inline]
- pub fn prefixed(local_name: &'a str, prefix: &'a str) -> Name<'a> {
- Name {
- local_name,
- namespace: None,
- prefix: Some(prefix)
- }
- }
-
- /// Returns a new `Name` instance representing a qualified name with or without a prefix and
- /// with a namespace URI.
- #[inline]
- pub fn qualified(local_name: &'a str, namespace: &'a str, prefix: Option<&'a str>) -> Name<'a> {
- Name {
- local_name,
- namespace: Some(namespace),
- prefix,
- }
- }
-
- /// Returns a correct XML representation of this local name and prefix.
- ///
- /// This method is different from the autoimplemented `to_string()` because it does not
- /// include namespace URI in the result.
- pub fn to_repr(&self) -> String {
- self.repr_display().to_string()
- }
-
- /// Returns a structure which can be displayed with `std::fmt` machinery to obtain this
- /// local name and prefix.
- ///
- /// This method is needed for efficiency purposes in order not to create unnecessary
- /// allocations.
- #[inline]
- pub fn repr_display(&self) -> ReprDisplay {
- ReprDisplay(self)
- }
-
- /// Returns either a prefix of this name or `namespace::NS_NO_PREFIX` constant.
- #[inline]
- pub fn prefix_repr(&self) -> &str {
- self.prefix.unwrap_or(NS_NO_PREFIX)
- }
-}
-
-/// A wrapper around `Name` whose `Display` implementation prints the wrapped name as it is
-/// displayed in an XML document.
-pub struct ReprDisplay<'a, 'b:'a>(&'a Name<'b>);
-
-impl<'a, 'b:'a> fmt::Display for ReprDisplay<'a, 'b> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match self.0.prefix {
- Some(prefix) => write!(f, "{}:{}", prefix, self.0.local_name),
- None => write!(f, "{}", self.0.local_name)
- }
- }
-}
-
-/// An owned variant of `Name`.
-///
-/// Everything about `Name` applies to this structure as well.
-#[derive(Clone, PartialEq, Eq, Hash, Debug)]
-pub struct OwnedName {
- /// A local name, e.g. `string` in `xsi:string`.
- pub local_name: String,
-
- /// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
- pub namespace: Option<String>,
-
- /// A name prefix, e.g. `xsi` in `xsi:string`.
- pub prefix: Option<String>,
-}
-
-impl fmt::Display for OwnedName {
- #[inline]
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- fmt::Display::fmt(&self.borrow(), f)
- }
-}
-
-impl OwnedName {
- /// Constructs a borrowed `Name` based on this owned name.
- pub fn borrow(&self) -> Name {
- Name {
- local_name: &*self.local_name,
- namespace: self.namespace.as_ref().map(|s| &**s),
- prefix: self.prefix.as_ref().map(|s| &**s),
- }
- }
-
- /// Returns a new `OwnedName` instance representing a plain local name.
- #[inline]
- pub fn local<S>(local_name: S) -> OwnedName where S: Into<String> {
- OwnedName {
- local_name: local_name.into(),
- namespace: None,
- prefix: None,
- }
- }
-
- /// Returns a new `OwnedName` instance representing a qualified name with or without
- /// a prefix and with a namespace URI.
- #[inline]
- pub fn qualified<S1, S2, S3>(local_name: S1, namespace: S2, prefix: Option<S3>) -> OwnedName
- where S1: Into<String>, S2: Into<String>, S3: Into<String>
- {
- OwnedName {
- local_name: local_name.into(),
- namespace: Some(namespace.into()),
- prefix: prefix.map(|v| v.into())
- }
- }
-
- /// Returns an optional prefix by reference, equivalent to `self.borrow().prefix`
- /// but avoids extra work.
- #[inline]
- pub fn prefix_ref(&self) -> Option<&str> {
- self.prefix.as_ref().map(|s| &**s)
- }
-
- /// Returns an optional namespace by reference, equivalen to `self.borrow().namespace`
- /// but avoids extra work.
- #[inline]
- pub fn namespace_ref(&self) -> Option<&str> {
- self.namespace.as_ref().map(|s| &**s)
- }
-}
-
-impl<'a> From<Name<'a>> for OwnedName {
- #[inline]
- fn from(n: Name<'a>) -> OwnedName {
- n.to_owned()
- }
-}
-
-impl FromStr for OwnedName {
- type Err = ();
-
- /// Parses the given string slice into a qualified name.
- ///
- /// This function, when finishes sucessfully, always return a qualified
- /// name without a namespace (`name.namespace == None`). It should be filled later
- /// using proper `NamespaceStack`.
- ///
- /// It is supposed that all characters in the argument string are correct
- /// as defined by the XML specification. No additional checks except a check
- /// for emptiness are done.
- fn from_str(s: &str) -> Result<OwnedName, ()> {
- let mut it = s.split(':');
-
- let r = match (it.next(), it.next(), it.next()) {
- (Some(prefix), Some(local_name), None) if !prefix.is_empty() &&
- !local_name.is_empty() =>
- Some((local_name.into(), Some(prefix.into()))),
- (Some(local_name), None, None) if !local_name.is_empty() =>
- Some((local_name.into(), None)),
- (_, _, _) => None
- };
- r.map(|(local_name, prefix)| OwnedName {
- local_name,
- namespace: None,
- prefix
- }).ok_or(())
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::OwnedName;
-
- #[test]
- fn test_owned_name_from_str() {
- assert_eq!("prefix:name".parse(), Ok(OwnedName {
- local_name: "name".into(),
- namespace: None,
- prefix: Some("prefix".into())
- }));
-
- assert_eq!("name".parse(), Ok(OwnedName {
- local_name: "name".into(),
- namespace: None,
- prefix: None
- }));
-
- assert_eq!("".parse(), Err::<OwnedName, ()>(()));
- assert_eq!(":".parse(), Err::<OwnedName, ()>(()));
- assert_eq!(":a".parse(), Err::<OwnedName, ()>(()));
- assert_eq!("a:".parse(), Err::<OwnedName, ()>(()));
- assert_eq!("a:b:c".parse(), Err::<OwnedName, ()>(()));
- }
-}
diff --git a/src/namespace.rs b/src/namespace.rs
deleted file mode 100644
index 1ab4a5c..0000000
--- a/src/namespace.rs
+++ /dev/null
@@ -1,485 +0,0 @@
-//! Contains namespace manipulation types and functions.
-
-use std::iter::{Map, Rev};
-use std::collections::btree_map::{BTreeMap, Entry};
-use std::collections::btree_map::Iter as Entries;
-use std::collections::HashSet;
-use std::slice::Iter;
-
-/// Designates prefix for namespace definitions.
-///
-/// See [Namespaces in XML][namespace] spec for more information.
-///
-/// [namespace]: http://www.w3.org/TR/xml-names/#ns-decl
-pub const NS_XMLNS_PREFIX: &'static str = "xmlns";
-
-/// Designates the standard URI for `xmlns` prefix.
-///
-/// See [A Namespace Name for xmlns Attributes][1] for more information.
-///
-/// [namespace]: http://www.w3.org/2000/xmlns/
-pub const NS_XMLNS_URI: &'static str = "http://www.w3.org/2000/xmlns/";
-
-/// Designates prefix for a namespace containing several special predefined attributes.
-///
-/// See [2.10 White Space handling][1], [2.1 Language Identification][2],
-/// [XML Base specification][3] and [xml:id specification][4] for more information.
-///
-/// [1]: http://www.w3.org/TR/REC-xml/#sec-white-space
-/// [2]: http://www.w3.org/TR/REC-xml/#sec-lang-tag
-/// [3]: http://www.w3.org/TR/xmlbase/
-/// [4]: http://www.w3.org/TR/xml-id/
-pub const NS_XML_PREFIX: &'static str = "xml";
-
-/// Designates the standard URI for `xml` prefix.
-///
-/// See `NS_XML_PREFIX` documentation for more information.
-pub const NS_XML_URI: &'static str = "http://www.w3.org/XML/1998/namespace";
-
-/// Designates the absence of prefix in a qualified name.
-///
-/// This constant should be used to define or query default namespace which should be used
-/// for element or attribute names without prefix. For example, if a namespace mapping
-/// at a particular point in the document contains correspondence like
-///
-/// ```none
-/// NS_NO_PREFIX --> urn:some:namespace
-/// ```
-///
-/// then all names declared without an explicit prefix `urn:some:namespace` is assumed as
-/// a namespace URI.
-///
-/// By default empty prefix corresponds to absence of namespace, but this can change either
-/// when writing an XML document (manually) or when reading an XML document (based on namespace
-/// declarations).
-pub const NS_NO_PREFIX: &'static str = "";
-
-/// Designates an empty namespace URI, which is equivalent to absence of namespace.
-///
-/// This constant should not usually be used directly; it is used to designate that
-/// empty prefix corresponds to absent namespace in `NamespaceStack` instances created with
-/// `NamespaceStack::default()`. Therefore, it can be used to restore `NS_NO_PREFIX` mapping
-/// in a namespace back to its default value.
-pub const NS_EMPTY_URI: &'static str = "";
-
-/// Namespace is a map from prefixes to namespace URIs.
-///
-/// No prefix (i.e. default namespace) is designated by `NS_NO_PREFIX` constant.
-#[derive(PartialEq, Eq, Clone, Debug)]
-pub struct Namespace(pub BTreeMap<String, String>);
-
-impl Namespace {
- /// Returns an empty namespace.
- #[inline]
- pub fn empty() -> Namespace { Namespace(BTreeMap::new()) }
-
- /// Checks whether this namespace is empty.
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.0.is_empty()
- }
-
- /// Checks whether this namespace is essentially empty, that is, it does not contain
- /// anything but default mappings.
- pub fn is_essentially_empty(&self) -> bool {
- // a shortcut for a namespace which is definitely not empty
- if self.0.len() > 3 { return false; }
-
- self.0.iter().all(|(k, v)| match (&**k, &**v) {
- (NS_NO_PREFIX, NS_EMPTY_URI) => true,
- (NS_XMLNS_PREFIX, NS_XMLNS_URI) => true,
- (NS_XML_PREFIX, NS_XML_URI) => true,
- _ => false
- })
- }
-
- /// Checks whether this namespace mapping contains the given prefix.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix.
- ///
- /// # Return value
- /// `true` if this namespace contains the given prefix, `false` otherwise.
- #[inline]
- pub fn contains<P: ?Sized+AsRef<str>>(&self, prefix: &P) -> bool {
- self.0.contains_key(prefix.as_ref())
- }
-
- /// Puts a mapping into this namespace.
- ///
- /// This method does not override any already existing mappings.
- ///
- /// Returns a boolean flag indicating whether the map already contained
- /// the given prefix.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix;
- /// * `uri` --- namespace URI.
- ///
- /// # Return value
- /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
- /// was already present in the namespace.
- pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
- where P: Into<String>, U: Into<String>
- {
- match self.0.entry(prefix.into()) {
- Entry::Occupied(_) => false,
- Entry::Vacant(ve) => {
- ve.insert(uri.into());
- true
- }
- }
- }
-
- /// Puts a mapping into this namespace forcefully.
- ///
- /// This method, unlike `put()`, does replace an already existing mapping.
- ///
- /// Returns previous URI which was assigned to the given prefix, if it is present.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix;
- /// * `uri` --- namespace URI.
- ///
- /// # Return value
- /// `Some(uri)` with `uri` being a previous URI assigned to the `prefix`, or
- /// `None` if such prefix was not present in the namespace before.
- pub fn force_put<P, U>(&mut self, prefix: P, uri: U) -> Option<String>
- where P: Into<String>, U: Into<String>
- {
- self.0.insert(prefix.into(), uri.into())
- }
-
- /// Queries the namespace for the given prefix.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix.
- ///
- /// # Return value
- /// Namespace URI corresponding to the given prefix, if it is present.
- pub fn get<'a, P: ?Sized+AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
- self.0.get(prefix.as_ref()).map(|s| &**s)
- }
-}
-
-/// An alias for iterator type for namespace mappings contained in a namespace.
-pub type NamespaceMappings<'a> = Map<
- Entries<'a, String, String>,
- for<'b> fn((&'b String, &'b String)) -> UriMapping<'b>
->;
-
-impl<'a> IntoIterator for &'a Namespace {
- type Item = UriMapping<'a>;
- type IntoIter = NamespaceMappings<'a>;
-
- fn into_iter(self) -> Self::IntoIter {
- fn mapper<'a>((prefix, uri): (&'a String, &'a String)) -> UriMapping<'a> {
- (&*prefix, &*uri)
- }
- self.0.iter().map(mapper)
- }
-}
-
-/// Namespace stack is a sequence of namespaces.
-///
-/// Namespace stack is used to represent cumulative namespace consisting of
-/// combined namespaces from nested elements.
-#[derive(Clone, Eq, PartialEq, Debug)]
-pub struct NamespaceStack(pub Vec<Namespace>);
-
-impl NamespaceStack {
- /// Returns an empty namespace stack.
- #[inline]
- pub fn empty() -> NamespaceStack { NamespaceStack(Vec::with_capacity(2)) }
-
- /// Returns a namespace stack with default items in it.
- ///
- /// Default items are the following:
- ///
- /// * `xml` → `http://www.w3.org/XML/1998/namespace`;
- /// * `xmlns` → `http://www.w3.org/2000/xmlns/`.
- #[inline]
- pub fn default() -> NamespaceStack {
- let mut nst = NamespaceStack::empty();
- nst.push_empty();
- // xml namespace
- nst.put(NS_XML_PREFIX, NS_XML_URI);
- // xmlns namespace
- nst.put(NS_XMLNS_PREFIX, NS_XMLNS_URI);
- // empty namespace
- nst.put(NS_NO_PREFIX, NS_EMPTY_URI);
- nst
- }
-
- /// Adds an empty namespace to the top of this stack.
- #[inline]
- pub fn push_empty(&mut self) -> &mut NamespaceStack {
- self.0.push(Namespace::empty());
- self
- }
-
- /// Removes the topmost namespace in this stack.
- ///
- /// Panics if the stack is empty.
- #[inline]
- pub fn pop(&mut self) -> Namespace {
- self.0.pop().unwrap()
- }
-
- /// Removes the topmost namespace in this stack.
- ///
- /// Returns `Some(namespace)` if this stack is not empty and `None` otherwise.
- #[inline]
- pub fn try_pop(&mut self) -> Option<Namespace> {
- self.0.pop()
- }
-
- /// Borrows the topmost namespace mutably, leaving the stack intact.
- ///
- /// Panics if the stack is empty.
- #[inline]
- pub fn peek_mut(&mut self) -> &mut Namespace {
- self.0.last_mut().unwrap()
- }
-
- /// Borrows the topmost namespace immutably, leaving the stack intact.
- ///
- /// Panics if the stack is empty.
- #[inline]
- pub fn peek(&self) -> &Namespace {
- self.0.last().unwrap()
- }
-
- /// Puts a mapping into the topmost namespace if this stack does not already contain one.
- ///
- /// Returns a boolean flag indicating whether the insertion has completed successfully.
- /// Note that both key and value are matched and the mapping is inserted if either
- /// namespace prefix is not already mapped, or if it is mapped, but to a different URI.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix;
- /// * `uri` --- namespace URI.
- ///
- /// # Return value
- /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
- /// was already present in the namespace stack.
- pub fn put_checked<P, U>(&mut self, prefix: P, uri: U) -> bool
- where P: Into<String> + AsRef<str>,
- U: Into<String> + AsRef<str>
- {
- if self.0.iter().any(|ns| ns.get(&prefix) == Some(uri.as_ref())) {
- false
- } else {
- self.put(prefix, uri);
- true
- }
- }
-
- /// Puts a mapping into the topmost namespace in this stack.
- ///
- /// This method does not override a mapping in the topmost namespace if it is
- /// already present, however, it does not depend on other namespaces in the stack,
- /// so it is possible to put a mapping which is present in lower namespaces.
- ///
- /// Returns a boolean flag indicating whether the insertion has completed successfully.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix;
- /// * `uri` --- namespace URI.
- ///
- /// # Return value
- /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
- /// was already present in the namespace.
- #[inline]
- pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
- where P: Into<String>, U: Into<String>
- {
- self.0.last_mut().unwrap().put(prefix, uri)
- }
-
- /// Performs a search for the given prefix in the whole stack.
- ///
- /// This method walks the stack from top to bottom, querying each namespace
- /// in order for the given prefix. If none of the namespaces contains the prefix,
- /// `None` is returned.
- ///
- /// # Parameters
- /// * `prefix` --- namespace prefix.
- #[inline]
- pub fn get<'a, P: ?Sized+AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
- let prefix = prefix.as_ref();
- for ns in self.0.iter().rev() {
- match ns.get(prefix) {
- None => {},
- r => return r,
- }
- }
- None
- }
-
- /// Combines this stack of namespaces into a single namespace.
- ///
- /// Namespaces are combined in left-to-right order, that is, rightmost namespace
- /// elements take priority over leftmost ones.
- pub fn squash(&self) -> Namespace {
- let mut result = BTreeMap::new();
- for ns in self.0.iter() {
- result.extend(ns.0.iter().map(|(k, v)| (k.clone(), v.clone())));
- }
- Namespace(result)
- }
-
- /// Returns an object which implements `Extend` using `put_checked()` instead of `put()`.
- ///
- /// See `CheckedTarget` for more information.
- #[inline]
- pub fn checked_target(&mut self) -> CheckedTarget {
- CheckedTarget(self)
- }
-
- /// Returns an iterator over all mappings in this namespace stack.
- #[inline]
- pub fn iter(&self) -> NamespaceStackMappings {
- self.into_iter()
- }
-}
-
-/// An iterator over mappings from prefixes to URIs in a namespace stack.
-///
-/// # Example
-/// ```
-/// # use xml::namespace::NamespaceStack;
-/// let mut nst = NamespaceStack::empty();
-/// nst.push_empty();
-/// nst.put("a", "urn:A");
-/// nst.put("b", "urn:B");
-/// nst.push_empty();
-/// nst.put("c", "urn:C");
-///
-/// assert_eq!(vec![("c", "urn:C"), ("a", "urn:A"), ("b", "urn:B")], nst.iter().collect::<Vec<_>>());
-/// ```
-pub struct NamespaceStackMappings<'a> {
- namespaces: Rev<Iter<'a, Namespace>>,
- current_namespace: Option<NamespaceMappings<'a>>,
- used_keys: HashSet<&'a str>
-}
-
-impl<'a> NamespaceStackMappings<'a> {
- fn go_to_next_namespace(&mut self) -> bool {
- self.current_namespace = self.namespaces.next().map(|ns| ns.into_iter());
- self.current_namespace.is_some()
- }
-}
-
-impl<'a> Iterator for NamespaceStackMappings<'a> {
- type Item = UriMapping<'a>;
-
- fn next(&mut self) -> Option<UriMapping<'a>> {
- // If there is no current namespace and no next namespace, we're finished
- if self.current_namespace.is_none() && !self.go_to_next_namespace() {
- return None;
- }
- let next_item = self.current_namespace.as_mut().unwrap().next();
-
- match next_item {
- // There is an element in the current namespace
- Some((k, v)) => if self.used_keys.contains(&k) {
- // If the current key is used, go to the next one
- self.next()
- } else {
- // Otherwise insert the current key to the set of used keys and
- // return the mapping
- self.used_keys.insert(k);
- Some((k, v))
- },
- // Current namespace is exhausted
- None => if self.go_to_next_namespace() {
- // If there is next namespace, continue from it
- self.next()
- } else {
- // No next namespace, exiting
- None
- }
- }
- }
-}
-
-impl<'a> IntoIterator for &'a NamespaceStack {
- type Item = UriMapping<'a>;
- type IntoIter = NamespaceStackMappings<'a>;
-
- fn into_iter(self) -> Self::IntoIter {
- NamespaceStackMappings {
- namespaces: self.0.iter().rev(),
- current_namespace: None,
- used_keys: HashSet::new()
- }
- }
-}
-
-/// A type alias for a pair of `(prefix, uri)` values returned by namespace iterators.
-pub type UriMapping<'a> = (&'a str, &'a str);
-
-impl<'a> Extend<UriMapping<'a>> for Namespace {
- fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
- for (prefix, uri) in iterable {
- self.put(prefix, uri);
- }
- }
-}
-
-impl<'a> Extend<UriMapping<'a>> for NamespaceStack {
- fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
- for (prefix, uri) in iterable {
- self.put(prefix, uri);
- }
- }
-}
-
-/// A wrapper around `NamespaceStack` which implements `Extend` using `put_checked()`.
-///
-/// # Example
-///
-/// ```
-/// # use xml::namespace::NamespaceStack;
-///
-/// let mut nst = NamespaceStack::empty();
-/// nst.push_empty();
-/// nst.put("a", "urn:A");
-/// nst.put("b", "urn:B");
-/// nst.push_empty();
-/// nst.put("c", "urn:C");
-///
-/// nst.checked_target().extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
-/// assert_eq!(
-/// vec![("a", "urn:Z"), ("c", "urn:C"), ("d", "urn:D"), ("b", "urn:B")],
-/// nst.iter().collect::<Vec<_>>()
-/// );
-/// ```
-///
-/// Compare:
-///
-/// ```
-/// # use xml::namespace::NamespaceStack;
-/// # let mut nst = NamespaceStack::empty();
-/// # nst.push_empty();
-/// # nst.put("a", "urn:A");
-/// # nst.put("b", "urn:B");
-/// # nst.push_empty();
-/// # nst.put("c", "urn:C");
-///
-/// nst.extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
-/// assert_eq!(
-/// vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:C"), ("d", "urn:D")],
-/// nst.iter().collect::<Vec<_>>()
-/// );
-/// ```
-pub struct CheckedTarget<'a>(&'a mut NamespaceStack);
-
-impl<'a, 'b> Extend<UriMapping<'b>> for CheckedTarget<'a> {
- fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'b>> {
- for (prefix, uri) in iterable {
- self.0.put_checked(prefix, uri);
- }
- }
-}
diff --git a/src/reader/config.rs b/src/reader/config.rs
deleted file mode 100644
index 5b4cd32..0000000
--- a/src/reader/config.rs
+++ /dev/null
@@ -1,181 +0,0 @@
-//! Contains parser configuration structure.
-use std::io::Read;
-use std::collections::HashMap;
-
-use reader::EventReader;
-
-/// Parser configuration structure.
-///
-/// This structure contains various configuration options which affect
-/// behavior of the parser.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct ParserConfig {
- /// Whether or not should whitespace in textual events be removed. Default is false.
- ///
- /// When true, all standalone whitespace will be removed (this means no
- /// `Whitespace` events will ve emitted), and leading and trailing whitespace
- /// from `Character` events will be deleted. If after trimming `Characters`
- /// event will be empty, it will also be omitted from output stream. This is
- /// possible, however, only if `whitespace_to_characters` or
- /// `cdata_to_characters` options are set.
- ///
- /// This option does not affect CDATA events, unless `cdata_to_characters`
- /// option is also set. In that case CDATA content will also be trimmed.
- pub trim_whitespace: bool,
-
- /// Whether or not should whitespace be converted to characters.
- /// Default is false.
- ///
- /// If true, instead of `Whitespace` events `Characters` events with the
- /// same content will be emitted. If `trim_whitespace` is also true, these
- /// events will be trimmed to nothing and, consequently, not emitted.
- pub whitespace_to_characters: bool,
-
- /// Whether or not should CDATA be converted to characters.
- /// Default is false.
- ///
- /// If true, instead of `CData` events `Characters` events with the same
- /// content will be emitted. If `trim_whitespace` is also true, these events
- /// will be trimmed. If corresponding CDATA contained nothing but whitespace,
- /// this event will be omitted from the stream.
- pub cdata_to_characters: bool,
-
- /// Whether or not should comments be omitted. Default is true.
- ///
- /// If true, `Comment` events will not be emitted at all.
- pub ignore_comments: bool,
-
- /// Whether or not should sequential `Characters` events be merged.
- /// Default is true.
- ///
- /// If true, multiple sequential `Characters` events will be merged into
- /// a single event, that is, their data will be concatenated.
- ///
- /// Multiple sequential `Characters` events are only possible if either
- /// `cdata_to_characters` or `ignore_comments` are set. Otherwise character
- /// events will always be separated by other events.
- pub coalesce_characters: bool,
-
- /// A map of extra entities recognized by the parser. Default is an empty map.
- ///
- /// By default the XML parser recognizes the entities defined in the XML spec. Sometimes,
- /// however, it is convenient to make the parser recognize additional entities which
- /// are also not available through the DTD definitions (especially given that at the moment
- /// DTD parsing is not supported).
- pub extra_entities: HashMap<String, String>,
-
- /// Whether or not the parser should ignore the end of stream. Default is false.
- ///
- /// By default the parser will either error out when it encounters a premature end of
- /// stream or complete normally if the end of stream was expected. If you want to continue
- /// reading from a stream whose input is supplied progressively, you can set this option to true.
- /// In this case the parser will allow you to invoke the next() method even if a supposed end
- /// of stream has happened.
- ///
- /// Note that support for this functionality is incomplete; for example, the parser will fail if
- /// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk.
- pub ignore_end_of_stream: bool,
-
- /// Whether or not non-unicode entity references get replaced with the replacement character
- ///
- /// When true, any decimal or hexadecimal character reference that cannot be converted from a
- /// u32 to a char using [std::char::from_u32](https://doc.rust-lang.org/std/char/fn.from_u32.html)
- /// will be converted into the unicode REPLACEMENT CHARACTER (U+FFFD).
- pub replace_unknown_entity_references: bool,
-
- /// Whether or not whitespace at the root level of the document is ignored. Default is true.
- ///
- /// By default any whitespace that is not enclosed within at least one level of elements will be
- /// ignored. Setting this value to false will cause root level whitespace events to be emitted.
- pub ignore_root_level_whitespace: bool,
-}
-
-impl ParserConfig {
- /// Returns a new config with default values.
- ///
- /// You can tweak default values using builder-like pattern:
- ///
- /// ```rust
- /// use xml::reader::ParserConfig;
- ///
- /// let config = ParserConfig::new()
- /// .trim_whitespace(true)
- /// .ignore_comments(true)
- /// .coalesce_characters(false);
- /// ```
- pub fn new() -> ParserConfig {
- ParserConfig {
- trim_whitespace: false,
- whitespace_to_characters: false,
- cdata_to_characters: false,
- ignore_comments: true,
- coalesce_characters: true,
- extra_entities: HashMap::new(),
- ignore_end_of_stream: false,
- replace_unknown_entity_references: false,
- ignore_root_level_whitespace: true,
- }
- }
-
- /// Creates an XML reader with this configuration.
- ///
- /// This is a convenience method for configuring and creating a reader at the same time:
- ///
- /// ```rust
- /// use xml::reader::ParserConfig;
- ///
- /// let mut source: &[u8] = b"...";
- ///
- /// let reader = ParserConfig::new()
- /// .trim_whitespace(true)
- /// .ignore_comments(true)
- /// .coalesce_characters(false)
- /// .create_reader(&mut source);
- /// ```
- ///
- /// This method is exactly equivalent to calling `EventReader::new_with_config()` with
- /// this configuration object.
- #[inline]
- pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
- EventReader::new_with_config(source, self)
- }
-
- /// Adds a new entity mapping and returns an updated config object.
- ///
- /// This is a convenience method for adding external entities mappings to the XML parser.
- /// An example:
- ///
- /// ```rust
- /// use xml::reader::ParserConfig;
- ///
- /// let mut source: &[u8] = b"...";
- ///
- /// let reader = ParserConfig::new()
- /// .add_entity("nbsp", " ")
- /// .add_entity("copy", "©")
- /// .add_entity("reg", "®")
- /// .create_reader(&mut source);
- /// ```
- pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig {
- self.extra_entities.insert(entity.into(), value.into());
- self
- }
-}
-
-impl Default for ParserConfig {
- #[inline]
- fn default() -> ParserConfig {
- ParserConfig::new()
- }
-}
-
-gen_setters! { ParserConfig,
- trim_whitespace: val bool,
- whitespace_to_characters: val bool,
- cdata_to_characters: val bool,
- ignore_comments: val bool,
- coalesce_characters: val bool,
- ignore_end_of_stream: val bool,
- replace_unknown_entity_references: val bool,
- ignore_root_level_whitespace: val bool
-}
diff --git a/src/reader/error.rs b/src/reader/error.rs
deleted file mode 100644
index 92378e6..0000000
--- a/src/reader/error.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-
-use std::io;
-use std::borrow::Cow;
-use std::fmt;
-use std::error;
-use std::str;
-
-use util;
-use common::{Position, TextPosition};
-
-#[derive(Debug)]
-pub enum ErrorKind {
- Syntax(Cow<'static, str>),
- Io(io::Error),
- Utf8(str::Utf8Error),
- UnexpectedEof,
-}
-
-/// An XML parsing error.
-///
-/// Consists of a 2D position in a document and a textual message describing the error.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct Error {
- pos: TextPosition,
- kind: ErrorKind,
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{} {}", self.pos, self.msg())
- }
-}
-
-impl Position for Error {
- #[inline]
- fn position(&self) -> TextPosition { self.pos }
-}
-
-impl Error {
- /// Returns a reference to a message which is contained inside this error.
- #[inline]
- pub fn msg(&self) -> &str {
- use self::ErrorKind::*;
- match self.kind {
- UnexpectedEof => &"Unexpected EOF",
- Utf8(ref reason) => error_description(reason),
- Io(ref io_error) => error_description(io_error),
- Syntax(ref msg) => msg.as_ref(),
- }
- }
-
- pub fn kind(&self) -> &ErrorKind { &self.kind }
-}
-
-impl error::Error for Error {
- #[inline]
- fn description(&self) -> &str { self.msg() }
-}
-
-impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> {
- fn from(orig: (&'a P, M)) -> Self {
- Error{
- pos: orig.0.position(),
- kind: ErrorKind::Syntax(orig.1.into())
- }
- }
-}
-
-impl From<util::CharReadError> for Error {
- fn from(e: util::CharReadError) -> Self {
- use util::CharReadError::*;
- Error{
- pos: TextPosition::new(),
- kind: match e {
- UnexpectedEof => ErrorKind::UnexpectedEof,
- Utf8(reason) => ErrorKind::Utf8(reason),
- Io(io_error) => ErrorKind::Io(io_error),
- }
- }
- }
-}
-
-impl From<io::Error> for Error {
- fn from(e: io::Error) -> Self {
- Error {
- pos: TextPosition::new(),
- kind: ErrorKind::Io(e),
- }
- }
-}
-
-impl Clone for ErrorKind {
- fn clone(&self) -> Self {
- use self::ErrorKind::*;
- match *self {
- UnexpectedEof => UnexpectedEof,
- Utf8(ref reason) => Utf8(reason.clone()),
- Io(ref io_error) => Io(io::Error::new(io_error.kind(), error_description(io_error))),
- Syntax(ref msg) => Syntax(msg.clone()),
- }
- }
-}
-impl PartialEq for ErrorKind {
- fn eq(&self, other: &ErrorKind) -> bool {
- use self::ErrorKind::*;
- match (self, other) {
- (&UnexpectedEof, &UnexpectedEof) => true,
- (&Utf8(ref left), &Utf8(ref right)) => left == right,
- (&Io(ref left), &Io(ref right)) =>
- left.kind() == right.kind() &&
- error_description(left) == error_description(right),
- (&Syntax(ref left), &Syntax(ref right)) =>
- left == right,
-
- (_, _) => false,
- }
- }
-}
-impl Eq for ErrorKind {}
-
-fn error_description(e: &error::Error) -> &str { e.description() }
diff --git a/src/reader/events.rs b/src/reader/events.rs
deleted file mode 100644
index 46d7621..0000000
--- a/src/reader/events.rs
+++ /dev/null
@@ -1,219 +0,0 @@
-//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
-
-use std::fmt;
-use std::borrow::Cow;
-
-use name::OwnedName;
-use attribute::OwnedAttribute;
-use common::XmlVersion;
-use namespace::Namespace;
-
-/// An element of an XML input stream.
-///
-/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
-/// elements of an XML document.
-#[derive(PartialEq, Clone)]
-pub enum XmlEvent {
- /// Corresponds to XML document declaration.
- ///
- /// This event is always emitted before any other event. It is emitted
- /// even if the actual declaration is not present in the document.
- StartDocument {
- /// XML version.
- ///
- /// If XML declaration is not present, defaults to `Version10`.
- version: XmlVersion,
-
- /// XML document encoding.
- ///
- /// If XML declaration is not present or does not contain `encoding` attribute,
- /// defaults to `"UTF-8"`. This field is currently used for no other purpose than
- /// informational.
- encoding: String,
-
- /// XML standalone declaration.
- ///
- /// If XML document is not present or does not contain `standalone` attribute,
- /// defaults to `None`. This field is currently used for no other purpose than
- /// informational.
- standalone: Option<bool>
- },
-
- /// Denotes to the end of the document stream.
- ///
- /// This event is always emitted after any other event (except `Error`). After it
- /// is emitted for the first time, it will always be emitted on next event pull attempts.
- EndDocument,
-
- /// Denotes an XML processing instruction.
- ///
- /// This event contains a processing instruction target (`name`) and opaque `data`. It
- /// is up to the application to process them.
- ProcessingInstruction {
- /// Processing instruction target.
- name: String,
-
- /// Processing instruction content.
- data: Option<String>
- },
-
- /// Denotes a beginning of an XML element.
- ///
- /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
- /// latter case `EndElement` event immediately follows.
- StartElement {
- /// Qualified name of the element.
- name: OwnedName,
-
- /// A list of attributes associated with the element.
- ///
- /// Currently attributes are not checked for duplicates (TODO)
- attributes: Vec<OwnedAttribute>,
-
- /// Contents of the namespace mapping at this point of the document.
- namespace: Namespace,
- },
-
- /// Denotes an end of an XML element.
- ///
- /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
- /// latter case it is emitted immediately after corresponding `StartElement` event.
- EndElement {
- /// Qualified name of the element.
- name: OwnedName
- },
-
- /// Denotes CDATA content.
- ///
- /// This event contains unparsed data. No unescaping will be performed.
- ///
- /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
- /// `pull::ParserConfiguration` structure for more information.
- CData(String),
-
- /// Denotes a comment.
- ///
- /// It is possible to configure a parser to ignore comments, so this event will never be emitted.
- /// See `pull::ParserConfiguration` structure for more information.
- Comment(String),
-
- /// Denotes character data outside of tags.
- ///
- /// Contents of this event will always be unescaped, so no entities like `<` or `&` or `{`
- /// will appear in it.
- ///
- /// It is possible to configure a parser to trim leading and trailing whitespace for this event.
- /// See `pull::ParserConfiguration` structure for more information.
- Characters(String),
-
- /// Denotes a chunk of whitespace outside of tags.
- ///
- /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
- /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
- /// trimming, it will eliminate standalone whitespace from the event stream completely.
- Whitespace(String)
-}
-
-impl fmt::Debug for XmlEvent {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- XmlEvent::StartDocument { ref version, ref encoding, ref standalone } =>
- write!(f, "StartDocument({}, {}, {:?})", version, *encoding, *standalone),
- XmlEvent::EndDocument =>
- write!(f, "EndDocument"),
- XmlEvent::ProcessingInstruction { ref name, ref data } =>
- write!(f, "ProcessingInstruction({}{})", *name, match *data {
- Some(ref data) => format!(", {}", data),
- None => String::new()
- }),
- XmlEvent::StartElement { ref name, ref attributes, namespace: Namespace(ref namespace) } =>
- write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
- String::new()
- } else {
- let attributes: Vec<String> = attributes.iter().map(
- |a| format!("{} -> {}", a.name, a.value)
- ).collect();
- format!(", [{}]", attributes.join(", "))
- }),
- XmlEvent::EndElement { ref name } =>
- write!(f, "EndElement({})", name),
- XmlEvent::Comment(ref data) =>
- write!(f, "Comment({})", data),
- XmlEvent::CData(ref data) =>
- write!(f, "CData({})", data),
- XmlEvent::Characters(ref data) =>
- write!(f, "Characters({})", data),
- XmlEvent::Whitespace(ref data) =>
- write!(f, "Whitespace({})", data)
- }
- }
-}
-
-impl XmlEvent {
- /// Obtains a writer event from this reader event.
- ///
- /// This method is useful for streaming processing of XML documents where the output
- /// is also an XML document. With this method it is possible to process some events
- /// while passing other events through to the writer unchanged:
- ///
- /// ```rust
- /// use std::str;
- ///
- /// use xml::{EventReader, EventWriter};
- /// use xml::reader::XmlEvent as ReaderEvent;
- /// use xml::writer::XmlEvent as WriterEvent;
- ///
- /// let mut input: &[u8] = b"<hello>world</hello>";
- /// let mut output: Vec<u8> = Vec::new();
- ///
- /// {
- /// let mut reader = EventReader::new(&mut input);
- /// let mut writer = EventWriter::new(&mut output);
- ///
- /// for e in reader {
- /// match e.unwrap() {
- /// ReaderEvent::Characters(s) =>
- /// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
- /// e => if let Some(e) = e.as_writer_event() {
- /// writer.write(e).unwrap()
- /// }
- /// }
- /// }
- /// }
- ///
- /// assert_eq!(
- /// str::from_utf8(&output).unwrap(),
- /// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
- /// );
- /// ```
- ///
- /// Note that this API may change or get additions in future to improve its ergonomics.
- pub fn as_writer_event<'a>(&'a self) -> Option<::writer::events::XmlEvent<'a>> {
- match *self {
- XmlEvent::StartDocument { version, ref encoding, standalone } =>
- Some(::writer::events::XmlEvent::StartDocument {
- version: version,
- encoding: Some(encoding),
- standalone: standalone
- }),
- XmlEvent::ProcessingInstruction { ref name, ref data } =>
- Some(::writer::events::XmlEvent::ProcessingInstruction {
- name: name,
- data: data.as_ref().map(|s| &s[..])
- }),
- XmlEvent::StartElement { ref name, ref attributes, ref namespace } =>
- Some(::writer::events::XmlEvent::StartElement {
- name: name.borrow(),
- attributes: attributes.iter().map(|a| a.borrow()).collect(),
- namespace: Cow::Borrowed(namespace)
- }),
- XmlEvent::EndElement { ref name } =>
- Some(::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
- XmlEvent::Comment(ref data) => Some(::writer::events::XmlEvent::Comment(data)),
- XmlEvent::CData(ref data) => Some(::writer::events::XmlEvent::CData(data)),
- XmlEvent::Characters(ref data) => Some(::writer::events::XmlEvent::Characters(data)),
- XmlEvent::Whitespace(ref data) => Some(::writer::events::XmlEvent::Characters(data)),
- _ => None
- }
- }
-}
diff --git a/src/reader/lexer.rs b/src/reader/lexer.rs
deleted file mode 100644
index ffaab57..0000000
--- a/src/reader/lexer.rs
+++ /dev/null
@@ -1,866 +0,0 @@
-//! Contains simple lexer for XML documents.
-//!
-//! This module is for internal use. Use `xml::pull` module to do parsing.
-
-use std::fmt;
-use std::collections::VecDeque;
-use std::io::Read;
-use std::result;
-use std::borrow::Cow;
-
-use common::{Position, TextPosition, is_whitespace_char, is_name_char};
-use reader::Error;
-use util;
-
-/// `Token` represents a single lexeme of an XML document. These lexemes
-/// are used to perform actual parsing.
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub enum Token {
- /// `<?`
- ProcessingInstructionStart,
- /// `?>`
- ProcessingInstructionEnd,
- /// `<!DOCTYPE
- DoctypeStart,
- /// `<`
- OpeningTagStart,
- /// `</`
- ClosingTagStart,
- /// `>`
- TagEnd,
- /// `/>`
- EmptyTagEnd,
- /// `<!--`
- CommentStart,
- /// `-->`
- CommentEnd,
- /// A chunk of characters, used for errors recovery.
- Chunk(&'static str),
- /// Any non-special character except whitespace.
- Character(char),
- /// Whitespace character.
- Whitespace(char),
- /// `=`
- EqualsSign,
- /// `'`
- SingleQuote,
- /// `"`
- DoubleQuote,
- /// `<![CDATA[`
- CDataStart,
- /// `]]>`
- CDataEnd,
- /// `&`
- ReferenceStart,
- /// `;`
- ReferenceEnd,
-}
-
-impl fmt::Display for Token {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- Token::Chunk(s) => write!(f, "{}", s),
- Token::Character(c) | Token::Whitespace(c) => write!(f, "{}", c),
- other => write!(f, "{}", match other {
- Token::OpeningTagStart => "<",
- Token::ProcessingInstructionStart => "<?",
- Token::DoctypeStart => "<!DOCTYPE",
- Token::ClosingTagStart => "</",
- Token::CommentStart => "<!--",
- Token::CDataStart => "<![CDATA[",
- Token::TagEnd => ">",
- Token::EmptyTagEnd => "/>",
- Token::ProcessingInstructionEnd => "?>",
- Token::CommentEnd => "-->",
- Token::CDataEnd => "]]>",
- Token::ReferenceStart => "&",
- Token::ReferenceEnd => ";",
- Token::EqualsSign => "=",
- Token::SingleQuote => "'",
- Token::DoubleQuote => "\"",
- _ => unreachable!()
- })
- }
- }
-}
-
-impl Token {
- pub fn as_static_str(&self) -> Option<&'static str> {
- match *self {
- Token::OpeningTagStart => Some("<"),
- Token::ProcessingInstructionStart => Some("<?"),
- Token::DoctypeStart => Some("<!DOCTYPE"),
- Token::ClosingTagStart => Some("</"),
- Token::CommentStart => Some("<!--"),
- Token::CDataStart => Some("<![CDATA["),
- Token::TagEnd => Some(">"),
- Token::EmptyTagEnd => Some("/>"),
- Token::ProcessingInstructionEnd => Some("?>"),
- Token::CommentEnd => Some("-->"),
- Token::CDataEnd => Some("]]>"),
- Token::ReferenceStart => Some("&"),
- Token::ReferenceEnd => Some(";"),
- Token::EqualsSign => Some("="),
- Token::SingleQuote => Some("'"),
- Token::DoubleQuote => Some("\""),
- Token::Chunk(s) => Some(s),
- _ => None
- }
- }
-
- // using String.push_str(token.to_string()) is simply way too slow
- pub fn push_to_string(&self, target: &mut String) {
- match self.as_static_str() {
- Some(s) => { target.push_str(s); }
- None => {
- match *self {
- Token::Character(c) | Token::Whitespace(c) => target.push(c),
- _ => unreachable!()
- }
- }
- }
- }
-
- /// Returns `true` if this token contains data that can be interpreted
- /// as a part of the text. Surprisingly, this also means '>' and '=' and '"' and "'" and '-->'.
- #[inline]
- pub fn contains_char_data(&self) -> bool {
- match *self {
- Token::Whitespace(_) | Token::Chunk(_) | Token::Character(_) | Token::CommentEnd |
- Token::TagEnd | Token::EqualsSign | Token::DoubleQuote | Token::SingleQuote => true,
- _ => false
- }
- }
-
- /// Returns `true` if this token corresponds to a white space character.
- #[inline]
- pub fn is_whitespace(&self) -> bool {
- match *self {
- Token::Whitespace(_) => true,
- _ => false
- }
- }
-}
-
-enum State {
- /// Triggered on '<'
- TagStarted,
- /// Triggered on '<!'
- CommentOrCDataOrDoctypeStarted,
- /// Triggered on '<!-'
- CommentStarted,
- /// Triggered on '<!D' up to '<!DOCTYPE'
- DoctypeStarted(DoctypeStartedSubstate),
- /// Triggered after DoctypeStarted to handle sub elements
- DoctypeFinishing(u8),
- /// Triggered on '<![' up to '<![CDATA'
- CDataStarted(CDataStartedSubstate),
- /// Triggered on '?'
- ProcessingInstructionClosing,
- /// Triggered on '/'
- EmptyTagClosing,
- /// Triggered on '-' up to '--'
- CommentClosing(ClosingSubstate),
- /// Triggered on ']' up to ']]'
- CDataClosing(ClosingSubstate),
- /// Default state
- Normal
-}
-
-#[derive(Copy, Clone)]
-enum ClosingSubstate {
- First, Second
-}
-
-#[derive(Copy, Clone)]
-enum DoctypeStartedSubstate {
- D, DO, DOC, DOCT, DOCTY, DOCTYP
-}
-
-#[derive(Copy, Clone)]
-enum CDataStartedSubstate {
- E, C, CD, CDA, CDAT, CDATA
-}
-
-/// `Result` represents lexing result. It is either a token or an error message.
-pub type Result = result::Result<Option<Token>, Error>;
-
-/// Helps to set up a dispatch table for lexing large unambigous tokens like
-/// `<![CDATA[` or `<!DOCTYPE `.
-macro_rules! dispatch_on_enum_state(
- ($_self:ident, $s:expr, $c:expr, $is:expr,
- $($st:ident; $stc:expr ; $next_st:ident ; $chunk:expr),+;
- $end_st:ident ; $end_c:expr ; $end_chunk:expr ; $e:expr) => (
- match $s {
- $(
- $st => match $c {
- $stc => $_self.move_to($is($next_st)),
- _ => $_self.handle_error($chunk, $c)
- },
- )+
- $end_st => match $c {
- $end_c => $e,
- _ => $_self.handle_error($end_chunk, $c)
- }
- }
- )
-);
-
-/// `Lexer` is a lexer for XML documents, which implements pull API.
-///
-/// Main method is `next_token` which accepts an `std::io::Read` instance and
-/// tries to read the next lexeme from it.
-///
-/// When `skip_errors` flag is set, invalid lexemes will be returned as `Chunk`s.
-/// When it is not set, errors will be reported as `Err` objects with a string message.
-/// By default this flag is not set. Use `enable_errors` and `disable_errors` methods
-/// to toggle the behavior.
-pub struct Lexer {
- pos: TextPosition,
- head_pos: TextPosition,
- char_queue: VecDeque<char>,
- st: State,
- skip_errors: bool,
- inside_comment: bool,
- inside_token: bool,
- eof_handled: bool
-}
-
-impl Position for Lexer {
- #[inline]
- /// Returns the position of the last token produced by the lexer
- fn position(&self) -> TextPosition { self.pos }
-}
-
-impl Lexer {
- /// Returns a new lexer with default state.
- pub fn new() -> Lexer {
- Lexer {
- pos: TextPosition::new(),
- head_pos: TextPosition::new(),
- char_queue: VecDeque::with_capacity(4), // TODO: check size
- st: State::Normal,
- skip_errors: false,
- inside_comment: false,
- inside_token: false,
- eof_handled: false
- }
- }
-
- /// Enables error handling so `next_token` will return `Some(Err(..))`
- /// upon invalid lexeme.
- #[inline]
- pub fn enable_errors(&mut self) { self.skip_errors = false; }
-
- /// Disables error handling so `next_token` will return `Some(Chunk(..))`
- /// upon invalid lexeme with this lexeme content.
- #[inline]
- pub fn disable_errors(&mut self) { self.skip_errors = true; }
-
- /// Enables special handling of some lexemes which should be done when we're parsing comment
- /// internals.
- #[inline]
- pub fn inside_comment(&mut self) { self.inside_comment = true; }
-
- /// Disables the effect of `inside_comment()` method.
- #[inline]
- pub fn outside_comment(&mut self) { self.inside_comment = false; }
-
- /// Reset the eof handled flag of the lexer.
- #[inline]
- pub fn reset_eof_handled(&mut self) { self.eof_handled = false; }
-
- /// Tries to read the next token from the buffer.
- ///
- /// It is possible to pass different instaces of `BufReader` each time
- /// this method is called, but the resulting behavior is undefined in this case.
- ///
- /// Return value:
- /// * `Err(reason) where reason: reader::Error` - when an error occurs;
- /// * `Ok(None)` - upon end of stream is reached;
- /// * `Ok(Some(token)) where token: Token` - in case a complete-token has been read from the stream.
- pub fn next_token<B: Read>(&mut self, b: &mut B) -> Result {
- // Already reached end of buffer
- if self.eof_handled {
- return Ok(None);
- }
-
- if !self.inside_token {
- self.pos = self.head_pos;
- self.inside_token = true;
- }
-
- // Check if we have saved a char or two for ourselves
- while let Some(c) = self.char_queue.pop_front() {
- match try!(self.read_next_token(c)) {
- Some(t) => {
- self.inside_token = false;
- return Ok(Some(t));
- }
- None => {} // continue
- }
- }
-
- loop {
- // TODO: this should handle multiple encodings
- let c = match try!(util::next_char_from(b)) {
- Some(c) => c, // got next char
- None => break, // nothing to read left
- };
-
- match try!(self.read_next_token(c)) {
- Some(t) => {
- self.inside_token = false;
- return Ok(Some(t));
- }
- None => {
- // continue
- }
- }
- }
-
- // Handle end of stream
- self.eof_handled = true;
- self.pos = self.head_pos;
- match self.st {
- State::TagStarted | State::CommentOrCDataOrDoctypeStarted |
- State::CommentStarted | State::CDataStarted(_)| State::DoctypeStarted(_) |
- State::CommentClosing(ClosingSubstate::Second) |
- State::DoctypeFinishing(_) =>
- Err(self.error("Unexpected end of stream")),
- State::ProcessingInstructionClosing =>
- Ok(Some(Token::Character('?'))),
- State::EmptyTagClosing =>
- Ok(Some(Token::Character('/'))),
- State::CommentClosing(ClosingSubstate::First) =>
- Ok(Some(Token::Character('-'))),
- State::CDataClosing(ClosingSubstate::First) =>
- Ok(Some(Token::Character(']'))),
- State::CDataClosing(ClosingSubstate::Second) =>
- Ok(Some(Token::Chunk("]]"))),
- State::Normal =>
- Ok(None)
- }
- }
-
- #[inline]
- fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Error {
- (self, msg).into()
- }
-
- #[inline]
- fn read_next_token(&mut self, c: char) -> Result {
- let res = self.dispatch_char(c);
- if self.char_queue.is_empty() {
- if c == '\n' {
- self.head_pos.new_line();
- } else {
- self.head_pos.advance(1);
- }
- }
- res
- }
-
- fn dispatch_char(&mut self, c: char) -> Result {
- match self.st {
- State::Normal => self.normal(c),
- State::TagStarted => self.tag_opened(c),
- State::CommentOrCDataOrDoctypeStarted => self.comment_or_cdata_or_doctype_started(c),
- State::CommentStarted => self.comment_started(c),
- State::CDataStarted(s) => self.cdata_started(c, s),
- State::DoctypeStarted(s) => self.doctype_started(c, s),
- State::DoctypeFinishing(d) => self.doctype_finishing(c, d),
- State::ProcessingInstructionClosing => self.processing_instruction_closing(c),
- State::EmptyTagClosing => self.empty_element_closing(c),
- State::CommentClosing(s) => self.comment_closing(c, s),
- State::CDataClosing(s) => self.cdata_closing(c, s)
- }
- }
-
- #[inline]
- fn move_to(&mut self, st: State) -> Result {
- self.st = st;
- Ok(None)
- }
-
- #[inline]
- fn move_to_with(&mut self, st: State, token: Token) -> Result {
- self.st = st;
- Ok(Some(token))
- }
-
- #[inline]
- fn move_to_with_unread(&mut self, st: State, cs: &[char], token: Token) -> Result {
- self.char_queue.extend(cs.iter().cloned());
- self.move_to_with(st, token)
- }
-
- fn handle_error(&mut self, chunk: &'static str, c: char) -> Result {
- self.char_queue.push_back(c);
- if self.skip_errors || (self.inside_comment && chunk != "--") { // FIXME: looks hacky
- self.move_to_with(State::Normal, Token::Chunk(chunk))
- } else {
- Err(self.error(format!("Unexpected token '{}' before '{}'", chunk, c)))
- }
- }
-
- /// Encountered a char
- fn normal(&mut self, c: char) -> Result {
- match c {
- '<' => self.move_to(State::TagStarted),
- '>' => Ok(Some(Token::TagEnd)),
- '/' => self.move_to(State::EmptyTagClosing),
- '=' => Ok(Some(Token::EqualsSign)),
- '"' => Ok(Some(Token::DoubleQuote)),
- '\'' => Ok(Some(Token::SingleQuote)),
- '?' => self.move_to(State::ProcessingInstructionClosing),
- '-' => self.move_to(State::CommentClosing(ClosingSubstate::First)),
- ']' => self.move_to(State::CDataClosing(ClosingSubstate::First)),
- '&' => Ok(Some(Token::ReferenceStart)),
- ';' => Ok(Some(Token::ReferenceEnd)),
- _ if is_whitespace_char(c) => Ok(Some(Token::Whitespace(c))),
- _ => Ok(Some(Token::Character(c)))
- }
- }
-
- /// Encountered '<'
- fn tag_opened(&mut self, c: char) -> Result {
- match c {
- '?' => self.move_to_with(State::Normal, Token::ProcessingInstructionStart),
- '/' => self.move_to_with(State::Normal, Token::ClosingTagStart),
- '!' => self.move_to(State::CommentOrCDataOrDoctypeStarted),
- _ if is_whitespace_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
- _ if is_name_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
- _ => self.handle_error("<", c)
- }
- }
-
- /// Encountered '<!'
- fn comment_or_cdata_or_doctype_started(&mut self, c: char) -> Result {
- match c {
- '-' => self.move_to(State::CommentStarted),
- '[' => self.move_to(State::CDataStarted(CDataStartedSubstate::E)),
- 'D' => self.move_to(State::DoctypeStarted(DoctypeStartedSubstate::D)),
- _ => self.handle_error("<!", c)
- }
- }
-
- /// Encountered '<!-'
- fn comment_started(&mut self, c: char) -> Result {
- match c {
- '-' => self.move_to_with(State::Normal, Token::CommentStart),
- _ => self.handle_error("<!-", c)
- }
- }
-
- /// Encountered '<!['
- fn cdata_started(&mut self, c: char, s: CDataStartedSubstate) -> Result {
- use self::CDataStartedSubstate::{E, C, CD, CDA, CDAT, CDATA};
- dispatch_on_enum_state!(self, s, c, State::CDataStarted,
- E ; 'C' ; C ; "<![",
- C ; 'D' ; CD ; "<![C",
- CD ; 'A' ; CDA ; "<![CD",
- CDA ; 'T' ; CDAT ; "<![CDA",
- CDAT ; 'A' ; CDATA ; "<![CDAT";
- CDATA ; '[' ; "<![CDATA" ; self.move_to_with(State::Normal, Token::CDataStart)
- )
- }
-
- /// Encountered '<!D'
- fn doctype_started(&mut self, c: char, s: DoctypeStartedSubstate) -> Result {
- use self::DoctypeStartedSubstate::{D, DO, DOC, DOCT, DOCTY, DOCTYP};
- dispatch_on_enum_state!(self, s, c, State::DoctypeStarted,
- D ; 'O' ; DO ; "<!D",
- DO ; 'C' ; DOC ; "<!DO",
- DOC ; 'T' ; DOCT ; "<!DOC",
- DOCT ; 'Y' ; DOCTY ; "<!DOCT",
- DOCTY ; 'P' ; DOCTYP ; "<!DOCTY";
- DOCTYP ; 'E' ; "<!DOCTYP" ; self.move_to_with(State::DoctypeFinishing(1), Token::DoctypeStart)
- )
- }
-
- /// State used while awaiting the closing bracket for the <!DOCTYPE tag
- fn doctype_finishing(&mut self, c: char, d: u8) -> Result {
- match c {
- '<' => self.move_to(State::DoctypeFinishing(d + 1)),
- '>' if d == 1 => self.move_to_with(State::Normal, Token::TagEnd),
- '>' => self.move_to(State::DoctypeFinishing(d - 1)),
- _ => Ok(None),
- }
- }
-
- /// Encountered '?'
- fn processing_instruction_closing(&mut self, c: char) -> Result {
- match c {
- '>' => self.move_to_with(State::Normal, Token::ProcessingInstructionEnd),
- _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('?')),
- }
- }
-
- /// Encountered '/'
- fn empty_element_closing(&mut self, c: char) -> Result {
- match c {
- '>' => self.move_to_with(State::Normal, Token::EmptyTagEnd),
- _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('/')),
- }
- }
-
- /// Encountered '-'
- fn comment_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
- match s {
- ClosingSubstate::First => match c {
- '-' => self.move_to(State::CommentClosing(ClosingSubstate::Second)),
- _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('-'))
- },
- ClosingSubstate::Second => match c {
- '>' => self.move_to_with(State::Normal, Token::CommentEnd),
- // double dash not followed by a greater-than is a hard error inside comment
- _ if self.inside_comment => self.handle_error("--", c),
- // nothing else except comment closing starts with a double dash, and comment
- // closing can never be after another dash, and also we're outside of a comment,
- // therefore it is safe to push only the last read character to the list of unread
- // characters and pass the double dash directly to the output
- _ => self.move_to_with_unread(State::Normal, &[c], Token::Chunk("--"))
- }
- }
- }
-
- /// Encountered ']'
- fn cdata_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
- match s {
- ClosingSubstate::First => match c {
- ']' => self.move_to(State::CDataClosing(ClosingSubstate::Second)),
- _ => self.move_to_with_unread(State::Normal, &[c], Token::Character(']'))
- },
- ClosingSubstate::Second => match c {
- '>' => self.move_to_with(State::Normal, Token::CDataEnd),
- _ => self.move_to_with_unread(State::Normal, &[']', c], Token::Character(']'))
- }
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use common::{Position};
- use std::io::{BufReader, Cursor};
-
- use super::{Lexer, Token};
-
- macro_rules! assert_oks(
- (for $lex:ident and $buf:ident ; $($e:expr)+) => ({
- $(
- assert_eq!(Ok(Some($e)), $lex.next_token(&mut $buf));
- )+
- })
- );
-
- macro_rules! assert_err(
- (for $lex:ident and $buf:ident expect row $r:expr ; $c:expr, $s:expr) => ({
- let err = $lex.next_token(&mut $buf);
- assert!(err.is_err());
- let err = err.unwrap_err();
- assert_eq!($r as u64, err.position().row);
- assert_eq!($c as u64, err.position().column);
- assert_eq!($s, err.msg());
- })
- );
-
- macro_rules! assert_none(
- (for $lex:ident and $buf:ident) => (
- assert_eq!(Ok(None), $lex.next_token(&mut $buf));
- )
- );
-
- fn make_lex_and_buf(s: &str) -> (Lexer, BufReader<Cursor<Vec<u8>>>) {
- (Lexer::new(), BufReader::new(Cursor::new(s.to_owned().into_bytes())))
- }
-
- #[test]
- fn simple_lexer_test() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"<a p='q'> x<b z="y">d </b></a><p/> <?nm ?> <!-- a c --> "#
- );
-
- assert_oks!(for lex and buf ;
- Token::OpeningTagStart
- Token::Character('a')
- Token::Whitespace(' ')
- Token::Character('p')
- Token::EqualsSign
- Token::SingleQuote
- Token::Character('q')
- Token::SingleQuote
- Token::TagEnd
- Token::Whitespace(' ')
- Token::Character('x')
- Token::OpeningTagStart
- Token::Character('b')
- Token::Whitespace(' ')
- Token::Character('z')
- Token::EqualsSign
- Token::DoubleQuote
- Token::Character('y')
- Token::DoubleQuote
- Token::TagEnd
- Token::Character('d')
- Token::Whitespace('\t')
- Token::ClosingTagStart
- Token::Character('b')
- Token::TagEnd
- Token::ClosingTagStart
- Token::Character('a')
- Token::TagEnd
- Token::OpeningTagStart
- Token::Character('p')
- Token::EmptyTagEnd
- Token::Whitespace(' ')
- Token::ProcessingInstructionStart
- Token::Character('n')
- Token::Character('m')
- Token::Whitespace(' ')
- Token::ProcessingInstructionEnd
- Token::Whitespace(' ')
- Token::CommentStart
- Token::Whitespace(' ')
- Token::Character('a')
- Token::Whitespace(' ')
- Token::Character('c')
- Token::Whitespace(' ')
- Token::CommentEnd
- Token::Whitespace(' ')
- Token::ReferenceStart
- Token::Character('n')
- Token::Character('b')
- Token::Character('s')
- Token::Character('p')
- Token::ReferenceEnd
- );
- assert_none!(for lex and buf);
- }
-
- #[test]
- fn special_chars_test() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"?x!+ // -| ]z]]"#
- );
-
- assert_oks!(for lex and buf ;
- Token::Character('?')
- Token::Character('x')
- Token::Character('!')
- Token::Character('+')
- Token::Whitespace(' ')
- Token::Character('/')
- Token::Character('/')
- Token::Whitespace(' ')
- Token::Character('-')
- Token::Character('|')
- Token::Whitespace(' ')
- Token::Character(']')
- Token::Character('z')
- Token::Chunk("]]")
- );
- assert_none!(for lex and buf);
- }
-
- #[test]
- fn cdata_test() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"<a><![CDATA[x y ?]]> </a>"#
- );
-
- assert_oks!(for lex and buf ;
- Token::OpeningTagStart
- Token::Character('a')
- Token::TagEnd
- Token::CDataStart
- Token::Character('x')
- Token::Whitespace(' ')
- Token::Character('y')
- Token::Whitespace(' ')
- Token::Character('?')
- Token::CDataEnd
- Token::Whitespace(' ')
- Token::ClosingTagStart
- Token::Character('a')
- Token::TagEnd
- );
- assert_none!(for lex and buf);
- }
-
- #[test]
- fn doctype_test() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"<a><!DOCTYPE ab xx z> "#
- );
- assert_oks!(for lex and buf ;
- Token::OpeningTagStart
- Token::Character('a')
- Token::TagEnd
- Token::DoctypeStart
- Token::TagEnd
- Token::Whitespace(' ')
- );
- assert_none!(for lex and buf)
- }
-
- #[test]
- fn doctype_with_internal_subset_test() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"<a><!DOCTYPE ab[<!ELEMENT ba> ]> "#
- );
- assert_oks!(for lex and buf ;
- Token::OpeningTagStart
- Token::Character('a')
- Token::TagEnd
- Token::DoctypeStart
- Token::TagEnd
- Token::Whitespace(' ')
- );
- assert_none!(for lex and buf)
- }
-
- #[test]
- fn end_of_stream_handling_ok() {
- macro_rules! eof_check(
- ($data:expr ; $token:expr) => ({
- let (mut lex, mut buf) = make_lex_and_buf($data);
- assert_oks!(for lex and buf ; $token);
- assert_none!(for lex and buf);
- })
- );
- eof_check!("?" ; Token::Character('?'));
- eof_check!("/" ; Token::Character('/'));
- eof_check!("-" ; Token::Character('-'));
- eof_check!("]" ; Token::Character(']'));
- eof_check!("]]" ; Token::Chunk("]]"));
- }
-
- #[test]
- fn end_of_stream_handling_error() {
- macro_rules! eof_check(
- ($data:expr; $r:expr, $c:expr) => ({
- let (mut lex, mut buf) = make_lex_and_buf($data);
- assert_err!(for lex and buf expect row $r ; $c, "Unexpected end of stream");
- assert_none!(for lex and buf);
- })
- );
- eof_check!("<" ; 0, 1);
- eof_check!("<!" ; 0, 2);
- eof_check!("<!-" ; 0, 3);
- eof_check!("<![" ; 0, 3);
- eof_check!("<![C" ; 0, 4);
- eof_check!("<![CD" ; 0, 5);
- eof_check!("<![CDA" ; 0, 6);
- eof_check!("<![CDAT" ; 0, 7);
- eof_check!("<![CDATA" ; 0, 8);
- eof_check!("--" ; 0, 2);
- }
-
- #[test]
- fn error_in_comment_or_cdata_prefix() {
- let (mut lex, mut buf) = make_lex_and_buf("<!x");
- assert_err!(for lex and buf expect row 0 ; 0,
- "Unexpected token '<!' before 'x'"
- );
-
- let (mut lex, mut buf) = make_lex_and_buf("<!x");
- lex.disable_errors();
- assert_oks!(for lex and buf ;
- Token::Chunk("<!")
- Token::Character('x')
- );
- assert_none!(for lex and buf);
- }
-
- #[test]
- fn error_in_comment_started() {
- let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
- assert_err!(for lex and buf expect row 0 ; 0,
- "Unexpected token '<!-' before '\t'"
- );
-
- let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
- lex.disable_errors();
- assert_oks!(for lex and buf ;
- Token::Chunk("<!-")
- Token::Whitespace('\t')
- );
- assert_none!(for lex and buf);
- }
-
- #[test]
- fn error_in_comment_two_dashes_not_at_end() {
- let (mut lex, mut buf) = make_lex_and_buf("--x");
- lex.inside_comment();
- assert_err!(for lex and buf expect row 0; 0,
- "Unexpected token '--' before 'x'"
- );
-
- let (mut lex, mut buf) = make_lex_and_buf("--x");
- assert_oks!(for lex and buf ;
- Token::Chunk("--")
- Token::Character('x')
- );
- }
-
- macro_rules! check_case(
- ($chunk:expr, $app:expr; $data:expr; $r:expr, $c:expr, $s:expr) => ({
- let (mut lex, mut buf) = make_lex_and_buf($data);
- assert_err!(for lex and buf expect row $r ; $c, $s);
-
- let (mut lex, mut buf) = make_lex_and_buf($data);
- lex.disable_errors();
- assert_oks!(for lex and buf ;
- Token::Chunk($chunk)
- Token::Character($app)
- );
- assert_none!(for lex and buf);
- })
- );
-
- #[test]
- fn error_in_cdata_started() {
- check_case!("<![", '['; "<![[" ; 0, 0, "Unexpected token '<![' before '['");
- check_case!("<![C", '['; "<![C[" ; 0, 0, "Unexpected token '<![C' before '['");
- check_case!("<![CD", '['; "<![CD[" ; 0, 0, "Unexpected token '<![CD' before '['");
- check_case!("<![CDA", '['; "<![CDA[" ; 0, 0, "Unexpected token '<![CDA' before '['");
- check_case!("<![CDAT", '['; "<![CDAT[" ; 0, 0, "Unexpected token '<![CDAT' before '['");
- check_case!("<![CDATA", '|'; "<![CDATA|" ; 0, 0, "Unexpected token '<![CDATA' before '|'");
- }
-
- #[test]
- fn error_in_doctype_started() {
- check_case!("<!D", 'a'; "<!Da" ; 0, 0, "Unexpected token '<!D' before 'a'");
- check_case!("<!DO", 'b'; "<!DOb" ; 0, 0, "Unexpected token '<!DO' before 'b'");
- check_case!("<!DOC", 'c'; "<!DOCc" ; 0, 0, "Unexpected token '<!DOC' before 'c'");
- check_case!("<!DOCT", 'd'; "<!DOCTd" ; 0, 0, "Unexpected token '<!DOCT' before 'd'");
- check_case!("<!DOCTY", 'e'; "<!DOCTYe" ; 0, 0, "Unexpected token '<!DOCTY' before 'e'");
- check_case!("<!DOCTYP", 'f'; "<!DOCTYPf" ; 0, 0, "Unexpected token '<!DOCTYP' before 'f'");
- }
-
-
-
- #[test]
- fn issue_98_cdata_ending_with_right_bracket() {
- let (mut lex, mut buf) = make_lex_and_buf(
- r#"<![CDATA[Foo [Bar]]]>"#
- );
-
- assert_oks!(for lex and buf ;
- Token::CDataStart
- Token::Character('F')
- Token::Character('o')
- Token::Character('o')
- Token::Whitespace(' ')
- Token::Character('[')
- Token::Character('B')
- Token::Character('a')
- Token::Character('r')
- Token::Character(']')
- Token::CDataEnd
- );
- assert_none!(for lex and buf);
- }
-}
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
deleted file mode 100644
index 90f5b52..0000000
--- a/src/reader/mod.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-//! Contains high-level interface for a pull-based XML parser.
-//!
-//! The most important type in this module is `EventReader`, which provides an iterator
-//! view for events in XML document.
-
-use std::io::{Read};
-use std::result;
-
-use common::{Position, TextPosition};
-
-pub use self::config::ParserConfig;
-pub use self::events::XmlEvent;
-
-use self::parser::PullParser;
-
-mod lexer;
-mod parser;
-mod config;
-mod events;
-
-mod error;
-pub use self::error::{Error, ErrorKind};
-
-/// A result type yielded by `XmlReader`.
-pub type Result<T> = result::Result<T, Error>;
-
-/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
-pub struct EventReader<R: Read> {
- source: R,
- parser: PullParser
-}
-
-impl<R: Read> EventReader<R> {
- /// Creates a new reader, consuming the given stream.
- #[inline]
- pub fn new(source: R) -> EventReader<R> {
- EventReader::new_with_config(source, ParserConfig::new())
- }
-
- /// Creates a new reader with the provded configuration, consuming the given stream.
- #[inline]
- pub fn new_with_config(source: R, config: ParserConfig) -> EventReader<R> {
- EventReader { source: source, parser: PullParser::new(config) }
- }
-
- /// Pulls and returns next XML event from the stream.
- ///
- /// If returned event is `XmlEvent::Error` or `XmlEvent::EndDocument`, then
- /// further calls to this method will return this event again.
- #[inline]
- pub fn next(&mut self) -> Result<XmlEvent> {
- self.parser.next(&mut self.source)
- }
-
- pub fn source(&self) -> &R { &self.source }
- pub fn source_mut(&mut self) -> &mut R { &mut self.source }
-
- /// Unwraps this `EventReader`, returning the underlying reader.
- ///
- /// Note that this operation is destructive; unwrapping the reader and wrapping it
- /// again with `EventReader::new()` will create a fresh reader which will attempt
- /// to parse an XML document from the beginning.
- pub fn into_inner(self) -> R {
- self.source
- }
-}
-
-impl<B: Read> Position for EventReader<B> {
- /// Returns the position of the last event produced by the reader.
- #[inline]
- fn position(&self) -> TextPosition {
- self.parser.position()
- }
-}
-
-impl<R: Read> IntoIterator for EventReader<R> {
- type Item = Result<XmlEvent>;
- type IntoIter = Events<R>;
-
- fn into_iter(self) -> Events<R> {
- Events { reader: self, finished: false }
- }
-}
-
-/// An iterator over XML events created from some type implementing `Read`.
-///
-/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
-/// it will be returned by the iterator once, and then it will stop producing events.
-pub struct Events<R: Read> {
- reader: EventReader<R>,
- finished: bool
-}
-
-impl<R: Read> Events<R> {
- /// Unwraps the iterator, returning the internal `EventReader`.
- #[inline]
- pub fn into_inner(self) -> EventReader<R> {
- self.reader
- }
-
- pub fn source(&self) -> &R { &self.reader.source }
- pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
-
-}
-
-impl<R: Read> Iterator for Events<R> {
- type Item = Result<XmlEvent>;
-
- #[inline]
- fn next(&mut self) -> Option<Result<XmlEvent>> {
- if self.finished && !self.reader.parser.is_ignoring_end_of_stream() { None }
- else {
- let ev = self.reader.next();
- match ev {
- Ok(XmlEvent::EndDocument) | Err(_) => self.finished = true,
- _ => {}
- }
- Some(ev)
- }
- }
-}
-
-impl<'r> EventReader<&'r [u8]> {
- /// A convenience method to create an `XmlReader` from a string slice.
- #[inline]
- pub fn from_str(source: &'r str) -> EventReader<&'r [u8]> {
- EventReader::new(source.as_bytes())
- }
-}
diff --git a/src/reader/parser/inside_cdata.rs b/src/reader/parser/inside_cdata.rs
deleted file mode 100644
index 3269fb4..0000000
--- a/src/reader/parser/inside_cdata.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use reader::events::XmlEvent;
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State};
-
-impl PullParser {
- pub fn inside_cdata(&mut self, t: Token) -> Option<Result> {
- match t {
- Token::CDataEnd => {
- self.lexer.enable_errors();
- let event = if self.config.cdata_to_characters {
- None
- } else {
- let data = self.take_buf();
- Some(Ok(XmlEvent::CData(data)))
- };
- self.into_state(State::OutsideTag, event)
- }
-
- Token::Whitespace(_) => {
- t.push_to_string(&mut self.buf);
- None
- }
-
- _ => {
- self.inside_whitespace = false;
- t.push_to_string(&mut self.buf);
- None
- }
- }
- }
-}
diff --git a/src/reader/parser/inside_closing_tag_name.rs b/src/reader/parser/inside_closing_tag_name.rs
deleted file mode 100644
index 1d8074a..0000000
--- a/src/reader/parser/inside_closing_tag_name.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-use namespace;
-
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State, QualifiedNameTarget, ClosingTagSubstate};
-
-impl PullParser {
- pub fn inside_closing_tag_name(&mut self, t: Token, s: ClosingTagSubstate) -> Option<Result> {
- match s {
- ClosingTagSubstate::CTInsideName => self.read_qualified_name(t, QualifiedNameTarget::ClosingTagNameTarget, |this, token, name| {
- match name.prefix_ref() {
- Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
- prefix == namespace::NS_XMLNS_PREFIX =>
- // TODO: {:?} is bad, need something better
- Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
- _ => {
- this.data.element_name = Some(name.clone());
- match token {
- Token::Whitespace(_) => this.into_state_continue(State::InsideClosingTag(ClosingTagSubstate::CTAfterName)),
- Token::TagEnd => this.emit_end_element(),
- _ => Some(self_error!(this; "Unexpected token inside closing tag: {}", token))
- }
- }
- }
- }),
- ClosingTagSubstate::CTAfterName => match t {
- Token::Whitespace(_) => None, // Skip whitespace
- Token::TagEnd => self.emit_end_element(),
- _ => Some(self_error!(self; "Unexpected token inside closing tag: {}", t))
- }
- }
- }
-
-}
diff --git a/src/reader/parser/inside_comment.rs b/src/reader/parser/inside_comment.rs
deleted file mode 100644
index fc98320..0000000
--- a/src/reader/parser/inside_comment.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use reader::events::XmlEvent;
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State};
-
-impl PullParser {
- pub fn inside_comment(&mut self, t: Token) -> Option<Result> {
- match t {
- // Double dash is illegal inside a comment
- Token::Chunk(ref s) if &s[..] == "--" => Some(self_error!(self; "Unexpected token inside a comment: --")),
-
- Token::CommentEnd if self.config.ignore_comments => {
- self.lexer.outside_comment();
- self.into_state_continue(State::OutsideTag)
- }
-
- Token::CommentEnd => {
- self.lexer.outside_comment();
- let data = self.take_buf();
- self.into_state_emit(State::OutsideTag, Ok(XmlEvent::Comment(data)))
- }
-
- _ if self.config.ignore_comments => None, // Do not modify buffer if ignoring the comment
-
- _ => {
- t.push_to_string(&mut self.buf);
- None
- }
- }
- }
-
-}
diff --git a/src/reader/parser/inside_declaration.rs b/src/reader/parser/inside_declaration.rs
deleted file mode 100644
index af39d10..0000000
--- a/src/reader/parser/inside_declaration.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-
-use common::XmlVersion;
-
-use reader::events::XmlEvent;
-use reader::lexer::Token;
-
-use super::{
- Result, PullParser, State, DeclarationSubstate, QualifiedNameTarget,
- DEFAULT_VERSION, DEFAULT_ENCODING
-};
-
-impl PullParser {
- // TODO: remove redundancy via macros or extra methods
- pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
- macro_rules! unexpected_token(
- ($this:expr; $t:expr) => (Some($this.error(format!("Unexpected token inside XML declaration: {}", $t))));
- ($t:expr) => (unexpected_token!(self; $t));
- );
-
- #[inline]
- fn emit_start_document(this: &mut PullParser) -> Option<Result> {
- this.parsed_declaration = true;
- let version = this.data.take_version();
- let encoding = this.data.take_encoding();
- let standalone = this.data.take_standalone();
- this.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
- version: version.unwrap_or(DEFAULT_VERSION),
- encoding: encoding.unwrap_or(DEFAULT_ENCODING.into()),
- standalone: standalone
- }))
- }
-
- match s {
- DeclarationSubstate::BeforeVersion => match t {
- Token::Whitespace(_) => None, // continue
- Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
- "ersion" if name.namespace.is_none() =>
- this.into_state_continue(State::InsideDeclaration(
- if token == Token::EqualsSign {
- DeclarationSubstate::InsideVersionValue
- } else {
- DeclarationSubstate::AfterVersion
- }
- )),
- _ => unexpected_token!(this; name)
- }
- }),
-
- DeclarationSubstate::AfterVersion => match t {
- Token::Whitespace(_) => None,
- Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
- this.data.version = match &value[..] {
- "1.0" => Some(XmlVersion::Version10),
- "1.1" => Some(XmlVersion::Version11),
- _ => None
- };
- if this.data.version.is_some() {
- this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
- } else {
- Some(self_error!(this; "Unexpected XML version value: {}", value))
- }
- }),
-
- DeclarationSubstate::AfterVersionValue => match t {
- Token::Whitespace(_) => None, // skip whitespace
- Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
- Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
- "ncoding" if name.namespace.is_none() =>
- this.into_state_continue(State::InsideDeclaration(
- if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
- )),
- _ => unexpected_token!(this; name)
- }
- }),
-
- DeclarationSubstate::AfterEncoding => match t {
- Token::Whitespace(_) => None,
- Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
- this.data.encoding = Some(value);
- this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
- }),
-
- DeclarationSubstate::BeforeStandaloneDecl => match t {
- Token::Whitespace(_) => None, // skip whitespace
- Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
- "tandalone" if name.namespace.is_none() =>
- this.into_state_continue(State::InsideDeclaration(
- if token == Token::EqualsSign {
- DeclarationSubstate::InsideStandaloneDeclValue
- } else {
- DeclarationSubstate::AfterStandaloneDecl
- }
- )),
- _ => unexpected_token!(this; name)
- }
- }),
-
- DeclarationSubstate::AfterStandaloneDecl => match t {
- Token::Whitespace(_) => None,
- Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
- _ => unexpected_token!(t)
- },
-
- DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
- let standalone = match &value[..] {
- "yes" => Some(true),
- "no" => Some(false),
- _ => None
- };
- if standalone.is_some() {
- this.data.standalone = standalone;
- this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
- } else {
- Some(self_error!(this; "Invalid standalone declaration value: {}", value))
- }
- }),
-
- DeclarationSubstate::AfterStandaloneDeclValue => match t {
- Token::Whitespace(_) => None, // skip whitespace
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
- }
- }
- }
-
-}
diff --git a/src/reader/parser/inside_doctype.rs b/src/reader/parser/inside_doctype.rs
deleted file mode 100644
index 8dcf367..0000000
--- a/src/reader/parser/inside_doctype.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State};
-
-impl PullParser {
- pub fn inside_doctype(&mut self, t: Token) -> Option<Result> {
- match t {
- Token::TagEnd => {
- self.lexer.enable_errors();
- self.into_state_continue(State::OutsideTag)
- }
-
- _ => None
- }
- }
-}
diff --git a/src/reader/parser/inside_opening_tag.rs b/src/reader/parser/inside_opening_tag.rs
deleted file mode 100644
index 533874f..0000000
--- a/src/reader/parser/inside_opening_tag.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-use common::is_name_start_char;
-use attribute::OwnedAttribute;
-use namespace;
-
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State, OpeningTagSubstate, QualifiedNameTarget};
-
-impl PullParser {
- pub fn inside_opening_tag(&mut self, t: Token, s: OpeningTagSubstate) -> Option<Result> {
- macro_rules! unexpected_token(($t:expr) => (Some(self_error!(self; "Unexpected token inside opening tag: {}", $t))));
- match s {
- OpeningTagSubstate::InsideName => self.read_qualified_name(t, QualifiedNameTarget::OpeningTagNameTarget, |this, token, name| {
- match name.prefix_ref() {
- Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
- prefix == namespace::NS_XMLNS_PREFIX =>
- Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
- _ => {
- this.data.element_name = Some(name.clone());
- match token {
- Token::TagEnd => this.emit_start_element(false),
- Token::EmptyTagEnd => this.emit_start_element(true),
- Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
- _ => unreachable!()
- }
- }
- }
- }),
-
- OpeningTagSubstate::InsideTag => match t {
- Token::Whitespace(_) => None, // skip whitespace
- Token::Character(c) if is_name_start_char(c) => {
- self.buf.push(c);
- self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
- }
- Token::TagEnd => self.emit_start_element(false),
- Token::EmptyTagEnd => self.emit_start_element(true),
- _ => unexpected_token!(t)
- },
-
- OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- this.data.attr_name = Some(name);
- match token {
- Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeName)),
- Token::EqualsSign => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
- _ => unreachable!()
- }
- }),
-
- OpeningTagSubstate::AfterAttributeName => match t {
- Token::Whitespace(_) => None,
- Token::EqualsSign => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
- _ => unexpected_token!(t)
- },
-
- OpeningTagSubstate::InsideAttributeValue => self.read_attribute_value(t, |this, value| {
- let name = this.data.take_attr_name().unwrap(); // unwrap() will always succeed here
-
- // check that no attribute with such name is already present
- // if there is one, XML is not well-formed
- if this.data.attributes.iter().find(|a| a.name == name).is_some() { // TODO: looks bad
- // TODO: ideally this error should point to the beginning of the attribute,
- // TODO: not the end of its value
- Some(self_error!(this; "Attribute '{}' is redefined", name))
- } else {
- match name.prefix_ref() {
- // declaring a new prefix; it is sufficient to check prefix only
- // because "xmlns" prefix is reserved
- Some(namespace::NS_XMLNS_PREFIX) => {
- let ln = &name.local_name[..];
- if ln == namespace::NS_XMLNS_PREFIX {
- Some(self_error!(this; "Cannot redefine prefix '{}'", namespace::NS_XMLNS_PREFIX))
- } else if ln == namespace::NS_XML_PREFIX && &value[..] != namespace::NS_XML_URI {
- Some(self_error!(this; "Prefix '{}' cannot be rebound to another value", namespace::NS_XML_PREFIX))
- } else if value.is_empty() {
- Some(self_error!(this; "Cannot undefine prefix '{}'", ln))
- } else {
- this.nst.put(name.local_name.clone(), value);
- this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
- }
- }
-
- // declaring default namespace
- None if &name.local_name[..] == namespace::NS_XMLNS_PREFIX =>
- match &value[..] {
- namespace::NS_XMLNS_PREFIX | namespace::NS_XML_PREFIX =>
- Some(self_error!(this; "Namespace '{}' cannot be default", value)),
- _ => {
- this.nst.put(namespace::NS_NO_PREFIX, value.clone());
- this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
- }
- },
-
- // regular attribute
- _ => {
- this.data.attributes.push(OwnedAttribute {
- name: name.clone(),
- value: value
- });
- this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
- }
- }
- }
- })
- }
- }
-
-}
diff --git a/src/reader/parser/inside_processing_instruction.rs b/src/reader/parser/inside_processing_instruction.rs
deleted file mode 100644
index 8ddf6b8..0000000
--- a/src/reader/parser/inside_processing_instruction.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-use common::{
- is_name_start_char, is_name_char,
-};
-
-use reader::events::XmlEvent;
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State, ProcessingInstructionSubstate, DeclarationSubstate};
-
-impl PullParser {
- pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
- match s {
- ProcessingInstructionSubstate::PIInsideName => match t {
- Token::Character(c) if !self.buf_has_data() && is_name_start_char(c) ||
- self.buf_has_data() && is_name_char(c) => self.append_char_continue(c),
-
- Token::ProcessingInstructionEnd => {
- // self.buf contains PI name
- let name = self.take_buf();
-
- // Don't need to check for declaration because it has mandatory attributes
- // but there is none
- match &name[..] {
- // Name is empty, it is an error
- "" => Some(self_error!(self; "Encountered processing instruction without name")),
-
- // Found <?xml-like PI not at the beginning of a document,
- // it is an error - see section 2.6 of XML 1.1 spec
- "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" =>
- Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
-
- // All is ok, emitting event
- _ => {
- self.into_state_emit(
- State::OutsideTag,
- Ok(XmlEvent::ProcessingInstruction {
- name: name,
- data: None
- })
- )
- }
- }
- }
-
- Token::Whitespace(_) => {
- // self.buf contains PI name
- let name = self.take_buf();
-
- match &name[..] {
- // We have not ever encountered an element and have not parsed XML declaration
- "xml" if !self.encountered_element && !self.parsed_declaration =>
- self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
-
- // Found <?xml-like PI after the beginning of a document,
- // it is an error - see section 2.6 of XML 1.1 spec
- "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML"
- if self.encountered_element || self.parsed_declaration =>
- Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
-
- // All is ok, starting parsing PI data
- _ => {
- self.lexer.disable_errors(); // data is arbitrary, so disable errors
- self.data.name = name;
- self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData))
- }
-
- }
- }
-
- _ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t))
- },
-
- ProcessingInstructionSubstate::PIInsideData => match t {
- Token::ProcessingInstructionEnd => {
- self.lexer.enable_errors();
- let name = self.data.take_name();
- let data = self.take_buf();
- self.into_state_emit(
- State::OutsideTag,
- Ok(XmlEvent::ProcessingInstruction {
- name: name,
- data: Some(data)
- })
- )
- },
-
- // Any other token should be treated as plain characters
- _ => {
- t.push_to_string(&mut self.buf);
- None
- }
- },
- }
- }
-
-}
diff --git a/src/reader/parser/inside_reference.rs b/src/reader/parser/inside_reference.rs
deleted file mode 100644
index 60026d5..0000000
--- a/src/reader/parser/inside_reference.rs
+++ /dev/null
@@ -1,89 +0,0 @@
-use std::char;
-
-use common::{is_name_start_char, is_name_char, is_whitespace_str};
-
-use reader::lexer::Token;
-
-use super::{Result, PullParser, State};
-
-impl PullParser {
- pub fn inside_reference(&mut self, t: Token, prev_st: State) -> Option<Result> {
- match t {
- Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
- self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
- self.data.ref_data.push(c);
- None
- }
-
- Token::ReferenceEnd => {
- // TODO: check for unicode correctness
- let name = self.data.take_ref_data();
- let name_len = name.len(); // compute once
- let c = match &name[..] {
- "lt" => Ok('<'.to_string()),
- "gt" => Ok('>'.to_string()),
- "amp" => Ok('&'.to_string()),
- "apos" => Ok('\''.to_string()),
- "quot" => Ok('"'.to_string()),
- "" => Err(self_error!(self; "Encountered empty entity")),
- _ if name_len > 2 && name.starts_with("#x") => {
- let num_str = &name[2..name_len];
- if num_str == "0" {
- Err(self_error!(self; "Null character entity is not allowed"))
- } else {
- if self.config.replace_unknown_entity_references {
- match u32::from_str_radix(num_str, 16).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
- Some(c) => Ok(c.to_string()),
- None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
- }
- } else {
- match u32::from_str_radix(num_str, 16).ok().and_then(char::from_u32) {
- Some(c) => Ok(c.to_string()),
- None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
- }
- }
- }
- }
- _ if name_len > 1 && name.starts_with('#') => {
- let num_str = &name[1..name_len];
- if num_str == "0" {
- Err(self_error!(self; "Null character entity is not allowed"))
- } else {
- if self.config.replace_unknown_entity_references {
- match u32::from_str_radix(num_str, 10).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
- Some(c) => Ok(c.to_string()),
- None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
- }
- }
- else {
- match u32::from_str_radix(num_str, 10).ok().and_then(char::from_u32) {
- Some(c) => Ok(c.to_string()),
- None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
- }
- }
- }
- },
- _ => {
- if let Some(v) = self.config.extra_entities.get(&name) {
- Ok(v.clone())
- } else {
- Err(self_error!(self; "Unexpected entity: {}", name))
- }
- }
- };
- match c {
- Ok(c) => {
- self.buf.push_str(&c);
- if prev_st == State::OutsideTag && !is_whitespace_str(&c) {
- self.inside_whitespace = false;
- }
- self.into_state_continue(prev_st)
- }
- Err(e) => Some(e)
- }
- }
-
- _ => Some(self_error!(self; "Unexpected token inside an entity: {}", t))
- }
- }
-}
diff --git a/src/reader/parser/mod.rs b/src/reader/parser/mod.rs
deleted file mode 100644
index 58ca3a6..0000000
--- a/src/reader/parser/mod.rs
+++ /dev/null
@@ -1,622 +0,0 @@
-//! Contains an implementation of pull-based XML parser.
-
-use std::mem;
-use std::borrow::Cow;
-use std::io::prelude::*;
-
-use common::{
- self,
- XmlVersion, Position, TextPosition,
- is_name_start_char, is_name_char,
-};
-use name::OwnedName;
-use attribute::OwnedAttribute;
-use namespace::NamespaceStack;
-
-use reader::events::XmlEvent;
-use reader::config::ParserConfig;
-use reader::lexer::{Lexer, Token};
-
-macro_rules! gen_takes(
- ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
- $(
- impl MarkupData {
- #[inline]
- fn $method(&mut self) -> $t {
- mem::replace(&mut self.$field, $def)
- }
- }
- )+
- )
-);
-
-gen_takes!(
- name -> take_name, String, String::new();
- ref_data -> take_ref_data, String, String::new();
-
- version -> take_version, Option<common::XmlVersion>, None;
- encoding -> take_encoding, Option<String>, None;
- standalone -> take_standalone, Option<bool>, None;
-
- element_name -> take_element_name, Option<OwnedName>, None;
-
- attr_name -> take_attr_name, Option<OwnedName>, None;
- attributes -> take_attributes, Vec<OwnedAttribute>, vec!()
-);
-
-macro_rules! self_error(
- ($this:ident; $msg:expr) => ($this.error($msg));
- ($this:ident; $fmt:expr, $($arg:expr),+) => ($this.error(format!($fmt, $($arg),+)))
-);
-
-mod outside_tag;
-mod inside_processing_instruction;
-mod inside_declaration;
-mod inside_doctype;
-mod inside_opening_tag;
-mod inside_closing_tag_name;
-mod inside_comment;
-mod inside_cdata;
-mod inside_reference;
-
-static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
-static DEFAULT_ENCODING: &'static str = "UTF-8";
-static DEFAULT_STANDALONE: Option<bool> = None;
-
-type ElementStack = Vec<OwnedName>;
-pub type Result = super::Result<XmlEvent>;
-
-/// Pull-based XML parser.
-pub struct PullParser {
- config: ParserConfig,
- lexer: Lexer,
- st: State,
- buf: String,
- nst: NamespaceStack,
-
- data: MarkupData,
- final_result: Option<Result>,
- next_event: Option<Result>,
- est: ElementStack,
- pos: Vec<TextPosition>,
-
- encountered_element: bool,
- parsed_declaration: bool,
- inside_whitespace: bool,
- read_prefix_separator: bool,
- pop_namespace: bool
-}
-
-impl PullParser {
- /// Returns a new parser using the given config.
- pub fn new(config: ParserConfig) -> PullParser {
- PullParser {
- config: config,
- lexer: Lexer::new(),
- st: State::OutsideTag,
- buf: String::new(),
- nst: NamespaceStack::default(),
-
- data: MarkupData {
- name: String::new(),
- version: None,
- encoding: None,
- standalone: None,
- ref_data: String::new(),
- element_name: None,
- quote: None,
- attr_name: None,
- attributes: Vec::new()
- },
- final_result: None,
- next_event: None,
- est: Vec::new(),
- pos: vec![TextPosition::new()],
-
- encountered_element: false,
- parsed_declaration: false,
- inside_whitespace: true,
- read_prefix_separator: false,
- pop_namespace: false
- }
- }
-
- /// Checks if this parser ignores the end of stream errors.
- pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.ignore_end_of_stream }
-}
-
-impl Position for PullParser {
- /// Returns the position of the last event produced by the parser
- #[inline]
- fn position(&self) -> TextPosition {
- self.pos[0]
- }
-}
-
-#[derive(Clone, PartialEq)]
-pub enum State {
- OutsideTag,
- InsideOpeningTag(OpeningTagSubstate),
- InsideClosingTag(ClosingTagSubstate),
- InsideProcessingInstruction(ProcessingInstructionSubstate),
- InsideComment,
- InsideCData,
- InsideDeclaration(DeclarationSubstate),
- InsideDoctype,
- InsideReference(Box<State>)
-}
-
-#[derive(Clone, PartialEq)]
-pub enum OpeningTagSubstate {
- InsideName,
-
- InsideTag,
-
- InsideAttributeName,
- AfterAttributeName,
-
- InsideAttributeValue,
-}
-
-#[derive(Clone, PartialEq)]
-pub enum ClosingTagSubstate {
- CTInsideName,
- CTAfterName
-}
-
-#[derive(Clone, PartialEq)]
-pub enum ProcessingInstructionSubstate {
- PIInsideName,
- PIInsideData
-}
-
-#[derive(Clone, PartialEq)]
-pub enum DeclarationSubstate {
- BeforeVersion,
- InsideVersion,
- AfterVersion,
-
- InsideVersionValue,
- AfterVersionValue,
-
- InsideEncoding,
- AfterEncoding,
-
- InsideEncodingValue,
-
- BeforeStandaloneDecl,
- InsideStandaloneDecl,
- AfterStandaloneDecl,
-
- InsideStandaloneDeclValue,
- AfterStandaloneDeclValue
-}
-
-#[derive(PartialEq)]
-enum QualifiedNameTarget {
- AttributeNameTarget,
- OpeningTagNameTarget,
- ClosingTagNameTarget
-}
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-enum QuoteToken {
- SingleQuoteToken,
- DoubleQuoteToken
-}
-
-impl QuoteToken {
- fn from_token(t: &Token) -> QuoteToken {
- match *t {
- Token::SingleQuote => QuoteToken::SingleQuoteToken,
- Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
- _ => panic!("Unexpected token: {}", t)
- }
- }
-
- fn as_token(self) -> Token {
- match self {
- QuoteToken::SingleQuoteToken => Token::SingleQuote,
- QuoteToken::DoubleQuoteToken => Token::DoubleQuote
- }
- }
-}
-
-struct MarkupData {
- name: String, // used for processing instruction name
- ref_data: String, // used for reference content
-
- version: Option<common::XmlVersion>, // used for XML declaration version
- encoding: Option<String>, // used for XML declaration encoding
- standalone: Option<bool>, // used for XML declaration standalone parameter
-
- element_name: Option<OwnedName>, // used for element name
-
- quote: Option<QuoteToken>, // used to hold opening quote for attribute value
- attr_name: Option<OwnedName>, // used to hold attribute name
- attributes: Vec<OwnedAttribute> // used to hold all accumulated attributes
-}
-
-impl PullParser {
- /// Returns next event read from the given buffer.
- ///
- /// This method should be always called with the same buffer. If you call it
- /// providing different buffers each time, the result will be undefined.
- pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
- if let Some(ref ev) = self.final_result {
- return ev.clone();
- }
-
- if let Some(ev) = self.next_event.take() {
- return ev;
- }
-
- if self.pop_namespace {
- self.pop_namespace = false;
- self.nst.pop();
- }
-
- loop {
- // While lexer gives us Ok(maybe_token) -- we loop.
- // Upon having a complete XML-event -- we return from the whole function.
- match self.lexer.next_token(r) {
- Ok(maybe_token) =>
- match maybe_token {
- None => break,
- Some(token) =>
- match self.dispatch_token(token) {
- None => {} // continue
- Some(Ok(XmlEvent::EndDocument)) =>
- return {
- self.next_pos();
- self.set_final_result(Ok(XmlEvent::EndDocument))
- },
- Some(Ok(xml_event)) =>
- return {
- self.next_pos();
- Ok(xml_event)
- },
- Some(Err(xml_error)) =>
- return {
- self.next_pos();
- self.set_final_result(Err(xml_error))
- },
- }
- },
- Err(lexer_error) =>
- return self.set_final_result(Err(lexer_error)),
- }
- }
-
- // Handle end of stream
- // Forward pos to the lexer head
- self.next_pos();
- let ev = if self.depth() == 0 {
- if self.encountered_element && self.st == State::OutsideTag { // all is ok
- Ok(XmlEvent::EndDocument)
- } else if !self.encountered_element {
- self_error!(self; "Unexpected end of stream: no root element found")
- } else { // self.st != State::OutsideTag
- self_error!(self; "Unexpected end of stream") // TODO: add expected hint?
- }
- } else {
- if self.config.ignore_end_of_stream {
- self.final_result = None;
- self.lexer.reset_eof_handled();
- return self_error!(self; "Unexpected end of stream: still inside the root element");
- } else {
- self_error!(self; "Unexpected end of stream: still inside the root element")
- }
- };
- self.set_final_result(ev)
- }
-
- // This function is to be called when a terminal event is reached.
- // The function sets up the `self.final_result` into `Some(result)` and return `result`.
- fn set_final_result(&mut self, result: Result) -> Result {
- self.final_result = Some(result.clone());
- result
- }
-
- #[inline]
- fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Result {
- Err((&self.lexer, msg).into())
- }
-
- #[inline]
- fn next_pos(&mut self) {
- if self.pos.len() > 1 {
- self.pos.remove(0);
- } else {
- self.pos[0] = self.lexer.position();
- }
- }
-
- #[inline]
- fn push_pos(&mut self) {
- self.pos.push(self.lexer.position());
- }
-
- fn dispatch_token(&mut self, t: Token) -> Option<Result> {
- match self.st.clone() {
- State::OutsideTag => self.outside_tag(t),
- State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
- State::InsideDeclaration(s) => self.inside_declaration(t, s),
- State::InsideDoctype => self.inside_doctype(t),
- State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
- State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
- State::InsideComment => self.inside_comment(t),
- State::InsideCData => self.inside_cdata(t),
- State::InsideReference(s) => self.inside_reference(t, *s)
- }
- }
-
- #[inline]
- fn depth(&self) -> usize {
- self.est.len()
- }
-
- #[inline]
- fn buf_has_data(&self) -> bool {
- self.buf.len() > 0
- }
-
- #[inline]
- fn take_buf(&mut self) -> String {
- mem::replace(&mut self.buf, String::new())
- }
-
- #[inline]
- fn append_char_continue(&mut self, c: char) -> Option<Result> {
- self.buf.push(c);
- None
- }
-
- #[inline]
- fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
- self.st = st;
- ev
- }
-
- #[inline]
- fn into_state_continue(&mut self, st: State) -> Option<Result> {
- self.into_state(st, None)
- }
-
- #[inline]
- fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
- self.into_state(st, Some(ev))
- }
-
- /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
- /// an error is returned.
- ///
- /// # Parameters
- /// * `t` --- next token;
- /// * `on_name` --- a callback which is executed when whitespace is encountered.
- fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
- where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> {
- // We can get here for the first time only when self.data.name contains zero or one character,
- // but first character cannot be a colon anyway
- if self.buf.len() <= 1 {
- self.read_prefix_separator = false;
- }
-
- let invoke_callback = |this: &mut PullParser, t| {
- let name = this.take_buf();
- match name.parse() {
- Ok(name) => on_name(this, t, name),
- Err(_) => Some(self_error!(this; "Qualified name is invalid: {}", name))
- }
- };
-
- match t {
- // There can be only one colon, and not as the first character
- Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
- self.buf.push(':');
- self.read_prefix_separator = true;
- None
- }
-
- Token::Character(c) if c != ':' && (!self.buf_has_data() && is_name_start_char(c) ||
- self.buf_has_data() && is_name_char(c)) =>
- self.append_char_continue(c),
-
- Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
-
- Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
-
- Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
- target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
-
- Token::Whitespace(_) => invoke_callback(self, t),
-
- _ => Some(self_error!(self; "Unexpected token inside qualified name: {}", t))
- }
- }
-
- /// Dispatches tokens in order to process attribute value.
- ///
- /// # Parameters
- /// * `t` --- next token;
- /// * `on_value` --- a callback which is called when terminating quote is encountered.
- fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
- where F: Fn(&mut PullParser, String) -> Option<Result> {
- match t {
- Token::Whitespace(_) if self.data.quote.is_none() => None, // skip leading whitespace
-
- Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
- None => { // Entered attribute value
- self.data.quote = Some(QuoteToken::from_token(&t));
- None
- }
- Some(q) if q.as_token() == t => {
- self.data.quote = None;
- let value = self.take_buf();
- on_value(self, value)
- }
- _ => {
- t.push_to_string(&mut self.buf);
- None
- }
- },
-
- Token::ReferenceStart => {
- let st = Box::new(self.st.clone());
- self.into_state_continue(State::InsideReference(st))
- }
-
- Token::OpeningTagStart =>
- Some(self_error!(self; "Unexpected token inside attribute value: <")),
-
- // Every character except " and ' and < is okay
- _ => {
- t.push_to_string(&mut self.buf);
- None
- }
- }
- }
-
- fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
- let mut name = self.data.take_element_name().unwrap();
- let mut attributes = self.data.take_attributes();
-
- // check whether the name prefix is bound and fix its namespace
- match self.nst.get(name.borrow().prefix_repr()) {
- Some("") => name.namespace = None, // default namespace
- Some(ns) => name.namespace = Some(ns.into()),
- None => return Some(self_error!(self; "Element {} prefix is unbound", name))
- }
-
- // check and fix accumulated attributes prefixes
- for attr in attributes.iter_mut() {
- if let Some(ref pfx) = attr.name.prefix {
- let new_ns = match self.nst.get(pfx) {
- Some("") => None, // default namespace
- Some(ns) => Some(ns.into()),
- None => return Some(self_error!(self; "Attribute {} prefix is unbound", attr.name))
- };
- attr.name.namespace = new_ns;
- }
- }
-
- if emit_end_element {
- self.pop_namespace = true;
- self.next_event = Some(Ok(XmlEvent::EndElement {
- name: name.clone()
- }));
- } else {
- self.est.push(name.clone());
- }
- let namespace = self.nst.squash();
- self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
- name: name,
- attributes: attributes,
- namespace: namespace
- }))
- }
-
- fn emit_end_element(&mut self) -> Option<Result> {
- let mut name = self.data.take_element_name().unwrap();
-
- // check whether the name prefix is bound and fix its namespace
- match self.nst.get(name.borrow().prefix_repr()) {
- Some("") => name.namespace = None, // default namespace
- Some(ns) => name.namespace = Some(ns.into()),
- None => return Some(self_error!(self; "Element {} prefix is unbound", name))
- }
-
- let op_name = self.est.pop().unwrap();
-
- if name == op_name {
- self.pop_namespace = true;
- self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name: name }))
- } else {
- Some(self_error!(self; "Unexpected closing tag: {}, expected {}", name, op_name))
- }
- }
-
-}
-
-#[cfg(test)]
-mod tests {
- use std::io::BufReader;
-
- use common::{Position, TextPosition};
- use name::OwnedName;
- use attribute::OwnedAttribute;
- use reader::parser::PullParser;
- use reader::ParserConfig;
- use reader::events::XmlEvent;
-
- fn new_parser() -> PullParser {
- PullParser::new(ParserConfig::new())
- }
-
- macro_rules! expect_event(
- ($r:expr, $p:expr, $t:pat) => (
- match $p.next(&mut $r) {
- $t => {}
- e => panic!("Unexpected event: {:?}", e)
- }
- );
- ($r:expr, $p:expr, $t:pat => $c:expr ) => (
- match $p.next(&mut $r) {
- $t if $c => {}
- e => panic!("Unexpected event: {:?}", e)
- }
- )
- );
-
- macro_rules! test_data(
- ($d:expr) => ({
- static DATA: &'static str = $d;
- let r = BufReader::new(DATA.as_bytes());
- let p = new_parser();
- (r, p)
- })
- );
-
- #[test]
- fn issue_3_semicolon_in_attribute_value() {
- let (mut r, mut p) = test_data!(r#"
- <a attr="zzz;zzz" />
- "#);
-
- expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
- expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
- *name == OwnedName::local("a") &&
- attributes.len() == 1 &&
- attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
- namespace.is_essentially_empty()
- );
- expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
- expect_event!(r, p, Ok(XmlEvent::EndDocument));
- }
-
- #[test]
- fn issue_140_entity_reference_inside_tag() {
- let (mut r, mut p) = test_data!(r#"
- <bla>♫</bla>
- "#);
-
- expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
- expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
- expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
- expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
- expect_event!(r, p, Ok(XmlEvent::EndDocument));
- }
-
- #[test]
- fn opening_tag_in_attribute_value() {
- let (mut r, mut p) = test_data!(r#"
- <a attr="zzz<zzz" />
- "#);
-
- expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
- expect_event!(r, p, Err(ref e) =>
- e.msg() == "Unexpected token inside attribute value: <" &&
- e.position() == TextPosition { row: 1, column: 24 }
- );
- }
-}
diff --git a/src/reader/parser/outside_tag.rs b/src/reader/parser/outside_tag.rs
deleted file mode 100644
index d3f7598..0000000
--- a/src/reader/parser/outside_tag.rs
+++ /dev/null
@@ -1,130 +0,0 @@
-use common::is_whitespace_char;
-
-use reader::events::XmlEvent;
-use reader::lexer::Token;
-
-use super::{
- Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
- ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
-};
-
-impl PullParser {
- pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
- match t {
- Token::ReferenceStart =>
- self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),
-
- Token::Whitespace(_) if self.depth() == 0 && self.config.ignore_root_level_whitespace => None, // skip whitespace outside of the root element
-
- Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,
-
- Token::Whitespace(c) => {
- if !self.buf_has_data() {
- self.push_pos();
- }
- self.append_char_continue(c)
- }
-
- _ if t.contains_char_data() && self.depth() == 0 =>
- Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),
-
- _ if t.contains_char_data() => { // Non-whitespace char data
- if !self.buf_has_data() {
- self.push_pos();
- }
- self.inside_whitespace = false;
- t.push_to_string(&mut self.buf);
- None
- }
-
- Token::ReferenceEnd => { // Semi-colon in a text outside an entity
- self.inside_whitespace = false;
- Token::ReferenceEnd.push_to_string(&mut self.buf);
- None
- }
-
- Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
- // We need to switch the lexer into a comment mode inside comments
- self.lexer.inside_comment();
- self.into_state_continue(State::InsideComment)
- }
-
- Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
- if !self.buf_has_data() {
- self.push_pos();
- }
- // We need to disable lexing errors inside CDATA
- self.lexer.disable_errors();
- self.into_state_continue(State::InsideCData)
- }
-
- _ => {
- // Encountered some markup event, flush the buffer as characters
- // or a whitespace
- let mut next_event = if self.buf_has_data() {
- let buf = self.take_buf();
- if self.inside_whitespace && self.config.trim_whitespace {
- None
- } else if self.inside_whitespace && !self.config.whitespace_to_characters {
- Some(Ok(XmlEvent::Whitespace(buf)))
- } else if self.config.trim_whitespace {
- Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
- } else {
- Some(Ok(XmlEvent::Characters(buf)))
- }
- } else { None };
- self.inside_whitespace = true; // Reset inside_whitespace flag
- self.push_pos();
- match t {
- Token::ProcessingInstructionStart =>
- self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
-
- Token::DoctypeStart if !self.encountered_element => {
- // We don't have a doctype event so skip this position
- // FIXME: update when we have a doctype event
- self.next_pos();
- self.lexer.disable_errors();
- self.into_state(State::InsideDoctype, next_event)
- }
-
- Token::OpeningTagStart => {
- // If declaration was not parsed and we have encountered an element,
- // emit this declaration as the next event.
- if !self.parsed_declaration {
- self.parsed_declaration = true;
- let sd_event = XmlEvent::StartDocument {
- version: DEFAULT_VERSION,
- encoding: DEFAULT_ENCODING.into(),
- standalone: DEFAULT_STANDALONE
- };
- // next_event is always none here because we're outside of
- // the root element
- next_event = Some(Ok(sd_event));
- self.push_pos();
- }
- self.encountered_element = true;
- self.nst.push_empty();
- self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
- }
-
- Token::ClosingTagStart if self.depth() > 0 =>
- self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
-
- Token::CommentStart => {
- // We need to switch the lexer into a comment mode inside comments
- self.lexer.inside_comment();
- self.into_state(State::InsideComment, next_event)
- }
-
- Token::CDataStart => {
- // We need to disable lexing errors inside CDATA
- self.lexer.disable_errors();
- self.into_state(State::InsideCData, next_event)
- }
-
- _ => Some(self_error!(self; "Unexpected token: {}", t))
- }
- }
- }
- }
-}
diff --git a/src/util.rs b/src/util.rs
deleted file mode 100644
index 23fee04..0000000
--- a/src/util.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-use std::io::{self, Read};
-use std::str;
-use std::fmt;
-
-#[derive(Debug)]
-pub enum CharReadError {
- UnexpectedEof,
- Utf8(str::Utf8Error),
- Io(io::Error)
-}
-
-impl From<str::Utf8Error> for CharReadError {
- fn from(e: str::Utf8Error) -> CharReadError {
- CharReadError::Utf8(e)
- }
-}
-
-impl From<io::Error> for CharReadError {
- fn from(e: io::Error) -> CharReadError {
- CharReadError::Io(e)
- }
-}
-
-impl fmt::Display for CharReadError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- use self::CharReadError::*;
- match *self {
- UnexpectedEof => write!(f, "unexpected end of stream"),
- Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e),
- Io(ref e) => write!(f, "I/O error: {}", e)
- }
- }
-}
-
-pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> {
- const MAX_CODEPOINT_LEN: usize = 4;
-
- let mut bytes = source.bytes();
- let mut buf = [0u8; MAX_CODEPOINT_LEN];
- let mut pos = 0;
-
- loop {
- let next = match bytes.next() {
- Some(Ok(b)) => b,
- Some(Err(e)) => return Err(e.into()),
- None if pos == 0 => return Ok(None),
- None => return Err(CharReadError::UnexpectedEof)
- };
- buf[pos] = next;
- pos += 1;
-
- match str::from_utf8(&buf[..pos]) {
- Ok(s) => return Ok(s.chars().next()), // always Some(..)
- Err(_) if pos < MAX_CODEPOINT_LEN => {},
- Err(e) => return Err(e.into())
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- #[test]
- fn test_next_char_from() {
- use std::io;
- use std::error::Error;
-
- let mut bytes: &[u8] = "correct".as_bytes(); // correct ASCII
- assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c'));
-
- let mut bytes: &[u8] = "правильно".as_bytes(); // correct BMP
- assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п'));
-
- let mut bytes: &[u8] = "😊".as_bytes(); // correct non-BMP
- assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('😊'));
-
- let mut bytes: &[u8] = b""; // empty
- assert_eq!(super::next_char_from(&mut bytes).unwrap(), None);
-
- let mut bytes: &[u8] = b"\xf0\x9f\x98"; // incomplete code point
- match super::next_char_from(&mut bytes).unwrap_err() {
- super::CharReadError::UnexpectedEof => {},
- e => panic!("Unexpected result: {:?}", e)
- };
-
- let mut bytes: &[u8] = b"\xff\x9f\x98\x32"; // invalid code point
- match super::next_char_from(&mut bytes).unwrap_err() {
- super::CharReadError::Utf8(_) => {},
- e => panic!("Unexpected result: {:?}", e)
- };
-
-
- // error during read
- struct ErrorReader;
- impl io::Read for ErrorReader {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- Err(io::Error::new(io::ErrorKind::Other, "test error"))
- }
- }
-
- let mut r = ErrorReader;
- match super::next_char_from(&mut r).unwrap_err() {
- super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
- e.description() == "test error" => {},
- e => panic!("Unexpected result: {:?}", e)
- }
- }
-}
diff --git a/src/writer/config.rs b/src/writer/config.rs
deleted file mode 100644
index ebabf18..0000000
--- a/src/writer/config.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-//! Contains emitter configuration structure.
-
-use std::io::Write;
-use std::borrow::Cow;
-
-use writer::EventWriter;
-
-/// Emitter configuration structure.
-///
-/// This structure contains various options which control XML document emitter behavior.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct EmitterConfig {
- /// Line separator used to separate lines in formatted output. Default is `"\n"`.
- pub line_separator: Cow<'static, str>,
-
- /// A string which will be used for a single level of indentation. Default is `" "`
- /// (two spaces).
- pub indent_string: Cow<'static, str>,
-
- /// Whether or not the emitted document should be indented. Default is false.
- ///
- /// The emitter is capable to perform automatic indentation of the emitted XML document.
- /// It is done in stream-like fashion and does not require the knowledge of the whole
- /// document in advance.
- ///
- /// Sometimes, however, automatic indentation is undesirable, e.g. when you want to keep
- /// existing layout when processing an existing XML document. Also the indentiation algorithm
- /// is not thoroughly tested. Hence by default it is disabled.
- pub perform_indent: bool,
-
- /// Whether or not characters in output events will be escaped. Default is true.
- ///
- /// The emitter can automatically escape characters which can't appear in PCDATA sections
- /// or element attributes of an XML document, like `<` or `"` (in attributes). This may
- /// introduce some overhead because then every corresponding piece of character data
- /// should be scanned for invalid characters.
- ///
- /// If this option is disabled, the XML writer may produce non-well-formed documents, so
- /// use `false` value for this option with care.
- pub perform_escaping: bool,
-
- /// Whether or not to write XML document declaration at the beginning of a document.
- /// Default is true.
- ///
- /// This option controls whether the document declaration should be emitted automatically
- /// before a root element is written if it was not emitted explicitly by the user.
- pub write_document_declaration: bool,
-
- /// Whether or not to convert elements with empty content to empty elements. Default is true.
- ///
- /// This option allows turning elements like `<a></a>` (an element with empty content)
- /// into `<a />` (an empty element).
- pub normalize_empty_elements: bool,
-
- /// Whether or not to emit CDATA events as plain characters. Default is false.
- ///
- /// This option forces the emitter to convert CDATA events into regular character events,
- /// performing all the necessary escaping beforehand. This may be occasionally useful
- /// for feeding the document into incorrect parsers which do not support CDATA.
- pub cdata_to_characters: bool,
-
- /// Whether or not to keep element names to support `EndElement` events without explicit names.
- /// Default is true.
- ///
- /// This option makes the emitter to keep names of written elements in order to allow
- /// omitting names when writing closing element tags. This could incur some memory overhead.
- pub keep_element_names_stack: bool,
-
- /// Whether or not to automatically insert leading and trailing spaces in emitted comments,
- /// if necessary. Default is true.
- ///
- /// This is a convenience option in order for the user not to append spaces before and after
- /// comments text in order to get more pretty comments: `<!-- something -->` instead of
- /// `<!--something-->`.
- pub autopad_comments: bool,
-
- /// Whether or not to automatically insert spaces before the trailing `/>` in self-closing
- /// elements. Default is true.
- ///
- /// This option is only meaningful if `normalize_empty_elements` is true. For example, the
- /// element `<a></a>` would be unaffected. When `normalize_empty_elements` is true, then when
- /// this option is also true, the same element would appear `<a />`. If this option is false,
- /// then the same element would appear `<a/>`.
- pub pad_self_closing: bool,
-}
-
-impl EmitterConfig {
- /// Creates an emitter configuration with default values.
- ///
- /// You can tweak default options with builder-like pattern:
- ///
- /// ```rust
- /// use xml::writer::EmitterConfig;
- ///
- /// let config = EmitterConfig::new()
- /// .line_separator("\r\n")
- /// .perform_indent(true)
- /// .normalize_empty_elements(false);
- /// ```
- #[inline]
- pub fn new() -> EmitterConfig {
- EmitterConfig {
- line_separator: "\n".into(),
- indent_string: " ".into(), // two spaces
- perform_indent: false,
- perform_escaping: true,
- write_document_declaration: true,
- normalize_empty_elements: true,
- cdata_to_characters: false,
- keep_element_names_stack: true,
- autopad_comments: true,
- pad_self_closing: true
- }
- }
-
- /// Creates an XML writer with this configuration.
- ///
- /// This is a convenience method for configuring and creating a writer at the same time:
- ///
- /// ```rust
- /// use xml::writer::EmitterConfig;
- ///
- /// let mut target: Vec<u8> = Vec::new();
- ///
- /// let writer = EmitterConfig::new()
- /// .line_separator("\r\n")
- /// .perform_indent(true)
- /// .normalize_empty_elements(false)
- /// .create_writer(&mut target);
- /// ```
- ///
- /// This method is exactly equivalent to calling `EventWriter::new_with_config()` with
- /// this configuration object.
- #[inline]
- pub fn create_writer<W: Write>(self, sink: W) -> EventWriter<W> {
- EventWriter::new_with_config(sink, self)
- }
-}
-
-impl Default for EmitterConfig {
- #[inline]
- fn default() -> EmitterConfig {
- EmitterConfig::new()
- }
-}
-
-gen_setters!(EmitterConfig,
- line_separator: into Cow<'static, str>,
- indent_string: into Cow<'static, str>,
- perform_indent: val bool,
- write_document_declaration: val bool,
- normalize_empty_elements: val bool,
- cdata_to_characters: val bool,
- keep_element_names_stack: val bool,
- autopad_comments: val bool,
- pad_self_closing: val bool
-);
diff --git a/src/writer/emitter.rs b/src/writer/emitter.rs
deleted file mode 100644
index bfd9205..0000000
--- a/src/writer/emitter.rs
+++ /dev/null
@@ -1,446 +0,0 @@
-use std::io;
-use std::io::prelude::*;
-use std::fmt;
-use std::result;
-use std::borrow::Cow;
-use std::error::Error;
-
-use common;
-use name::{Name, OwnedName};
-use attribute::Attribute;
-use escape::{escape_str_attribute, escape_str_pcdata};
-use common::XmlVersion;
-use namespace::{NamespaceStack, NS_NO_PREFIX, NS_EMPTY_URI, NS_XMLNS_PREFIX, NS_XML_PREFIX};
-
-use writer::config::EmitterConfig;
-
-/// An error which may be returned by `XmlWriter` when writing XML events.
-#[derive(Debug)]
-pub enum EmitterError {
- /// An I/O error occured in the underlying `Write` instance.
- Io(io::Error),
-
- /// Document declaration has already been written to the output stream.
- DocumentStartAlreadyEmitted,
-
- /// The name of the last opening element is not available.
- LastElementNameNotAvailable,
-
- /// The name of the last opening element is not equal to the name of the provided
- /// closing element.
- EndElementNameIsNotEqualToLastStartElementName,
-
- /// End element name is not specified when it is needed, for example, when automatic
- /// closing is not enabled in configuration.
- EndElementNameIsNotSpecified
-}
-
-impl From<io::Error> for EmitterError {
- fn from(err: io::Error) -> EmitterError {
- EmitterError::Io(err)
- }
-}
-
-impl fmt::Display for EmitterError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-
- write!(f, "emitter error: ")?;
- match *self {
- EmitterError::Io(ref e) =>
- write!(f, "I/O error: {}", e),
- ref other =>
- write!(f, "{}", other.description()),
- }
- }
-}
-
-impl Error for EmitterError {
- fn description(&self) -> &str {
- match *self {
- EmitterError::Io(_) =>
- "I/O error",
- EmitterError::DocumentStartAlreadyEmitted =>
- "document start event has already been emitted",
- EmitterError::LastElementNameNotAvailable =>
- "last element name is not available",
- EmitterError::EndElementNameIsNotEqualToLastStartElementName =>
- "end element name is not equal to last start element name",
- EmitterError::EndElementNameIsNotSpecified =>
- "end element name is not specified and can't be inferred",
- }
- }
-}
-
-/// A result type yielded by `XmlWriter`.
-pub type Result<T> = result::Result<T, EmitterError>;
-
-// TODO: split into a low-level fast writer without any checks and formatting logic and a
-// high-level indenting validating writer
-pub struct Emitter {
- config: EmitterConfig,
-
- nst: NamespaceStack,
-
- indent_level: usize,
- indent_stack: Vec<IndentFlags>,
-
- element_names: Vec<OwnedName>,
-
- start_document_emitted: bool,
- just_wrote_start_element: bool
-}
-
-impl Emitter {
- pub fn new(config: EmitterConfig) -> Emitter {
- Emitter {
- config,
-
- nst: NamespaceStack::empty(),
-
- indent_level: 0,
- indent_stack: vec![IndentFlags::WroteNothing],
-
- element_names: Vec::new(),
-
- start_document_emitted: false,
- just_wrote_start_element: false
- }
- }
-}
-
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-enum IndentFlags {
- WroteNothing,
- WroteMarkup,
- WroteText,
-}
-
-impl Emitter {
- /// Returns the current state of namespaces.
- #[inline]
- pub fn namespace_stack_mut(&mut self) -> &mut NamespaceStack {
- &mut self.nst
- }
-
- #[inline]
- fn wrote_text(&self) -> bool {
- *self.indent_stack.last().unwrap() == IndentFlags::WroteText
- }
-
- #[inline]
- fn wrote_markup(&self) -> bool {
- *self.indent_stack.last().unwrap() == IndentFlags::WroteMarkup
- }
-
- #[inline]
- fn set_wrote_text(&mut self) {
- *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteText;
- }
-
- #[inline]
- fn set_wrote_markup(&mut self) {
- *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteMarkup;
- }
-
- #[inline]
- fn reset_state(&mut self) {
- *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteNothing;
- }
-
- fn write_newline<W: Write>(&mut self, target: &mut W, level: usize) -> Result<()> {
- target.write(self.config.line_separator.as_bytes())?;
- for _ in 0..level {
- target.write(self.config.indent_string.as_bytes())?;
- }
- Ok(())
- }
-
- fn before_markup<W: Write>(&mut self, target: &mut W) -> Result<()> {
- if self.config.perform_indent && !self.wrote_text() &&
- (self.indent_level > 0 || self.wrote_markup()) {
- let indent_level = self.indent_level;
- self.write_newline(target, indent_level)?;
- if self.indent_level > 0 && self.config.indent_string.len() > 0 {
- self.after_markup();
- }
- }
- Ok(())
- }
-
- fn after_markup(&mut self) {
- self.set_wrote_markup();
- }
-
- fn before_start_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
- self.before_markup(target)?;
- self.indent_stack.push(IndentFlags::WroteNothing);
- Ok(())
- }
-
- fn after_start_element(&mut self) {
- self.after_markup();
- self.indent_level += 1;
- }
-
- fn before_end_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
- if self.config.perform_indent && self.indent_level > 0 && self.wrote_markup() &&
- !self.wrote_text() {
- let indent_level = self.indent_level;
- self.write_newline(target, indent_level - 1)
- } else {
- Ok(())
- }
- }
-
- fn after_end_element(&mut self) {
- if self.indent_level > 0 {
- self.indent_level -= 1;
- self.indent_stack.pop();
- }
- self.set_wrote_markup();
- }
-
- fn after_text(&mut self) {
- self.set_wrote_text();
- }
-
- pub fn emit_start_document<W: Write>(&mut self, target: &mut W,
- version: XmlVersion,
- encoding: &str,
- standalone: Option<bool>) -> Result<()> {
- if self.start_document_emitted {
- return Err(EmitterError::DocumentStartAlreadyEmitted);
- }
- self.start_document_emitted = true;
-
- self.before_markup(target)?;
- let result = {
- let mut write = move || {
- write!(target, "<?xml version=\"{}\" encoding=\"{}\"", version, encoding)?;
-
- if let Some(standalone) = standalone {
- write!(target, " standalone=\"{}\"", if standalone { "yes" } else { "no" })?;
- }
-
- write!(target, "?>")?;
-
- Ok(())
- };
- write()
- };
- self.after_markup();
-
- result
- }
-
- fn check_document_started<W: Write>(&mut self, target: &mut W) -> Result<()> {
- if !self.start_document_emitted && self.config.write_document_declaration {
- self.emit_start_document(target, common::XmlVersion::Version10, "utf-8", None)
- } else {
- Ok(())
- }
- }
-
- fn fix_non_empty_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
- if self.config.normalize_empty_elements && self.just_wrote_start_element {
- self.just_wrote_start_element = false;
- target.write(b">").map(|_| ()).map_err(From::from)
- } else {
- Ok(())
- }
- }
-
- pub fn emit_processing_instruction<W: Write>(&mut self,
- target: &mut W,
- name: &str,
- data: Option<&str>) -> Result<()> {
- self.check_document_started(target)?;
- self.fix_non_empty_element(target)?;
-
- self.before_markup(target)?;
-
- let result = {
- let mut write = || {
- write!(target, "<?{}", name)?;
-
- if let Some(data) = data {
- write!(target, " {}", data)?;
- }
-
- write!(target, "?>")?;
-
- Ok(())
- };
- write()
- };
-
- self.after_markup();
-
- result
- }
-
- fn emit_start_element_initial<W>(&mut self, target: &mut W,
- name: Name,
- attributes: &[Attribute]) -> Result<()>
- where W: Write
- {
- self.check_document_started(target)?;
- self.fix_non_empty_element(target)?;
- self.before_start_element(target)?;
- write!(target, "<{}", name.repr_display())?;
- self.emit_current_namespace_attributes(target)?;
- self.emit_attributes(target, attributes)?;
- self.after_start_element();
- Ok(())
- }
-
- pub fn emit_start_element<W>(&mut self, target: &mut W,
- name: Name,
- attributes: &[Attribute]) -> Result<()>
- where W: Write
- {
- if self.config.keep_element_names_stack {
- self.element_names.push(name.to_owned());
- }
-
- self.emit_start_element_initial(target, name, attributes)?;
- self.just_wrote_start_element = true;
-
- if !self.config.normalize_empty_elements {
- write!(target, ">")?;
- }
-
- Ok(())
- }
-
- pub fn emit_current_namespace_attributes<W>(&mut self, target: &mut W) -> Result<()>
- where W: Write
- {
- for (prefix, uri) in self.nst.peek() {
- match prefix {
- // internal namespaces are not emitted
- NS_XMLNS_PREFIX | NS_XML_PREFIX => Ok(()),
- //// there is already a namespace binding with this prefix in scope
- //prefix if self.nst.get(prefix) == Some(uri) => Ok(()),
- // emit xmlns only if it is overridden
- NS_NO_PREFIX => if uri != NS_EMPTY_URI {
- write!(target, " xmlns=\"{}\"", uri)
- } else { Ok(()) },
- // everything else
- prefix => write!(target, " xmlns:{}=\"{}\"", prefix, uri)
- }?;
- }
- Ok(())
- }
-
- pub fn emit_attributes<W: Write>(&mut self, target: &mut W,
- attributes: &[Attribute]) -> Result<()> {
- for attr in attributes.iter() {
- write!(
- target, " {}=\"{}\"",
- attr.name.repr_display(),
- if self.config.perform_escaping { escape_str_attribute(attr.value) } else { Cow::Borrowed(attr.value) }
- )?
- }
- Ok(())
- }
-
- pub fn emit_end_element<W: Write>(&mut self, target: &mut W,
- name: Option<Name>) -> Result<()> {
- let owned_name = if self.config.keep_element_names_stack {
- Some(self.element_names.pop().ok_or(EmitterError::LastElementNameNotAvailable)?)
- } else {
- None
- };
-
- // Check that last started element name equals to the provided name, if there are both
- if let Some(ref last_name) = owned_name {
- if let Some(ref name) = name {
- if last_name.borrow() != *name {
- return Err(EmitterError::EndElementNameIsNotEqualToLastStartElementName);
- }
- }
- }
-
- if let Some(name) = owned_name.as_ref().map(|n| n.borrow()).or(name) {
- if self.config.normalize_empty_elements && self.just_wrote_start_element {
- self.just_wrote_start_element = false;
- let termination = if self.config.pad_self_closing { " />" } else { "/>" };
- let result = target.write(termination.as_bytes()).map_err(From::from);
- self.after_end_element();
- result.map(|_| ())
- } else {
- self.just_wrote_start_element = false;
-
- self.before_end_element(target)?;
- let result = write!(target, "</{}>", name.repr_display()).map_err(From::from);
- self.after_end_element();
-
- result
- }
- } else {
- Err(EmitterError::EndElementNameIsNotSpecified)
- }
- }
-
- pub fn emit_cdata<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
- self.fix_non_empty_element(target)?;
- if self.config.cdata_to_characters {
- self.emit_characters(target, content)
- } else {
- // TODO: escape ']]>' characters in CDATA as two adjacent CDATA blocks
- target.write(b"<![CDATA[")?;
- target.write(content.as_bytes())?;
- target.write(b"]]>")?;
-
- self.after_text();
-
- Ok(())
- }
- }
-
- pub fn emit_characters<W: Write>(&mut self, target: &mut W,
- content: &str) -> Result<()> {
- self.fix_non_empty_element(target)?;
- target.write(
- (if self.config.perform_escaping {
- escape_str_pcdata(content)
- } else {
- Cow::Borrowed(content)
- }).as_bytes()
- )?;
- self.after_text();
- Ok(())
- }
-
- pub fn emit_comment<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
- self.fix_non_empty_element(target)?;
-
- // TODO: add escaping dashes at the end of the comment
-
- let autopad_comments = self.config.autopad_comments;
- let write = |target: &mut W| -> Result<()> {
- target.write(b"<!--")?;
-
- if autopad_comments && !content.starts_with(char::is_whitespace) {
- target.write(b" ")?;
- }
-
- target.write(content.as_bytes())?;
-
- if autopad_comments && !content.ends_with(char::is_whitespace) {
- target.write(b" ")?;
- }
-
- target.write(b"-->")?;
-
- Ok(())
- };
-
- self.before_markup(target)?;
- let result = write(target);
- self.after_markup();
-
- result
- }
-}
diff --git a/src/writer/events.rs b/src/writer/events.rs
deleted file mode 100644
index 1f7040f..0000000
--- a/src/writer/events.rs
+++ /dev/null
@@ -1,241 +0,0 @@
-//! Contains `XmlEvent` datatype, instances of which are consumed by the writer.
-
-use std::borrow::Cow;
-
-use name::Name;
-use attribute::Attribute;
-use common::XmlVersion;
-use namespace::{Namespace, NS_NO_PREFIX};
-
-/// A part of an XML output stream.
-///
-/// Objects of this enum are consumed by `EventWriter`. They correspond to different parts of
-/// an XML document.
-#[derive(Debug)]
-pub enum XmlEvent<'a> {
- /// Corresponds to XML document declaration.
- ///
- /// This event should always be written before any other event. If it is not written
- /// at all, a default XML declaration will be outputted if the corresponding option
- /// is set in the configuration. Otherwise an error will be returned.
- StartDocument {
- /// XML version.
- ///
- /// Defaults to `XmlVersion::Version10`.
- version: XmlVersion,
-
- /// XML document encoding.
- ///
- /// Defaults to `Some("UTF-8")`.
- encoding: Option<&'a str>,
-
- /// XML standalone declaration.
- ///
- /// Defaults to `None`.
- standalone: Option<bool>
- },
-
- /// Denotes an XML processing instruction.
- ProcessingInstruction {
- /// Processing instruction target.
- name: &'a str,
-
- /// Processing instruction content.
- data: Option<&'a str>
- },
-
- /// Denotes a beginning of an XML element.
- StartElement {
- /// Qualified name of the element.
- name: Name<'a>,
-
- /// A list of attributes associated with the element.
- ///
- /// Currently attributes are not checked for duplicates (TODO). Attribute values
- /// will be escaped, and all characters invalid for attribute values like `"` or `<`
- /// will be changed into character entities.
- attributes: Cow<'a, [Attribute<'a>]>,
-
- /// Contents of the namespace mapping at this point of the document.
- ///
- /// This mapping will be inspected for "new" entries, and if at this point of the document
- /// a particular pair of prefix and namespace URI is already defined, no namespace
- /// attributes will be emitted.
- namespace: Cow<'a, Namespace>,
- },
-
- /// Denotes an end of an XML element.
- EndElement {
- /// Optional qualified name of the element.
- ///
- /// If `None`, then it is assumed that the element name should be the last valid one.
- /// If `Some` and element names tracking is enabled, then the writer will check it for
- /// correctness.
- name: Option<Name<'a>>
- },
-
- /// Denotes CDATA content.
- ///
- /// This event contains unparsed data, and no escaping will be performed when writing it
- /// to the output stream.
- CData(&'a str),
-
- /// Denotes a comment.
- ///
- /// The string will be checked for invalid sequences and error will be returned by the
- /// write operation
- Comment(&'a str),
-
- /// Denotes character data outside of tags.
- ///
- /// Contents of this event will be escaped if `perform_escaping` option is enabled,
- /// that is, every character invalid for PCDATA will appear as a character entity.
- Characters(&'a str)
-}
-
-impl<'a> XmlEvent<'a> {
- /// Returns an writer event for a processing instruction.
- #[inline]
- pub fn processing_instruction(name: &'a str, data: Option<&'a str>) -> XmlEvent<'a> {
- XmlEvent::ProcessingInstruction { name: name, data: data }
- }
-
- /// Returns a builder for a starting element.
- ///
- /// This builder can then be used to tweak attributes and namespace starting at
- /// this element.
- #[inline]
- pub fn start_element<S>(name: S) -> StartElementBuilder<'a> where S: Into<Name<'a>> {
- StartElementBuilder {
- name: name.into(),
- attributes: Vec::new(),
- namespace: Namespace::empty().into()
- }
- }
-
- /// Returns a builder for an closing element.
- ///
- /// This method, unline `start_element()`, does not accept a name because by default
- /// the writer is able to determine it automatically. However, when this functionality
- /// is disabled, it is possible to specify the name with `name()` method on the builder.
- #[inline]
- pub fn end_element() -> EndElementBuilder<'a> {
- EndElementBuilder { name: None }
- }
-
- /// Returns a CDATA event.
- ///
- /// Naturally, the provided string won't be escaped, except for closing CDATA token `]]>`
- /// (depending on the configuration).
- #[inline]
- pub fn cdata(data: &'a str) -> XmlEvent<'a> { XmlEvent::CData(data) }
-
- /// Returns a regular characters (PCDATA) event.
- ///
- /// All offending symbols, in particular, `&` and `<`, will be escaped by the writer.
- #[inline]
- pub fn characters(data: &'a str) -> XmlEvent<'a> { XmlEvent::Characters(data) }
-
- /// Returns a comment event.
- #[inline]
- pub fn comment(data: &'a str) -> XmlEvent<'a> { XmlEvent::Comment(data) }
-}
-
-impl<'a> From<&'a str> for XmlEvent<'a> {
- #[inline]
- fn from(s: &'a str) -> XmlEvent<'a> { XmlEvent::Characters(s) }
-}
-
-pub struct EndElementBuilder<'a> {
- name: Option<Name<'a>>
-}
-
-/// A builder for a closing element event.
-impl<'a> EndElementBuilder<'a> {
- /// Sets the name of this closing element.
- ///
- /// Usually the writer is able to determine closing element names automatically. If
- /// this functionality is enabled (by default it is), then this name is checked for correctness.
- /// It is possible, however, to disable such behavior; then the user must ensure that
- /// closing element name is correct manually.
- #[inline]
- pub fn name<N>(mut self, name: N) -> EndElementBuilder<'a> where N: Into<Name<'a>> {
- self.name = Some(name.into());
- self
- }
-}
-
-impl<'a> From<EndElementBuilder<'a>> for XmlEvent<'a> {
- fn from(b: EndElementBuilder<'a>) -> XmlEvent<'a> {
- XmlEvent::EndElement { name: b.name }
- }
-}
-
-/// A builder for a starting element event.
-pub struct StartElementBuilder<'a> {
- name: Name<'a>,
- attributes: Vec<Attribute<'a>>,
- namespace: Namespace
-}
-
-impl<'a> StartElementBuilder<'a> {
- /// Sets an attribute value of this element to the given string.
- ///
- /// This method can be used to add attributes to the starting element. Name is a qualified
- /// name; its namespace is ignored, but its prefix is checked for correctness, that is,
- /// it is checked that the prefix is bound to some namespace in the current context.
- ///
- /// Currently attributes are not checked for duplicates. Note that duplicate attributes
- /// are a violation of XML document well-formedness.
- ///
- /// The writer checks that you don't specify reserved prefix names, for example `xmlns`.
- #[inline]
- pub fn attr<N>(mut self, name: N, value: &'a str) -> StartElementBuilder<'a>
- where N: Into<Name<'a>>
- {
- self.attributes.push(Attribute::new(name.into(), value));
- self
- }
-
- /// Adds a namespace to the current namespace context.
- ///
- /// If no namespace URI was bound to the provided prefix at this point of the document,
- /// then the mapping from the prefix to the provided namespace URI will be written as
- /// a part of this element attribute set.
- ///
- /// If the same namespace URI was bound to the provided prefix at this point of the document,
- /// then no namespace attributes will be emitted.
- ///
- /// If some other namespace URI was bound to the provided prefix at this point of the document,
- /// then another binding will be added as a part of this element attribute set, shadowing
- /// the outer binding.
- #[inline]
- pub fn ns<S1, S2>(mut self, prefix: S1, uri: S2) -> StartElementBuilder<'a>
- where S1: Into<String>, S2: Into<String>
- {
- self.namespace.put(prefix, uri);
- self
- }
-
- /// Adds a default namespace mapping to the current namespace context.
- ///
- /// Same rules as for `ns()` are also valid for the default namespace mapping.
- #[inline]
- pub fn default_ns<S>(mut self, uri: S) -> StartElementBuilder<'a>
- where S: Into<String>
- {
- self.namespace.put(NS_NO_PREFIX, uri);
- self
- }
-}
-
-impl<'a> From<StartElementBuilder<'a>> for XmlEvent<'a> {
- #[inline]
- fn from(b: StartElementBuilder<'a>) -> XmlEvent<'a> {
- XmlEvent::StartElement {
- name: b.name,
- attributes: Cow::Owned(b.attributes),
- namespace: Cow::Owned(b.namespace)
- }
- }
-}
diff --git a/src/writer/mod.rs b/src/writer/mod.rs
deleted file mode 100644
index ea1b242..0000000
--- a/src/writer/mod.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-//! Contains high-level interface for an events-based XML emitter.
-//!
-//! The most important type in this module is `EventWriter` which allows writing an XML document
-//! to some output stream.
-
-pub use self::emitter::Result;
-pub use self::emitter::EmitterError as Error;
-pub use self::config::EmitterConfig;
-pub use self::events::XmlEvent;
-
-use self::emitter::Emitter;
-
-use std::io::prelude::*;
-
-mod emitter;
-mod config;
-pub mod events;
-
-/// A wrapper around an `std::io::Write` instance which emits XML document according to provided
-/// events.
-pub struct EventWriter<W> {
- sink: W,
- emitter: Emitter
-}
-
-impl<W: Write> EventWriter<W> {
- /// Creates a new `EventWriter` out of an `std::io::Write` instance using the default
- /// configuration.
- #[inline]
- pub fn new(sink: W) -> EventWriter<W> {
- EventWriter::new_with_config(sink, EmitterConfig::new())
- }
-
- /// Creates a new `EventWriter` out of an `std::io::Write` instance using the provided
- /// configuration.
- #[inline]
- pub fn new_with_config(sink: W, config: EmitterConfig) -> EventWriter<W> {
- EventWriter {
- sink,
- emitter: Emitter::new(config)
- }
- }
-
- /// Writes the next piece of XML document according to the provided event.
- ///
- /// Note that output data may not exactly correspond to the written event because
- /// of various configuration options. For example, `XmlEvent::EndElement` may
- /// correspond to a separate closing element or it may cause writing an empty element.
- /// Another example is that `XmlEvent::CData` may be represented as characters in
- /// the output stream.
- pub fn write<'a, E>(&mut self, event: E) -> Result<()> where E: Into<XmlEvent<'a>> {
- match event.into() {
- XmlEvent::StartDocument { version, encoding, standalone } =>
- self.emitter.emit_start_document(&mut self.sink, version, encoding.unwrap_or("UTF-8"), standalone),
- XmlEvent::ProcessingInstruction { name, data } =>
- self.emitter.emit_processing_instruction(&mut self.sink, name, data),
- XmlEvent::StartElement { name, attributes, namespace } => {
- self.emitter.namespace_stack_mut().push_empty().checked_target().extend(namespace.as_ref());
- self.emitter.emit_start_element(&mut self.sink, name, &attributes)
- }
- XmlEvent::EndElement { name } => {
- let r = self.emitter.emit_end_element(&mut self.sink, name);
- self.emitter.namespace_stack_mut().try_pop();
- r
- }
- XmlEvent::Comment(content) =>
- self.emitter.emit_comment(&mut self.sink, content),
- XmlEvent::CData(content) =>
- self.emitter.emit_cdata(&mut self.sink, content),
- XmlEvent::Characters(content) =>
- self.emitter.emit_characters(&mut self.sink, content)
- }
- }
-
- /// Returns a mutable reference to the underlying `Writer`.
- ///
- /// Note that having a reference to the underlying sink makes it very easy to emit invalid XML
- /// documents. Use this method with care. Valid use cases for this method include accessing
- /// methods like `Write::flush`, which do not emit new data but rather change the state
- /// of the stream itself.
- pub fn inner_mut(&mut self) -> &mut W {
- &mut self.sink
- }
-
- /// Unwraps this `EventWriter`, returning the underlying writer.
- ///
- /// Note that this is a destructive operation: unwrapping a writer and then wrapping
- /// it again with `EventWriter::new()` will create a fresh writer whose state will be
- /// blank; for example, accumulated namespaces will be reset.
- pub fn into_inner(self) -> W {
- self.sink
- }
-}
diff --git a/tests/documents/sample_1.xml b/tests/documents/sample_1.xml
deleted file mode 100644
index 4d1cbc0..0000000
--- a/tests/documents/sample_1.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="utf-8" standalone="yes"?>
-<project name="project-name">
- <libraries>
- <library groupId="org.example" artifactId="<name>" version="0.1"/>
- <library groupId="com.example" artifactId=""cool-lib&" version="999"/>
- </libraries>
- <module name="module-1">
- <files>
- <file name="somefile.java" type="java">
- Some <java> class
- </file>
- <file name="another_file.java" type="java">
- Another "java" class
- </file>
- <file name="config.xml" type="xml">
- Weird 'XML' config
- </file>
- </files>
- <libraries>
- <library groupId="junit" artifactId="junit" version="1.9.5"/>
- </libraries>
- </module>
- <module name="module-2">
- <files>
- <file name="program.js" type="javascript">
- JavaScript & program
- </file>
- <file name="style.css" type="css">
- Cascading style sheet: © - ҉
- </file>
- </files>
- </module>
-</project>
-
diff --git a/tests/documents/sample_1_full.txt b/tests/documents/sample_1_full.txt
deleted file mode 100644
index a8d64d0..0000000
--- a/tests/documents/sample_1_full.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement(project [name="project-name"])
-Whitespace("\n ")
-StartElement(libraries)
-Whitespace("\n ")
-StartElement(library [groupId="org.example", artifactId="<name>", version="0.1"])
-EndElement(library)
-Whitespace("\n ")
-StartElement(library [groupId="com.example", artifactId="\"cool-lib&", version="999"])
-EndElement(library)
-Whitespace("\n ")
-EndElement(libraries)
-Whitespace("\n ")
-StartElement(module [name="module-1"])
-Whitespace("\n ")
-StartElement(files)
-Whitespace("\n ")
-StartElement(file [name="somefile.java", type="java"])
-Characters("\n Some <java> class\n ")
-EndElement(file)
-Whitespace("\n ")
-StartElement(file [name="another_file.java", type="java"])
-Characters("\n Another \"java\" class\n ")
-EndElement(file)
-Whitespace("\n ")
-StartElement(file [name="config.xml", type="xml"])
-Characters("\n Weird \'XML\' config\n ")
-EndElement(file)
-Whitespace("\n ")
-EndElement(files)
-Whitespace("\n ")
-StartElement(libraries)
-Whitespace("\n ")
-StartElement(library [groupId="junit", artifactId="junit", version="1.9.5"])
-EndElement(library)
-Whitespace("\n ")
-EndElement(libraries)
-Whitespace("\n ")
-EndElement(module)
-Whitespace("\n ")
-StartElement(module [name="module-2"])
-Whitespace("\n ")
-StartElement(files)
-Whitespace("\n ")
-StartElement(file [name="program.js", type="javascript"])
-Characters("\n JavaScript & program\n ")
-EndElement(file)
-Whitespace("\n ")
-StartElement(file [name="style.css", type="css"])
-Characters("\n Cascading style sheet: © - ҉\n ")
-EndElement(file)
-Whitespace("\n ")
-EndElement(files)
-Whitespace("\n ")
-EndElement(module)
-Whitespace("\n")
-EndElement(project)
-EndDocument
diff --git a/tests/documents/sample_1_short.txt b/tests/documents/sample_1_short.txt
deleted file mode 100644
index 4dbe285..0000000
--- a/tests/documents/sample_1_short.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement(project [name="project-name"])
-StartElement(libraries)
-StartElement(library [groupId="org.example", artifactId="<name>", version="0.1"])
-EndElement(library)
-StartElement(library [groupId="com.example", artifactId="\"cool-lib&", version="999"])
-EndElement(library)
-EndElement(libraries)
-StartElement(module [name="module-1"])
-StartElement(files)
-StartElement(file [name="somefile.java", type="java"])
-Characters("Some <java> class")
-EndElement(file)
-StartElement(file [name="another_file.java", type="java"])
-Characters("Another \"java\" class")
-EndElement(file)
-StartElement(file [name="config.xml", type="xml"])
-Characters("Weird \'XML\' config")
-EndElement(file)
-EndElement(files)
-StartElement(libraries)
-StartElement(library [groupId="junit", artifactId="junit", version="1.9.5"])
-EndElement(library)
-EndElement(libraries)
-EndElement(module)
-StartElement(module [name="module-2"])
-StartElement(files)
-StartElement(file [name="program.js", type="javascript"])
-Characters("JavaScript & program")
-EndElement(file)
-StartElement(file [name="style.css", type="css"])
-Characters("Cascading style sheet: © - ҉")
-EndElement(file)
-EndElement(files)
-EndElement(module)
-EndElement(project)
-EndDocument
diff --git a/tests/documents/sample_2.xml b/tests/documents/sample_2.xml
deleted file mode 100644
index f9543ac..0000000
--- a/tests/documents/sample_2.xml
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<p:data xmlns:d="urn:example:double" xmlns:h="urn:example:header" xmlns:p="urn:example:namespace">
- <p:datum id="34">
- <p:name>Name</p:name>
- <d:name>Another name</d:name>
- <d:arg>0.3</d:arg>
- <d:arg>0.2</d:arg>
- <p:arg>0.1</p:arg>
- <p:arg>0.01</p:arg>
- <h:header name="Header-1">header 1 value</h:header>
- <h:header name="Header-2">
- Some bigger value
- </h:header>
- </p:datum>
-</p:data>
diff --git a/tests/documents/sample_2_full.txt b/tests/documents/sample_2_full.txt
deleted file mode 100644
index 75075cd..0000000
--- a/tests/documents/sample_2_full.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement({urn:example:namespace}p:data)
-Whitespace("\n ")
-StartElement({urn:example:namespace}p:datum [id="34"])
-Whitespace("\n ")
-StartElement({urn:example:namespace}p:name)
-Characters("Name")
-EndElement({urn:example:namespace}p:name)
-Whitespace("\n ")
-StartElement({urn:example:double}d:name)
-Characters("Another name")
-EndElement({urn:example:double}d:name)
-Whitespace("\n ")
-StartElement({urn:example:double}d:arg)
-Characters("0.3")
-EndElement({urn:example:double}d:arg)
-Whitespace("\n ")
-StartElement({urn:example:double}d:arg)
-Characters("0.2")
-EndElement({urn:example:double}d:arg)
-Whitespace("\n ")
-StartElement({urn:example:namespace}p:arg)
-Characters("0.1")
-EndElement({urn:example:namespace}p:arg)
-Whitespace("\n ")
-StartElement({urn:example:namespace}p:arg)
-Characters("0.01")
-EndElement({urn:example:namespace}p:arg)
-Whitespace("\n ")
-StartElement({urn:example:header}h:header [name="Header-1"])
-Characters("header 1 value")
-EndElement({urn:example:header}h:header)
-Whitespace("\n ")
-StartElement({urn:example:header}h:header [name="Header-2"])
-Characters("\n Some bigger value\n ")
-EndElement({urn:example:header}h:header)
-Whitespace("\n ")
-EndElement({urn:example:namespace}p:datum)
-Whitespace("\n")
-EndElement({urn:example:namespace}p:data)
-EndDocument
diff --git a/tests/documents/sample_2_short.txt b/tests/documents/sample_2_short.txt
deleted file mode 100644
index 2368025..0000000
--- a/tests/documents/sample_2_short.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement({urn:example:namespace}p:data)
-StartElement({urn:example:namespace}p:datum [id="34"])
-StartElement({urn:example:namespace}p:name)
-Characters("Name")
-EndElement({urn:example:namespace}p:name)
-StartElement({urn:example:double}d:name)
-Characters("Another name")
-EndElement({urn:example:double}d:name)
-StartElement({urn:example:double}d:arg)
-Characters("0.3")
-EndElement({urn:example:double}d:arg)
-StartElement({urn:example:double}d:arg)
-Characters("0.2")
-EndElement({urn:example:double}d:arg)
-StartElement({urn:example:namespace}p:arg)
-Characters("0.1")
-EndElement({urn:example:namespace}p:arg)
-StartElement({urn:example:namespace}p:arg)
-Characters("0.01")
-EndElement({urn:example:namespace}p:arg)
-StartElement({urn:example:header}h:header [name="Header-1"])
-Characters("header 1 value")
-EndElement({urn:example:header}h:header)
-StartElement({urn:example:header}h:header [name="Header-2"])
-Characters("Some bigger value")
-EndElement({urn:example:header}h:header)
-EndElement({urn:example:namespace}p:datum)
-EndElement({urn:example:namespace}p:data)
-EndDocument
diff --git a/tests/documents/sample_3.xml b/tests/documents/sample_3.xml
deleted file mode 100644
index 657e37d..0000000
--- a/tests/documents/sample_3.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<p:data xmlns:p="urn:x" z=">">
- <!-- abcd < > & -->
- <a>test</a>
- <b>kkss" = ddd' ></b>
- <![CDATA[
- <a>ddddd</b>!e3--><!-- ddckx
- ]]>
- <c/>
- <![CDATA[
- <![CDATA[zzzz]]]]><![CDATA[>]]>
-</p:data>
-
diff --git a/tests/documents/sample_3_full.txt b/tests/documents/sample_3_full.txt
deleted file mode 100644
index e9a0f7e..0000000
--- a/tests/documents/sample_3_full.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-1:1 StartDocument(1.0, utf-8)
-2:1 StartElement({urn:x}p:data [z=">"])
-2:31 Whitespace("\n ")
-3:5 Comment(" abcd < > & ")
-3:34 Whitespace("\n ")
-4:5 StartElement(a)
-4:8 Characters("test")
-4:12 EndElement(a)
-4:16 Whitespace("\n ")
-5:5 StartElement(b)
-5:8 Characters("kkss\" = ddd\' >")
-5:22 EndElement(b)
-5:26 Whitespace("\n ")
-6:5 CData("\n <a>ddddd</b>!e3--><!-- ddckx\n ")
-8:8 Characters("\n ")
-9:5 StartElement(c)
-9:5 EndElement(c)
-9:9 Whitespace("\n ")
-10:5 CData("\n <![CDATA[zzzz]]")
-11:23 CData(">")
-11:36 Characters("\n")
-12:1 EndElement({urn:x}p:data)
-14:1 EndDocument
diff --git a/tests/documents/sample_3_short.txt b/tests/documents/sample_3_short.txt
deleted file mode 100644
index 2582f33..0000000
--- a/tests/documents/sample_3_short.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-1:1 StartDocument(1.0, utf-8)
-2:1 StartElement({urn:x}p:data [z=">"])
-4:5 StartElement(a)
-4:8 Characters("test")
-4:12 EndElement(a)
-5:5 StartElement(b)
-5:8 Characters("kkss\" = ddd\' >")
-5:22 EndElement(b)
-6:5 Characters("<a>ddddd</b>!e3--><!-- ddckx")
-9:5 StartElement(c)
-9:5 EndElement(c)
-10:5 Characters("<![CDATA[zzzz]]>")
-12:1 EndElement({urn:x}p:data)
-14:1 EndDocument
diff --git a/tests/documents/sample_4.xml b/tests/documents/sample_4.xml
deleted file mode 100644
index fb915ff..0000000
--- a/tests/documents/sample_4.xml
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE data SYSTEM "abcd.dtd">
-<p:data xmlns:p="urn:x" z=">">
- <!-- abcd < > & -->
- <a>test</a>
- <b>kkss" = ddd' ></b>
- <![CDATA[
- <a>ddddd</b>!e3--><!-- ddckx
- ]]>
- <c/>
- <![CDATA[
- <![CDATA[zzzz]]]]><![CDATA[>]]>
-</p:data>
-
-
diff --git a/tests/documents/sample_4_full.txt b/tests/documents/sample_4_full.txt
deleted file mode 100644
index 4bdadfb..0000000
--- a/tests/documents/sample_4_full.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement({urn:x}p:data [z=">"])
-Whitespace("\n ")
-Comment(" abcd < > & ")
-Whitespace("\n ")
-StartElement(a)
-Characters("test")
-EndElement(a)
-Whitespace("\n ")
-StartElement(b)
-Characters("kkss\" = ddd\' >")
-EndElement(b)
-Whitespace("\n ")
-CData("\n <a>ddddd</b>!e3--><!-- ddckx\n ")
-Characters("\n ")
-StartElement(c)
-EndElement(c)
-Whitespace("\n ")
-CData("\n <![CDATA[zzzz]]")
-CData(">")
-Characters("\n")
-EndElement({urn:x}p:data)
-EndDocument
diff --git a/tests/documents/sample_4_short.txt b/tests/documents/sample_4_short.txt
deleted file mode 100644
index 52e4b83..0000000
--- a/tests/documents/sample_4_short.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement({urn:x}p:data [z=">"])
-StartElement(a)
-Characters("test")
-EndElement(a)
-StartElement(b)
-Characters("kkss\" = ddd\' >")
-EndElement(b)
-Characters("<a>ddddd</b>!e3--><!-- ddckx")
-StartElement(c)
-EndElement(c)
-Characters("<![CDATA[zzzz]]>")
-EndElement({urn:x}p:data)
-EndDocument
diff --git a/tests/documents/sample_5.xml b/tests/documents/sample_5.xml
deleted file mode 100644
index 92aa31d..0000000
--- a/tests/documents/sample_5.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE data SYSTEM "abcd.dtd">
-<p>
- <a>test ©≂̸</a>
-</p>
-
-
diff --git a/tests/documents/sample_5_short.txt b/tests/documents/sample_5_short.txt
deleted file mode 100644
index 3079811..0000000
--- a/tests/documents/sample_5_short.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-StartDocument(1.0, utf-8)
-StartElement(p)
-StartElement(a)
-Characters("test ©≂̸")
-EndElement(a)
-EndElement(p)
-EndDocument
diff --git a/tests/documents/sample_6.xml b/tests/documents/sample_6.xml
deleted file mode 100644
index 943c02d..0000000
--- a/tests/documents/sample_6.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet href="doc.xsl"?>
-
-<doc>Hello</doc>
diff --git a/tests/documents/sample_6_full.txt b/tests/documents/sample_6_full.txt
deleted file mode 100644
index debb366..0000000
--- a/tests/documents/sample_6_full.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-StartDocument(1.0, UTF-8)
-Whitespace("\n")
-ProcessingInstruction(xml-stylesheet="href=\"doc.xsl\"")
-Whitespace("\n\n")
-StartElement(doc)
-Characters("Hello")
-EndElement(doc)
-EndDocument
diff --git a/tests/event_reader.rs b/tests/event_reader.rs
deleted file mode 100644
index 80ed331..0000000
--- a/tests/event_reader.rs
+++ /dev/null
@@ -1,539 +0,0 @@
-#![forbid(unsafe_code)]
-
-extern crate xml;
-#[macro_use]
-extern crate lazy_static;
-
-use std::env;
-use std::fmt;
-use std::fs::File;
-use std::io::{BufRead, BufReader, Write, stderr};
-use std::path::Path;
-
-use xml::name::OwnedName;
-use xml::common::Position;
-use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
-
-/// Dummy function that opens a file, parses it, and returns a `Result`.
-/// There can be IO errors (from `File::open`) and XML errors (from the parser).
-/// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
-/// do this without defining their own error type.
-#[allow(dead_code)]
-fn count_event_in_file(name: &Path) -> Result<usize> {
- let mut event_count = 0;
- for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
- try!(event);
- event_count += 1;
- }
- Ok(event_count)
-}
-
-#[test]
-fn sample_1_short() {
- test(
- include_bytes!("documents/sample_1.xml"),
- include_bytes!("documents/sample_1_short.txt"),
- ParserConfig::new()
- .ignore_comments(true)
- .whitespace_to_characters(true)
- .cdata_to_characters(true)
- .trim_whitespace(true)
- .coalesce_characters(true),
- false
- );
-}
-
-#[test]
-fn sample_1_full() {
- test(
- include_bytes!("documents/sample_1.xml"),
- include_bytes!("documents/sample_1_full.txt"),
- ParserConfig::new()
- .ignore_comments(false)
- .whitespace_to_characters(false)
- .cdata_to_characters(false)
- .trim_whitespace(false)
- .coalesce_characters(false),
- false
- );
-}
-
-#[test]
-fn sample_2_short() {
- test(
- include_bytes!("documents/sample_2.xml"),
- include_bytes!("documents/sample_2_short.txt"),
- ParserConfig::new()
- .ignore_comments(true)
- .whitespace_to_characters(true)
- .cdata_to_characters(true)
- .trim_whitespace(true)
- .coalesce_characters(true),
- false
- );
-}
-
-#[test]
-fn sample_2_full() {
- test(
- include_bytes!("documents/sample_2.xml"),
- include_bytes!("documents/sample_2_full.txt"),
- ParserConfig::new()
- .ignore_comments(false)
- .whitespace_to_characters(false)
- .cdata_to_characters(false)
- .trim_whitespace(false)
- .coalesce_characters(false),
- false
- );
-}
-
-#[test]
-fn sample_3_short() {
- test(
- include_bytes!("documents/sample_3.xml"),
- include_bytes!("documents/sample_3_short.txt"),
- ParserConfig::new()
- .ignore_comments(true)
- .whitespace_to_characters(true)
- .cdata_to_characters(true)
- .trim_whitespace(true)
- .coalesce_characters(true),
- true
- );
-}
-
-#[test]
-fn sample_3_full() {
- test(
- include_bytes!("documents/sample_3.xml"),
- include_bytes!("documents/sample_3_full.txt"),
- ParserConfig::new()
- .ignore_comments(false)
- .whitespace_to_characters(false)
- .cdata_to_characters(false)
- .trim_whitespace(false)
- .coalesce_characters(false),
- true
- );
-}
-
-#[test]
-fn sample_4_short() {
- test(
- include_bytes!("documents/sample_4.xml"),
- include_bytes!("documents/sample_4_short.txt"),
- ParserConfig::new()
- .ignore_comments(true)
- .whitespace_to_characters(true)
- .cdata_to_characters(true)
- .trim_whitespace(true)
- .coalesce_characters(true),
- false
- );
-}
-
-#[test]
-fn sample_4_full() {
- test(
- include_bytes!("documents/sample_4.xml"),
- include_bytes!("documents/sample_4_full.txt"),
- ParserConfig::new()
- .ignore_comments(false)
- .whitespace_to_characters(false)
- .cdata_to_characters(false)
- .trim_whitespace(false)
- .coalesce_characters(false),
- false
- );
-
-}
-
-#[test]
-fn sample_5_short() {
- test(
- include_bytes!("documents/sample_5.xml"),
- include_bytes!("documents/sample_5_short.txt"),
- ParserConfig::new()
- .ignore_comments(true)
- .whitespace_to_characters(true)
- .cdata_to_characters(true)
- .trim_whitespace(true)
- .coalesce_characters(true)
- .add_entity("nbsp", " ")
- .add_entity("copy", "©")
- .add_entity("NotEqualTilde", "≂̸"),
- false
- );
-}
-
-#[test]
-fn sample_6_full() {
- test(
- include_bytes!("documents/sample_6.xml"),
- include_bytes!("documents/sample_6_full.txt"),
- ParserConfig::new()
- .ignore_root_level_whitespace(false)
- .ignore_comments(false)
- .whitespace_to_characters(false)
- .cdata_to_characters(false)
- .trim_whitespace(false)
- .coalesce_characters(false),
- false
- );
-}
-
-#[test]
-fn eof_1() {
- test(
- br#"<?xml"#,
- br#"1:6 Unexpected end of stream: no root element found"#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn bad_1() {
- test(
- br#"<?xml&.,"#,
- br#"1:6 Unexpected token: <?xml&"#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn dashes_in_comments() {
- test(
- br#"<!-- comment -- --><hello/>"#,
- br#"
- |1:14 Unexpected token '--' before ' '
- "#,
- ParserConfig::new(),
- false
- );
-
- test(
- br#"<!-- comment ---><hello/>"#,
- br#"
- |1:14 Unexpected token '--' before '-'
- "#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn tabs_1() {
- test(
- b"\t<a>\t<b/></a>",
- br#"
- |1:2 StartDocument(1.0, UTF-8)
- |1:2 StartElement(a)
- |1:6 StartElement(b)
- |1:6 EndElement(b)
- |1:10 EndElement(a)
- |1:14 EndDocument
- "#,
- ParserConfig::new()
- .trim_whitespace(true),
- true
- );
-}
-
-#[test]
-fn issue_83_duplicate_attributes() {
- test(
- br#"<hello><some-tag a='10' a="20"></hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |1:30 Attribute 'a' is redefined
- "#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn issue_93_large_characters_in_entity_references() {
- test(
- r#"<hello>&𤶼;</hello>"#.as_bytes(),
- r#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |1:10 Unexpected entity: 𤶼
- "#.as_bytes(), // FIXME: it shouldn't be 10, looks like indices are off slightly
- ParserConfig::new(),
- false
- )
-}
-
-#[test]
-fn issue_98_cdata_ending_with_right_bracket() {
- test(
- br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |CData("Foo [Bar]")
- |EndElement(hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- )
-}
-
-#[test]
-fn issue_105_unexpected_double_dash() {
- test(
- br#"<hello>-- </hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |Characters("-- ")
- |EndElement(hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- );
-
- test(
- br#"<hello>--</hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |Characters("--")
- |EndElement(hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- );
-
- test(
- br#"<hello>--></hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |Characters("-->")
- |EndElement(hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- );
-
- test(
- br#"<hello><![CDATA[--]]></hello>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(hello)
- |CData("--")
- |EndElement(hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn issue_attribues_have_no_default_namespace () {
- test(
- br#"<hello xmlns="urn:foo" x="y"/>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement({urn:foo}hello [x="y"])
- |EndElement({urn:foo}hello)
- |EndDocument
- "#,
- ParserConfig::new(),
- false
- );
-}
-
-#[test]
-fn issue_replacement_character_entity_reference() {
- test(
- br#"<doc>��</doc>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(doc)
- |1:13 Invalid decimal character number in an entity: #55357
- "#,
- ParserConfig::new(),
- false,
- );
-
- test(
- br#"<doc>��</doc>"#,
- br#"
- |StartDocument(1.0, UTF-8)
- |StartElement(doc)
- |1:13 Invalid hexadecimal character number in an entity: #xd83d
- "#,
- ParserConfig::new(),
- false,
- );
-
- test(
- br#"<doc>��</doc>"#,
- format!(
- r#"
- |StartDocument(1.0, UTF-8)
- |StartElement(doc)
- |Characters("{replacement_character}{replacement_character}")
- |EndElement(doc)
- |EndDocument
- "#,
- replacement_character = "\u{fffd}"
- )
- .as_bytes(),
- ParserConfig::new()
- .replace_unknown_entity_references(true),
- false,
- );
-
- test(
- br#"<doc>��</doc>"#,
- format!(
- r#"
- |StartDocument(1.0, UTF-8)
- |StartElement(doc)
- |Characters("{replacement_character}{replacement_character}")
- |EndElement(doc)
- |EndDocument
- "#,
- replacement_character = "\u{fffd}"
- )
- .as_bytes(),
- ParserConfig::new()
- .replace_unknown_entity_references(true),
- false,
- );
-}
-
-lazy_static! {
- // If PRINT_SPEC env variable is set, print the lines
- // to stderr instead of comparing with the output
- // it can be used like this:
- // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
- static ref PRINT: bool = {
- for (key, value) in env::vars() {
- if key == "PRINT_SPEC" && value == "1" {
- return true;
- }
- }
- false
- };
-}
-
-// clones a lot but that's fine
-fn trim_until_bar(s: String) -> String {
- match s.trim() {
- ts if ts.starts_with('|') => return ts[1..].to_owned(),
- _ => {}
- }
- s
-}
-
-fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
- let mut reader = config.create_reader(input);
- let mut spec_lines = BufReader::new(output).lines()
- .map(|line| line.unwrap())
- .enumerate()
- .map(|(i, line)| (i, trim_until_bar(line)))
- .filter(|&(_, ref line)| !line.trim().is_empty());
-
- loop {
- let e = reader.next();
- let line =
- if test_position {
- format!("{} {}", reader.position(), Event(&e))
- } else {
- format!("{}", Event(&e))
- };
-
- if *PRINT {
- writeln!(&mut stderr(), "{}", line).unwrap();
- } else {
- if let Some((n, spec)) = spec_lines.next() {
- if line != spec {
- const SPLITTER: &'static str = "-------------------";
- panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound: {}\n{}\n",
- SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
- }
- } else {
- panic!("Unexpected event: {}", line);
- }
- }
-
- match e {
- Ok(XmlEvent::EndDocument) | Err(_) => break,
- _ => {},
- }
- }
-}
-
-// Here we define our own string representation of events so we don't depend
-// on the specifics of Display implementation for XmlEvent and OwnedName.
-
-struct Name<'a>(&'a OwnedName);
-
-impl <'a> fmt::Display for Name<'a> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- if let Some(ref namespace) = self.0.namespace {
- try! { write!(f, "{{{}}}", namespace) }
- }
-
- if let Some(ref prefix) = self.0.prefix {
- try! { write!(f, "{}:", prefix) }
- }
-
- write!(f, "{}", self.0.local_name)
- }
-}
-
-struct Event<'a>(&'a Result<XmlEvent>);
-
-impl<'a> fmt::Display for Event<'a> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- let empty = String::new();
- match *self.0 {
- Ok(ref e) => match *e {
- XmlEvent::StartDocument { ref version, ref encoding, .. } =>
- write!(f, "StartDocument({}, {})", version, encoding),
- XmlEvent::EndDocument =>
- write!(f, "EndDocument"),
- XmlEvent::ProcessingInstruction { ref name, ref data } =>
- write!(f, "ProcessingInstruction({}={:?})", name,
- data.as_ref().unwrap_or(&empty)),
- XmlEvent::StartElement { ref name, ref attributes, .. } => {
- if attributes.is_empty() {
- write!(f, "StartElement({})", Name(name))
- }
- else {
- let attrs: Vec<_> = attributes.iter()
- .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
- write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
- }
- },
- XmlEvent::EndElement { ref name } =>
- write!(f, "EndElement({})", Name(name)),
- XmlEvent::Comment(ref data) =>
- write!(f, r#"Comment("{}")"#, data.escape_debug()),
- XmlEvent::CData(ref data) =>
- write!(f, r#"CData("{}")"#, data.escape_debug()),
- XmlEvent::Characters(ref data) =>
- write!(f, r#"Characters("{}")"#, data.escape_debug()),
- XmlEvent::Whitespace(ref data) =>
- write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
- },
- Err(ref e) => e.fmt(f),
- }
- }
-}
diff --git a/tests/event_writer.rs b/tests/event_writer.rs
deleted file mode 100644
index dd64a43..0000000
--- a/tests/event_writer.rs
+++ /dev/null
@@ -1,269 +0,0 @@
-#![forbid(unsafe_code)]
-
-extern crate xml;
-
-use std::io::{BufReader, SeekFrom};
-use std::io::prelude::*;
-use std::fs::File;
-use std::str;
-
-use xml::reader::EventReader;
-use xml::writer::EmitterConfig;
-
-macro_rules! unwrap_all {
- ($($e:expr);+) => {{
- $($e.unwrap();)+
- }}
-}
-
-#[test]
-fn reading_writing_equal_with_namespaces() {
- let mut f = File::open("tests/documents/sample_2.xml").unwrap();
- let mut b = Vec::new();
-
- {
- let r = EventReader::new(BufReader::new(&mut f));
- let mut w = EmitterConfig::default().perform_indent(true).create_writer(&mut b);
-
- for e in r {
- match e {
- Ok(e) => if let Some(e) = e.as_writer_event() {
- match w.write(e) {
- Ok(_) => {},
- Err(e) => panic!("Writer error: {:?}", e)
- }
- },
- Err(e) => panic!("Error: {}", e)
- }
- }
- }
-
- f.seek(SeekFrom::Start(0)).unwrap();
- let mut fs = String::new();
- f.read_to_string(&mut fs).unwrap();
-
- let bs = String::from_utf8(b).unwrap();
-
- assert_eq!(fs.trim(), bs.trim());
-}
-
-#[test]
-fn writing_simple() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new().write_document_declaration(false).create_writer(&mut b);
-
- w.write(XmlEvent::start_element("h:hello").ns("h", "urn:hello-world")).unwrap();
- w.write("hello world").unwrap();
- w.write(XmlEvent::end_element()).unwrap();
- }
-
- assert_eq!(
- str::from_utf8(&b).unwrap(),
- r#"<h:hello xmlns:h="urn:hello-world">hello world</h:hello>"#
- );
-}
-
-#[test]
-fn writing_empty_elements_with_normalizing() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new().write_document_declaration(false).create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("hello"));
- w.write(XmlEvent::start_element("world"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(str::from_utf8(&b).unwrap(), r#"<hello><world /></hello>"#);
-}
-
-#[test]
-fn writing_empty_elements_without_normalizing() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .normalize_empty_elements(false)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("hello"));
- w.write(XmlEvent::start_element("world"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(str::from_utf8(&b).unwrap(), r#"<hello><world></world></hello>"#);
-}
-
-#[test]
-fn writing_empty_elements_without_pad_self_closing() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .pad_self_closing(false)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("hello"));
- w.write(XmlEvent::start_element("world"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(str::from_utf8(&b).unwrap(), r#"<hello><world/></hello>"#);
-}
-#[test]
-fn writing_empty_elements_pad_self_closing_explicit() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .pad_self_closing(true)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("hello"));
- w.write(XmlEvent::start_element("world"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(str::from_utf8(&b).unwrap(), r#"<hello><world /></hello>"#);
-}
-
-#[test]
-fn writing_comments_with_indentation() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .perform_indent(true)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("hello"));
- w.write(XmlEvent::start_element("world"));
- w.write(XmlEvent::comment(" this is a manually padded comment\t"));
- w.write(XmlEvent::comment("this is an unpadded comment"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(
- str::from_utf8(&b).unwrap(),
- "<hello>
- <world>
- <!-- this is a manually padded comment\t-->
- <!-- this is an unpadded comment -->
- </world>
-</hello>");
-}
-
-#[test]
-fn issue_112_overriding_namepace_prefix() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(XmlEvent::start_element("iq").ns("", "jabber:client").ns("a", "urn:A"));
- w.write(XmlEvent::start_element("bind").ns("", "urn:ietf:params:xml:ns:xmpp-bind"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::start_element("whatever").ns("a", "urn:X"));
- w.write(XmlEvent::end_element());
- w.write(XmlEvent::end_element())
- }
- }
-
- assert_eq!(
- str::from_utf8(&b).unwrap(),
- r#"<iq xmlns="jabber:client" xmlns:a="urn:A"><bind xmlns="urn:ietf:params:xml:ns:xmpp-bind" /><whatever xmlns:a="urn:X" /></iq>"#
- )
-}
-
-#[test]
-fn attribute_escaping() {
- use xml::writer::XmlEvent;
-
- let mut b = Vec::new();
-
- {
- let mut w = EmitterConfig::new()
- .write_document_declaration(false)
- .perform_indent(true)
- .create_writer(&mut b);
-
- unwrap_all! {
- w.write(
- XmlEvent::start_element("hello")
- .attr("testLt", "<")
- .attr("testGt", ">")
- );
- w.write(XmlEvent::end_element());
- w.write(
- XmlEvent::start_element("hello")
- .attr("testQuot", "\"")
- .attr("testApos", "\'")
- );
- w.write(XmlEvent::end_element());
- w.write(
- XmlEvent::start_element("hello")
- .attr("testAmp", "&")
- );
- w.write(XmlEvent::end_element());
- w.write(
- XmlEvent::start_element("hello")
- .attr("testNl", "\n")
- .attr("testCr", "\r")
- );
- w.write(XmlEvent::end_element());
- w.write(
- XmlEvent::start_element("hello")
- .attr("testNl", "\\n")
- .attr("testCr", "\\r")
- );
- w.write(XmlEvent::end_element())
- }
- }
- assert_eq!(
- str::from_utf8(&b).unwrap(),
- "<hello testLt=\"<\" testGt=\">\" />
-<hello testQuot=\""\" testApos=\"'\" />
-<hello testAmp=\"&\" />
-<hello testNl=\"
\" testCr=\"
\" />
-<hello testNl=\"\\n\" testCr=\"\\r\" />"
- );
-}
\ No newline at end of file
diff --git a/tests/streaming.rs b/tests/streaming.rs
deleted file mode 100644
index a577a00..0000000
--- a/tests/streaming.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-#![forbid(unsafe_code)]
-
-extern crate xml;
-
-use std::io::{Cursor, Write};
-
-use xml::EventReader;
-use xml::reader::ParserConfig;
-use xml::reader::XmlEvent;
-
-macro_rules! assert_match {
- ($actual:expr, $expected:pat) => {
- match $actual {
- $expected => {},
- _ => panic!("assertion failed: `(left matches right)` \
- (left: `{:?}`, right: `{}`", $actual, stringify!($expected))
- }
- };
- ($actual:expr, $expected:pat if $guard:expr) => {
- match $actual {
- $expected if $guard => {},
- _ => panic!("assertion failed: `(left matches right)` \
- (left: `{:?}`, right: `{} if {}`",
- $actual, stringify!($expected), stringify!($guard))
- }
- }
-}
-
-fn write_and_reset_position<W>(c: &mut Cursor<W>, data: &[u8]) where Cursor<W>: Write {
- let p = c.position();
- c.write_all(data).unwrap();
- c.set_position(p);
-}
-
-#[test]
-fn reading_streamed_content() {
- let buf = Cursor::new(b"<root>".to_vec());
- let reader = EventReader::new(buf);
-
- let mut it = reader.into_iter();
-
- assert_match!(it.next(), Some(Ok(XmlEvent::StartDocument { .. })));
- assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "root");
-
- write_and_reset_position(it.source_mut(), b"<child-1>content</child-1>");
- assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-1");
- assert_match!(it.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
- assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-1");
-
- write_and_reset_position(it.source_mut(), b"<child-2/>");
- assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-2");
- assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-2");
-
- write_and_reset_position(it.source_mut(), b"<child-3/>");
- assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-3");
- assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-3");
- // doesn't seem to work because of how tags parsing is done
-// write_and_reset_position(it.source_mut(), b"some text");
- // assert_match!(it.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "some text");
-
- write_and_reset_position(it.source_mut(), b"</root>");
- assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "root");
- assert_match!(it.next(), Some(Ok(XmlEvent::EndDocument)));
- assert_match!(it.next(), None);
-}
-
-#[test]
-fn reading_streamed_content2() {
- let buf = Cursor::new(b"<root>".to_vec());
- let mut config = ParserConfig::new();
- config.ignore_end_of_stream = true;
- let readerb = EventReader::new_with_config(buf, config);
-
- let mut reader = readerb.into_iter();
-
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartDocument { .. })));
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "root");
-
- write_and_reset_position(reader.source_mut(), b"<child-1>content</child-1>");
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-1");
- assert_match!(reader.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
- assert_match!(reader.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-1");
-
- write_and_reset_position(reader.source_mut(), b"<child-2>content</child-2>");
-
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-2");
- assert_match!(reader.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
- assert_match!(reader.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-2");
- assert_match!(reader.next(), Some(Err(_)));
- write_and_reset_position(reader.source_mut(), b"<child-3></child-3>");
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-3");
- write_and_reset_position(reader.source_mut(), b"<child-4 type='get'");
- match reader.next() {
- None |
- Some(Ok(_)) => {
- panic!("At this point, parser must not detect something.");
- },
- Some(Err(_)) => {}
- };
- write_and_reset_position(reader.source_mut(), b" />");
- assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-4");
-}
-