Jooyung Han | a7cd238 | 2021-07-16 01:22:24 +0000 | [diff] [blame] | 1 | #![forbid(unsafe_code)] |
| 2 | |
| 3 | extern crate xml; |
| 4 | |
| 5 | use std::cmp; |
| 6 | use std::env; |
| 7 | use std::io::{self, Read, Write, BufReader}; |
| 8 | use std::fs::File; |
| 9 | use std::collections::HashSet; |
| 10 | |
| 11 | use xml::ParserConfig; |
| 12 | use xml::reader::XmlEvent; |
| 13 | |
| 14 | macro_rules! abort { |
| 15 | ($code:expr) => {::std::process::exit($code)}; |
| 16 | ($code:expr, $($args:tt)+) => {{ |
| 17 | writeln!(&mut ::std::io::stderr(), $($args)+).unwrap(); |
| 18 | ::std::process::exit($code); |
| 19 | }} |
| 20 | } |
| 21 | |
| 22 | fn main() { |
| 23 | let mut file; |
| 24 | let mut stdin; |
| 25 | let source: &mut Read = match env::args().nth(1) { |
| 26 | Some(file_name) => { |
| 27 | file = File::open(file_name) |
| 28 | .unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e)); |
| 29 | &mut file |
| 30 | } |
| 31 | None => { |
| 32 | stdin = io::stdin(); |
| 33 | &mut stdin |
| 34 | } |
| 35 | }; |
| 36 | |
| 37 | let reader = ParserConfig::new() |
| 38 | .whitespace_to_characters(true) |
| 39 | .ignore_comments(false) |
| 40 | .create_reader(BufReader::new(source)); |
| 41 | |
| 42 | let mut processing_instructions = 0; |
| 43 | let mut elements = 0; |
| 44 | let mut character_blocks = 0; |
| 45 | let mut cdata_blocks = 0; |
| 46 | let mut characters = 0; |
| 47 | let mut comment_blocks = 0; |
| 48 | let mut comment_characters = 0; |
| 49 | let mut namespaces = HashSet::new(); |
| 50 | let mut depth = 0; |
| 51 | let mut max_depth = 0; |
| 52 | |
| 53 | for e in reader { |
| 54 | match e { |
| 55 | Ok(e) => match e { |
| 56 | XmlEvent::StartDocument { version, encoding, standalone } => |
| 57 | println!( |
| 58 | "XML document version {}, encoded in {}, {}standalone", |
| 59 | version, encoding, if standalone.unwrap_or(false) { "" } else { "not " } |
| 60 | ), |
| 61 | XmlEvent::EndDocument => println!("Document finished"), |
| 62 | XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1, |
| 63 | XmlEvent::Whitespace(_) => {} // can't happen due to configuration |
| 64 | XmlEvent::Characters(s) => { |
| 65 | character_blocks += 1; |
| 66 | characters += s.len(); |
| 67 | } |
| 68 | XmlEvent::CData(s) => { |
| 69 | cdata_blocks += 1; |
| 70 | characters += s.len(); |
| 71 | } |
| 72 | XmlEvent::Comment(s) => { |
| 73 | comment_blocks += 1; |
| 74 | comment_characters += s.len(); |
| 75 | } |
| 76 | XmlEvent::StartElement { namespace, .. } => { |
| 77 | depth += 1; |
| 78 | max_depth = cmp::max(max_depth, depth); |
| 79 | elements += 1; |
| 80 | namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri)); |
| 81 | } |
| 82 | XmlEvent::EndElement { .. } => { |
| 83 | depth -= 1; |
| 84 | } |
| 85 | }, |
| 86 | Err(e) => abort!(1, "Error parsing XML document: {}", e) |
| 87 | } |
| 88 | } |
| 89 | namespaces.remove(xml::namespace::NS_EMPTY_URI); |
| 90 | namespaces.remove(xml::namespace::NS_XMLNS_URI); |
| 91 | namespaces.remove(xml::namespace::NS_XML_URI); |
| 92 | |
| 93 | println!("Elements: {}, maximum depth: {}", elements, max_depth); |
| 94 | println!("Namespaces (excluding built-in): {}", namespaces.len()); |
| 95 | println!("Characters: {}, characters blocks: {}, CDATA blocks: {}", |
| 96 | characters, character_blocks, cdata_blocks); |
| 97 | println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters); |
| 98 | println!("Processing instructions (excluding built-in): {}", processing_instructions); |
| 99 | } |