Blame - src/lib.rs - platform/external/rust/crates/csv

blob: 3c771c95f1edba1bf40878fb86537ef6bd1d9358 [file] [log] [blame]

Jakub Kotur	c72d720	2020-12-21 17:28:15 +0100	[diff] [blame]	1	/*!
				2	The `csv` crate provides a fast and flexible CSV reader and writer, with
				3	support for Serde.
				4
				5	The [tutorial](tutorial/index.html) is a good place to start if you're new to
				6	Rust.
				7
				8	The [cookbook](cookbook/index.html) will give you a variety of complete Rust
				9	programs that do CSV reading and writing.
				10
				11	# Brief overview
				12
				13	If you're new to Rust, you might find the
				14	[tutorial](tutorial/index.html)
				15	to be a good place to start.
				16
				17	The primary types in this crate are
				18	[`Reader`](struct.Reader.html)
				19	and
				20	[`Writer`](struct.Writer.html),
				21	for reading and writing CSV data respectively.
				22	Correspondingly, to support CSV data with custom field or record delimiters
				23	(among many other things), you should use either a
				24	[`ReaderBuilder`](struct.ReaderBuilder.html)
				25	or a
				26	[`WriterBuilder`](struct.WriterBuilder.html),
				27	depending on whether you're reading or writing CSV data.
				28
				29	Unless you're using Serde, the standard CSV record types are
				30	[`StringRecord`](struct.StringRecord.html)
				31	and
				32	[`ByteRecord`](struct.ByteRecord.html).
				33	`StringRecord` should be used when you know your data to be valid UTF-8.
				34	For data that may be invalid UTF-8, `ByteRecord` is suitable.
				35
				36	Finally, the set of errors is described by the
				37	[`Error`](struct.Error.html)
				38	type.
				39
				40	The rest of the types in this crate mostly correspond to more detailed errors,
				41	position information, configuration knobs or iterator types.
				42
				43	# Setup
				44
				45	Add this to your `Cargo.toml`:
				46
				47	```toml
				48	[dependencies]
				49	csv = "1.1"
				50	```
				51
				52	If you want to use Serde's custom derive functionality on your custom structs,
				53	then add this to your `[dependencies]` section of `Cargo.toml`:
				54
				55	```toml
				56	[dependencies]
				57	serde = { version = "1", features = ["derive"] }
				58	```
				59
				60	# Example
				61
				62	This example shows how to read CSV data from stdin and print each record to
				63	stdout.
				64
				65	There are more examples in the [cookbook](cookbook/index.html).
				66
				67	```no_run
				68	use std::error::Error;
				69	use std::io;
				70	use std::process;
				71
				72	fn example() -> Result<(), Box<dyn Error>> {
				73	// Build the CSV reader and iterate over each record.
				74	let mut rdr = csv::Reader::from_reader(io::stdin());
				75	for result in rdr.records() {
				76	// The iterator yields Result<StringRecord, Error>, so we check the
				77	// error here.
				78	let record = result?;
				79	println!("{:?}", record);
				80	}
				81	Ok(())
				82	}
				83
				84	fn main() {
				85	if let Err(err) = example() {
				86	println!("error running example: {}", err);
				87	process::exit(1);
				88	}
				89	}
				90	```
				91
				92	The above example can be run like so:
				93
				94	```ignore
				95	$ git clone git://github.com/BurntSushi/rust-csv
				96	$ cd rust-csv
				97	$ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
				98	```
				99
				100	# Example with Serde
				101
				102	This example shows how to read CSV data from stdin into your own custom struct.
				103	By default, the member names of the struct are matched with the values in the
				104	header record of your CSV data.
				105
				106	```no_run
				107	use std::error::Error;
				108	use std::io;
				109	use std::process;
				110
				111	use serde::Deserialize;
				112
				113	#[derive(Debug, Deserialize)]
				114	struct Record {
				115	city: String,
				116	region: String,
				117	country: String,
				118	population: Option<u64>,
				119	}
				120
				121	fn example() -> Result<(), Box<dyn Error>> {
				122	let mut rdr = csv::Reader::from_reader(io::stdin());
				123	for result in rdr.deserialize() {
				124	// Notice that we need to provide a type hint for automatic
				125	// deserialization.
				126	let record: Record = result?;
				127	println!("{:?}", record);
				128	}
				129	Ok(())
				130	}
				131
				132	fn main() {
				133	if let Err(err) = example() {
				134	println!("error running example: {}", err);
				135	process::exit(1);
				136	}
				137	}
				138	```
				139
				140	The above example can be run like so:
				141
				142	```ignore
				143	$ git clone git://github.com/BurntSushi/rust-csv
				144	$ cd rust-csv
				145	$ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
				146	```
				147
				148	*/
				149
				150	#![deny(missing_docs)]
				151
				152	use std::result;
				153
				154	use serde::{Deserialize, Deserializer};
				155
				156	pub use crate::byte_record::{ByteRecord, ByteRecordIter, Position};
				157	pub use crate::deserializer::{DeserializeError, DeserializeErrorKind};
				158	pub use crate::error::{
				159	Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error,
				160	};
				161	pub use crate::reader::{
				162	ByteRecordsIntoIter, ByteRecordsIter, DeserializeRecordsIntoIter,
				163	DeserializeRecordsIter, Reader, ReaderBuilder, StringRecordsIntoIter,
				164	StringRecordsIter,
				165	};
				166	pub use crate::string_record::{StringRecord, StringRecordIter};
				167	pub use crate::writer::{Writer, WriterBuilder};
				168
				169	mod byte_record;
				170	pub mod cookbook;
				171	mod deserializer;
				172	mod error;
				173	mod reader;
				174	mod serializer;
				175	mod string_record;
				176	pub mod tutorial;
				177	mod writer;
				178
				179	/// The quoting style to use when writing CSV data.
				180	#[derive(Clone, Copy, Debug)]
				181	pub enum QuoteStyle {
				182	/// This puts quotes around every field. Always.
				183	Always,
				184	/// This puts quotes around fields only when necessary.
				185	///
				186	/// They are necessary when fields contain a quote, delimiter or record
				187	/// terminator. Quotes are also necessary when writing an empty record
				188	/// (which is indistinguishable from a record with one empty field).
				189	///
				190	/// This is the default.
				191	Necessary,
				192	/// This puts quotes around all fields that are non-numeric. Namely, when
				193	/// writing a field that does not parse as a valid float or integer, then
				194	/// quotes will be used even if they aren't strictly necessary.
				195	NonNumeric,
				196	/// This never writes quotes, even if it would produce invalid CSV data.
				197	Never,
				198	/// Hints that destructuring should not be exhaustive.
				199	///
				200	/// This enum may grow additional variants, so this makes sure clients
				201	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				202	/// could break existing code.)
				203	#[doc(hidden)]
				204	__Nonexhaustive,
				205	}
				206
				207	impl QuoteStyle {
				208	fn to_core(self) -> csv_core::QuoteStyle {
				209	match self {
				210	QuoteStyle::Always => csv_core::QuoteStyle::Always,
				211	QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary,
				212	QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric,
				213	QuoteStyle::Never => csv_core::QuoteStyle::Never,
				214	_ => unreachable!(),
				215	}
				216	}
				217	}
				218
				219	impl Default for QuoteStyle {
				220	fn default() -> QuoteStyle {
				221	QuoteStyle::Necessary
				222	}
				223	}
				224
				225	/// A record terminator.
				226	///
				227	/// Use this to specify the record terminator while parsing CSV. The default is
				228	/// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator.
				229	#[derive(Clone, Copy, Debug)]
				230	pub enum Terminator {
				231	/// Parses `\r`, `\n` or `\r\n` as a single record terminator.
				232	CRLF,
				233	/// Parses the byte given as a record terminator.
				234	Any(u8),
				235	/// Hints that destructuring should not be exhaustive.
				236	///
				237	/// This enum may grow additional variants, so this makes sure clients
				238	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				239	/// could break existing code.)
				240	#[doc(hidden)]
				241	__Nonexhaustive,
				242	}
				243
				244	impl Terminator {
				245	/// Convert this to the csv_core type of the same name.
				246	fn to_core(self) -> csv_core::Terminator {
				247	match self {
				248	Terminator::CRLF => csv_core::Terminator::CRLF,
				249	Terminator::Any(b) => csv_core::Terminator::Any(b),
				250	_ => unreachable!(),
				251	}
				252	}
				253	}
				254
				255	impl Default for Terminator {
				256	fn default() -> Terminator {
				257	Terminator::CRLF
				258	}
				259	}
				260
				261	/// The whitespace preservation behaviour when reading CSV data.
				262	#[derive(Clone, Copy, Debug, PartialEq)]
				263	pub enum Trim {
				264	/// Preserves fields and headers. This is the default.
				265	None,
				266	/// Trim whitespace from headers.
				267	Headers,
				268	/// Trim whitespace from fields, but not headers.
				269	Fields,
				270	/// Trim whitespace from fields and headers.
				271	All,
				272	/// Hints that destructuring should not be exhaustive.
				273	///
				274	/// This enum may grow additional variants, so this makes sure clients
				275	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				276	/// could break existing code.)
				277	#[doc(hidden)]
				278	__Nonexhaustive,
				279	}
				280
				281	impl Trim {
				282	fn should_trim_fields(&self) -> bool {
				283	self == &Trim::Fields \|\| self == &Trim::All
				284	}
				285
				286	fn should_trim_headers(&self) -> bool {
				287	self == &Trim::Headers \|\| self == &Trim::All
				288	}
				289	}
				290
				291	impl Default for Trim {
				292	fn default() -> Trim {
				293	Trim::None
				294	}
				295	}
				296
				297	/// A custom Serde deserializer for possibly invalid `Option<T>` fields.
				298	///
				299	/// When deserializing CSV data, it is sometimes desirable to simply ignore
				300	/// fields with invalid data. For example, there might be a field that is
				301	/// usually a number, but will occasionally contain garbage data that causes
				302	/// number parsing to fail.
				303	///
				304	/// You might be inclined to use, say, `Option<i32>` for fields such at this.
				305	/// By default, however, `Option<i32>` will either capture empty fields with
				306	/// `None` or valid numeric fields with `Some(the_number)`. If the field is
				307	/// non-empty and not a valid number, then deserialization will return an error
				308	/// instead of using `None`.
				309	///
				310	/// This function allows you to override this default behavior. Namely, if
				311	/// `Option<T>` is deserialized with non-empty but invalid data, then the value
				312	/// will be `None` and the error will be ignored.
				313	///
				314	/// # Example
				315	///
				316	/// This example shows how to parse CSV records with numerical data, even if
				317	/// some numerical data is absent or invalid. Without the
				318	/// `serde(deserialize_with = "...")` annotations, this example would return
				319	/// an error.
				320	///
				321	/// ```
				322	/// use std::error::Error;
				323	///
				324	/// use csv::Reader;
				325	/// use serde::Deserialize;
				326	///
				327	/// #[derive(Debug, Deserialize, Eq, PartialEq)]
				328	/// struct Row {
				329	/// #[serde(deserialize_with = "csv::invalid_option")]
				330	/// a: Option<i32>,
				331	/// #[serde(deserialize_with = "csv::invalid_option")]
				332	/// b: Option<i32>,
				333	/// #[serde(deserialize_with = "csv::invalid_option")]
				334	/// c: Option<i32>,
				335	/// }
				336	///
				337	/// # fn main() { example().unwrap(); }
				338	/// fn example() -> Result<(), Box<dyn Error>> {
				339	/// let data = "\
				340	/// a,b,c
				341	/// 5,\"\",xyz
				342	/// ";
				343	/// let mut rdr = Reader::from_reader(data.as_bytes());
				344	/// if let Some(result) = rdr.deserialize().next() {
				345	/// let record: Row = result?;
				346	/// assert_eq!(record, Row { a: Some(5), b: None, c: None });
				347	/// Ok(())
				348	/// } else {
				349	/// Err(From::from("expected at least one record but got none"))
				350	/// }
				351	/// }
				352	/// ```
				353	pub fn invalid_option<'de, D, T>(de: D) -> result::Result<Option<T>, D::Error>
				354	where
				355	D: Deserializer<'de>,
				356	Option<T>: Deserialize<'de>,
				357	{
				358	Option::<T>::deserialize(de).or_else(\|_\| Ok(None))
				359	}