Initial import of csv-1.1.5.
Bug: 155309706
Change-Id: Id08b46b0989d3b869c4a014d10ea98193b7c3b32
diff --git a/src/byte_record.rs b/src/byte_record.rs
new file mode 100644
index 0000000..777526e
--- /dev/null
+++ b/src/byte_record.rs
@@ -0,0 +1,1150 @@
+use std::cmp;
+use std::fmt;
+use std::iter::FromIterator;
+use std::ops::{self, Range};
+use std::result;
+
+use bstr::{BString, ByteSlice};
+use serde::de::Deserialize;
+
+use crate::deserializer::deserialize_byte_record;
+use crate::error::{new_utf8_error, Result, Utf8Error};
+use crate::string_record::StringRecord;
+
+/// A single CSV record stored as raw bytes.
+///
+/// A byte record permits reading or writing CSV rows that are not UTF-8.
+/// In general, you should prefer using a
+/// [`StringRecord`](struct.StringRecord.html)
+/// since it is more ergonomic, but a `ByteRecord` is provided in case you need
+/// it.
+///
+/// If you are using the Serde (de)serialization APIs, then you probably never
+/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
+/// are some circumstances in which you might need to use a raw record type
+/// while still using Serde. For example, if you need to deserialize possibly
+/// invalid UTF-8 fields, then you'll need to first read your record into a
+/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
+/// reason for using the raw record deserialization APIs is if you're using
+/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
+///
+/// Two `ByteRecord`s are compared on the basis of their field data. Any
+/// position information associated with the records is ignored.
+#[derive(Clone, Eq)]
+pub struct ByteRecord(Box<ByteRecordInner>);
+
+impl PartialEq for ByteRecord {
+ fn eq(&self, other: &ByteRecord) -> bool {
+ if self.len() != other.len() {
+ return false;
+ }
+ self.iter().zip(other.iter()).all(|e| e.0 == e.1)
+ }
+}
+
+impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
+ fn eq(&self, other: &Vec<T>) -> bool {
+ self.iter_eq(other)
+ }
+}
+
+impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
+ fn eq(&self, other: &Vec<T>) -> bool {
+ self.iter_eq(other)
+ }
+}
+
+impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
+ fn eq(&self, other: &[T]) -> bool {
+ self.iter_eq(other)
+ }
+}
+
+impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
+ fn eq(&self, other: &[T]) -> bool {
+ self.iter_eq(other)
+ }
+}
+
+impl fmt::Debug for ByteRecord {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut fields = vec![];
+ for field in self {
+ fields.push(BString::from(field.to_vec()));
+ }
+ write!(f, "ByteRecord({:?})", fields)
+ }
+}
+
+/// The inner portion of a byte record.
+///
+/// We use this memory layout so that moving a `ByteRecord` only requires
+/// moving a single pointer. The optimization is dubious at best, but does
+/// seem to result in slightly better numbers in microbenchmarks. Methinks this
+/// may heavily depend on the underlying allocator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct ByteRecordInner {
+ /// The position of this byte record.
+ pos: Option<Position>,
+ /// All fields in this record, stored contiguously.
+ fields: Vec<u8>,
+ /// The number of and location of each field in this record.
+ bounds: Bounds,
+}
+
+impl Default for ByteRecord {
+ #[inline]
+ fn default() -> ByteRecord {
+ ByteRecord::new()
+ }
+}
+
+impl ByteRecord {
+ /// Create a new empty `ByteRecord`.
+ ///
+ /// Note that you may find the `ByteRecord::from` constructor more
+ /// convenient, which is provided by an impl on the `From` trait.
+ ///
+ /// # Example: create an empty record
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::new();
+ /// assert_eq!(record.len(), 0);
+ /// ```
+ ///
+ /// # Example: initialize a record from a `Vec`
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// ```
+ #[inline]
+ pub fn new() -> ByteRecord {
+ ByteRecord::with_capacity(0, 0)
+ }
+
+ /// Create a new empty `ByteRecord` with the given capacity settings.
+ ///
+ /// `buffer` refers to the capacity of the buffer used to store the
+ /// actual row contents. `fields` refers to the number of fields one
+ /// might expect to store.
+ #[inline]
+ pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
+ ByteRecord(Box::new(ByteRecordInner {
+ pos: None,
+ fields: vec![0; buffer],
+ bounds: Bounds::with_capacity(fields),
+ }))
+ }
+
+ /// Deserialize this record.
+ ///
+ /// The `D` type parameter refers to the type that this record should be
+ /// deserialized into. The `'de` lifetime refers to the lifetime of the
+ /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
+ /// that borrow field data from this record.
+ ///
+ /// An optional `headers` parameter permits deserializing into a struct
+ /// based on its field names (corresponding to header values) rather than
+ /// the order in which the fields are defined.
+ ///
+ /// # Example: without headers
+ ///
+ /// This shows how to deserialize a single row into a struct based on the
+ /// order in which fields occur. This example also shows how to borrow
+ /// fields from the `ByteRecord`, which results in zero allocation
+ /// deserialization.
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::ByteRecord;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Deserialize)]
+ /// struct Row<'a> {
+ /// city: &'a str,
+ /// country: &'a str,
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap() }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let record = ByteRecord::from(vec![
+ /// "Boston", "United States", "4628910",
+ /// ]);
+ ///
+ /// let row: Row = record.deserialize(None)?;
+ /// assert_eq!(row.city, "Boston");
+ /// assert_eq!(row.country, "United States");
+ /// assert_eq!(row.population, 4628910);
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: with headers
+ ///
+ /// This example is like the previous one, but shows how to deserialize
+ /// into a struct based on the struct's field names. For this to work,
+ /// you must provide a header row.
+ ///
+ /// This example also shows that you can deserialize into owned data
+ /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::ByteRecord;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Deserialize)]
+ /// struct Row {
+ /// city: String,
+ /// country: String,
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap() }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// // Notice that the fields are not in the same order
+ /// // as the fields in the struct!
+ /// let header = ByteRecord::from(vec![
+ /// "country", "city", "population",
+ /// ]);
+ /// let record = ByteRecord::from(vec![
+ /// "United States", "Boston", "4628910",
+ /// ]);
+ ///
+ /// let row: Row = record.deserialize(Some(&header))?;
+ /// assert_eq!(row.city, "Boston");
+ /// assert_eq!(row.country, "United States");
+ /// assert_eq!(row.population, 4628910);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn deserialize<'de, D: Deserialize<'de>>(
+ &'de self,
+ headers: Option<&'de ByteRecord>,
+ ) -> Result<D> {
+ deserialize_byte_record(self, headers)
+ }
+
+ /// Returns an iterator over all fields in this record.
+ ///
+ /// # Example
+ ///
+ /// This example shows how to iterate over each field in a `ByteRecord`.
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// for field in record.iter() {
+ /// assert!(field == b"a" || field == b"b" || field == b"c");
+ /// }
+ /// ```
+ #[inline]
+ pub fn iter(&self) -> ByteRecordIter {
+ self.into_iter()
+ }
+
+ /// Return the field at index `i`.
+ ///
+ /// If no field at index `i` exists, then this returns `None`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.get(1), Some(&b"b"[..]));
+ /// assert_eq!(record.get(3), None);
+ /// ```
+ #[inline]
+ pub fn get(&self, i: usize) -> Option<&[u8]> {
+ self.0.bounds.get(i).map(|range| &self.0.fields[range])
+ }
+
+ /// Returns true if and only if this record is empty.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// assert!(ByteRecord::new().is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Returns the number of fields in this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// ```
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.0.bounds.len()
+ }
+
+ /// Truncate this record to `n` fields.
+ ///
+ /// If `n` is greater than the number of fields in this record, then this
+ /// has no effect.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// record.truncate(1);
+ /// assert_eq!(record.len(), 1);
+ /// assert_eq!(record, vec!["a"]);
+ /// ```
+ #[inline]
+ pub fn truncate(&mut self, n: usize) {
+ if n <= self.len() {
+ self.0.bounds.len = n;
+ }
+ }
+
+ /// Clear this record so that it has zero fields.
+ ///
+ /// This is equivalent to calling `truncate(0)`.
+ ///
+ /// Note that it is not necessary to clear the record to reuse it with
+ /// the CSV reader.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// record.clear();
+ /// assert_eq!(record.len(), 0);
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ self.truncate(0);
+ }
+
+ /// Trim the fields of this record so that leading and trailing whitespace
+ /// is removed.
+ ///
+ /// This method uses the ASCII definition of whitespace. That is, only
+ /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let mut record = ByteRecord::from(vec![
+ /// " ", "\tfoo", "bar ", "b a z",
+ /// ]);
+ /// record.trim();
+ /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
+ /// ```
+ pub fn trim(&mut self) {
+ let length = self.len();
+ if length == 0 {
+ return;
+ }
+ // TODO: We could likely do this in place, but for now, we allocate.
+ let mut trimmed =
+ ByteRecord::with_capacity(self.as_slice().len(), self.len());
+ trimmed.set_position(self.position().cloned());
+ for field in &*self {
+ trimmed.push_field(field.trim());
+ }
+ *self = trimmed;
+ }
+
+ /// Add a new field to this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let mut record = ByteRecord::new();
+ /// record.push_field(b"foo");
+ /// assert_eq!(&record[0], b"foo");
+ /// ```
+ #[inline]
+ pub fn push_field(&mut self, field: &[u8]) {
+ let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
+ while e > self.0.fields.len() {
+ self.expand_fields();
+ }
+ self.0.fields[s..e].copy_from_slice(field);
+ self.0.bounds.add(e);
+ }
+
+ /// Return the position of this record, if available.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::{ByteRecord, ReaderBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut record = ByteRecord::new();
+ /// let mut rdr = ReaderBuilder::new()
+ /// .has_headers(false)
+ /// .from_reader("a,b,c\nx,y,z".as_bytes());
+ ///
+ /// assert!(rdr.read_byte_record(&mut record)?);
+ /// {
+ /// let pos = record.position().expect("a record position");
+ /// assert_eq!(pos.byte(), 0);
+ /// assert_eq!(pos.line(), 1);
+ /// assert_eq!(pos.record(), 0);
+ /// }
+ ///
+ /// assert!(rdr.read_byte_record(&mut record)?);
+ /// {
+ /// let pos = record.position().expect("a record position");
+ /// assert_eq!(pos.byte(), 6);
+ /// assert_eq!(pos.line(), 2);
+ /// assert_eq!(pos.record(), 1);
+ /// }
+ ///
+ /// // Finish the CSV reader for good measure.
+ /// assert!(!rdr.read_byte_record(&mut record)?);
+ /// Ok(())
+ /// }
+ /// ```
+ #[inline]
+ pub fn position(&self) -> Option<&Position> {
+ self.0.pos.as_ref()
+ }
+
+ /// Set the position of this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::{ByteRecord, Position};
+ ///
+ /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// let mut pos = Position::new();
+ /// pos.set_byte(100);
+ /// pos.set_line(4);
+ /// pos.set_record(2);
+ ///
+ /// record.set_position(Some(pos.clone()));
+ /// assert_eq!(record.position(), Some(&pos));
+ /// ```
+ #[inline]
+ pub fn set_position(&mut self, pos: Option<Position>) {
+ self.0.pos = pos;
+ }
+
+ /// Return the start and end position of a field in this record.
+ ///
+ /// If no such field exists at the given index, then return `None`.
+ ///
+ /// The range returned can be used with the slice returned by `as_slice`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
+ /// let range = record.range(1).expect("a record range");
+ /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
+ /// ```
+ #[inline]
+ pub fn range(&self, i: usize) -> Option<Range<usize>> {
+ self.0.bounds.get(i)
+ }
+
+ /// Return the entire row as a single byte slice. The slice returned stores
+ /// all fields contiguously. The boundaries of each field can be determined
+ /// via the `range` method.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::ByteRecord;
+ ///
+ /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
+ /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
+ /// ```
+ #[inline]
+ pub fn as_slice(&self) -> &[u8] {
+ &self.0.fields[..self.0.bounds.end()]
+ }
+
+ /// Retrieve the underlying parts of a byte record.
+ #[inline]
+ pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
+ let inner = &mut *self.0;
+ (&mut inner.fields, &mut inner.bounds.ends)
+ }
+
+ /// Set the number of fields in the given record record.
+ #[inline]
+ pub(crate) fn set_len(&mut self, len: usize) {
+ self.0.bounds.len = len;
+ }
+
+ /// Expand the capacity for storing fields.
+ #[inline]
+ pub(crate) fn expand_fields(&mut self) {
+ let new_len = self.0.fields.len().checked_mul(2).unwrap();
+ self.0.fields.resize(cmp::max(4, new_len), 0);
+ }
+
+ /// Expand the capacity for storing field ending positions.
+ #[inline]
+ pub(crate) fn expand_ends(&mut self) {
+ self.0.bounds.expand();
+ }
+
+ /// Validate the given record as UTF-8.
+ ///
+ /// If it's not UTF-8, return an error.
+ #[inline]
+ pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
+ // If the entire buffer is ASCII, then we have nothing to fear.
+ if self.0.fields[..self.0.bounds.end()].is_ascii() {
+ return Ok(());
+ }
+ // Otherwise, we must check each field individually to ensure that
+ // it's valid UTF-8.
+ for (i, field) in self.iter().enumerate() {
+ if let Err(err) = field.to_str() {
+ return Err(new_utf8_error(i, err.valid_up_to()));
+ }
+ }
+ Ok(())
+ }
+
+ /// Compare the given byte record with the iterator of fields for equality.
+ pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
+ where
+ I: IntoIterator<Item = T>,
+ T: AsRef<[u8]>,
+ {
+ let mut it_record = self.iter();
+ let mut it_other = other.into_iter();
+ loop {
+ match (it_record.next(), it_other.next()) {
+ (None, None) => return true,
+ (None, Some(_)) | (Some(_), None) => return false,
+ (Some(x), Some(y)) => {
+ if x != y.as_ref() {
+ return false;
+ }
+ }
+ }
+ }
+ }
+}
+
+/// A position in CSV data.
+///
+/// A position is used to report errors in CSV data. All positions include the
+/// byte offset, line number and record index at which the error occurred.
+///
+/// Byte offsets and record indices start at `0`. Line numbers start at `1`.
+///
+/// A CSV reader will automatically assign the position of each record.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Position {
+ byte: u64,
+ line: u64,
+ record: u64,
+}
+
+impl Position {
+ /// Returns a new position initialized to the start value.
+ #[inline]
+ pub fn new() -> Position {
+ Position { byte: 0, line: 1, record: 0 }
+ }
+
+ /// The byte offset, starting at `0`, of this position.
+ #[inline]
+ pub fn byte(&self) -> u64 {
+ self.byte
+ }
+ /// The line number, starting at `1`, of this position.
+ #[inline]
+ pub fn line(&self) -> u64 {
+ self.line
+ }
+ /// The record index, starting with the first record at `0`.
+ #[inline]
+ pub fn record(&self) -> u64 {
+ self.record
+ }
+
+ /// Set the byte offset of this position.
+ #[inline]
+ pub fn set_byte(&mut self, byte: u64) -> &mut Position {
+ self.byte = byte;
+ self
+ }
+
+ /// Set the line number of this position.
+ ///
+ /// If the line number is less than `1`, then this method panics.
+ #[inline]
+ pub fn set_line(&mut self, line: u64) -> &mut Position {
+ assert!(line > 0);
+ self.line = line;
+ self
+ }
+
+ /// Set the record index of this position.
+ #[inline]
+ pub fn set_record(&mut self, record: u64) -> &mut Position {
+ self.record = record;
+ self
+ }
+}
+
+/// The bounds of fields in a single record.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct Bounds {
+ /// The ending index of each field.
+ ends: Vec<usize>,
+ /// The number of fields in this record.
+ ///
+ /// Technically, we could drop this field and maintain an invariant that
+ /// `ends.len()` is always the number of fields, but doing that efficiently
+ /// requires attention to safety. We play it safe at essentially no cost.
+ len: usize,
+}
+
+impl Default for Bounds {
+ #[inline]
+ fn default() -> Bounds {
+ Bounds::with_capacity(0)
+ }
+}
+
+impl Bounds {
+ /// Create a new set of bounds with the given capacity for storing the
+ /// ends of fields.
+ #[inline]
+ fn with_capacity(capacity: usize) -> Bounds {
+ Bounds { ends: vec![0; capacity], len: 0 }
+ }
+
+ /// Returns the bounds of field `i`.
+ #[inline]
+ fn get(&self, i: usize) -> Option<Range<usize>> {
+ if i >= self.len {
+ return None;
+ }
+ let end = match self.ends.get(i) {
+ None => return None,
+ Some(&end) => end,
+ };
+ let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
+ None => 0,
+ Some(&start) => start,
+ };
+ Some(ops::Range { start: start, end: end })
+ }
+
+ /// Returns a slice of ending positions of all fields.
+ #[inline]
+ fn ends(&self) -> &[usize] {
+ &self.ends[..self.len]
+ }
+
+ /// Return the last position of the last field.
+ ///
+ /// If there are no fields, this returns `0`.
+ #[inline]
+ fn end(&self) -> usize {
+ self.ends().last().map(|&i| i).unwrap_or(0)
+ }
+
+ /// Returns the number of fields in these bounds.
+ #[inline]
+ fn len(&self) -> usize {
+ self.len
+ }
+
+ /// Expand the capacity for storing field ending positions.
+ #[inline]
+ fn expand(&mut self) {
+ let new_len = self.ends.len().checked_mul(2).unwrap();
+ self.ends.resize(cmp::max(4, new_len), 0);
+ }
+
+ /// Add a new field with the given ending position.
+ #[inline]
+ fn add(&mut self, pos: usize) {
+ if self.len >= self.ends.len() {
+ self.expand();
+ }
+ self.ends[self.len] = pos;
+ self.len += 1;
+ }
+}
+
+impl ops::Index<usize> for ByteRecord {
+ type Output = [u8];
+ #[inline]
+ fn index(&self, i: usize) -> &[u8] {
+ self.get(i).unwrap()
+ }
+}
+
+impl From<StringRecord> for ByteRecord {
+ #[inline]
+ fn from(record: StringRecord) -> ByteRecord {
+ record.into_byte_record()
+ }
+}
+
+impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
+ #[inline]
+ fn from(xs: Vec<T>) -> ByteRecord {
+ ByteRecord::from_iter(&xs)
+ }
+}
+
+impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
+ #[inline]
+ fn from(xs: &'a [T]) -> ByteRecord {
+ ByteRecord::from_iter(xs)
+ }
+}
+
+impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
+ #[inline]
+ fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
+ let mut record = ByteRecord::new();
+ record.extend(iter);
+ record
+ }
+}
+
+impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
+ #[inline]
+ fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
+ for x in iter {
+ self.push_field(x.as_ref());
+ }
+ }
+}
+
+/// A double-ended iterator over the fields in a byte record.
+///
+/// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
+/// is being iterated over.
+#[derive(Clone)]
+pub struct ByteRecordIter<'r> {
+ /// The record we are iterating over.
+ r: &'r ByteRecord,
+ /// The starting index of the previous field. (For reverse iteration.)
+ last_start: usize,
+ /// The ending index of the previous field. (For forward iteration.)
+ last_end: usize,
+ /// The index of forward iteration.
+ i_forward: usize,
+ /// The index of reverse iteration.
+ i_reverse: usize,
+}
+
+impl<'r> IntoIterator for &'r ByteRecord {
+ type IntoIter = ByteRecordIter<'r>;
+ type Item = &'r [u8];
+
+ #[inline]
+ fn into_iter(self) -> ByteRecordIter<'r> {
+ ByteRecordIter {
+ r: self,
+ last_start: self.as_slice().len(),
+ last_end: 0,
+ i_forward: 0,
+ i_reverse: self.len(),
+ }
+ }
+}
+
+impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
+
+impl<'r> Iterator for ByteRecordIter<'r> {
+ type Item = &'r [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'r [u8]> {
+ if self.i_forward == self.i_reverse {
+ None
+ } else {
+ let start = self.last_end;
+ let end = self.r.0.bounds.ends()[self.i_forward];
+ self.i_forward += 1;
+ self.last_end = end;
+ Some(&self.r.0.fields[start..end])
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let x = self.i_reverse - self.i_forward;
+ (x, Some(x))
+ }
+
+ #[inline]
+ fn count(self) -> usize {
+ self.len()
+ }
+}
+
+impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'r [u8]> {
+ if self.i_forward == self.i_reverse {
+ None
+ } else {
+ self.i_reverse -= 1;
+ let start = self
+ .i_reverse
+ .checked_sub(1)
+ .map(|i| self.r.0.bounds.ends()[i])
+ .unwrap_or(0);
+ let end = self.last_start;
+ self.last_start = start;
+ Some(&self.r.0.fields[start..end])
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::string_record::StringRecord;
+
+ use super::ByteRecord;
+
+ fn b(s: &str) -> &[u8] {
+ s.as_bytes()
+ }
+
+ #[test]
+ fn record_1() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"foo");
+
+ assert_eq!(rec.len(), 1);
+ assert_eq!(rec.get(0), Some(b("foo")));
+ assert_eq!(rec.get(1), None);
+ assert_eq!(rec.get(2), None);
+ }
+
+ #[test]
+ fn record_2() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"foo");
+ rec.push_field(b"quux");
+
+ assert_eq!(rec.len(), 2);
+ assert_eq!(rec.get(0), Some(b("foo")));
+ assert_eq!(rec.get(1), Some(b("quux")));
+ assert_eq!(rec.get(2), None);
+ assert_eq!(rec.get(3), None);
+ }
+
+ #[test]
+ fn empty_record() {
+ let rec = ByteRecord::new();
+
+ assert_eq!(rec.len(), 0);
+ assert_eq!(rec.get(0), None);
+ assert_eq!(rec.get(1), None);
+ }
+
+ #[test]
+ fn trim_whitespace_only() {
+ let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("")));
+ }
+
+ #[test]
+ fn trim_front() {
+ let mut rec = ByteRecord::from(vec![b" abc"]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+
+ let mut rec = ByteRecord::from(vec![b(" abc"), b(" xyz")]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+ assert_eq!(rec.get(1), Some(b("xyz")));
+ }
+
+ #[test]
+ fn trim_back() {
+ let mut rec = ByteRecord::from(vec![b"abc "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+
+ let mut rec = ByteRecord::from(vec![b("abc "), b("xyz ")]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+ assert_eq!(rec.get(1), Some(b("xyz")));
+ }
+
+ #[test]
+ fn trim_both() {
+ let mut rec = ByteRecord::from(vec![b" abc "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+
+ let mut rec = ByteRecord::from(vec![b(" abc "), b(" xyz ")]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("abc")));
+ assert_eq!(rec.get(1), Some(b("xyz")));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_1() {
+ let mut rec = ByteRecord::from(vec![b""]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("")));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_2() {
+ let mut rec = ByteRecord::from(vec![b"", b""]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(b("")));
+ assert_eq!(rec.get(1), Some(b("")));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_3() {
+ let mut rec = ByteRecord::new();
+ rec.trim();
+ assert_eq!(rec.as_slice().len(), 0);
+ }
+
+ #[test]
+ fn empty_field_1() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"");
+
+ assert_eq!(rec.len(), 1);
+ assert_eq!(rec.get(0), Some(b("")));
+ assert_eq!(rec.get(1), None);
+ assert_eq!(rec.get(2), None);
+ }
+
+ #[test]
+ fn empty_field_2() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"");
+ rec.push_field(b"");
+
+ assert_eq!(rec.len(), 2);
+ assert_eq!(rec.get(0), Some(b("")));
+ assert_eq!(rec.get(1), Some(b("")));
+ assert_eq!(rec.get(2), None);
+ assert_eq!(rec.get(3), None);
+ }
+
+ #[test]
+ fn empty_surround_1() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"foo");
+ rec.push_field(b"");
+ rec.push_field(b"quux");
+
+ assert_eq!(rec.len(), 3);
+ assert_eq!(rec.get(0), Some(b("foo")));
+ assert_eq!(rec.get(1), Some(b("")));
+ assert_eq!(rec.get(2), Some(b("quux")));
+ assert_eq!(rec.get(3), None);
+ assert_eq!(rec.get(4), None);
+ }
+
+ #[test]
+ fn empty_surround_2() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"foo");
+ rec.push_field(b"");
+ rec.push_field(b"quux");
+ rec.push_field(b"");
+
+ assert_eq!(rec.len(), 4);
+ assert_eq!(rec.get(0), Some(b("foo")));
+ assert_eq!(rec.get(1), Some(b("")));
+ assert_eq!(rec.get(2), Some(b("quux")));
+ assert_eq!(rec.get(3), Some(b("")));
+ assert_eq!(rec.get(4), None);
+ assert_eq!(rec.get(5), None);
+ }
+
+ #[test]
+ fn utf8_error_1() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"foo");
+ rec.push_field(b"b\xFFar");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 1);
+ assert_eq!(err.utf8_error().valid_up_to(), 1);
+ }
+
+ #[test]
+ fn utf8_error_2() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"\xFF");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 0);
+ assert_eq!(err.utf8_error().valid_up_to(), 0);
+ }
+
+ #[test]
+ fn utf8_error_3() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"a\xFF");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 0);
+ assert_eq!(err.utf8_error().valid_up_to(), 1);
+ }
+
+ #[test]
+ fn utf8_error_4() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"a");
+ rec.push_field(b"b");
+ rec.push_field(b"c");
+ rec.push_field(b"d");
+ rec.push_field(b"xyz\xFF");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 4);
+ assert_eq!(err.utf8_error().valid_up_to(), 3);
+ }
+
+ #[test]
+ fn utf8_error_5() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"a");
+ rec.push_field(b"b");
+ rec.push_field(b"c");
+ rec.push_field(b"d");
+ rec.push_field(b"\xFFxyz");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 4);
+ assert_eq!(err.utf8_error().valid_up_to(), 0);
+ }
+
+ // This tests a tricky case where a single field on its own isn't valid
+ // UTF-8, but the concatenation of all fields is.
+ #[test]
+ fn utf8_error_6() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"a\xc9");
+ rec.push_field(b"\x91b");
+
+ let err = StringRecord::from_byte_record(rec).unwrap_err();
+ assert_eq!(err.utf8_error().field(), 0);
+ assert_eq!(err.utf8_error().valid_up_to(), 1);
+ }
+
+ // This tests that we can always clear a `ByteRecord` and get a guaranteed
+ // successful conversion to UTF-8. This permits reusing the allocation.
+ #[test]
+ fn utf8_clear_ok() {
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"\xFF");
+ assert!(StringRecord::from_byte_record(rec).is_err());
+
+ let mut rec = ByteRecord::new();
+ rec.push_field(b"\xFF");
+ rec.clear();
+ assert!(StringRecord::from_byte_record(rec).is_ok());
+ }
+
+ #[test]
+ fn iter() {
+ let data = vec!["foo", "bar", "baz", "quux", "wat"];
+ let rec = ByteRecord::from(&*data);
+ let got: Vec<&str> =
+ rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
+ assert_eq!(data, got);
+ }
+
+ #[test]
+ fn iter_reverse() {
+ let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
+ let rec = ByteRecord::from(&*data);
+ let got: Vec<&str> = rec
+ .iter()
+ .rev()
+ .map(|x| ::std::str::from_utf8(x).unwrap())
+ .collect();
+ data.reverse();
+ assert_eq!(data, got);
+ }
+
+ #[test]
+ fn iter_forward_and_reverse() {
+ let data = vec!["foo", "bar", "baz", "quux", "wat"];
+ let rec = ByteRecord::from(data);
+ let mut it = rec.iter();
+
+ assert_eq!(it.next_back(), Some(b("wat")));
+ assert_eq!(it.next(), Some(b("foo")));
+ assert_eq!(it.next(), Some(b("bar")));
+ assert_eq!(it.next_back(), Some(b("quux")));
+ assert_eq!(it.next(), Some(b("baz")));
+ assert_eq!(it.next_back(), None);
+ assert_eq!(it.next(), None);
+ }
+
+ // Check that record equality respects field boundaries.
+ //
+ // Regression test for #138.
+ #[test]
+ fn eq_field_boundaries() {
+ let test1 = ByteRecord::from(vec!["12", "34"]);
+ let test2 = ByteRecord::from(vec!["123", "4"]);
+
+ assert_ne!(test1, test2);
+ }
+
+ // Check that record equality respects number of fields.
+ //
+ // Regression test for #138.
+ #[test]
+ fn eq_record_len() {
+ let test1 = ByteRecord::from(vec!["12", "34", "56"]);
+ let test2 = ByteRecord::from(vec!["12", "34"]);
+ assert_ne!(test1, test2);
+ }
+}
diff --git a/src/cookbook.rs b/src/cookbook.rs
new file mode 100644
index 0000000..a28dc72
--- /dev/null
+++ b/src/cookbook.rs
@@ -0,0 +1,294 @@
+/*!
+A cookbook of examples for CSV reading and writing.
+
+# List of examples
+
+This is a list of examples that follow. Each of them can be found in the
+`examples` directory of the
+[`rust-csv`](https://github.com/BurntSushi/rust-csv)
+repository.
+
+For **reading** CSV:
+
+1. [Basic](#reading-basic)
+2. [With Serde](#reading-with-serde)
+3. [Setting a different delimiter](#reading-setting-a-different-delimiter)
+4. [Without headers](#reading-without-headers)
+
+For **writing** CSV:
+
+5. [Basic](#writing-basic)
+6. [With Serde](#writing-with-serde)
+
+Please
+[submit a pull request](https://github.com/BurntSushi/rust-csv/pulls)
+if you're interested in adding an example to this list!
+
+# Reading: basic
+
+This example shows how to read CSV data from stdin and print each record to
+stdout.
+
+```no_run
+# //cookbook-read-basic.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn example() -> Result<(), Box<dyn Error>> {
+ // Build the CSV reader and iterate over each record.
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ // The iterator yields Result<StringRecord, Error>, so we check the
+ // error here..
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
+```
+
+# Reading: with Serde
+
+This is like the previous example, except it shows how to deserialize each
+record into a struct type that you define.
+
+For more examples and details on how Serde deserialization works, see the
+[`Reader::deserialize`](../struct.Reader.html#method.deserialize)
+method.
+
+```no_run
+# //cookbook-read-serde.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::Deserialize;
+
+// By default, struct field names are deserialized based on the position of
+// a corresponding field in the CSV data's header record.
+#[derive(Debug, Deserialize)]
+struct Record {
+ city: String,
+ region: String,
+ country: String,
+ population: Option<u64>,
+}
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ // Notice that we need to provide a type hint for automatic
+ // deserialization.
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
+```
+
+# Reading: setting a different delimiter
+
+This example shows how to read CSV data from stdin where fields are separated
+by `:` instead of `,`.
+
+```no_run
+# //cookbook-read-colon.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::ReaderBuilder::new()
+ .delimiter(b':')
+ .from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-colon < examples/data/smallpop-colon.csv
+```
+
+# Reading: without headers
+
+The CSV reader in this crate assumes that CSV data has a header record by
+default, but the setting can be toggled. When enabled, the first record in
+CSV data in interpreted as the header record and is skipped. When disabled, the
+first record is not skipped. This example shows how to disable that setting.
+
+```no_run
+# //cookbook-read-no-headers.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::ReaderBuilder::new()
+ .has_headers(false)
+ .from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-no-headers < examples/data/smallpop-no-headers.csv
+```
+
+# Writing: basic
+
+This example shows how to write CSV data to stdout.
+
+```no_run
+# //cookbook-write-basic.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ // When writing records without Serde, the header record is written just
+ // like any other record.
+ wtr.write_record(&["city", "region", "country", "population"])?;
+ wtr.write_record(&["Southborough", "MA", "United States", "9686"])?;
+ wtr.write_record(&["Northbridge", "MA", "United States", "14061"])?;
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-write-basic > /tmp/simplepop.csv
+```
+
+# Writing: with Serde
+
+This example shows how to write CSV data to stdout with Serde. Namely, we
+represent each record using a custom struct that we define. In this example,
+headers are written automatically.
+
+```no_run
+# //cookbook-write-serde.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::Serialize;
+
+#[derive(Debug, Serialize)]
+struct Record {
+ city: String,
+ region: String,
+ country: String,
+ population: Option<u64>,
+}
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ // When writing records with Serde using structs, the header row is written
+ // automatically.
+ wtr.serialize(Record {
+ city: "Southborough".to_string(),
+ region: "MA".to_string(),
+ country: "United States".to_string(),
+ population: Some(9686),
+ })?;
+ wtr.serialize(Record {
+ city: "Northbridge".to_string(),
+ region: "MA".to_string(),
+ country: "United States".to_string(),
+ population: Some(14061),
+ })?;
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-write-serde > /tmp/simplepop.csv
+```
+*/
diff --git a/src/deserializer.rs b/src/deserializer.rs
new file mode 100644
index 0000000..4ee1a67
--- /dev/null
+++ b/src/deserializer.rs
@@ -0,0 +1,1253 @@
+use std::error::Error as StdError;
+use std::fmt;
+use std::iter;
+use std::num;
+use std::str;
+
+use serde::de::value::BorrowedBytesDeserializer;
+use serde::de::{
+ Deserialize, DeserializeSeed, Deserializer, EnumAccess,
+ Error as SerdeError, IntoDeserializer, MapAccess, SeqAccess, Unexpected,
+ VariantAccess, Visitor,
+};
+use serde::serde_if_integer128;
+
+use crate::byte_record::{ByteRecord, ByteRecordIter};
+use crate::error::{Error, ErrorKind};
+use crate::string_record::{StringRecord, StringRecordIter};
+
+use self::DeserializeErrorKind as DEK;
+
+pub fn deserialize_string_record<'de, D: Deserialize<'de>>(
+ record: &'de StringRecord,
+ headers: Option<&'de StringRecord>,
+) -> Result<D, Error> {
+ let mut deser = DeRecordWrap(DeStringRecord {
+ it: record.iter().peekable(),
+ headers: headers.map(|r| r.iter()),
+ field: 0,
+ });
+ D::deserialize(&mut deser).map_err(|err| {
+ Error::new(ErrorKind::Deserialize {
+ pos: record.position().map(Clone::clone),
+ err: err,
+ })
+ })
+}
+
+pub fn deserialize_byte_record<'de, D: Deserialize<'de>>(
+ record: &'de ByteRecord,
+ headers: Option<&'de ByteRecord>,
+) -> Result<D, Error> {
+ let mut deser = DeRecordWrap(DeByteRecord {
+ it: record.iter().peekable(),
+ headers: headers.map(|r| r.iter()),
+ field: 0,
+ });
+ D::deserialize(&mut deser).map_err(|err| {
+ Error::new(ErrorKind::Deserialize {
+ pos: record.position().map(Clone::clone),
+ err: err,
+ })
+ })
+}
+
+/// An over-engineered internal trait that permits writing a single Serde
+/// deserializer that works on both ByteRecord and StringRecord.
+///
+/// We *could* implement a single deserializer on `ByteRecord` and simply
+/// convert `StringRecord`s to `ByteRecord`s, but then the implementation
+/// would be required to redo UTF-8 validation checks in certain places.
+///
+/// How does this work? We create a new `DeRecordWrap` type that wraps
+/// either a `StringRecord` or a `ByteRecord`. We then implement
+/// `DeRecord` for `DeRecordWrap<ByteRecord>` and `DeRecordWrap<StringRecord>`.
+/// Finally, we impl `serde::Deserialize` for `DeRecordWrap<T>` where
+/// `T: DeRecord`. That is, the `DeRecord` type corresponds to the differences
+/// between deserializing into a `ByteRecord` and deserializing into a
+/// `StringRecord`.
+///
+/// The lifetime `'r` refers to the lifetime of the underlying record.
+trait DeRecord<'r> {
+ /// Returns true if and only if this deserialize has access to headers.
+ fn has_headers(&self) -> bool;
+
+ /// Extracts the next string header value from the underlying record.
+ fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError>;
+
+ /// Extracts the next raw byte header value from the underlying record.
+ fn next_header_bytes(
+ &mut self,
+ ) -> Result<Option<&'r [u8]>, DeserializeError>;
+
+ /// Extracts the next string field from the underlying record.
+ fn next_field(&mut self) -> Result<&'r str, DeserializeError>;
+
+ /// Extracts the next raw byte field from the underlying record.
+ fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError>;
+
+ /// Peeks at the next field from the underlying record.
+ fn peek_field(&mut self) -> Option<&'r [u8]>;
+
+ /// Returns an error corresponding to the most recently extracted field.
+ fn error(&self, kind: DeserializeErrorKind) -> DeserializeError;
+
+ /// Infer the type of the next field and deserialize it.
+ fn infer_deserialize<'de, V: Visitor<'de>>(
+ &mut self,
+ visitor: V,
+ ) -> Result<V::Value, DeserializeError>;
+}
+
+struct DeRecordWrap<T>(T);
+
+impl<'r, T: DeRecord<'r>> DeRecord<'r> for DeRecordWrap<T> {
+ #[inline]
+ fn has_headers(&self) -> bool {
+ self.0.has_headers()
+ }
+
+ #[inline]
+ fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError> {
+ self.0.next_header()
+ }
+
+ #[inline]
+ fn next_header_bytes(
+ &mut self,
+ ) -> Result<Option<&'r [u8]>, DeserializeError> {
+ self.0.next_header_bytes()
+ }
+
+ #[inline]
+ fn next_field(&mut self) -> Result<&'r str, DeserializeError> {
+ self.0.next_field()
+ }
+
+ #[inline]
+ fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError> {
+ self.0.next_field_bytes()
+ }
+
+ #[inline]
+ fn peek_field(&mut self) -> Option<&'r [u8]> {
+ self.0.peek_field()
+ }
+
+ #[inline]
+ fn error(&self, kind: DeserializeErrorKind) -> DeserializeError {
+ self.0.error(kind)
+ }
+
+ #[inline]
+ fn infer_deserialize<'de, V: Visitor<'de>>(
+ &mut self,
+ visitor: V,
+ ) -> Result<V::Value, DeserializeError> {
+ self.0.infer_deserialize(visitor)
+ }
+}
+
+struct DeStringRecord<'r> {
+ it: iter::Peekable<StringRecordIter<'r>>,
+ headers: Option<StringRecordIter<'r>>,
+ field: u64,
+}
+
+impl<'r> DeRecord<'r> for DeStringRecord<'r> {
+ #[inline]
+ fn has_headers(&self) -> bool {
+ self.headers.is_some()
+ }
+
+ #[inline]
+ fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError> {
+ Ok(self.headers.as_mut().and_then(|it| it.next()))
+ }
+
+ #[inline]
+ fn next_header_bytes(
+ &mut self,
+ ) -> Result<Option<&'r [u8]>, DeserializeError> {
+ Ok(self.next_header()?.map(|s| s.as_bytes()))
+ }
+
+ #[inline]
+ fn next_field(&mut self) -> Result<&'r str, DeserializeError> {
+ match self.it.next() {
+ Some(field) => {
+ self.field += 1;
+ Ok(field)
+ }
+ None => Err(DeserializeError {
+ field: None,
+ kind: DEK::UnexpectedEndOfRow,
+ }),
+ }
+ }
+
+ #[inline]
+ fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError> {
+ self.next_field().map(|s| s.as_bytes())
+ }
+
+ #[inline]
+ fn peek_field(&mut self) -> Option<&'r [u8]> {
+ self.it.peek().map(|s| s.as_bytes())
+ }
+
+ fn error(&self, kind: DeserializeErrorKind) -> DeserializeError {
+ DeserializeError {
+ field: Some(self.field.saturating_sub(1)),
+ kind: kind,
+ }
+ }
+
+ fn infer_deserialize<'de, V: Visitor<'de>>(
+ &mut self,
+ visitor: V,
+ ) -> Result<V::Value, DeserializeError> {
+ let x = self.next_field()?;
+ if x == "true" {
+ return visitor.visit_bool(true);
+ } else if x == "false" {
+ return visitor.visit_bool(false);
+ } else if let Some(n) = try_positive_integer64(x) {
+ return visitor.visit_u64(n);
+ } else if let Some(n) = try_negative_integer64(x) {
+ return visitor.visit_i64(n);
+ }
+ serde_if_integer128! {
+ if let Some(n) = try_positive_integer128(x) {
+ return visitor.visit_u128(n);
+ } else if let Some(n) = try_negative_integer128(x) {
+ return visitor.visit_i128(n);
+ }
+ }
+ if let Some(n) = try_float(x) {
+ visitor.visit_f64(n)
+ } else {
+ visitor.visit_str(x)
+ }
+ }
+}
+
+struct DeByteRecord<'r> {
+ it: iter::Peekable<ByteRecordIter<'r>>,
+ headers: Option<ByteRecordIter<'r>>,
+ field: u64,
+}
+
+impl<'r> DeRecord<'r> for DeByteRecord<'r> {
+ #[inline]
+ fn has_headers(&self) -> bool {
+ self.headers.is_some()
+ }
+
+ #[inline]
+ fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError> {
+ match self.next_header_bytes() {
+ Ok(Some(field)) => Ok(Some(
+ str::from_utf8(field)
+ .map_err(|err| self.error(DEK::InvalidUtf8(err)))?,
+ )),
+ Ok(None) => Ok(None),
+ Err(err) => Err(err),
+ }
+ }
+
+ #[inline]
+ fn next_header_bytes(
+ &mut self,
+ ) -> Result<Option<&'r [u8]>, DeserializeError> {
+ Ok(self.headers.as_mut().and_then(|it| it.next()))
+ }
+
+ #[inline]
+ fn next_field(&mut self) -> Result<&'r str, DeserializeError> {
+ self.next_field_bytes().and_then(|field| {
+ str::from_utf8(field)
+ .map_err(|err| self.error(DEK::InvalidUtf8(err)))
+ })
+ }
+
+ #[inline]
+ fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError> {
+ match self.it.next() {
+ Some(field) => {
+ self.field += 1;
+ Ok(field)
+ }
+ None => Err(DeserializeError {
+ field: None,
+ kind: DEK::UnexpectedEndOfRow,
+ }),
+ }
+ }
+
+ #[inline]
+ fn peek_field(&mut self) -> Option<&'r [u8]> {
+ self.it.peek().map(|s| *s)
+ }
+
+ fn error(&self, kind: DeserializeErrorKind) -> DeserializeError {
+ DeserializeError {
+ field: Some(self.field.saturating_sub(1)),
+ kind: kind,
+ }
+ }
+
+ fn infer_deserialize<'de, V: Visitor<'de>>(
+ &mut self,
+ visitor: V,
+ ) -> Result<V::Value, DeserializeError> {
+ let x = self.next_field_bytes()?;
+ if x == b"true" {
+ return visitor.visit_bool(true);
+ } else if x == b"false" {
+ return visitor.visit_bool(false);
+ } else if let Some(n) = try_positive_integer64_bytes(x) {
+ return visitor.visit_u64(n);
+ } else if let Some(n) = try_negative_integer64_bytes(x) {
+ return visitor.visit_i64(n);
+ }
+ serde_if_integer128! {
+ if let Some(n) = try_positive_integer128_bytes(x) {
+ return visitor.visit_u128(n);
+ } else if let Some(n) = try_negative_integer128_bytes(x) {
+ return visitor.visit_i128(n);
+ }
+ }
+ if let Some(n) = try_float_bytes(x) {
+ visitor.visit_f64(n)
+ } else if let Ok(s) = str::from_utf8(x) {
+ visitor.visit_str(s)
+ } else {
+ visitor.visit_bytes(x)
+ }
+ }
+}
+
+macro_rules! deserialize_int {
+ ($method:ident, $visit:ident, $inttype:ty) => {
+ fn $method<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ let field = self.next_field()?;
+ let num = if field.starts_with("0x") {
+ <$inttype>::from_str_radix(&field[2..], 16)
+ } else {
+ field.parse()
+ };
+ visitor.$visit(num.map_err(|err| self.error(DEK::ParseInt(err)))?)
+ }
+ };
+}
+
+impl<'a, 'de: 'a, T: DeRecord<'de>> Deserializer<'de>
+ for &'a mut DeRecordWrap<T>
+{
+ type Error = DeserializeError;
+
+ fn deserialize_any<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ self.infer_deserialize(visitor)
+ }
+
+ fn deserialize_bool<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_bool(
+ self.next_field()?
+ .parse()
+ .map_err(|err| self.error(DEK::ParseBool(err)))?,
+ )
+ }
+
+ deserialize_int!(deserialize_u8, visit_u8, u8);
+ deserialize_int!(deserialize_u16, visit_u16, u16);
+ deserialize_int!(deserialize_u32, visit_u32, u32);
+ deserialize_int!(deserialize_u64, visit_u64, u64);
+ serde_if_integer128! {
+ deserialize_int!(deserialize_u128, visit_u128, u128);
+ }
+ deserialize_int!(deserialize_i8, visit_i8, i8);
+ deserialize_int!(deserialize_i16, visit_i16, i16);
+ deserialize_int!(deserialize_i32, visit_i32, i32);
+ deserialize_int!(deserialize_i64, visit_i64, i64);
+ serde_if_integer128! {
+ deserialize_int!(deserialize_i128, visit_i128, i128);
+ }
+
+ fn deserialize_f32<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_f32(
+ self.next_field()?
+ .parse()
+ .map_err(|err| self.error(DEK::ParseFloat(err)))?,
+ )
+ }
+
+ fn deserialize_f64<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_f64(
+ self.next_field()?
+ .parse()
+ .map_err(|err| self.error(DEK::ParseFloat(err)))?,
+ )
+ }
+
+ fn deserialize_char<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ let field = self.next_field()?;
+ let len = field.chars().count();
+ if len != 1 {
+ return Err(self.error(DEK::Message(format!(
+ "expected single character but got {} characters in '{}'",
+ len, field
+ ))));
+ }
+ visitor.visit_char(field.chars().next().unwrap())
+ }
+
+ fn deserialize_str<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ self.next_field().and_then(|f| visitor.visit_borrowed_str(f))
+ }
+
+ fn deserialize_string<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ self.next_field().and_then(|f| visitor.visit_str(f.into()))
+ }
+
+ fn deserialize_bytes<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ self.next_field_bytes().and_then(|f| visitor.visit_borrowed_bytes(f))
+ }
+
+ fn deserialize_byte_buf<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ self.next_field_bytes()
+ .and_then(|f| visitor.visit_byte_buf(f.to_vec()))
+ }
+
+ fn deserialize_option<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ match self.peek_field() {
+ None => visitor.visit_none(),
+ Some(f) if f.is_empty() => {
+ self.next_field().expect("empty field");
+ visitor.visit_none()
+ }
+ Some(_) => visitor.visit_some(self),
+ }
+ }
+
+ fn deserialize_unit<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_unit()
+ }
+
+ fn deserialize_unit_struct<V: Visitor<'de>>(
+ self,
+ _name: &'static str,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_unit()
+ }
+
+ fn deserialize_newtype_struct<V: Visitor<'de>>(
+ self,
+ _name: &'static str,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_newtype_struct(self)
+ }
+
+ fn deserialize_seq<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_seq(self)
+ }
+
+ fn deserialize_tuple<V: Visitor<'de>>(
+ self,
+ _len: usize,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_seq(self)
+ }
+
+ fn deserialize_tuple_struct<V: Visitor<'de>>(
+ self,
+ _name: &'static str,
+ _len: usize,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_seq(self)
+ }
+
+ fn deserialize_map<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ if !self.has_headers() {
+ visitor.visit_seq(self)
+ } else {
+ visitor.visit_map(self)
+ }
+ }
+
+ fn deserialize_struct<V: Visitor<'de>>(
+ self,
+ _name: &'static str,
+ _fields: &'static [&'static str],
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ if !self.has_headers() {
+ visitor.visit_seq(self)
+ } else {
+ visitor.visit_map(self)
+ }
+ }
+
+ fn deserialize_identifier<V: Visitor<'de>>(
+ self,
+ _visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ Err(self.error(DEK::Unsupported("deserialize_identifier".into())))
+ }
+
+ fn deserialize_enum<V: Visitor<'de>>(
+ self,
+ _name: &'static str,
+ _variants: &'static [&'static str],
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ visitor.visit_enum(self)
+ }
+
+ fn deserialize_ignored_any<V: Visitor<'de>>(
+ self,
+ visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ // Read and drop the next field.
+ // This code is reached, e.g., when trying to deserialize a header
+ // that doesn't exist in the destination struct.
+ let _ = self.next_field_bytes()?;
+ visitor.visit_unit()
+ }
+}
+
+impl<'a, 'de: 'a, T: DeRecord<'de>> EnumAccess<'de>
+ for &'a mut DeRecordWrap<T>
+{
+ type Error = DeserializeError;
+ type Variant = Self;
+
+ fn variant_seed<V: DeserializeSeed<'de>>(
+ self,
+ seed: V,
+ ) -> Result<(V::Value, Self::Variant), Self::Error> {
+ let variant_name = self.next_field()?;
+ seed.deserialize(variant_name.into_deserializer()).map(|v| (v, self))
+ }
+}
+
+impl<'a, 'de: 'a, T: DeRecord<'de>> VariantAccess<'de>
+ for &'a mut DeRecordWrap<T>
+{
+ type Error = DeserializeError;
+
+ fn unit_variant(self) -> Result<(), Self::Error> {
+ Ok(())
+ }
+
+ fn newtype_variant_seed<U: DeserializeSeed<'de>>(
+ self,
+ _seed: U,
+ ) -> Result<U::Value, Self::Error> {
+ let unexp = Unexpected::UnitVariant;
+ Err(DeserializeError::invalid_type(unexp, &"newtype variant"))
+ }
+
+ fn tuple_variant<V: Visitor<'de>>(
+ self,
+ _len: usize,
+ _visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ let unexp = Unexpected::UnitVariant;
+ Err(DeserializeError::invalid_type(unexp, &"tuple variant"))
+ }
+
+ fn struct_variant<V: Visitor<'de>>(
+ self,
+ _fields: &'static [&'static str],
+ _visitor: V,
+ ) -> Result<V::Value, Self::Error> {
+ let unexp = Unexpected::UnitVariant;
+ Err(DeserializeError::invalid_type(unexp, &"struct variant"))
+ }
+}
+
+impl<'a, 'de: 'a, T: DeRecord<'de>> SeqAccess<'de>
+ for &'a mut DeRecordWrap<T>
+{
+ type Error = DeserializeError;
+
+ fn next_element_seed<U: DeserializeSeed<'de>>(
+ &mut self,
+ seed: U,
+ ) -> Result<Option<U::Value>, Self::Error> {
+ if self.peek_field().is_none() {
+ Ok(None)
+ } else {
+ seed.deserialize(&mut **self).map(Some)
+ }
+ }
+}
+
+impl<'a, 'de: 'a, T: DeRecord<'de>> MapAccess<'de>
+ for &'a mut DeRecordWrap<T>
+{
+ type Error = DeserializeError;
+
+ fn next_key_seed<K: DeserializeSeed<'de>>(
+ &mut self,
+ seed: K,
+ ) -> Result<Option<K::Value>, Self::Error> {
+ assert!(self.has_headers());
+ let field = match self.next_header_bytes()? {
+ None => return Ok(None),
+ Some(field) => field,
+ };
+ seed.deserialize(BorrowedBytesDeserializer::new(field)).map(Some)
+ }
+
+ fn next_value_seed<K: DeserializeSeed<'de>>(
+ &mut self,
+ seed: K,
+ ) -> Result<K::Value, Self::Error> {
+ seed.deserialize(&mut **self)
+ }
+}
+
+/// An Serde deserialization error.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct DeserializeError {
+ field: Option<u64>,
+ kind: DeserializeErrorKind,
+}
+
+/// The type of a Serde deserialization error.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum DeserializeErrorKind {
+ /// A generic Serde deserialization error.
+ Message(String),
+ /// A generic Serde unsupported error.
+ Unsupported(String),
+ /// This error occurs when a Rust type expects to decode another field
+ /// from a row, but no more fields exist.
+ UnexpectedEndOfRow,
+ /// This error occurs when UTF-8 validation on a field fails. UTF-8
+ /// validation is only performed when the Rust type requires it (e.g.,
+ /// a `String` or `&str` type).
+ InvalidUtf8(str::Utf8Error),
+ /// This error occurs when a boolean value fails to parse.
+ ParseBool(str::ParseBoolError),
+ /// This error occurs when an integer value fails to parse.
+ ParseInt(num::ParseIntError),
+ /// This error occurs when a float value fails to parse.
+ ParseFloat(num::ParseFloatError),
+}
+
+impl SerdeError for DeserializeError {
+ fn custom<T: fmt::Display>(msg: T) -> DeserializeError {
+ DeserializeError { field: None, kind: DEK::Message(msg.to_string()) }
+ }
+}
+
+impl StdError for DeserializeError {
+ fn description(&self) -> &str {
+ self.kind.description()
+ }
+}
+
+impl fmt::Display for DeserializeError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if let Some(field) = self.field {
+ write!(f, "field {}: {}", field, self.kind)
+ } else {
+ write!(f, "{}", self.kind)
+ }
+ }
+}
+
+impl fmt::Display for DeserializeErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::DeserializeErrorKind::*;
+
+ match *self {
+ Message(ref msg) => write!(f, "{}", msg),
+ Unsupported(ref which) => {
+ write!(f, "unsupported deserializer method: {}", which)
+ }
+ UnexpectedEndOfRow => write!(f, "{}", self.description()),
+ InvalidUtf8(ref err) => err.fmt(f),
+ ParseBool(ref err) => err.fmt(f),
+ ParseInt(ref err) => err.fmt(f),
+ ParseFloat(ref err) => err.fmt(f),
+ }
+ }
+}
+
+impl DeserializeError {
+ /// Return the field index (starting at 0) of this error, if available.
+ pub fn field(&self) -> Option<u64> {
+ self.field
+ }
+
+ /// Return the underlying error kind.
+ pub fn kind(&self) -> &DeserializeErrorKind {
+ &self.kind
+ }
+}
+
+impl DeserializeErrorKind {
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ use self::DeserializeErrorKind::*;
+
+ match *self {
+ Message(_) => "deserialization error",
+ Unsupported(_) => "unsupported deserializer method",
+ UnexpectedEndOfRow => "expected field, but got end of row",
+ InvalidUtf8(ref err) => err.description(),
+ ParseBool(ref err) => err.description(),
+ ParseInt(ref err) => err.description(),
+ ParseFloat(ref err) => err.description(),
+ }
+ }
+}
+
+serde_if_integer128! {
+ fn try_positive_integer128(s: &str) -> Option<u128> {
+ s.parse().ok()
+ }
+
+ fn try_negative_integer128(s: &str) -> Option<i128> {
+ s.parse().ok()
+ }
+}
+
+fn try_positive_integer64(s: &str) -> Option<u64> {
+ s.parse().ok()
+}
+
+fn try_negative_integer64(s: &str) -> Option<i64> {
+ s.parse().ok()
+}
+
+fn try_float(s: &str) -> Option<f64> {
+ s.parse().ok()
+}
+
+fn try_positive_integer64_bytes(s: &[u8]) -> Option<u64> {
+ str::from_utf8(s).ok().and_then(|s| s.parse().ok())
+}
+
+fn try_negative_integer64_bytes(s: &[u8]) -> Option<i64> {
+ str::from_utf8(s).ok().and_then(|s| s.parse().ok())
+}
+
+serde_if_integer128! {
+ fn try_positive_integer128_bytes(s: &[u8]) -> Option<u128> {
+ str::from_utf8(s).ok().and_then(|s| s.parse().ok())
+ }
+
+ fn try_negative_integer128_bytes(s: &[u8]) -> Option<i128> {
+ str::from_utf8(s).ok().and_then(|s| s.parse().ok())
+ }
+}
+
+fn try_float_bytes(s: &[u8]) -> Option<f64> {
+ str::from_utf8(s).ok().and_then(|s| s.parse().ok())
+}
+
+#[cfg(test)]
+mod tests {
+ use std::collections::HashMap;
+
+ use bstr::BString;
+ use serde::{de::DeserializeOwned, serde_if_integer128, Deserialize};
+
+ use super::{deserialize_byte_record, deserialize_string_record};
+ use crate::byte_record::ByteRecord;
+ use crate::error::Error;
+ use crate::string_record::StringRecord;
+
+ fn de<D: DeserializeOwned>(fields: &[&str]) -> Result<D, Error> {
+ let record = StringRecord::from(fields);
+ deserialize_string_record(&record, None)
+ }
+
+ fn de_headers<D: DeserializeOwned>(
+ headers: &[&str],
+ fields: &[&str],
+ ) -> Result<D, Error> {
+ let headers = StringRecord::from(headers);
+ let record = StringRecord::from(fields);
+ deserialize_string_record(&record, Some(&headers))
+ }
+
+ fn b<'a, T: AsRef<[u8]> + ?Sized>(bytes: &'a T) -> &'a [u8] {
+ bytes.as_ref()
+ }
+
+ #[test]
+ fn with_header() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: String,
+ }
+
+ let got: Foo =
+ de_headers(&["x", "y", "z"], &["hi", "42", "1.3"]).unwrap();
+ assert_eq!(got, Foo { x: "hi".into(), y: 42, z: 1.3 });
+ }
+
+ #[test]
+ fn with_header_unknown() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ #[serde(deny_unknown_fields)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: String,
+ }
+ assert!(de_headers::<Foo>(
+ &["a", "x", "y", "z"],
+ &["foo", "hi", "42", "1.3"],
+ )
+ .is_err());
+ }
+
+ #[test]
+ fn with_header_missing() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: String,
+ }
+ assert!(de_headers::<Foo>(&["y", "z"], &["42", "1.3"],).is_err());
+ }
+
+ #[test]
+ fn with_header_missing_ok() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: Option<String>,
+ }
+
+ let got: Foo = de_headers(&["y", "z"], &["42", "1.3"]).unwrap();
+ assert_eq!(got, Foo { x: None, y: 42, z: 1.3 });
+ }
+
+ #[test]
+ fn with_header_no_fields() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: Option<String>,
+ }
+
+ let got = de_headers::<Foo>(&["y", "z"], &[]);
+ assert!(got.is_err());
+ }
+
+ #[test]
+ fn with_header_empty() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: Option<String>,
+ }
+
+ let got = de_headers::<Foo>(&[], &[]);
+ assert!(got.is_err());
+ }
+
+ #[test]
+ fn with_header_empty_ok() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo;
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Bar {};
+
+ let got = de_headers::<Foo>(&[], &[]);
+ assert_eq!(got.unwrap(), Foo);
+
+ let got = de_headers::<Bar>(&[], &[]);
+ assert_eq!(got.unwrap(), Bar {});
+
+ let got = de_headers::<()>(&[], &[]);
+ assert_eq!(got.unwrap(), ());
+ }
+
+ #[test]
+ fn without_header() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ z: f64,
+ y: i32,
+ x: String,
+ }
+
+ let got: Foo = de(&["1.3", "42", "hi"]).unwrap();
+ assert_eq!(got, Foo { x: "hi".into(), y: 42, z: 1.3 });
+ }
+
+ #[test]
+ fn no_fields() {
+ assert!(de::<String>(&[]).is_err());
+ }
+
+ #[test]
+ fn one_field() {
+ let got: i32 = de(&["42"]).unwrap();
+ assert_eq!(got, 42);
+ }
+
+ serde_if_integer128! {
+ #[test]
+ fn one_field_128() {
+ let got: i128 = de(&["2010223372036854775808"]).unwrap();
+ assert_eq!(got, 2010223372036854775808);
+ }
+ }
+
+ #[test]
+ fn two_fields() {
+ let got: (i32, bool) = de(&["42", "true"]).unwrap();
+ assert_eq!(got, (42, true));
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo(i32, bool);
+
+ let got: Foo = de(&["42", "true"]).unwrap();
+ assert_eq!(got, Foo(42, true));
+ }
+
+ #[test]
+ fn two_fields_too_many() {
+ let got: (i32, bool) = de(&["42", "true", "z", "z"]).unwrap();
+ assert_eq!(got, (42, true));
+ }
+
+ #[test]
+ fn two_fields_too_few() {
+ assert!(de::<(i32, bool)>(&["42"]).is_err());
+ }
+
+ #[test]
+ fn one_char() {
+ let got: char = de(&["a"]).unwrap();
+ assert_eq!(got, 'a');
+ }
+
+ #[test]
+ fn no_chars() {
+ assert!(de::<char>(&[""]).is_err());
+ }
+
+ #[test]
+ fn too_many_chars() {
+ assert!(de::<char>(&["ab"]).is_err());
+ }
+
+ #[test]
+ fn simple_seq() {
+ let got: Vec<i32> = de(&["1", "5", "10"]).unwrap();
+ assert_eq!(got, vec![1, 5, 10]);
+ }
+
+ #[test]
+ fn simple_hex_seq() {
+ let got: Vec<i32> = de(&["0x7F", "0xA9", "0x10"]).unwrap();
+ assert_eq!(got, vec![0x7F, 0xA9, 0x10]);
+ }
+
+ #[test]
+ fn mixed_hex_seq() {
+ let got: Vec<i32> = de(&["0x7F", "0xA9", "10"]).unwrap();
+ assert_eq!(got, vec![0x7F, 0xA9, 10]);
+ }
+
+ #[test]
+ fn bad_hex_seq() {
+ assert!(de::<Vec<u8>>(&["7F", "0xA9", "10"]).is_err());
+ }
+
+ #[test]
+ fn seq_in_struct() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ xs: Vec<i32>,
+ }
+ let got: Foo = de(&["1", "5", "10"]).unwrap();
+ assert_eq!(got, Foo { xs: vec![1, 5, 10] });
+ }
+
+ #[test]
+ fn seq_in_struct_tail() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ label: String,
+ xs: Vec<i32>,
+ }
+ let got: Foo = de(&["foo", "1", "5", "10"]).unwrap();
+ assert_eq!(got, Foo { label: "foo".into(), xs: vec![1, 5, 10] });
+ }
+
+ #[test]
+ fn map_headers() {
+ let got: HashMap<String, i32> =
+ de_headers(&["a", "b", "c"], &["1", "5", "10"]).unwrap();
+ assert_eq!(got.len(), 3);
+ assert_eq!(got["a"], 1);
+ assert_eq!(got["b"], 5);
+ assert_eq!(got["c"], 10);
+ }
+
+ #[test]
+ fn map_no_headers() {
+ let got = de::<HashMap<String, i32>>(&["1", "5", "10"]);
+ assert!(got.is_err());
+ }
+
+ #[test]
+ fn bytes() {
+ let got: Vec<u8> = de::<BString>(&["foobar"]).unwrap().into();
+ assert_eq!(got, b"foobar".to_vec());
+ }
+
+ #[test]
+ fn adjacent_fixed_arrays() {
+ let got: ([u32; 2], [u32; 2]) = de(&["1", "5", "10", "15"]).unwrap();
+ assert_eq!(got, ([1, 5], [10, 15]));
+ }
+
+ #[test]
+ fn enum_label_simple_tagged() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Row {
+ label: Label,
+ x: f64,
+ }
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ #[serde(rename_all = "snake_case")]
+ enum Label {
+ Foo,
+ Bar,
+ Baz,
+ }
+
+ let got: Row = de_headers(&["label", "x"], &["bar", "5"]).unwrap();
+ assert_eq!(got, Row { label: Label::Bar, x: 5.0 });
+ }
+
+ #[test]
+ fn enum_untagged() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Row {
+ x: Boolish,
+ y: Boolish,
+ z: Boolish,
+ }
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ #[serde(rename_all = "snake_case")]
+ #[serde(untagged)]
+ enum Boolish {
+ Bool(bool),
+ Number(i64),
+ String(String),
+ }
+
+ let got: Row =
+ de_headers(&["x", "y", "z"], &["true", "null", "1"]).unwrap();
+ assert_eq!(
+ got,
+ Row {
+ x: Boolish::Bool(true),
+ y: Boolish::String("null".into()),
+ z: Boolish::Number(1),
+ }
+ );
+ }
+
+ #[test]
+ fn option_empty_field() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ a: Option<i32>,
+ b: String,
+ c: Option<i32>,
+ }
+
+ let got: Foo =
+ de_headers(&["a", "b", "c"], &["", "foo", "5"]).unwrap();
+ assert_eq!(got, Foo { a: None, b: "foo".into(), c: Some(5) });
+ }
+
+ #[test]
+ fn option_invalid_field() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo {
+ #[serde(deserialize_with = "crate::invalid_option")]
+ a: Option<i32>,
+ #[serde(deserialize_with = "crate::invalid_option")]
+ b: Option<i32>,
+ #[serde(deserialize_with = "crate::invalid_option")]
+ c: Option<i32>,
+ }
+
+ let got: Foo =
+ de_headers(&["a", "b", "c"], &["xyz", "", "5"]).unwrap();
+ assert_eq!(got, Foo { a: None, b: None, c: Some(5) });
+ }
+
+ #[test]
+ fn borrowed() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Foo<'a, 'c> {
+ a: &'a str,
+ b: i32,
+ c: &'c str,
+ }
+
+ let headers = StringRecord::from(vec!["a", "b", "c"]);
+ let record = StringRecord::from(vec!["foo", "5", "bar"]);
+ let got: Foo =
+ deserialize_string_record(&record, Some(&headers)).unwrap();
+ assert_eq!(got, Foo { a: "foo", b: 5, c: "bar" });
+ }
+
+ #[test]
+ fn borrowed_map() {
+ use std::collections::HashMap;
+
+ let headers = StringRecord::from(vec!["a", "b", "c"]);
+ let record = StringRecord::from(vec!["aardvark", "bee", "cat"]);
+ let got: HashMap<&str, &str> =
+ deserialize_string_record(&record, Some(&headers)).unwrap();
+
+ let expected: HashMap<&str, &str> =
+ headers.iter().zip(&record).collect();
+ assert_eq!(got, expected);
+ }
+
+ #[test]
+ fn borrowed_map_bytes() {
+ use std::collections::HashMap;
+
+ let headers = ByteRecord::from(vec![b"a", b"\xFF", b"c"]);
+ let record = ByteRecord::from(vec!["aardvark", "bee", "cat"]);
+ let got: HashMap<&[u8], &[u8]> =
+ deserialize_byte_record(&record, Some(&headers)).unwrap();
+
+ let expected: HashMap<&[u8], &[u8]> =
+ headers.iter().zip(&record).collect();
+ assert_eq!(got, expected);
+ }
+
+ #[test]
+ fn flatten() {
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Input {
+ x: f64,
+ y: f64,
+ }
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Properties {
+ prop1: f64,
+ prop2: f64,
+ }
+
+ #[derive(Deserialize, Debug, PartialEq)]
+ struct Row {
+ #[serde(flatten)]
+ input: Input,
+ #[serde(flatten)]
+ properties: Properties,
+ }
+
+ let header = StringRecord::from(vec!["x", "y", "prop1", "prop2"]);
+ let record = StringRecord::from(vec!["1", "2", "3", "4"]);
+ let got: Row = record.deserialize(Some(&header)).unwrap();
+ assert_eq!(
+ got,
+ Row {
+ input: Input { x: 1.0, y: 2.0 },
+ properties: Properties { prop1: 3.0, prop2: 4.0 },
+ }
+ );
+ }
+
+ #[test]
+ fn partially_invalid_utf8() {
+ #[derive(Debug, Deserialize, PartialEq)]
+ struct Row {
+ h1: String,
+ h2: BString,
+ h3: String,
+ }
+
+ let headers = ByteRecord::from(vec![b"h1", b"h2", b"h3"]);
+ let record =
+ ByteRecord::from(vec![b(b"baz"), b(b"foo\xFFbar"), b(b"quux")]);
+ let got: Row =
+ deserialize_byte_record(&record, Some(&headers)).unwrap();
+ assert_eq!(
+ got,
+ Row {
+ h1: "baz".to_string(),
+ h2: BString::from(b"foo\xFFbar".to_vec()),
+ h3: "quux".to_string(),
+ }
+ );
+ }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..793ad3e
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,353 @@
+use std::error::Error as StdError;
+use std::fmt;
+use std::io;
+use std::result;
+
+use crate::byte_record::{ByteRecord, Position};
+use crate::deserializer::DeserializeError;
+
+/// A type alias for `Result<T, csv::Error>`.
+pub type Result<T> = result::Result<T, Error>;
+
+/// An error that can occur when processing CSV data.
+///
+/// This error can happen when writing or reading CSV data.
+///
+/// There are some important scenarios where an error is impossible to occur.
+/// For example, if a CSV reader is used on an in-memory buffer with the
+/// `flexible` option enabled and one is reading records as raw byte strings,
+/// then no error can occur.
+#[derive(Debug)]
+pub struct Error(Box<ErrorKind>);
+
+impl Error {
+ /// A crate private constructor for `Error`.
+ pub(crate) fn new(kind: ErrorKind) -> Error {
+ Error(Box::new(kind))
+ }
+
+ /// Return the specific type of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.0
+ }
+
+ /// Unwrap this error into its underlying type.
+ pub fn into_kind(self) -> ErrorKind {
+ *self.0
+ }
+
+ /// Returns true if this is an I/O error.
+ ///
+ /// If this is true, the underlying `ErrorKind` is guaranteed to be
+ /// `ErrorKind::Io`.
+ pub fn is_io_error(&self) -> bool {
+ match *self.0 {
+ ErrorKind::Io(_) => true,
+ _ => false,
+ }
+ }
+
+ /// Return the position for this error, if one exists.
+ ///
+ /// This is a convenience function that permits callers to easily access
+ /// the position on an error without doing case analysis on `ErrorKind`.
+ pub fn position(&self) -> Option<&Position> {
+ self.0.position()
+ }
+}
+
+/// The specific type of an error.
+#[derive(Debug)]
+pub enum ErrorKind {
+ /// An I/O error that occurred while reading CSV data.
+ Io(io::Error),
+ /// A UTF-8 decoding error that occured while reading CSV data into Rust
+ /// `String`s.
+ Utf8 {
+ /// The position of the record in which this error occurred, if
+ /// available.
+ pos: Option<Position>,
+ /// The corresponding UTF-8 error.
+ err: Utf8Error,
+ },
+ /// This error occurs when two records with an unequal number of fields
+ /// are found. This error only occurs when the `flexible` option in a
+ /// CSV reader/writer is disabled.
+ UnequalLengths {
+ /// The position of the first record with an unequal number of fields
+ /// to the previous record, if available.
+ pos: Option<Position>,
+ /// The expected number of fields in a record. This is the number of
+ /// fields in the record read prior to the record indicated by
+ /// `pos`.
+ expected_len: u64,
+ /// The number of fields in the bad record.
+ len: u64,
+ },
+ /// This error occurs when either the `byte_headers` or `headers` methods
+ /// are called on a CSV reader that was asked to `seek` before it parsed
+ /// the first record.
+ Seek,
+ /// An error of this kind occurs only when using the Serde serializer.
+ Serialize(String),
+ /// An error of this kind occurs only when performing automatic
+ /// deserialization with serde.
+ Deserialize {
+ /// The position of this error, if available.
+ pos: Option<Position>,
+ /// The deserialization error.
+ err: DeserializeError,
+ },
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl ErrorKind {
+ /// Return the position for this error, if one exists.
+ ///
+ /// This is a convenience function that permits callers to easily access
+ /// the position on an error without doing case analysis on `ErrorKind`.
+ pub fn position(&self) -> Option<&Position> {
+ match *self {
+ ErrorKind::Utf8 { ref pos, .. } => pos.as_ref(),
+ ErrorKind::UnequalLengths { ref pos, .. } => pos.as_ref(),
+ ErrorKind::Deserialize { ref pos, .. } => pos.as_ref(),
+ _ => None,
+ }
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(err: io::Error) -> Error {
+ Error::new(ErrorKind::Io(err))
+ }
+}
+
+impl From<Error> for io::Error {
+ fn from(err: Error) -> io::Error {
+ io::Error::new(io::ErrorKind::Other, err)
+ }
+}
+
+impl StdError for Error {
+ fn source(&self) -> Option<&(dyn StdError + 'static)> {
+ match *self.0 {
+ ErrorKind::Io(ref err) => Some(err),
+ ErrorKind::Utf8 { ref err, .. } => Some(err),
+ ErrorKind::UnequalLengths { .. } => None,
+ ErrorKind::Seek => None,
+ ErrorKind::Serialize(_) => None,
+ ErrorKind::Deserialize { ref err, .. } => Some(err),
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self.0 {
+ ErrorKind::Io(ref err) => err.fmt(f),
+ ErrorKind::Utf8 { pos: None, ref err } => {
+ write!(f, "CSV parse error: field {}: {}", err.field(), err)
+ }
+ ErrorKind::Utf8 { pos: Some(ref pos), ref err } => write!(
+ f,
+ "CSV parse error: record {} \
+ (line {}, field: {}, byte: {}): {}",
+ pos.record(),
+ pos.line(),
+ err.field(),
+ pos.byte(),
+ err
+ ),
+ ErrorKind::UnequalLengths { pos: None, expected_len, len } => {
+ write!(
+ f,
+ "CSV error: \
+ found record with {} fields, but the previous record \
+ has {} fields",
+ len, expected_len
+ )
+ }
+ ErrorKind::UnequalLengths {
+ pos: Some(ref pos),
+ expected_len,
+ len,
+ } => write!(
+ f,
+ "CSV error: record {} (line: {}, byte: {}): \
+ found record with {} fields, but the previous record \
+ has {} fields",
+ pos.record(),
+ pos.line(),
+ pos.byte(),
+ len,
+ expected_len
+ ),
+ ErrorKind::Seek => write!(
+ f,
+ "CSV error: cannot access headers of CSV data \
+ when the parser was seeked before the first record \
+ could be read"
+ ),
+ ErrorKind::Serialize(ref err) => {
+ write!(f, "CSV write error: {}", err)
+ }
+ ErrorKind::Deserialize { pos: None, ref err } => {
+ write!(f, "CSV deserialize error: {}", err)
+ }
+ ErrorKind::Deserialize { pos: Some(ref pos), ref err } => write!(
+ f,
+ "CSV deserialize error: record {} \
+ (line: {}, byte: {}): {}",
+ pos.record(),
+ pos.line(),
+ pos.byte(),
+ err
+ ),
+ _ => unreachable!(),
+ }
+ }
+}
+
+/// A UTF-8 validation error during record conversion.
+///
+/// This occurs when attempting to convert a `ByteRecord` into a
+/// `StringRecord`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FromUtf8Error {
+ record: ByteRecord,
+ err: Utf8Error,
+}
+
+impl FromUtf8Error {
+ /// Create a new FromUtf8Error.
+ pub(crate) fn new(rec: ByteRecord, err: Utf8Error) -> FromUtf8Error {
+ FromUtf8Error { record: rec, err: err }
+ }
+
+ /// Access the underlying `ByteRecord` that failed UTF-8 validation.
+ pub fn into_byte_record(self) -> ByteRecord {
+ self.record
+ }
+
+ /// Access the underlying UTF-8 validation error.
+ pub fn utf8_error(&self) -> &Utf8Error {
+ &self.err
+ }
+}
+
+impl fmt::Display for FromUtf8Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.err.fmt(f)
+ }
+}
+
+impl StdError for FromUtf8Error {
+ fn source(&self) -> Option<&(dyn StdError + 'static)> {
+ Some(&self.err)
+ }
+}
+
+/// A UTF-8 validation error.
+///
+/// This occurs when attempting to convert a `ByteRecord` into a
+/// `StringRecord`.
+///
+/// The error includes the index of the field that failed validation, and the
+/// last byte at which valid UTF-8 was verified.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Utf8Error {
+ /// The field index of a byte record in which UTF-8 validation failed.
+ field: usize,
+ /// The index into the given field up to which valid UTF-8 was verified.
+ valid_up_to: usize,
+}
+
+/// Create a new UTF-8 error.
+pub fn new_utf8_error(field: usize, valid_up_to: usize) -> Utf8Error {
+ Utf8Error { field: field, valid_up_to: valid_up_to }
+}
+
+impl Utf8Error {
+ /// The field index of a byte record in which UTF-8 validation failed.
+ pub fn field(&self) -> usize {
+ self.field
+ }
+ /// The index into the given field up to which valid UTF-8 was verified.
+ pub fn valid_up_to(&self) -> usize {
+ self.valid_up_to
+ }
+}
+
+impl StdError for Utf8Error {}
+
+impl fmt::Display for Utf8Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "invalid utf-8: invalid UTF-8 in field {} near byte index {}",
+ self.field, self.valid_up_to
+ )
+ }
+}
+
+/// `IntoInnerError` occurs when consuming a `Writer` fails.
+///
+/// Consuming the `Writer` causes a flush to happen. If the flush fails, then
+/// this error is returned, which contains both the original `Writer` and
+/// the error that occurred.
+///
+/// The type parameter `W` is the unconsumed writer.
+pub struct IntoInnerError<W> {
+ wtr: W,
+ err: io::Error,
+}
+
+impl<W> IntoInnerError<W> {
+ /// Creates a new `IntoInnerError`.
+ ///
+ /// (This is a visibility hack. It's public in this module, but not in the
+ /// crate.)
+ pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError<W> {
+ IntoInnerError { wtr: wtr, err: err }
+ }
+
+ /// Returns the error which caused the call to `into_inner` to fail.
+ ///
+ /// This error was returned when attempting to flush the internal buffer.
+ pub fn error(&self) -> &io::Error {
+ &self.err
+ }
+
+ /// Returns the underlying writer which generated the error.
+ ///
+ /// The returned value can be used for error recovery, such as
+ /// re-inspecting the buffer.
+ pub fn into_inner(self) -> W {
+ self.wtr
+ }
+}
+
+impl<W: std::any::Any> StdError for IntoInnerError<W> {
+ fn source(&self) -> Option<&(dyn StdError + 'static)> {
+ self.err.source()
+ }
+}
+
+impl<W> fmt::Display for IntoInnerError<W> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.err.fmt(f)
+ }
+}
+
+impl<W> fmt::Debug for IntoInnerError<W> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.err.fmt(f)
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..3c771c9
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,359 @@
+/*!
+The `csv` crate provides a fast and flexible CSV reader and writer, with
+support for Serde.
+
+The [tutorial](tutorial/index.html) is a good place to start if you're new to
+Rust.
+
+The [cookbook](cookbook/index.html) will give you a variety of complete Rust
+programs that do CSV reading and writing.
+
+# Brief overview
+
+**If you're new to Rust**, you might find the
+[tutorial](tutorial/index.html)
+to be a good place to start.
+
+The primary types in this crate are
+[`Reader`](struct.Reader.html)
+and
+[`Writer`](struct.Writer.html),
+for reading and writing CSV data respectively.
+Correspondingly, to support CSV data with custom field or record delimiters
+(among many other things), you should use either a
+[`ReaderBuilder`](struct.ReaderBuilder.html)
+or a
+[`WriterBuilder`](struct.WriterBuilder.html),
+depending on whether you're reading or writing CSV data.
+
+Unless you're using Serde, the standard CSV record types are
+[`StringRecord`](struct.StringRecord.html)
+and
+[`ByteRecord`](struct.ByteRecord.html).
+`StringRecord` should be used when you know your data to be valid UTF-8.
+For data that may be invalid UTF-8, `ByteRecord` is suitable.
+
+Finally, the set of errors is described by the
+[`Error`](struct.Error.html)
+type.
+
+The rest of the types in this crate mostly correspond to more detailed errors,
+position information, configuration knobs or iterator types.
+
+# Setup
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+csv = "1.1"
+```
+
+If you want to use Serde's custom derive functionality on your custom structs,
+then add this to your `[dependencies]` section of `Cargo.toml`:
+
+```toml
+[dependencies]
+serde = { version = "1", features = ["derive"] }
+```
+
+# Example
+
+This example shows how to read CSV data from stdin and print each record to
+stdout.
+
+There are more examples in the [cookbook](cookbook/index.html).
+
+```no_run
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn example() -> Result<(), Box<dyn Error>> {
+ // Build the CSV reader and iterate over each record.
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ // The iterator yields Result<StringRecord, Error>, so we check the
+ // error here.
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
+```
+
+# Example with Serde
+
+This example shows how to read CSV data from stdin into your own custom struct.
+By default, the member names of the struct are matched with the values in the
+header record of your CSV data.
+
+```no_run
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::Deserialize;
+
+#[derive(Debug, Deserialize)]
+struct Record {
+ city: String,
+ region: String,
+ country: String,
+ population: Option<u64>,
+}
+
+fn example() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ // Notice that we need to provide a type hint for automatic
+ // deserialization.
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = example() {
+ println!("error running example: {}", err);
+ process::exit(1);
+ }
+}
+```
+
+The above example can be run like so:
+
+```ignore
+$ git clone git://github.com/BurntSushi/rust-csv
+$ cd rust-csv
+$ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
+```
+
+*/
+
+#![deny(missing_docs)]
+
+use std::result;
+
+use serde::{Deserialize, Deserializer};
+
+pub use crate::byte_record::{ByteRecord, ByteRecordIter, Position};
+pub use crate::deserializer::{DeserializeError, DeserializeErrorKind};
+pub use crate::error::{
+ Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error,
+};
+pub use crate::reader::{
+ ByteRecordsIntoIter, ByteRecordsIter, DeserializeRecordsIntoIter,
+ DeserializeRecordsIter, Reader, ReaderBuilder, StringRecordsIntoIter,
+ StringRecordsIter,
+};
+pub use crate::string_record::{StringRecord, StringRecordIter};
+pub use crate::writer::{Writer, WriterBuilder};
+
+mod byte_record;
+pub mod cookbook;
+mod deserializer;
+mod error;
+mod reader;
+mod serializer;
+mod string_record;
+pub mod tutorial;
+mod writer;
+
+/// The quoting style to use when writing CSV data.
+#[derive(Clone, Copy, Debug)]
+pub enum QuoteStyle {
+ /// This puts quotes around every field. Always.
+ Always,
+ /// This puts quotes around fields only when necessary.
+ ///
+ /// They are necessary when fields contain a quote, delimiter or record
+ /// terminator. Quotes are also necessary when writing an empty record
+ /// (which is indistinguishable from a record with one empty field).
+ ///
+ /// This is the default.
+ Necessary,
+ /// This puts quotes around all fields that are non-numeric. Namely, when
+ /// writing a field that does not parse as a valid float or integer, then
+ /// quotes will be used even if they aren't strictly necessary.
+ NonNumeric,
+ /// This *never* writes quotes, even if it would produce invalid CSV data.
+ Never,
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl QuoteStyle {
+ fn to_core(self) -> csv_core::QuoteStyle {
+ match self {
+ QuoteStyle::Always => csv_core::QuoteStyle::Always,
+ QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary,
+ QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric,
+ QuoteStyle::Never => csv_core::QuoteStyle::Never,
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl Default for QuoteStyle {
+ fn default() -> QuoteStyle {
+ QuoteStyle::Necessary
+ }
+}
+
+/// A record terminator.
+///
+/// Use this to specify the record terminator while parsing CSV. The default is
+/// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator.
+#[derive(Clone, Copy, Debug)]
+pub enum Terminator {
+ /// Parses `\r`, `\n` or `\r\n` as a single record terminator.
+ CRLF,
+ /// Parses the byte given as a record terminator.
+ Any(u8),
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl Terminator {
+ /// Convert this to the csv_core type of the same name.
+ fn to_core(self) -> csv_core::Terminator {
+ match self {
+ Terminator::CRLF => csv_core::Terminator::CRLF,
+ Terminator::Any(b) => csv_core::Terminator::Any(b),
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl Default for Terminator {
+ fn default() -> Terminator {
+ Terminator::CRLF
+ }
+}
+
+/// The whitespace preservation behaviour when reading CSV data.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Trim {
+ /// Preserves fields and headers. This is the default.
+ None,
+ /// Trim whitespace from headers.
+ Headers,
+ /// Trim whitespace from fields, but not headers.
+ Fields,
+ /// Trim whitespace from fields and headers.
+ All,
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl Trim {
+ fn should_trim_fields(&self) -> bool {
+ self == &Trim::Fields || self == &Trim::All
+ }
+
+ fn should_trim_headers(&self) -> bool {
+ self == &Trim::Headers || self == &Trim::All
+ }
+}
+
+impl Default for Trim {
+ fn default() -> Trim {
+ Trim::None
+ }
+}
+
+/// A custom Serde deserializer for possibly invalid `Option<T>` fields.
+///
+/// When deserializing CSV data, it is sometimes desirable to simply ignore
+/// fields with invalid data. For example, there might be a field that is
+/// usually a number, but will occasionally contain garbage data that causes
+/// number parsing to fail.
+///
+/// You might be inclined to use, say, `Option<i32>` for fields such at this.
+/// By default, however, `Option<i32>` will either capture *empty* fields with
+/// `None` or valid numeric fields with `Some(the_number)`. If the field is
+/// non-empty and not a valid number, then deserialization will return an error
+/// instead of using `None`.
+///
+/// This function allows you to override this default behavior. Namely, if
+/// `Option<T>` is deserialized with non-empty but invalid data, then the value
+/// will be `None` and the error will be ignored.
+///
+/// # Example
+///
+/// This example shows how to parse CSV records with numerical data, even if
+/// some numerical data is absent or invalid. Without the
+/// `serde(deserialize_with = "...")` annotations, this example would return
+/// an error.
+///
+/// ```
+/// use std::error::Error;
+///
+/// use csv::Reader;
+/// use serde::Deserialize;
+///
+/// #[derive(Debug, Deserialize, Eq, PartialEq)]
+/// struct Row {
+/// #[serde(deserialize_with = "csv::invalid_option")]
+/// a: Option<i32>,
+/// #[serde(deserialize_with = "csv::invalid_option")]
+/// b: Option<i32>,
+/// #[serde(deserialize_with = "csv::invalid_option")]
+/// c: Option<i32>,
+/// }
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<(), Box<dyn Error>> {
+/// let data = "\
+/// a,b,c
+/// 5,\"\",xyz
+/// ";
+/// let mut rdr = Reader::from_reader(data.as_bytes());
+/// if let Some(result) = rdr.deserialize().next() {
+/// let record: Row = result?;
+/// assert_eq!(record, Row { a: Some(5), b: None, c: None });
+/// Ok(())
+/// } else {
+/// Err(From::from("expected at least one record but got none"))
+/// }
+/// }
+/// ```
+pub fn invalid_option<'de, D, T>(de: D) -> result::Result<Option<T>, D::Error>
+where
+ D: Deserializer<'de>,
+ Option<T>: Deserialize<'de>,
+{
+ Option::<T>::deserialize(de).or_else(|_| Ok(None))
+}
diff --git a/src/reader.rs b/src/reader.rs
new file mode 100644
index 0000000..da53265
--- /dev/null
+++ b/src/reader.rs
@@ -0,0 +1,2633 @@
+use std::fs::File;
+use std::io::{self, BufRead, Seek};
+use std::marker::PhantomData;
+use std::path::Path;
+use std::result;
+
+use csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder};
+use serde::de::DeserializeOwned;
+
+use crate::byte_record::{ByteRecord, Position};
+use crate::error::{Error, ErrorKind, Result, Utf8Error};
+use crate::string_record::StringRecord;
+use crate::{Terminator, Trim};
+
+/// Builds a CSV reader with various configuration knobs.
+///
+/// This builder can be used to tweak the field delimiter, record terminator
+/// and more. Once a CSV `Reader` is built, its configuration cannot be
+/// changed.
+#[derive(Debug)]
+pub struct ReaderBuilder {
+ capacity: usize,
+ flexible: bool,
+ has_headers: bool,
+ trim: Trim,
+ /// The underlying CSV parser builder.
+ ///
+ /// We explicitly put this on the heap because CoreReaderBuilder embeds an
+ /// entire DFA transition table, which along with other things, tallies up
+ /// to almost 500 bytes on the stack.
+ builder: Box<CoreReaderBuilder>,
+}
+
+impl Default for ReaderBuilder {
+ fn default() -> ReaderBuilder {
+ ReaderBuilder {
+ capacity: 8 * (1 << 10),
+ flexible: false,
+ has_headers: true,
+ trim: Trim::default(),
+ builder: Box::new(CoreReaderBuilder::default()),
+ }
+ }
+}
+
+impl ReaderBuilder {
+ /// Create a new builder for configuring CSV parsing.
+ ///
+ /// To convert a builder into a reader, call one of the methods starting
+ /// with `from_`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ReaderBuilder, StringRecord};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
+ ///
+ /// let records = rdr
+ /// .records()
+ /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
+ /// assert_eq!(records, vec![
+ /// vec!["Boston", "United States", "4628910"],
+ /// vec!["Concord", "United States", "42695"],
+ /// ]);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn new() -> ReaderBuilder {
+ ReaderBuilder::default()
+ }
+
+ /// Build a CSV parser from this configuration that reads data from the
+ /// given file path.
+ ///
+ /// If there was a problem opening the file at the given path, then this
+ /// returns the corresponding error.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
+ /// for result in rdr.records() {
+ /// let record = result?;
+ /// println!("{:?}", record);
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
+ Ok(Reader::new(self, File::open(path)?))
+ }
+
+ /// Build a CSV parser from this configuration that reads data from `rdr`.
+ ///
+ /// Note that the CSV reader is buffered automatically, so you should not
+ /// wrap `rdr` in a buffered reader like `io::BufReader`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
+ /// for result in rdr.records() {
+ /// let record = result?;
+ /// println!("{:?}", record);
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
+ Reader::new(self, rdr)
+ }
+
+ /// The field delimiter to use when parsing CSV.
+ ///
+ /// The default is `b','`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city;country;pop
+ /// Boston;United States;4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .delimiter(b';')
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
+ self.builder.delimiter(delimiter);
+ self
+ }
+
+ /// Whether to treat the first row as a special header row.
+ ///
+ /// By default, the first row is treated as a special header row, which
+ /// means the header is never returned by any of the record reading methods
+ /// or iterators. When this is disabled (`yes` set to `false`), the first
+ /// row is not treated specially.
+ ///
+ /// Note that the `headers` and `byte_headers` methods are unaffected by
+ /// whether this is set. Those methods always return the first record.
+ ///
+ /// # Example
+ ///
+ /// This example shows what happens when `has_headers` is disabled.
+ /// Namely, the first row is treated just like any other row.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .has_headers(false)
+ /// .from_reader(data.as_bytes());
+ /// let mut iter = rdr.records();
+ ///
+ /// // Read the first record.
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["city", "country", "pop"]);
+ /// } else {
+ /// return Err(From::from(
+ /// "expected at least two records but got none"));
+ /// }
+ ///
+ /// // Read the second record.
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// } else {
+ /// return Err(From::from(
+ /// "expected at least two records but got one"))
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
+ self.has_headers = yes;
+ self
+ }
+
+ /// Whether the number of fields in records is allowed to change or not.
+ ///
+ /// When disabled (which is the default), parsing CSV data will return an
+ /// error if a record is found with a number of fields different from the
+ /// number of fields in a previous record.
+ ///
+ /// When enabled, this error checking is turned off.
+ ///
+ /// # Example: flexible records enabled
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// // Notice that the first row is missing the population count.
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .flexible(true)
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ ///
+ /// # Example: flexible records disabled
+ ///
+ /// This shows the error that appears when records of unequal length
+ /// are found and flexible records have been disabled (which is the
+ /// default).
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ErrorKind, ReaderBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// // Notice that the first row is missing the population count.
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .flexible(false)
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(Err(err)) = rdr.records().next() {
+ /// match *err.kind() {
+ /// ErrorKind::UnequalLengths { expected_len, len, .. } => {
+ /// // The header row has 3 fields...
+ /// assert_eq!(expected_len, 3);
+ /// // ... but the first row has only 2 fields.
+ /// assert_eq!(len, 2);
+ /// Ok(())
+ /// }
+ /// ref wrong => {
+ /// Err(From::from(format!(
+ /// "expected UnequalLengths error but got {:?}",
+ /// wrong)))
+ /// }
+ /// }
+ /// } else {
+ /// Err(From::from(
+ /// "expected at least one errored record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
+ self.flexible = yes;
+ self
+ }
+
+ /// Whether fields are trimmed of leading and trailing whitespace or not.
+ ///
+ /// By default, no trimming is performed. This method permits one to
+ /// override that behavior and choose one of the following options:
+ ///
+ /// 1. `Trim::Headers` trims only header values.
+ /// 2. `Trim::Fields` trims only non-header or "field" values.
+ /// 3. `Trim::All` trims both header and non-header values.
+ ///
+ /// A value is only interpreted as a header value if this CSV reader is
+ /// configured to read a header record (which is the default).
+ ///
+ /// When reading string records, characters meeting the definition of
+ /// Unicode whitespace are trimmed. When reading byte records, characters
+ /// meeting the definition of ASCII whitespace are trimmed. ASCII
+ /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
+ ///
+ /// # Example
+ ///
+ /// This example shows what happens when all values are trimmed.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ReaderBuilder, StringRecord, Trim};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city , country , pop
+ /// Boston,\"
+ /// United States\",4628910
+ /// Concord, United States ,42695
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .trim(Trim::All)
+ /// .from_reader(data.as_bytes());
+ /// let records = rdr
+ /// .records()
+ /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
+ /// assert_eq!(records, vec![
+ /// vec!["Boston", "United States", "4628910"],
+ /// vec!["Concord", "United States", "42695"],
+ /// ]);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
+ self.trim = trim;
+ self
+ }
+
+ /// The record terminator to use when parsing CSV.
+ ///
+ /// A record terminator can be any single byte. The default is a special
+ /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
+ /// or `\r\n` as a single record terminator.
+ ///
+ /// # Example: `$` as a record terminator
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ReaderBuilder, Terminator};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "city,country,pop$Boston,United States,4628910";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .terminator(Terminator::Any(b'$'))
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
+ self.builder.terminator(term.to_core());
+ self
+ }
+
+ /// The quote character to use when parsing CSV.
+ ///
+ /// The default is `b'"'`.
+ ///
+ /// # Example: single quotes instead of double quotes
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,'United States',4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .quote(b'\'')
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
+ self.builder.quote(quote);
+ self
+ }
+
+ /// The escape character to use when parsing CSV.
+ ///
+ /// In some variants of CSV, quotes are escaped using a special escape
+ /// character like `\` (instead of escaping quotes by doubling them).
+ ///
+ /// By default, recognizing these idiosyncratic escapes is disabled.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,\"The \\\"United\\\" States\",4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .escape(Some(b'\\'))
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec![
+ /// "Boston", "The \"United\" States", "4628910",
+ /// ]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
+ self.builder.escape(escape);
+ self
+ }
+
+ /// Enable double quote escapes.
+ ///
+ /// This is enabled by default, but it may be disabled. When disabled,
+ /// doubled quotes are not interpreted as escapes.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,\"The \"\"United\"\" States\",4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .double_quote(false)
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec![
+ /// "Boston", "The \"United\"\" States\"", "4628910",
+ /// ]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
+ self.builder.double_quote(yes);
+ self
+ }
+
+ /// Enable or disable quoting.
+ ///
+ /// This is enabled by default, but it may be disabled. When disabled,
+ /// quotes are not treated specially.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,\"The United States,4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .quoting(false)
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec![
+ /// "Boston", "\"The United States", "4628910",
+ /// ]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
+ self.builder.quoting(yes);
+ self
+ }
+
+ /// The comment character to use when parsing CSV.
+ ///
+ /// If the start of a record begins with the byte given here, then that
+ /// line is ignored by the CSV parser.
+ ///
+ /// This is disabled by default.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// #Concord,United States,42695
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .comment(Some(b'#'))
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
+ self.builder.comment(comment);
+ self
+ }
+
+ /// A convenience method for specifying a configuration to read ASCII
+ /// delimited text.
+ ///
+ /// This sets the delimiter and record terminator to the ASCII unit
+ /// separator (`\x1F`) and record separator (`\x1E`), respectively.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::ReaderBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .ascii()
+ /// .from_reader(data.as_bytes());
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn ascii(&mut self) -> &mut ReaderBuilder {
+ self.builder.ascii();
+ self
+ }
+
+ /// Set the capacity (in bytes) of the buffer used in the CSV reader.
+ /// This defaults to a reasonable setting.
+ pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
+ self.capacity = capacity;
+ self
+ }
+
+ /// Enable or disable the NFA for parsing CSV.
+ ///
+ /// This is intended to be a debug option. The NFA is always slower than
+ /// the DFA.
+ #[doc(hidden)]
+ pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
+ self.builder.nfa(yes);
+ self
+ }
+}
+
+/// A already configured CSV reader.
+///
+/// A CSV reader takes as input CSV data and transforms that into standard Rust
+/// values. The most flexible way to read CSV data is as a sequence of records,
+/// where a record is a sequence of fields and each field is a string. However,
+/// a reader can also deserialize CSV data into Rust types like `i64` or
+/// `(String, f64, f64, f64)` or even a custom struct automatically using
+/// Serde.
+///
+/// # Configuration
+///
+/// A CSV reader has a couple convenient constructor methods like `from_path`
+/// and `from_reader`. However, if you want to configure the CSV reader to use
+/// a different delimiter or quote character (among many other things), then
+/// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
+/// a `Reader`. For example, to change the field delimiter:
+///
+/// ```
+/// use std::error::Error;
+/// use csv::ReaderBuilder;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<(), Box<dyn Error>> {
+/// let data = "\
+/// city;country;pop
+/// Boston;United States;4628910
+/// ";
+/// let mut rdr = ReaderBuilder::new()
+/// .delimiter(b';')
+/// .from_reader(data.as_bytes());
+///
+/// if let Some(result) = rdr.records().next() {
+/// let record = result?;
+/// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+/// Ok(())
+/// } else {
+/// Err(From::from("expected at least one record but got none"))
+/// }
+/// }
+/// ```
+///
+/// # Error handling
+///
+/// In general, CSV *parsing* does not ever return an error. That is, there is
+/// no such thing as malformed CSV data. Instead, this reader will prioritize
+/// finding a parse over rejecting CSV data that it does not understand. This
+/// choice was inspired by other popular CSV parsers, but also because it is
+/// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
+/// it might still be possible to work with the data. In the land of CSV, there
+/// is no "right" or "wrong," only "right" and "less right."
+///
+/// With that said, a number of errors can occur while reading CSV data:
+///
+/// * By default, all records in CSV data must have the same number of fields.
+/// If a record is found with a different number of fields than a prior
+/// record, then an error is returned. This behavior can be disabled by
+/// enabling flexible parsing via the `flexible` method on
+/// [`ReaderBuilder`](struct.ReaderBuilder.html).
+/// * When reading CSV data from a resource (like a file), it is possible for
+/// reading from the underlying resource to fail. This will return an error.
+/// For subsequent calls to the `Reader` after encountering a such error
+/// (unless `seek` is used), it will behave as if end of file had been
+/// reached, in order to avoid running into infinite loops when still
+/// attempting to read the next record when one has errored.
+/// * When reading CSV data into `String` or `&str` fields (e.g., via a
+/// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
+/// enforced. If CSV data is invalid UTF-8, then an error is returned. If
+/// you want to read invalid UTF-8, then you should use the byte oriented
+/// APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
+/// support for another encoding entirely, then you'll need to use another
+/// crate to transcode your CSV data to UTF-8 before parsing it.
+/// * When using Serde to deserialize CSV data into Rust types, it is possible
+/// for a number of additional errors to occur. For example, deserializing
+/// a field `xyz` into an `i32` field will result in an error.
+///
+/// For more details on the precise semantics of errors, see the
+/// [`Error`](enum.Error.html) type.
+#[derive(Debug)]
+pub struct Reader<R> {
+ /// The underlying CSV parser.
+ ///
+ /// We explicitly put this on the heap because CoreReader embeds an entire
+ /// DFA transition table, which along with other things, tallies up to
+ /// almost 500 bytes on the stack.
+ core: Box<CoreReader>,
+ /// The underlying reader.
+ rdr: io::BufReader<R>,
+ /// Various state tracking.
+ ///
+ /// There is more state embedded in the `CoreReader`.
+ state: ReaderState,
+}
+
+#[derive(Debug)]
+struct ReaderState {
+ /// When set, this contains the first row of any parsed CSV data.
+ ///
+ /// This is always populated, regardless of whether `has_headers` is set.
+ headers: Option<Headers>,
+ /// When set, the first row of parsed CSV data is excluded from things
+ /// that read records, like iterators and `read_record`.
+ has_headers: bool,
+ /// When set, there is no restriction on the length of records. When not
+ /// set, every record must have the same number of fields, or else an error
+ /// is reported.
+ flexible: bool,
+ trim: Trim,
+ /// The number of fields in the first record parsed.
+ first_field_count: Option<u64>,
+ /// The current position of the parser.
+ ///
+ /// Note that this position is only observable by callers at the start
+ /// of a record. More granular positions are not supported.
+ cur_pos: Position,
+ /// Whether the first record has been read or not.
+ first: bool,
+ /// Whether the reader has been seeked or not.
+ seeked: bool,
+ /// Whether EOF of the underlying reader has been reached or not.
+ ///
+ /// IO errors on the underlying reader will be considered as an EOF for
+ /// subsequent read attempts, as it would be incorrect to keep on trying
+ /// to read when the underlying reader has broken.
+ ///
+ /// For clarity, having the best `Debug` impl and in case they need to be
+ /// treated differently at some point, we store whether the `EOF` is
+ /// considered because an actual EOF happened, or because we encoundered
+ /// an IO error.
+ /// This has no additional runtime cost.
+ eof: ReaderEofState,
+}
+
+/// Whether EOF of the underlying reader has been reached or not.
+///
+/// IO errors on the underlying reader will be considered as an EOF for
+/// subsequent read attempts, as it would be incorrect to keep on trying
+/// to read when the underlying reader has broken.
+///
+/// For clarity, having the best `Debug` impl and in case they need to be
+/// treated differently at some point, we store whether the `EOF` is
+/// considered because an actual EOF happened, or because we encoundered
+/// an IO error
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ReaderEofState {
+ NotEof,
+ Eof,
+ IOError,
+}
+
+/// Headers encapsulates any data associated with the headers of CSV data.
+///
+/// The headers always correspond to the first row.
+#[derive(Debug)]
+struct Headers {
+ /// The header, as raw bytes.
+ byte_record: ByteRecord,
+ /// The header, as valid UTF-8 (or a UTF-8 error).
+ string_record: result::Result<StringRecord, Utf8Error>,
+}
+
+impl Reader<Reader<File>> {
+ /// Create a new CSV parser with a default configuration for the given
+ /// file path.
+ ///
+ /// To customize CSV parsing, use a `ReaderBuilder`.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut rdr = Reader::from_path("foo.csv")?;
+ /// for result in rdr.records() {
+ /// let record = result?;
+ /// println!("{:?}", record);
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
+ ReaderBuilder::new().from_path(path)
+ }
+}
+
+impl<R: io::Read> Reader<R> {
+ /// Create a new CSV reader given a builder and a source of underlying
+ /// bytes.
+ fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
+ Reader {
+ core: Box::new(builder.builder.build()),
+ rdr: io::BufReader::with_capacity(builder.capacity, rdr),
+ state: ReaderState {
+ headers: None,
+ has_headers: builder.has_headers,
+ flexible: builder.flexible,
+ trim: builder.trim,
+ first_field_count: None,
+ cur_pos: Position::new(),
+ first: false,
+ seeked: false,
+ eof: ReaderEofState::NotEof,
+ },
+ }
+ }
+
+ /// Create a new CSV parser with a default configuration for the given
+ /// reader.
+ ///
+ /// To customize CSV parsing, use a `ReaderBuilder`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// for result in rdr.records() {
+ /// let record = result?;
+ /// println!("{:?}", record);
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_reader(rdr: R) -> Reader<R> {
+ ReaderBuilder::new().from_reader(rdr)
+ }
+
+ /// Returns a borrowed iterator over deserialized records.
+ ///
+ /// Each item yielded by this iterator is a `Result<D, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record. Additionally,
+ /// if `has_headers` is enabled, then deserializing into a struct will
+ /// automatically align the values in each row to the fields of a struct
+ /// based on the header row.
+ ///
+ /// # Example
+ ///
+ /// This shows how to deserialize CSV data into normal Rust structs. The
+ /// fields of the header row are used to match up the values in each row
+ /// to the fields of the struct.
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::Reader;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Debug, Deserialize, Eq, PartialEq)]
+ /// struct Row {
+ /// city: String,
+ /// country: String,
+ /// #[serde(rename = "popcount")]
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.deserialize();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record: Row = result?;
+ /// assert_eq!(record, Row {
+ /// city: "Boston".to_string(),
+ /// country: "United States".to_string(),
+ /// population: 4628910,
+ /// });
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ ///
+ /// # Rules
+ ///
+ /// For the most part, any Rust type that maps straight-forwardly to a CSV
+ /// record is supported. This includes maps, structs, tuples and tuple
+ /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
+ /// a more complicated story. In general, when working with CSV data, one
+ /// should avoid *nested sequences* as much as possible.
+ ///
+ /// Maps, structs, tuples and tuple structs map to CSV records in a simple
+ /// way. Tuples and tuple structs decode their fields in the order that
+ /// they are defined. Structs will do the same only if `has_headers` has
+ /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
+ /// otherwise, structs and maps are deserialized based on the fields
+ /// defined in the header row. (If there is no header row, then
+ /// deserializing into a map will result in an error.)
+ ///
+ /// Nested sequences are supported in a limited capacity. Namely, they
+ /// are flattened. As a result, it's often useful to use a `Vec` to capture
+ /// a "tail" of fields in a record:
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::ReaderBuilder;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Debug, Deserialize, Eq, PartialEq)]
+ /// struct Row {
+ /// label: String,
+ /// values: Vec<i32>,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "foo,1,2,3";
+ /// let mut rdr = ReaderBuilder::new()
+ /// .has_headers(false)
+ /// .from_reader(data.as_bytes());
+ /// let mut iter = rdr.deserialize();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record: Row = result?;
+ /// assert_eq!(record, Row {
+ /// label: "foo".to_string(),
+ /// values: vec![1, 2, 3],
+ /// });
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ ///
+ /// In the above example, adding another field to the `Row` struct after
+ /// the `values` field will result in a deserialization error. This is
+ /// because the deserializer doesn't know when to stop reading fields
+ /// into the `values` vector, so it will consume the rest of the fields in
+ /// the record leaving none left over for the additional field.
+ ///
+ /// Finally, simple enums in Rust can be deserialized as well. Namely,
+ /// enums must either be variants with no arguments or variants with a
+ /// single argument. Variants with no arguments are deserialized based on
+ /// which variant name the field matches. Variants with a single argument
+ /// are deserialized based on which variant can store the data. The latter
+ /// is only supported when using "untagged" enum deserialization. The
+ /// following example shows both forms in action:
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::Reader;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Debug, Deserialize, PartialEq)]
+ /// struct Row {
+ /// label: Label,
+ /// value: Number,
+ /// }
+ ///
+ /// #[derive(Debug, Deserialize, PartialEq)]
+ /// #[serde(rename_all = "lowercase")]
+ /// enum Label {
+ /// Celsius,
+ /// Fahrenheit,
+ /// }
+ ///
+ /// #[derive(Debug, Deserialize, PartialEq)]
+ /// #[serde(untagged)]
+ /// enum Number {
+ /// Integer(i64),
+ /// Float(f64),
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// label,value
+ /// celsius,22.2222
+ /// fahrenheit,72
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.deserialize();
+ ///
+ /// // Read the first record.
+ /// if let Some(result) = iter.next() {
+ /// let record: Row = result?;
+ /// assert_eq!(record, Row {
+ /// label: Label::Celsius,
+ /// value: Number::Float(22.2222),
+ /// });
+ /// } else {
+ /// return Err(From::from(
+ /// "expected at least two records but got none"));
+ /// }
+ ///
+ /// // Read the second record.
+ /// if let Some(result) = iter.next() {
+ /// let record: Row = result?;
+ /// assert_eq!(record, Row {
+ /// label: Label::Fahrenheit,
+ /// value: Number::Integer(72),
+ /// });
+ /// Ok(())
+ /// } else {
+ /// Err(From::from(
+ /// "expected at least two records but got only one"))
+ /// }
+ /// }
+ /// ```
+ pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
+ where
+ D: DeserializeOwned,
+ {
+ DeserializeRecordsIter::new(self)
+ }
+
+ /// Returns an owned iterator over deserialized records.
+ ///
+ /// Each item yielded by this iterator is a `Result<D, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// This is mostly useful when you want to return a CSV iterator or store
+ /// it somewhere.
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record. Additionally,
+ /// if `has_headers` is enabled, then deserializing into a struct will
+ /// automatically align the values in each row to the fields of a struct
+ /// based on the header row.
+ ///
+ /// For more detailed deserialization rules, see the documentation on the
+ /// `deserialize` method.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::Reader;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Debug, Deserialize, Eq, PartialEq)]
+ /// struct Row {
+ /// city: String,
+ /// country: String,
+ /// #[serde(rename = "popcount")]
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// ";
+ /// let rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.into_deserialize();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record: Row = result?;
+ /// assert_eq!(record, Row {
+ /// city: "Boston".to_string(),
+ /// country: "United States".to_string(),
+ /// population: 4628910,
+ /// });
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
+ where
+ D: DeserializeOwned,
+ {
+ DeserializeRecordsIntoIter::new(self)
+ }
+
+ /// Returns a borrowed iterator over all records as strings.
+ ///
+ /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.records();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn records(&mut self) -> StringRecordsIter<R> {
+ StringRecordsIter::new(self)
+ }
+
+ /// Returns an owned iterator over all records as strings.
+ ///
+ /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// This is mostly useful when you want to return a CSV iterator or store
+ /// it somewhere.
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.into_records();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn into_records(self) -> StringRecordsIntoIter<R> {
+ StringRecordsIntoIter::new(self)
+ }
+
+ /// Returns a borrowed iterator over all records as raw bytes.
+ ///
+ /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.byte_records();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
+ ByteRecordsIter::new(self)
+ }
+
+ /// Returns an owned iterator over all records as raw bytes.
+ ///
+ /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
+ /// Therefore, in order to access the record, callers must handle the
+ /// possibility of error (typically with `try!` or `?`).
+ ///
+ /// This is mostly useful when you want to return a CSV iterator or store
+ /// it somewhere.
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this does not include the first record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let rdr = Reader::from_reader(data.as_bytes());
+ /// let mut iter = rdr.into_byte_records();
+ ///
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
+ ByteRecordsIntoIter::new(self)
+ }
+
+ /// Returns a reference to the first row read by this parser.
+ ///
+ /// If no row has been read yet, then this will force parsing of the first
+ /// row.
+ ///
+ /// If there was a problem parsing the row or if it wasn't valid UTF-8,
+ /// then this returns an error.
+ ///
+ /// If the underlying reader emits EOF before any data, then this returns
+ /// an empty record.
+ ///
+ /// Note that this method may be used regardless of whether `has_headers`
+ /// was enabled (but it is enabled by default).
+ ///
+ /// # Example
+ ///
+ /// This example shows how to get the header row of CSV data. Notice that
+ /// the header row does not appear as a record in the iterator!
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ ///
+ /// // We can read the headers before iterating.
+ /// {
+ /// // `headers` borrows from the reader, so we put this in its
+ /// // own scope. That way, the borrow ends before we try iterating
+ /// // below. Alternatively, we could clone the headers.
+ /// let headers = rdr.headers()?;
+ /// assert_eq!(headers, vec!["city", "country", "pop"]);
+ /// }
+ ///
+ /// if let Some(result) = rdr.records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// } else {
+ /// return Err(From::from(
+ /// "expected at least one record but got none"))
+ /// }
+ ///
+ /// // We can also read the headers after iterating.
+ /// let headers = rdr.headers()?;
+ /// assert_eq!(headers, vec!["city", "country", "pop"]);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn headers(&mut self) -> Result<&StringRecord> {
+ if self.state.headers.is_none() {
+ let mut record = ByteRecord::new();
+ self.read_byte_record_impl(&mut record)?;
+ self.set_headers_impl(Err(record));
+ }
+ let headers = self.state.headers.as_ref().unwrap();
+ match headers.string_record {
+ Ok(ref record) => Ok(record),
+ Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
+ pos: headers.byte_record.position().map(Clone::clone),
+ err: err.clone(),
+ })),
+ }
+ }
+
+ /// Returns a reference to the first row read by this parser as raw bytes.
+ ///
+ /// If no row has been read yet, then this will force parsing of the first
+ /// row.
+ ///
+ /// If there was a problem parsing the row then this returns an error.
+ ///
+ /// If the underlying reader emits EOF before any data, then this returns
+ /// an empty record.
+ ///
+ /// Note that this method may be used regardless of whether `has_headers`
+ /// was enabled (but it is enabled by default).
+ ///
+ /// # Example
+ ///
+ /// This example shows how to get the header row of CSV data. Notice that
+ /// the header row does not appear as a record in the iterator!
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Reader;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ ///
+ /// // We can read the headers before iterating.
+ /// {
+ /// // `headers` borrows from the reader, so we put this in its
+ /// // own scope. That way, the borrow ends before we try iterating
+ /// // below. Alternatively, we could clone the headers.
+ /// let headers = rdr.byte_headers()?;
+ /// assert_eq!(headers, vec!["city", "country", "pop"]);
+ /// }
+ ///
+ /// if let Some(result) = rdr.byte_records().next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// } else {
+ /// return Err(From::from(
+ /// "expected at least one record but got none"))
+ /// }
+ ///
+ /// // We can also read the headers after iterating.
+ /// let headers = rdr.byte_headers()?;
+ /// assert_eq!(headers, vec!["city", "country", "pop"]);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
+ if self.state.headers.is_none() {
+ let mut record = ByteRecord::new();
+ self.read_byte_record_impl(&mut record)?;
+ self.set_headers_impl(Err(record));
+ }
+ Ok(&self.state.headers.as_ref().unwrap().byte_record)
+ }
+
+ /// Set the headers of this CSV parser manually.
+ ///
+ /// This overrides any other setting (including `set_byte_headers`). Any
+ /// automatic detection of headers is disabled. This may be called at any
+ /// time.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{Reader, StringRecord};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ ///
+ /// assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
+ /// rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
+ /// assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
+ ///
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn set_headers(&mut self, headers: StringRecord) {
+ self.set_headers_impl(Ok(headers));
+ }
+
+ /// Set the headers of this CSV parser manually as raw bytes.
+ ///
+ /// This overrides any other setting (including `set_headers`). Any
+ /// automatic detection of headers is disabled. This may be called at any
+ /// time.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{Reader, ByteRecord};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ ///
+ /// assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
+ /// rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
+ /// assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
+ ///
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn set_byte_headers(&mut self, headers: ByteRecord) {
+ self.set_headers_impl(Err(headers));
+ }
+
+ fn set_headers_impl(
+ &mut self,
+ headers: result::Result<StringRecord, ByteRecord>,
+ ) {
+ // If we have string headers, then get byte headers. But if we have
+ // byte headers, then get the string headers (or a UTF-8 error).
+ let (mut str_headers, mut byte_headers) = match headers {
+ Ok(string) => {
+ let bytes = string.clone().into_byte_record();
+ (Ok(string), bytes)
+ }
+ Err(bytes) => {
+ match StringRecord::from_byte_record(bytes.clone()) {
+ Ok(str_headers) => (Ok(str_headers), bytes),
+ Err(err) => (Err(err.utf8_error().clone()), bytes),
+ }
+ }
+ };
+ if self.state.trim.should_trim_headers() {
+ if let Ok(ref mut str_headers) = str_headers.as_mut() {
+ str_headers.trim();
+ }
+ byte_headers.trim();
+ }
+ self.state.headers = Some(Headers {
+ byte_record: byte_headers,
+ string_record: str_headers,
+ });
+ }
+
+ /// Read a single row into the given record. Returns false when no more
+ /// records could be read.
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this will never read the first record.
+ ///
+ /// This method is useful when you want to read records as fast as
+ /// as possible. It's less ergonomic than an iterator, but it permits the
+ /// caller to reuse the `StringRecord` allocation, which usually results
+ /// in higher throughput.
+ ///
+ /// Records read via this method are guaranteed to have a position set
+ /// on them, even if the reader is at EOF or if an error is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{Reader, StringRecord};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut record = StringRecord::new();
+ ///
+ /// if rdr.read_record(&mut record)? {
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
+ let result = record.read(self);
+ // We need to trim again because trimming string records includes
+ // Unicode whitespace. (ByteRecord trimming only includes ASCII
+ // whitespace.)
+ if self.state.trim.should_trim_fields() {
+ record.trim();
+ }
+ result
+ }
+
+ /// Read a single row into the given byte record. Returns false when no
+ /// more records could be read.
+ ///
+ /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
+ /// default), then this will never read the first record.
+ ///
+ /// This method is useful when you want to read records as fast as
+ /// as possible. It's less ergonomic than an iterator, but it permits the
+ /// caller to reuse the `ByteRecord` allocation, which usually results
+ /// in higher throughput.
+ ///
+ /// Records read via this method are guaranteed to have a position set
+ /// on them, even if the reader is at EOF or if an error is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ByteRecord, Reader};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,pop
+ /// Boston,United States,4628910
+ /// ";
+ /// let mut rdr = Reader::from_reader(data.as_bytes());
+ /// let mut record = ByteRecord::new();
+ ///
+ /// if rdr.read_byte_record(&mut record)? {
+ /// assert_eq!(record, vec!["Boston", "United States", "4628910"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn read_byte_record(
+ &mut self,
+ record: &mut ByteRecord,
+ ) -> Result<bool> {
+ if !self.state.seeked && !self.state.has_headers && !self.state.first {
+ // If the caller indicated "no headers" and we haven't yielded the
+ // first record yet, then we should yield our header row if we have
+ // one.
+ if let Some(ref headers) = self.state.headers {
+ self.state.first = true;
+ record.clone_from(&headers.byte_record);
+ if self.state.trim.should_trim_fields() {
+ record.trim();
+ }
+ return Ok(!record.is_empty());
+ }
+ }
+ let ok = self.read_byte_record_impl(record)?;
+ self.state.first = true;
+ if !self.state.seeked && self.state.headers.is_none() {
+ self.set_headers_impl(Err(record.clone()));
+ // If the end user indicated that we have headers, then we should
+ // never return the first row. Instead, we should attempt to
+ // read and return the next one.
+ if self.state.has_headers {
+ let result = self.read_byte_record_impl(record);
+ if self.state.trim.should_trim_fields() {
+ record.trim();
+ }
+ return result;
+ }
+ } else if self.state.trim.should_trim_fields() {
+ record.trim();
+ }
+ Ok(ok)
+ }
+
+ /// Read a byte record from the underlying CSV reader, without accounting
+ /// for headers.
+ #[inline(always)]
+ fn read_byte_record_impl(
+ &mut self,
+ record: &mut ByteRecord,
+ ) -> Result<bool> {
+ use csv_core::ReadRecordResult::*;
+
+ record.clear();
+ record.set_position(Some(self.state.cur_pos.clone()));
+ if self.state.eof != ReaderEofState::NotEof {
+ return Ok(false);
+ }
+ let (mut outlen, mut endlen) = (0, 0);
+ loop {
+ let (res, nin, nout, nend) = {
+ let input_res = self.rdr.fill_buf();
+ if input_res.is_err() {
+ self.state.eof = ReaderEofState::IOError;
+ }
+ let input = input_res?;
+ let (fields, ends) = record.as_parts();
+ self.core.read_record(
+ input,
+ &mut fields[outlen..],
+ &mut ends[endlen..],
+ )
+ };
+ self.rdr.consume(nin);
+ let byte = self.state.cur_pos.byte();
+ self.state
+ .cur_pos
+ .set_byte(byte + nin as u64)
+ .set_line(self.core.line());
+ outlen += nout;
+ endlen += nend;
+ match res {
+ InputEmpty => continue,
+ OutputFull => {
+ record.expand_fields();
+ continue;
+ }
+ OutputEndsFull => {
+ record.expand_ends();
+ continue;
+ }
+ Record => {
+ record.set_len(endlen);
+ self.state.add_record(record)?;
+ return Ok(true);
+ }
+ End => {
+ self.state.eof = ReaderEofState::Eof;
+ return Ok(false);
+ }
+ }
+ }
+ }
+
+ /// Return the current position of this CSV reader.
+ ///
+ /// The byte offset in the position returned can be used to `seek` this
+ /// reader. In particular, seeking to a position returned here on the same
+ /// data will result in parsing the same subsequent record.
+ ///
+ /// # Example: reading the position
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::io;
+ /// use csv::{Reader, Position};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let rdr = Reader::from_reader(io::Cursor::new(data));
+ /// let mut iter = rdr.into_records();
+ /// let mut pos = Position::new();
+ /// loop {
+ /// // Read the position immediately before each record.
+ /// let next_pos = iter.reader().position().clone();
+ /// if iter.next().is_none() {
+ /// break;
+ /// }
+ /// pos = next_pos;
+ /// }
+ ///
+ /// // `pos` should now be the position immediately before the last
+ /// // record.
+ /// assert_eq!(pos.byte(), 51);
+ /// assert_eq!(pos.line(), 3);
+ /// assert_eq!(pos.record(), 2);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn position(&self) -> &Position {
+ &self.state.cur_pos
+ }
+
+ /// Returns true if and only if this reader has been exhausted.
+ ///
+ /// When this returns true, no more records can be read from this reader
+ /// (unless it has been seeked to another position).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::io;
+ /// use csv::{Reader, Position};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let mut rdr = Reader::from_reader(io::Cursor::new(data));
+ /// assert!(!rdr.is_done());
+ /// for result in rdr.records() {
+ /// let _ = result?;
+ /// }
+ /// assert!(rdr.is_done());
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn is_done(&self) -> bool {
+ self.state.eof != ReaderEofState::NotEof
+ }
+
+ /// Returns true if and only if this reader has been configured to
+ /// interpret the first record as a header record.
+ pub fn has_headers(&self) -> bool {
+ self.state.has_headers
+ }
+
+ /// Returns a reference to the underlying reader.
+ pub fn get_ref(&self) -> &R {
+ self.rdr.get_ref()
+ }
+
+ /// Returns a mutable reference to the underlying reader.
+ pub fn get_mut(&mut self) -> &mut R {
+ self.rdr.get_mut()
+ }
+
+ /// Unwraps this CSV reader, returning the underlying reader.
+ ///
+ /// Note that any leftover data inside this reader's internal buffer is
+ /// lost.
+ pub fn into_inner(self) -> R {
+ self.rdr.into_inner()
+ }
+}
+
+impl<R: io::Read + io::Seek> Reader<R> {
+ /// Seeks the underlying reader to the position given.
+ ///
+ /// This comes with a few caveats:
+ ///
+ /// * Any internal buffer associated with this reader is cleared.
+ /// * If the given position does not correspond to a position immediately
+ /// before the start of a record, then the behavior of this reader is
+ /// unspecified.
+ /// * Any special logic that skips the first record in the CSV reader
+ /// when reading or iterating over records is disabled.
+ ///
+ /// If the given position has a byte offset equivalent to the current
+ /// position, then no seeking is performed.
+ ///
+ /// If the header row has not already been read, then this will attempt
+ /// to read the header row before seeking. Therefore, it is possible that
+ /// this returns an error associated with reading CSV data.
+ ///
+ /// Note that seeking is performed based only on the byte offset in the
+ /// given position. Namely, the record or line numbers in the position may
+ /// be incorrect, but this will cause any future position generated by
+ /// this CSV reader to be similarly incorrect.
+ ///
+ /// # Example: seek to parse a record twice
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::io;
+ /// use csv::{Reader, Position};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let data = "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ";
+ /// let rdr = Reader::from_reader(io::Cursor::new(data));
+ /// let mut iter = rdr.into_records();
+ /// let mut pos = Position::new();
+ /// loop {
+ /// // Read the position immediately before each record.
+ /// let next_pos = iter.reader().position().clone();
+ /// if iter.next().is_none() {
+ /// break;
+ /// }
+ /// pos = next_pos;
+ /// }
+ ///
+ /// // Now seek the reader back to `pos`. This will let us read the
+ /// // last record again.
+ /// iter.reader_mut().seek(pos)?;
+ /// let mut iter = iter.into_reader().into_records();
+ /// if let Some(result) = iter.next() {
+ /// let record = result?;
+ /// assert_eq!(record, vec!["Concord", "United States", "42695"]);
+ /// Ok(())
+ /// } else {
+ /// Err(From::from("expected at least one record but got none"))
+ /// }
+ /// }
+ /// ```
+ pub fn seek(&mut self, pos: Position) -> Result<()> {
+ self.byte_headers()?;
+ self.state.seeked = true;
+ if pos.byte() == self.state.cur_pos.byte() {
+ return Ok(());
+ }
+ self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
+ self.core.reset();
+ self.core.set_line(pos.line());
+ self.state.cur_pos = pos;
+ self.state.eof = ReaderEofState::NotEof;
+ Ok(())
+ }
+
+ /// This is like `seek`, but provides direct control over how the seeking
+ /// operation is performed via `io::SeekFrom`.
+ ///
+ /// The `pos` position given *should* correspond the position indicated
+ /// by `seek_from`, but there is no requirement. If the `pos` position
+ /// given is incorrect, then the position information returned by this
+ /// reader will be similarly incorrect.
+ ///
+ /// If the header row has not already been read, then this will attempt
+ /// to read the header row before seeking. Therefore, it is possible that
+ /// this returns an error associated with reading CSV data.
+ ///
+ /// Unlike `seek`, this will always cause an actual seek to be performed.
+ pub fn seek_raw(
+ &mut self,
+ seek_from: io::SeekFrom,
+ pos: Position,
+ ) -> Result<()> {
+ self.byte_headers()?;
+ self.state.seeked = true;
+ self.rdr.seek(seek_from)?;
+ self.core.reset();
+ self.core.set_line(pos.line());
+ self.state.cur_pos = pos;
+ self.state.eof = ReaderEofState::NotEof;
+ Ok(())
+ }
+}
+
+impl ReaderState {
+ #[inline(always)]
+ fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
+ let i = self.cur_pos.record();
+ self.cur_pos.set_record(i.checked_add(1).unwrap());
+ if !self.flexible {
+ match self.first_field_count {
+ None => self.first_field_count = Some(record.len() as u64),
+ Some(expected) => {
+ if record.len() as u64 != expected {
+ return Err(Error::new(ErrorKind::UnequalLengths {
+ pos: record.position().map(Clone::clone),
+ expected_len: expected,
+ len: record.len() as u64,
+ }));
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+}
+
+/// An owned iterator over deserialized records.
+///
+/// The type parameter `R` refers to the underlying `io::Read` type, and `D`
+/// refers to the type that this iterator will deserialize a record into.
+pub struct DeserializeRecordsIntoIter<R, D> {
+ rdr: Reader<R>,
+ rec: StringRecord,
+ headers: Option<StringRecord>,
+ _priv: PhantomData<D>,
+}
+
+impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
+ fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
+ let headers = if !rdr.state.has_headers {
+ None
+ } else {
+ rdr.headers().ok().map(Clone::clone)
+ };
+ DeserializeRecordsIntoIter {
+ rdr: rdr,
+ rec: StringRecord::new(),
+ headers: headers,
+ _priv: PhantomData,
+ }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+
+ /// Drop this iterator and return the underlying CSV reader.
+ pub fn into_reader(self) -> Reader<R> {
+ self.rdr
+ }
+}
+
+impl<R: io::Read, D: DeserializeOwned> Iterator
+ for DeserializeRecordsIntoIter<R, D>
+{
+ type Item = Result<D>;
+
+ fn next(&mut self) -> Option<Result<D>> {
+ match self.rdr.read_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(false) => None,
+ Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
+ }
+ }
+}
+
+/// A borrowed iterator over deserialized records.
+///
+/// The lifetime parameter `'r` refers to the lifetime of the underlying
+/// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
+/// type, and `D` refers to the type that this iterator will deserialize a
+/// record into.
+pub struct DeserializeRecordsIter<'r, R: 'r, D> {
+ rdr: &'r mut Reader<R>,
+ rec: StringRecord,
+ headers: Option<StringRecord>,
+ _priv: PhantomData<D>,
+}
+
+impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
+ fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
+ let headers = if !rdr.state.has_headers {
+ None
+ } else {
+ rdr.headers().ok().map(Clone::clone)
+ };
+ DeserializeRecordsIter {
+ rdr: rdr,
+ rec: StringRecord::new(),
+ headers: headers,
+ _priv: PhantomData,
+ }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+}
+
+impl<'r, R: io::Read, D: DeserializeOwned> Iterator
+ for DeserializeRecordsIter<'r, R, D>
+{
+ type Item = Result<D>;
+
+ fn next(&mut self) -> Option<Result<D>> {
+ match self.rdr.read_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(false) => None,
+ Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
+ }
+ }
+}
+
+/// An owned iterator over records as strings.
+pub struct StringRecordsIntoIter<R> {
+ rdr: Reader<R>,
+ rec: StringRecord,
+}
+
+impl<R: io::Read> StringRecordsIntoIter<R> {
+ fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
+ StringRecordsIntoIter { rdr: rdr, rec: StringRecord::new() }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+
+ /// Drop this iterator and return the underlying CSV reader.
+ pub fn into_reader(self) -> Reader<R> {
+ self.rdr
+ }
+}
+
+impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
+ type Item = Result<StringRecord>;
+
+ fn next(&mut self) -> Option<Result<StringRecord>> {
+ match self.rdr.read_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(true) => Some(Ok(self.rec.clone())),
+ Ok(false) => None,
+ }
+ }
+}
+
+/// A borrowed iterator over records as strings.
+///
+/// The lifetime parameter `'r` refers to the lifetime of the underlying
+/// CSV `Reader`.
+pub struct StringRecordsIter<'r, R: 'r> {
+ rdr: &'r mut Reader<R>,
+ rec: StringRecord,
+}
+
+impl<'r, R: io::Read> StringRecordsIter<'r, R> {
+ fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
+ StringRecordsIter { rdr: rdr, rec: StringRecord::new() }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+}
+
+impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
+ type Item = Result<StringRecord>;
+
+ fn next(&mut self) -> Option<Result<StringRecord>> {
+ match self.rdr.read_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(true) => Some(Ok(self.rec.clone())),
+ Ok(false) => None,
+ }
+ }
+}
+
+/// An owned iterator over records as raw bytes.
+pub struct ByteRecordsIntoIter<R> {
+ rdr: Reader<R>,
+ rec: ByteRecord,
+}
+
+impl<R: io::Read> ByteRecordsIntoIter<R> {
+ fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
+ ByteRecordsIntoIter { rdr: rdr, rec: ByteRecord::new() }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+
+ /// Drop this iterator and return the underlying CSV reader.
+ pub fn into_reader(self) -> Reader<R> {
+ self.rdr
+ }
+}
+
+impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
+ type Item = Result<ByteRecord>;
+
+ fn next(&mut self) -> Option<Result<ByteRecord>> {
+ match self.rdr.read_byte_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(true) => Some(Ok(self.rec.clone())),
+ Ok(false) => None,
+ }
+ }
+}
+
+/// A borrowed iterator over records as raw bytes.
+///
+/// The lifetime parameter `'r` refers to the lifetime of the underlying
+/// CSV `Reader`.
+pub struct ByteRecordsIter<'r, R: 'r> {
+ rdr: &'r mut Reader<R>,
+ rec: ByteRecord,
+}
+
+impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
+ fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
+ ByteRecordsIter { rdr: rdr, rec: ByteRecord::new() }
+ }
+
+ /// Return a reference to the underlying CSV reader.
+ pub fn reader(&self) -> &Reader<R> {
+ &self.rdr
+ }
+
+ /// Return a mutable reference to the underlying CSV reader.
+ pub fn reader_mut(&mut self) -> &mut Reader<R> {
+ &mut self.rdr
+ }
+}
+
+impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
+ type Item = Result<ByteRecord>;
+
+ fn next(&mut self) -> Option<Result<ByteRecord>> {
+ match self.rdr.read_byte_record(&mut self.rec) {
+ Err(err) => Some(Err(err)),
+ Ok(true) => Some(Ok(self.rec.clone())),
+ Ok(false) => None,
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::io;
+
+ use crate::byte_record::ByteRecord;
+ use crate::error::ErrorKind;
+ use crate::string_record::StringRecord;
+
+ use super::{Position, ReaderBuilder, Trim};
+
+ fn b(s: &str) -> &[u8] {
+ s.as_bytes()
+ }
+ fn s(b: &[u8]) -> &str {
+ ::std::str::from_utf8(b).unwrap()
+ }
+
+ fn newpos(byte: u64, line: u64, record: u64) -> Position {
+ let mut p = Position::new();
+ p.set_byte(byte).set_line(line).set_record(record);
+ p
+ }
+
+ #[test]
+ fn read_byte_record() {
+ let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader(data);
+ let mut rec = ByteRecord::new();
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("foo", s(&rec[0]));
+ assert_eq!("b,ar", s(&rec[1]));
+ assert_eq!("baz", s(&rec[2]));
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("abc", s(&rec[0]));
+ assert_eq!("mno", s(&rec[1]));
+ assert_eq!("xyz", s(&rec[2]));
+
+ assert!(!rdr.read_byte_record(&mut rec).unwrap());
+ }
+
+ #[test]
+ fn read_trimmed_records_and_headers() {
+ let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(true)
+ .trim(Trim::All)
+ .from_reader(data);
+ let mut rec = ByteRecord::new();
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!("1", s(&rec[0]));
+ assert_eq!("2", s(&rec[1]));
+ assert_eq!("3", s(&rec[2]));
+ let mut rec = StringRecord::new();
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!("1", &rec[0]);
+ assert_eq!("", &rec[1]);
+ assert_eq!("3", &rec[2]);
+ {
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!("bar", &headers[1]);
+ assert_eq!("baz", &headers[2]);
+ }
+ }
+
+ #[test]
+ fn read_trimmed_header() {
+ let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(true)
+ .trim(Trim::Headers)
+ .from_reader(data);
+ let mut rec = ByteRecord::new();
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(" 1", s(&rec[0]));
+ assert_eq!(" 2", s(&rec[1]));
+ assert_eq!(" 3", s(&rec[2]));
+ {
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!("bar", &headers[1]);
+ assert_eq!("baz", &headers[2]);
+ }
+ }
+
+ #[test]
+ fn read_trimed_header_invalid_utf8() {
+ let data = &b"foo, b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(true)
+ .trim(Trim::Headers)
+ .from_reader(data);
+ let mut rec = StringRecord::new();
+
+ // force the headers to be read
+ let _ = rdr.read_record(&mut rec);
+ // Check the byte headers are trimmed
+ {
+ let headers = rdr.byte_headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!(b"foo", &headers[0]);
+ assert_eq!(b"b\xFFar", &headers[1]);
+ assert_eq!(b"baz", &headers[2]);
+ }
+ match *rdr.headers().unwrap_err().kind() {
+ ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
+ assert_eq!(pos, &newpos(0, 1, 0));
+ assert_eq!(err.field(), 1);
+ assert_eq!(err.valid_up_to(), 3);
+ }
+ ref err => panic!("match failed, got {:?}", err),
+ }
+ }
+
+ #[test]
+ fn read_trimmed_records() {
+ let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t");
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(true)
+ .trim(Trim::Fields)
+ .from_reader(data);
+ let mut rec = ByteRecord::new();
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!("1", s(&rec[0]));
+ assert_eq!("2", s(&rec[1]));
+ assert_eq!("3", s(&rec[2]));
+ {
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!(" bar", &headers[1]);
+ assert_eq!("\tbaz", &headers[2]);
+ }
+ }
+
+ #[test]
+ fn read_record_unequal_fails() {
+ let data = b("foo\nbar,baz");
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader(data);
+ let mut rec = ByteRecord::new();
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(1, rec.len());
+ assert_eq!("foo", s(&rec[0]));
+
+ match rdr.read_byte_record(&mut rec) {
+ Err(err) => match *err.kind() {
+ ErrorKind::UnequalLengths {
+ expected_len: 1,
+ ref pos,
+ len: 2,
+ } => {
+ assert_eq!(pos, &Some(newpos(4, 2, 1)));
+ }
+ ref wrong => panic!("match failed, got {:?}", wrong),
+ },
+ wrong => panic!("match failed, got {:?}", wrong),
+ }
+ }
+
+ #[test]
+ fn read_record_unequal_ok() {
+ let data = b("foo\nbar,baz");
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(false)
+ .flexible(true)
+ .from_reader(data);
+ let mut rec = ByteRecord::new();
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(1, rec.len());
+ assert_eq!("foo", s(&rec[0]));
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(2, rec.len());
+ assert_eq!("bar", s(&rec[0]));
+ assert_eq!("baz", s(&rec[1]));
+
+ assert!(!rdr.read_byte_record(&mut rec).unwrap());
+ }
+
+ // This tests that even if we get a CSV error, we can continue reading
+ // if we want.
+ #[test]
+ fn read_record_unequal_continue() {
+ let data = b("foo\nbar,baz\nquux");
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader(data);
+ let mut rec = ByteRecord::new();
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(1, rec.len());
+ assert_eq!("foo", s(&rec[0]));
+
+ match rdr.read_byte_record(&mut rec) {
+ Err(err) => match err.kind() {
+ &ErrorKind::UnequalLengths {
+ expected_len: 1,
+ ref pos,
+ len: 2,
+ } => {
+ assert_eq!(pos, &Some(newpos(4, 2, 1)));
+ }
+ wrong => panic!("match failed, got {:?}", wrong),
+ },
+ wrong => panic!("match failed, got {:?}", wrong),
+ }
+
+ assert!(rdr.read_byte_record(&mut rec).unwrap());
+ assert_eq!(1, rec.len());
+ assert_eq!("quux", s(&rec[0]));
+
+ assert!(!rdr.read_byte_record(&mut rec).unwrap());
+ }
+
+ #[test]
+ fn read_record_headers() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f");
+ let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
+ let mut rec = StringRecord::new();
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("a", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("d", &rec[0]);
+
+ assert!(!rdr.read_record(&mut rec).unwrap());
+
+ {
+ let headers = rdr.byte_headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!(b"foo", &headers[0]);
+ assert_eq!(b"bar", &headers[1]);
+ assert_eq!(b"baz", &headers[2]);
+ }
+ {
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!("bar", &headers[1]);
+ assert_eq!("baz", &headers[2]);
+ }
+ }
+
+ #[test]
+ fn read_record_headers_invalid_utf8() {
+ let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
+ let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
+ let mut rec = StringRecord::new();
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("a", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("d", &rec[0]);
+
+ assert!(!rdr.read_record(&mut rec).unwrap());
+
+ // Check that we can read the headers as raw bytes, but that
+ // if we read them as strings, we get an appropriate UTF-8 error.
+ {
+ let headers = rdr.byte_headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!(b"foo", &headers[0]);
+ assert_eq!(b"b\xFFar", &headers[1]);
+ assert_eq!(b"baz", &headers[2]);
+ }
+ match *rdr.headers().unwrap_err().kind() {
+ ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
+ assert_eq!(pos, &newpos(0, 1, 0));
+ assert_eq!(err.field(), 1);
+ assert_eq!(err.valid_up_to(), 1);
+ }
+ ref err => panic!("match failed, got {:?}", err),
+ }
+ }
+
+ #[test]
+ fn read_record_no_headers_before() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f");
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader(data);
+ let mut rec = StringRecord::new();
+
+ {
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!("bar", &headers[1]);
+ assert_eq!("baz", &headers[2]);
+ }
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("foo", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("a", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("d", &rec[0]);
+
+ assert!(!rdr.read_record(&mut rec).unwrap());
+ }
+
+ #[test]
+ fn read_record_no_headers_after() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f");
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader(data);
+ let mut rec = StringRecord::new();
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("foo", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("a", &rec[0]);
+
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("d", &rec[0]);
+
+ assert!(!rdr.read_record(&mut rec).unwrap());
+
+ let headers = rdr.headers().unwrap();
+ assert_eq!(3, headers.len());
+ assert_eq!("foo", &headers[0]);
+ assert_eq!("bar", &headers[1]);
+ assert_eq!("baz", &headers[2]);
+ }
+
+ #[test]
+ fn seek() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
+ let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
+ rdr.seek(newpos(18, 3, 2)).unwrap();
+
+ let mut rec = StringRecord::new();
+
+ assert_eq!(18, rdr.position().byte());
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("d", &rec[0]);
+
+ assert_eq!(24, rdr.position().byte());
+ assert_eq!(4, rdr.position().line());
+ assert_eq!(3, rdr.position().record());
+ assert!(rdr.read_record(&mut rec).unwrap());
+ assert_eq!(3, rec.len());
+ assert_eq!("g", &rec[0]);
+
+ assert!(!rdr.read_record(&mut rec).unwrap());
+ }
+
+ // Test that we can read headers after seeking even if the headers weren't
+ // explicit read before seeking.
+ #[test]
+ fn seek_headers_after() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
+ let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
+ rdr.seek(newpos(18, 3, 2)).unwrap();
+ assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
+ }
+
+ // Test that we can read headers after seeking if the headers were read
+ // before seeking.
+ #[test]
+ fn seek_headers_before_after() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
+ let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
+ let headers = rdr.headers().unwrap().clone();
+ rdr.seek(newpos(18, 3, 2)).unwrap();
+ assert_eq!(&headers, rdr.headers().unwrap());
+ }
+
+ // Test that even if we didn't read headers before seeking, if we seek to
+ // the current byte offset, then no seeking is done and therefore we can
+ // still read headers after seeking.
+ #[test]
+ fn seek_headers_no_actual_seek() {
+ let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
+ let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
+ rdr.seek(Position::new()).unwrap();
+ assert_eq!("foo", &rdr.headers().unwrap()[0]);
+ }
+
+ // Test that position info is reported correctly in absence of headers.
+ #[test]
+ fn positions_no_headers() {
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(false)
+ .from_reader("a,b,c\nx,y,z".as_bytes())
+ .into_records();
+
+ let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
+ assert_eq!(pos.byte(), 0);
+ assert_eq!(pos.line(), 1);
+ assert_eq!(pos.record(), 0);
+
+ let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
+ assert_eq!(pos.byte(), 6);
+ assert_eq!(pos.line(), 2);
+ assert_eq!(pos.record(), 1);
+ }
+
+ // Test that position info is reported correctly with headers.
+ #[test]
+ fn positions_headers() {
+ let mut rdr = ReaderBuilder::new()
+ .has_headers(true)
+ .from_reader("a,b,c\nx,y,z".as_bytes())
+ .into_records();
+
+ let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
+ assert_eq!(pos.byte(), 6);
+ assert_eq!(pos.line(), 2);
+ assert_eq!(pos.record(), 1);
+ }
+
+ // Test that reading headers on empty data yields an empty record.
+ #[test]
+ fn headers_on_empty_data() {
+ let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
+ let r = rdr.byte_headers().unwrap();
+ assert_eq!(r.len(), 0);
+ }
+
+ // Test that reading the first record on empty data works.
+ #[test]
+ fn no_headers_on_empty_data() {
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
+ assert_eq!(rdr.records().count(), 0);
+ }
+
+ // Test that reading the first record on empty data works, even if
+ // we've tried to read headers before hand.
+ #[test]
+ fn no_headers_on_empty_data_after_headers() {
+ let mut rdr =
+ ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
+ assert_eq!(rdr.headers().unwrap().len(), 0);
+ assert_eq!(rdr.records().count(), 0);
+ }
+}
diff --git a/src/serializer.rs b/src/serializer.rs
new file mode 100644
index 0000000..40c94f2
--- /dev/null
+++ b/src/serializer.rs
@@ -0,0 +1,1341 @@
+use std::fmt;
+use std::io;
+use std::mem;
+
+use itoa;
+use ryu;
+use serde::ser::{
+ Error as SerdeError, Serialize, SerializeMap, SerializeSeq,
+ SerializeStruct, SerializeStructVariant, SerializeTuple,
+ SerializeTupleStruct, SerializeTupleVariant, Serializer,
+};
+use serde::serde_if_integer128;
+
+use crate::error::{Error, ErrorKind};
+use crate::writer::Writer;
+
+/// Serialize the given value to the given writer, and return an error if
+/// anything went wrong.
+pub fn serialize<S: Serialize, W: io::Write>(
+ wtr: &mut Writer<W>,
+ value: S,
+) -> Result<(), Error> {
+ value.serialize(&mut SeRecord { wtr: wtr })
+}
+
+struct SeRecord<'w, W: 'w + io::Write> {
+ wtr: &'w mut Writer<W>,
+}
+
+impl<'a, 'w, W: io::Write> Serializer for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+ type SerializeSeq = Self;
+ type SerializeTuple = Self;
+ type SerializeTupleStruct = Self;
+ type SerializeTupleVariant = Self;
+ type SerializeMap = Self;
+ type SerializeStruct = Self;
+ type SerializeStructVariant = Self;
+
+ fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
+ if v {
+ self.wtr.write_field("true")
+ } else {
+ self.wtr.write_field("false")
+ }
+ }
+
+ fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ serde_if_integer128! {
+ fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> {
+ self.collect_str(&v)
+ }
+ }
+
+ fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = itoa::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ serde_if_integer128! {
+ fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> {
+ self.collect_str(&v)
+ }
+ }
+
+ fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = ryu::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
+ let mut buffer = ryu::Buffer::new();
+ self.wtr.write_field(buffer.format(v))
+ }
+
+ fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(v.encode_utf8(&mut [0; 4]))
+ }
+
+ fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(value)
+ }
+
+ fn serialize_bytes(self, value: &[u8]) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(value)
+ }
+
+ fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(&[])
+ }
+
+ fn serialize_some<T: ?Sized + Serialize>(
+ self,
+ value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ value.serialize(self)
+ }
+
+ fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+ None::<()>.serialize(self)
+ }
+
+ fn serialize_unit_struct(
+ self,
+ name: &'static str,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(name)
+ }
+
+ fn serialize_unit_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ variant: &'static str,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.wtr.write_field(variant)
+ }
+
+ fn serialize_newtype_struct<T: ?Sized + Serialize>(
+ self,
+ _name: &'static str,
+ value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ value.serialize(self)
+ }
+
+ fn serialize_newtype_variant<T: ?Sized + Serialize>(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ value.serialize(self)
+ }
+
+ fn serialize_seq(
+ self,
+ _len: Option<usize>,
+ ) -> Result<Self::SerializeSeq, Self::Error> {
+ Ok(self)
+ }
+
+ fn serialize_tuple(
+ self,
+ _len: usize,
+ ) -> Result<Self::SerializeTuple, Self::Error> {
+ Ok(self)
+ }
+
+ fn serialize_tuple_struct(
+ self,
+ _name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleStruct, Self::Error> {
+ Ok(self)
+ }
+
+ fn serialize_tuple_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleVariant, Self::Error> {
+ Err(Error::custom("serializing enum tuple variants is not supported"))
+ }
+
+ fn serialize_map(
+ self,
+ _len: Option<usize>,
+ ) -> Result<Self::SerializeMap, Self::Error> {
+ // The right behavior for serializing maps isn't clear.
+ Err(Error::custom(
+ "serializing maps is not supported, \
+ if you have a use case, please file an issue at \
+ https://github.com/BurntSushi/rust-csv",
+ ))
+ }
+
+ fn serialize_struct(
+ self,
+ _name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStruct, Self::Error> {
+ Ok(self)
+ }
+
+ fn serialize_struct_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStructVariant, Self::Error> {
+ Err(Error::custom("serializing enum struct variants is not supported"))
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeSeq for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTuple for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTupleStruct for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTupleVariant for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeMap for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_key<T: ?Sized + Serialize>(
+ &mut self,
+ _key: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn serialize_value<T: ?Sized + Serialize>(
+ &mut self,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeStruct for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ _key: &'static str,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeStructVariant for &'a mut SeRecord<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ _key: &'static str,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+impl SerdeError for Error {
+ fn custom<T: fmt::Display>(msg: T) -> Error {
+ Error::new(ErrorKind::Serialize(msg.to_string()))
+ }
+}
+
+fn error_scalar_outside_struct<T: fmt::Display>(name: T) -> Error {
+ Error::custom(format!(
+ "cannot serialize {} scalar outside struct \
+ when writing headers from structs",
+ name
+ ))
+}
+
+fn error_container_inside_struct<T: fmt::Display>(name: T) -> Error {
+ Error::custom(format!(
+ "cannot serialize {} container inside struct \
+ when writing headers from structs",
+ name
+ ))
+}
+
+/// Write header names corresponding to the field names of the value (if the
+/// value has field names).
+///
+/// If the type to be serialized has field names (e.g. it's a struct), then
+/// header names are written, and the `Ok` return value is `true`.
+///
+/// If the type to be serialized doesn't have field names, then nothing is
+/// written, and the `Ok` return value is `false`.
+pub fn serialize_header<S: Serialize, W: io::Write>(
+ wtr: &mut Writer<W>,
+ value: S,
+) -> Result<bool, Error> {
+ let mut ser = SeHeader::new(wtr);
+ value.serialize(&mut ser).map(|_| ser.wrote_header())
+}
+
+/// State machine for `SeHeader`.
+///
+/// This is a diagram of the transitions in the state machine. Note that only
+/// some serialization events cause a state transition, and only for certain
+/// states. For example, encountering a scalar causes a transition if the state
+/// is `Write` or `EncounteredStructField`, but not if the state is
+/// `ErrorIfWrite(err)` or `InStructField`.
+///
+/// ```text
+/// +-----+
+/// |Write|
+/// +-----+
+/// |
+/// /------------------+------------------\
+/// | | |
+/// encounter finish encounter
+/// scalar | struct field
+/// | | |
+/// v v v
+/// +-----------------+ Ok(()) +-------------+
+/// |ErrorIfWrite(err)| |InStructField|<--------\
+/// +-----------------+ +-------------+ |
+/// | | |
+/// /------+------\ /-----------------+ |
+/// | | | | |
+/// encounter finish encounter finish encounter
+/// struct field | container field struct field
+/// | | | | |
+/// v v v v |
+/// Err(err) Ok(()) Err(_) +----------------------+ |
+/// |EncounteredStructField| |
+/// +----------------------+ |
+/// | |
+/// /----------+----------------/
+/// | |
+/// encounter finish
+/// scalar |
+/// | |
+/// v v
+/// Err(_) Ok(())
+/// ```
+enum HeaderState {
+ /// Start here. Headers need to be written if the type has field names.
+ Write,
+ /// The serializer still has not encountered a struct field. If one is
+ /// encountered (headers need to be written), return the enclosed error.
+ ErrorIfWrite(Error),
+ /// The serializer encountered one or more struct fields (and wrote their
+ /// names).
+ EncounteredStructField,
+ /// The serializer is currently in a struct field value.
+ InStructField,
+}
+
+struct SeHeader<'w, W: 'w + io::Write> {
+ wtr: &'w mut Writer<W>,
+ state: HeaderState,
+}
+
+impl<'w, W: io::Write> SeHeader<'w, W> {
+ fn new(wtr: &'w mut Writer<W>) -> Self {
+ SeHeader { wtr: wtr, state: HeaderState::Write }
+ }
+
+ fn wrote_header(&self) -> bool {
+ use self::HeaderState::*;
+ match self.state {
+ Write | ErrorIfWrite(_) => false,
+ EncounteredStructField | InStructField => true,
+ }
+ }
+
+ fn handle_scalar<T: fmt::Display>(
+ &mut self,
+ name: T,
+ ) -> Result<(), Error> {
+ use self::HeaderState::*;
+
+ match self.state {
+ Write => {
+ self.state = ErrorIfWrite(error_scalar_outside_struct(name));
+ Ok(())
+ }
+ ErrorIfWrite(_) | InStructField => Ok(()),
+ EncounteredStructField => Err(error_scalar_outside_struct(name)),
+ }
+ }
+
+ fn handle_container<T: fmt::Display>(
+ &mut self,
+ name: T,
+ ) -> Result<&mut Self, Error> {
+ if let HeaderState::InStructField = self.state {
+ Err(error_container_inside_struct(name))
+ } else {
+ Ok(self)
+ }
+ }
+}
+
+impl<'a, 'w, W: io::Write> Serializer for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+ type SerializeSeq = Self;
+ type SerializeTuple = Self;
+ type SerializeTupleStruct = Self;
+ type SerializeTupleVariant = Self;
+ type SerializeMap = Self;
+ type SerializeStruct = Self;
+ type SerializeStructVariant = Self;
+
+ fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ serde_if_integer128! {
+ fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+ }
+
+ fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ serde_if_integer128! {
+ fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+ }
+
+ fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(v)
+ }
+
+ fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(value)
+ }
+
+ fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar("&[u8]")
+ }
+
+ fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar("None")
+ }
+
+ fn serialize_some<T: ?Sized + Serialize>(
+ self,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar("Some(_)")
+ }
+
+ fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar("()")
+ }
+
+ fn serialize_unit_struct(
+ self,
+ name: &'static str,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(name)
+ }
+
+ fn serialize_unit_variant(
+ self,
+ name: &'static str,
+ _variant_index: u32,
+ variant: &'static str,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(format!("{}::{}", name, variant))
+ }
+
+ fn serialize_newtype_struct<T: ?Sized + Serialize>(
+ self,
+ name: &'static str,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(format!("{}(_)", name))
+ }
+
+ fn serialize_newtype_variant<T: ?Sized + Serialize>(
+ self,
+ name: &'static str,
+ _variant_index: u32,
+ variant: &'static str,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ self.handle_scalar(format!("{}::{}(_)", name, variant))
+ }
+
+ fn serialize_seq(
+ self,
+ _len: Option<usize>,
+ ) -> Result<Self::SerializeSeq, Self::Error> {
+ self.handle_container("sequence")
+ }
+
+ fn serialize_tuple(
+ self,
+ _len: usize,
+ ) -> Result<Self::SerializeTuple, Self::Error> {
+ self.handle_container("tuple")
+ }
+
+ fn serialize_tuple_struct(
+ self,
+ name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleStruct, Self::Error> {
+ self.handle_container(name)
+ }
+
+ fn serialize_tuple_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleVariant, Self::Error> {
+ Err(Error::custom("serializing enum tuple variants is not supported"))
+ }
+
+ fn serialize_map(
+ self,
+ _len: Option<usize>,
+ ) -> Result<Self::SerializeMap, Self::Error> {
+ // The right behavior for serializing maps isn't clear.
+ Err(Error::custom(
+ "serializing maps is not supported, \
+ if you have a use case, please file an issue at \
+ https://github.com/BurntSushi/rust-csv",
+ ))
+ }
+
+ fn serialize_struct(
+ self,
+ name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStruct, Self::Error> {
+ self.handle_container(name)
+ }
+
+ fn serialize_struct_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStructVariant, Self::Error> {
+ Err(Error::custom("serializing enum struct variants is not supported"))
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeSeq for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTuple for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTupleStruct for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeTupleVariant for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeMap for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_key<T: ?Sized + Serialize>(
+ &mut self,
+ _key: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn serialize_value<T: ?Sized + Serialize>(
+ &mut self,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeStruct for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ key: &'static str,
+ value: &T,
+ ) -> Result<(), Self::Error> {
+ // Grab old state and update state to `EncounteredStructField`.
+ let old_state =
+ mem::replace(&mut self.state, HeaderState::EncounteredStructField);
+ if let HeaderState::ErrorIfWrite(err) = old_state {
+ return Err(err);
+ }
+ self.wtr.write_field(key)?;
+
+ // Check that there aren't any containers in the value.
+ self.state = HeaderState::InStructField;
+ value.serialize(&mut **self)?;
+ self.state = HeaderState::EncounteredStructField;
+
+ Ok(())
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+}
+
+impl<'a, 'w, W: io::Write> SerializeStructVariant for &'a mut SeHeader<'w, W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ _key: &'static str,
+ _value: &T,
+ ) -> Result<(), Self::Error> {
+ unreachable!()
+ }
+
+ fn end(self) -> Result<Self::Ok, Self::Error> {
+ unreachable!()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use bstr::ByteSlice;
+ use serde::{serde_if_integer128, Serialize};
+
+ use crate::error::{Error, ErrorKind};
+ use crate::writer::Writer;
+
+ use super::{SeHeader, SeRecord};
+
+ fn serialize<S: Serialize>(s: S) -> String {
+ let mut wtr = Writer::from_writer(vec![]);
+ s.serialize(&mut SeRecord { wtr: &mut wtr }).unwrap();
+ wtr.write_record(None::<&[u8]>).unwrap();
+ String::from_utf8(wtr.into_inner().unwrap()).unwrap()
+ }
+
+ /// Serialize using `SeHeader`. Returns whether a header was written and
+ /// the output of the writer.
+ fn serialize_header<S: Serialize>(s: S) -> (bool, String) {
+ let mut wtr = Writer::from_writer(vec![]);
+ let wrote = {
+ let mut ser = SeHeader::new(&mut wtr);
+ s.serialize(&mut ser).unwrap();
+ ser.wrote_header()
+ };
+ (wrote, String::from_utf8(wtr.into_inner().unwrap()).unwrap())
+ }
+
+ fn serialize_err<S: Serialize>(s: S) -> Error {
+ let mut wtr = Writer::from_writer(vec![]);
+ s.serialize(&mut SeRecord { wtr: &mut wtr }).unwrap_err()
+ }
+
+ fn serialize_header_err<S: Serialize>(s: S) -> Error {
+ let mut wtr = Writer::from_writer(vec![]);
+ s.serialize(&mut SeHeader::new(&mut wtr)).unwrap_err()
+ }
+
+ #[test]
+ fn bool() {
+ let got = serialize(true);
+ assert_eq!(got, "true\n");
+ let (wrote, got) = serialize_header(true);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn integer() {
+ let got = serialize(12345);
+ assert_eq!(got, "12345\n");
+ let (wrote, got) = serialize_header(12345);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ serde_if_integer128! {
+ #[test]
+ fn integer_u128() {
+ let got = serialize(i128::max_value() as u128 + 1);
+ assert_eq!(got, "170141183460469231731687303715884105728\n");
+ let (wrote, got) = serialize_header(12345);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn integer_i128() {
+ let got = serialize(i128::max_value());
+ assert_eq!(got, "170141183460469231731687303715884105727\n");
+ let (wrote, got) = serialize_header(12345);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+ }
+
+ #[test]
+ fn float() {
+ let got = serialize(1.23);
+ assert_eq!(got, "1.23\n");
+ let (wrote, got) = serialize_header(1.23);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn float_nan() {
+ let got = serialize(::std::f64::NAN);
+ assert_eq!(got, "NaN\n");
+ let (wrote, got) = serialize_header(::std::f64::NAN);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn char() {
+ let got = serialize('☃');
+ assert_eq!(got, "☃\n");
+ let (wrote, got) = serialize_header('☃');
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn str() {
+ let got = serialize("how\nare\n\"you\"?");
+ assert_eq!(got, "\"how\nare\n\"\"you\"\"?\"\n");
+ let (wrote, got) = serialize_header("how\nare\n\"you\"?");
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn bytes() {
+ let got = serialize(b"how\nare\n\"you\"?".as_bstr());
+ assert_eq!(got, "\"how\nare\n\"\"you\"\"?\"\n");
+ let (wrote, got) = serialize_header(&b"how\nare\n\"you\"?"[..]);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn option() {
+ let got = serialize(None::<()>);
+ assert_eq!(got, "\"\"\n");
+ let (wrote, got) = serialize_header(None::<()>);
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize(Some(5));
+ assert_eq!(got, "5\n");
+ let (wrote, got) = serialize_header(Some(5));
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn unit() {
+ let got = serialize(());
+ assert_eq!(got, "\"\"\n");
+ let (wrote, got) = serialize_header(());
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize((5, ()));
+ assert_eq!(got, "5,\n");
+ let (wrote, got) = serialize_header(());
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn struct_unit() {
+ #[derive(Serialize)]
+ struct Foo;
+
+ let got = serialize(Foo);
+ assert_eq!(got, "Foo\n");
+ let (wrote, got) = serialize_header(Foo);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn struct_newtype() {
+ #[derive(Serialize)]
+ struct Foo(f64);
+
+ let got = serialize(Foo(1.5));
+ assert_eq!(got, "1.5\n");
+ let (wrote, got) = serialize_header(Foo(1.5));
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn enum_units() {
+ #[derive(Serialize)]
+ enum Wat {
+ Foo,
+ Bar,
+ Baz,
+ }
+
+ let got = serialize(Wat::Foo);
+ assert_eq!(got, "Foo\n");
+ let (wrote, got) = serialize_header(Wat::Foo);
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize(Wat::Bar);
+ assert_eq!(got, "Bar\n");
+ let (wrote, got) = serialize_header(Wat::Bar);
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize(Wat::Baz);
+ assert_eq!(got, "Baz\n");
+ let (wrote, got) = serialize_header(Wat::Baz);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn enum_newtypes() {
+ #[derive(Serialize)]
+ enum Wat {
+ Foo(i32),
+ Bar(f32),
+ Baz(bool),
+ }
+
+ let got = serialize(Wat::Foo(5));
+ assert_eq!(got, "5\n");
+ let (wrote, got) = serialize_header(Wat::Foo(5));
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize(Wat::Bar(1.5));
+ assert_eq!(got, "1.5\n");
+ let (wrote, got) = serialize_header(Wat::Bar(1.5));
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let got = serialize(Wat::Baz(true));
+ assert_eq!(got, "true\n");
+ let (wrote, got) = serialize_header(Wat::Baz(true));
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn seq() {
+ let got = serialize(vec![1, 2, 3]);
+ assert_eq!(got, "1,2,3\n");
+ let (wrote, got) = serialize_header(vec![1, 2, 3]);
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn tuple() {
+ let row = (true, 1.5, "hi");
+ let got = serialize(row.clone());
+ assert_eq!(got, "true,1.5,hi\n");
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(!wrote);
+ assert_eq!(got, "");
+
+ let row = (true, 1.5, vec![1, 2, 3]);
+ let got = serialize(row.clone());
+ assert_eq!(got, "true,1.5,1,2,3\n");
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn tuple_struct() {
+ #[derive(Clone, Serialize)]
+ struct Foo(bool, i32, String);
+
+ let row = Foo(false, 42, "hi".to_string());
+ let got = serialize(row.clone());
+ assert_eq!(got, "false,42,hi\n");
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(!wrote);
+ assert_eq!(got, "");
+ }
+
+ #[test]
+ fn tuple_variant() {
+ #[derive(Clone, Serialize)]
+ enum Foo {
+ X(bool, i32, String),
+ }
+
+ let row = Foo::X(false, 42, "hi".to_string());
+ let err = serialize_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn enum_struct_variant() {
+ #[derive(Clone, Serialize)]
+ enum Foo {
+ X { a: bool, b: i32, c: String },
+ }
+
+ let row = Foo::X { a: false, b: 1, c: "hi".into() };
+ let err = serialize_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn struct_no_headers() {
+ #[derive(Serialize)]
+ struct Foo {
+ x: bool,
+ y: i32,
+ z: String,
+ }
+
+ let got = serialize(Foo { x: true, y: 5, z: "hi".into() });
+ assert_eq!(got, "true,5,hi\n");
+ }
+
+ serde_if_integer128! {
+ #[test]
+ fn struct_no_headers_128() {
+ #[derive(Serialize)]
+ struct Foo {
+ x: i128,
+ y: u128,
+ }
+
+ let got =
+ serialize(Foo { x: i128::max_value(), y: u128::max_value() });
+ assert_eq!(
+ got,
+ "170141183460469231731687303715884105727,\
+ 340282366920938463463374607431768211455\n"
+ );
+ }
+ }
+
+ #[test]
+ fn struct_headers() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ x: bool,
+ y: i32,
+ z: String,
+ }
+
+ let row = Foo { x: true, y: 5, z: "hi".into() };
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(wrote);
+ assert_eq!(got, "x,y,z");
+ let got = serialize(row.clone());
+ assert_eq!(got, "true,5,hi\n");
+ }
+
+ #[test]
+ fn struct_headers_nested() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ nest: Nested,
+ }
+ #[derive(Clone, Serialize)]
+ struct Nested {
+ label2: String,
+ value: i32,
+ }
+
+ let row = Foo {
+ label: "foo".into(),
+ nest: Nested { label2: "bar".into(), value: 5 },
+ };
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "foo,bar,5\n");
+
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn struct_headers_nested_seq() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ values: Vec<i32>,
+ }
+ let row = Foo { label: "foo".into(), values: vec![1, 2, 3] };
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "foo,1,2,3\n");
+
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn struct_headers_inside_tuple() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ num: f64,
+ }
+ #[derive(Clone, Serialize)]
+ struct Bar {
+ label2: bool,
+ value: i32,
+ empty: (),
+ }
+ let row = (
+ Foo { label: "hi".to_string(), num: 5.0 },
+ Bar { label2: true, value: 3, empty: () },
+ Foo { label: "baz".to_string(), num: 2.3 },
+ );
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "hi,5.0,true,3,,baz,2.3\n");
+
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(wrote);
+ assert_eq!(got, "label,num,label2,value,empty,label,num");
+ }
+
+ #[test]
+ fn struct_headers_inside_tuple_scalar_before() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ num: f64,
+ }
+ let row = (3.14, Foo { label: "hi".to_string(), num: 5.0 });
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "3.14,hi,5.0\n");
+
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn struct_headers_inside_tuple_scalar_after() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ num: f64,
+ }
+ let row = (Foo { label: "hi".to_string(), num: 5.0 }, 3.14);
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "hi,5.0,3.14\n");
+
+ let err = serialize_header_err(row.clone());
+ match *err.kind() {
+ ErrorKind::Serialize(_) => {}
+ ref x => panic!("expected ErrorKind::Serialize but got '{:?}'", x),
+ }
+ }
+
+ #[test]
+ fn struct_headers_inside_seq() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ num: f64,
+ }
+ let row = vec![
+ Foo { label: "hi".to_string(), num: 5.0 },
+ Foo { label: "baz".to_string(), num: 2.3 },
+ ];
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "hi,5.0,baz,2.3\n");
+
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(wrote);
+ assert_eq!(got, "label,num,label,num");
+ }
+
+ #[test]
+ fn struct_headers_inside_nested_tuple_seq() {
+ #[derive(Clone, Serialize)]
+ struct Foo {
+ label: String,
+ num: f64,
+ }
+ #[derive(Clone, Serialize)]
+ struct Bar {
+ label2: Baz,
+ value: i32,
+ empty: (),
+ }
+ #[derive(Clone, Serialize)]
+ struct Baz(bool);
+ let row = (
+ (
+ Foo { label: "hi".to_string(), num: 5.0 },
+ Bar { label2: Baz(true), value: 3, empty: () },
+ ),
+ vec![(Foo { label: "baz".to_string(), num: 2.3 },)],
+ );
+
+ let got = serialize(row.clone());
+ assert_eq!(got, "hi,5.0,true,3,,baz,2.3\n");
+
+ let (wrote, got) = serialize_header(row.clone());
+ assert!(wrote);
+ assert_eq!(got, "label,num,label2,value,empty,label,num");
+ }
+}
diff --git a/src/string_record.rs b/src/string_record.rs
new file mode 100644
index 0000000..4e807f2
--- /dev/null
+++ b/src/string_record.rs
@@ -0,0 +1,830 @@
+use std::fmt;
+use std::io;
+use std::iter::FromIterator;
+use std::ops::{self, Range};
+use std::result;
+use std::str;
+
+use serde::de::Deserialize;
+
+use crate::byte_record::{ByteRecord, ByteRecordIter, Position};
+use crate::deserializer::deserialize_string_record;
+use crate::error::{Error, ErrorKind, FromUtf8Error, Result};
+use crate::reader::Reader;
+
+/// A single CSV record stored as valid UTF-8 bytes.
+///
+/// A string record permits reading or writing CSV rows that are valid UTF-8.
+/// If string records are used to read CSV data that is not valid UTF-8, then
+/// the CSV reader will return an invalid UTF-8 error. If you do need to read
+/// possibly invalid UTF-8 data, then you should prefer using a
+/// [`ByteRecord`](struct.ByteRecord.html),
+/// since it makes no assumptions about UTF-8.
+///
+/// If you are using the Serde (de)serialization APIs, then you probably never
+/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
+/// are some circumstances in which you might need to use a raw record type
+/// while still using Serde. For example, if you need to deserialize possibly
+/// invalid UTF-8 fields, then you'll need to first read your record into a
+/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
+/// reason for using the raw record deserialization APIs is if you're using
+/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
+///
+/// Two `StringRecord`s are compared on the basis of their field data. Any
+/// position information associated with the records is ignored.
+#[derive(Clone, Eq)]
+pub struct StringRecord(ByteRecord);
+
+impl PartialEq for StringRecord {
+ fn eq(&self, other: &StringRecord) -> bool {
+ self.0.iter_eq(&other.0)
+ }
+}
+
+impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for StringRecord {
+ fn eq(&self, other: &Vec<T>) -> bool {
+ self.0.iter_eq(other)
+ }
+}
+
+impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a StringRecord {
+ fn eq(&self, other: &Vec<T>) -> bool {
+ self.0.iter_eq(other)
+ }
+}
+
+impl<T: AsRef<[u8]>> PartialEq<[T]> for StringRecord {
+ fn eq(&self, other: &[T]) -> bool {
+ self.0.iter_eq(other)
+ }
+}
+
+impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a StringRecord {
+ fn eq(&self, other: &[T]) -> bool {
+ self.0.iter_eq(other)
+ }
+}
+
+impl fmt::Debug for StringRecord {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let fields: Vec<&str> = self.iter().collect();
+ write!(f, "StringRecord({:?})", fields)
+ }
+}
+
+impl Default for StringRecord {
+ #[inline]
+ fn default() -> StringRecord {
+ StringRecord::new()
+ }
+}
+
+impl StringRecord {
+ /// Create a new empty `StringRecord`.
+ ///
+ /// Note that you may find the `StringRecord::from` constructor more
+ /// convenient, which is provided by an impl on the `From` trait.
+ ///
+ /// # Example: create an empty record
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::new();
+ /// assert_eq!(record.len(), 0);
+ /// ```
+ ///
+ /// # Example: initialize a record from a `Vec`
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// ```
+ #[inline]
+ pub fn new() -> StringRecord {
+ StringRecord(ByteRecord::new())
+ }
+
+ /// Create a new empty `StringRecord` with the given capacity.
+ ///
+ /// `buffer` refers to the capacity of the buffer used to store the
+ /// actual row contents. `fields` refers to the number of fields one
+ /// might expect to store.
+ #[inline]
+ pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord {
+ StringRecord(ByteRecord::with_capacity(buffer, fields))
+ }
+
+ /// Create a new `StringRecord` from a `ByteRecord`.
+ ///
+ /// Note that this does UTF-8 validation. If the given `ByteRecord` does
+ /// not contain valid UTF-8, then this returns an error. The error includes
+ /// the UTF-8 error and the original `ByteRecord`.
+ ///
+ /// # Example: valid UTF-8
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ByteRecord, StringRecord};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// let str_record = StringRecord::from_byte_record(byte_record)?;
+ /// assert_eq!(str_record.len(), 3);
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: invalid UTF-8
+ ///
+ /// ```
+ /// use csv::{ByteRecord, StringRecord};
+ ///
+ /// let byte_record = ByteRecord::from(vec![
+ /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
+ /// ]);
+ /// let err = StringRecord::from_byte_record(byte_record).unwrap_err();
+ /// assert_eq!(err.utf8_error().field(), 1);
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// ```
+ #[inline]
+ pub fn from_byte_record(
+ record: ByteRecord,
+ ) -> result::Result<StringRecord, FromUtf8Error> {
+ match record.validate() {
+ Ok(()) => Ok(StringRecord(record)),
+ Err(err) => Err(FromUtf8Error::new(record, err)),
+ }
+ }
+
+ /// Lossily create a new `StringRecord` from a `ByteRecord`.
+ ///
+ /// This is like `StringRecord::from_byte_record`, except all invalid UTF-8
+ /// sequences are replaced with the `U+FFFD REPLACEMENT CHARACTER`, which
+ /// looks like this: �.
+ ///
+ /// # Example: valid UTF-8
+ ///
+ /// ```
+ /// use csv::{ByteRecord, StringRecord};
+ ///
+ /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
+ /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
+ /// assert_eq!(str_record.len(), 3);
+ /// ```
+ ///
+ /// # Example: invalid UTF-8
+ ///
+ /// ```
+ /// use csv::{ByteRecord, StringRecord};
+ ///
+ /// let byte_record = ByteRecord::from(vec![
+ /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
+ /// ]);
+ /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
+ /// assert_eq!(&str_record[0], "quux");
+ /// assert_eq!(&str_record[1], "foo�bar");
+ /// assert_eq!(&str_record[2], "c");
+ /// ```
+ #[inline]
+ pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord {
+ // If the record is valid UTF-8, then take the easy path.
+ if let Ok(()) = record.validate() {
+ return StringRecord(record);
+ }
+ // TODO: We can be faster here. Not sure if it's worth it.
+ let mut str_record =
+ StringRecord::with_capacity(record.as_slice().len(), record.len());
+ for field in &record {
+ str_record.push_field(&String::from_utf8_lossy(field));
+ }
+ str_record
+ }
+
+ /// Deserialize this record.
+ ///
+ /// The `D` type parameter refers to the type that this record should be
+ /// deserialized into. The `'de` lifetime refers to the lifetime of the
+ /// `StringRecord`. The `'de` lifetime permits deserializing into structs
+ /// that borrow field data from this record.
+ ///
+ /// An optional `headers` parameter permits deserializing into a struct
+ /// based on its field names (corresponding to header values) rather than
+ /// the order in which the fields are defined.
+ ///
+ /// # Example: without headers
+ ///
+ /// This shows how to deserialize a single row into a struct based on the
+ /// order in which fields occur. This example also shows how to borrow
+ /// fields from the `StringRecord`, which results in zero allocation
+ /// deserialization.
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::StringRecord;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Deserialize)]
+ /// struct Row<'a> {
+ /// city: &'a str,
+ /// country: &'a str,
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap() }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let record = StringRecord::from(vec![
+ /// "Boston", "United States", "4628910",
+ /// ]);
+ ///
+ /// let row: Row = record.deserialize(None)?;
+ /// assert_eq!(row.city, "Boston");
+ /// assert_eq!(row.country, "United States");
+ /// assert_eq!(row.population, 4628910);
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: with headers
+ ///
+ /// This example is like the previous one, but shows how to deserialize
+ /// into a struct based on the struct's field names. For this to work,
+ /// you must provide a header row.
+ ///
+ /// This example also shows that you can deserialize into owned data
+ /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::StringRecord;
+ /// use serde::Deserialize;
+ ///
+ /// #[derive(Deserialize)]
+ /// struct Row {
+ /// city: String,
+ /// country: String,
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap() }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// // Notice that the fields are not in the same order
+ /// // as the fields in the struct!
+ /// let header = StringRecord::from(vec![
+ /// "country", "city", "population",
+ /// ]);
+ /// let record = StringRecord::from(vec![
+ /// "United States", "Boston", "4628910",
+ /// ]);
+ ///
+ /// let row: Row = record.deserialize(Some(&header))?;
+ /// assert_eq!(row.city, "Boston");
+ /// assert_eq!(row.country, "United States");
+ /// assert_eq!(row.population, 4628910);
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn deserialize<'de, D: Deserialize<'de>>(
+ &'de self,
+ headers: Option<&'de StringRecord>,
+ ) -> Result<D> {
+ deserialize_string_record(self, headers)
+ }
+
+ /// Returns an iterator over all fields in this record.
+ ///
+ /// # Example
+ ///
+ /// This example shows how to iterate over each field in a `StringRecord`.
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["a", "b", "c"]);
+ /// for field in record.iter() {
+ /// assert!(field == "a" || field == "b" || field == "c");
+ /// }
+ /// ```
+ #[inline]
+ pub fn iter(&self) -> StringRecordIter {
+ self.into_iter()
+ }
+
+ /// Return the field at index `i`.
+ ///
+ /// If no field at index `i` exists, then this returns `None`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.get(1), Some("b"));
+ /// assert_eq!(record.get(3), None);
+ /// ```
+ #[inline]
+ pub fn get(&self, i: usize) -> Option<&str> {
+ self.0.get(i).map(|bytes| {
+ debug_assert!(str::from_utf8(bytes).is_ok());
+ // This is safe because we guarantee that all string records
+ // have a valid UTF-8 buffer. It's also safe because we
+ // individually check each field for valid UTF-8.
+ unsafe { str::from_utf8_unchecked(bytes) }
+ })
+ }
+
+ /// Returns true if and only if this record is empty.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// assert!(StringRecord::new().is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Returns the number of fields in this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// ```
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Truncate this record to `n` fields.
+ ///
+ /// If `n` is greater than the number of fields in this record, then this
+ /// has no effect.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// record.truncate(1);
+ /// assert_eq!(record.len(), 1);
+ /// assert_eq!(record, vec!["a"]);
+ /// ```
+ #[inline]
+ pub fn truncate(&mut self, n: usize) {
+ self.0.truncate(n);
+ }
+
+ /// Clear this record so that it has zero fields.
+ ///
+ /// Note that it is not necessary to clear the record to reuse it with
+ /// the CSV reader.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(record.len(), 3);
+ /// record.clear();
+ /// assert_eq!(record.len(), 0);
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ self.0.clear();
+ }
+
+ /// Trim the fields of this record so that leading and trailing whitespace
+ /// is removed.
+ ///
+ /// This method uses the Unicode definition of whitespace.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let mut record = StringRecord::from(vec![
+ /// " ", "\u{3000}\tfoo ", "bar ", "b a z",
+ /// ]);
+ /// record.trim();
+ /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
+ /// ```
+ pub fn trim(&mut self) {
+ let length = self.len();
+ if length == 0 {
+ return;
+ }
+ // TODO: We could likely do this in place, but for now, we allocate.
+ let mut trimmed =
+ StringRecord::with_capacity(self.as_slice().len(), self.len());
+ trimmed.set_position(self.position().cloned());
+ for field in &*self {
+ trimmed.push_field(field.trim());
+ }
+ *self = trimmed;
+ }
+
+ /// Add a new field to this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let mut record = StringRecord::new();
+ /// record.push_field("foo");
+ /// assert_eq!(&record[0], "foo");
+ /// ```
+ #[inline]
+ pub fn push_field(&mut self, field: &str) {
+ self.0.push_field(field.as_bytes());
+ }
+
+ /// Return the position of this record, if available.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{StringRecord, ReaderBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut record = StringRecord::new();
+ /// let mut rdr = ReaderBuilder::new()
+ /// .has_headers(false)
+ /// .from_reader("a,b,c\nx,y,z".as_bytes());
+ ///
+ /// assert!(rdr.read_record(&mut record)?);
+ /// {
+ /// let pos = record.position().expect("a record position");
+ /// assert_eq!(pos.byte(), 0);
+ /// assert_eq!(pos.line(), 1);
+ /// assert_eq!(pos.record(), 0);
+ /// }
+ ///
+ /// assert!(rdr.read_record(&mut record)?);
+ /// {
+ /// let pos = record.position().expect("a record position");
+ /// assert_eq!(pos.byte(), 6);
+ /// assert_eq!(pos.line(), 2);
+ /// assert_eq!(pos.record(), 1);
+ /// }
+ ///
+ /// // Finish the CSV reader for good measure.
+ /// assert!(!rdr.read_record(&mut record)?);
+ /// Ok(())
+ /// }
+ /// ```
+ #[inline]
+ pub fn position(&self) -> Option<&Position> {
+ self.0.position()
+ }
+
+ /// Set the position of this record.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::{StringRecord, Position};
+ ///
+ /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
+ /// let mut pos = Position::new();
+ /// pos.set_byte(100);
+ /// pos.set_line(4);
+ /// pos.set_record(2);
+ ///
+ /// record.set_position(Some(pos.clone()));
+ /// assert_eq!(record.position(), Some(&pos));
+ /// ```
+ #[inline]
+ pub fn set_position(&mut self, pos: Option<Position>) {
+ self.0.set_position(pos);
+ }
+
+ /// Return the start and end position of a field in this record.
+ ///
+ /// If no such field exists at the given index, then return `None`.
+ ///
+ /// The range returned can be used with the slice returned by `as_slice`.
+ /// Namely, the range returned is guaranteed to start and end at valid
+ /// UTF-8 sequence boundaries.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
+ /// let range = record.range(1).expect("a record range");
+ /// assert_eq!(&record.as_slice()[range], "quux");
+ /// ```
+ #[inline]
+ pub fn range(&self, i: usize) -> Option<Range<usize>> {
+ self.0.range(i)
+ }
+
+ /// Return the entire row as a single string slice. The slice returned
+ /// stores all fields contiguously. The boundaries of each field can be
+ /// determined via the `range` method.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
+ /// assert_eq!(record.as_slice(), "fooquuxz");
+ /// ```
+ #[inline]
+ pub fn as_slice(&self) -> &str {
+ debug_assert!(str::from_utf8(self.0.as_slice()).is_ok());
+ // This is safe because we guarantee that each field is valid UTF-8.
+ // If each field is valid UTF-8, then the entire buffer (up to the end
+ // of the last field) must also be valid UTF-8.
+ unsafe { str::from_utf8_unchecked(self.0.as_slice()) }
+ }
+
+ /// Return a reference to this record's raw
+ /// [`ByteRecord`](struct.ByteRecord.html).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
+ /// let byte_record = str_record.as_byte_record();
+ /// assert_eq!(&byte_record[2], b"c");
+ /// ```
+ #[inline]
+ pub fn as_byte_record(&self) -> &ByteRecord {
+ &self.0
+ }
+
+ /// Convert this `StringRecord` into a
+ /// [`ByteRecord`](struct.ByteRecord.html).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use csv::StringRecord;
+ ///
+ /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
+ /// let byte_record = str_record.into_byte_record();
+ /// assert_eq!(&byte_record[2], b"c");
+ /// ```
+ ///
+ /// Note that this can also be achieved using the `From` impl:
+ ///
+ /// ```
+ /// use csv::{ByteRecord, StringRecord};
+ ///
+ /// // Using ByteRecord::from...
+ /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
+ /// assert_eq!(ByteRecord::from(str_record).len(), 3);
+ ///
+ /// // Using StringRecord::into...
+ /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
+ /// let byte_record: ByteRecord = str_record.into();
+ /// assert_eq!(byte_record.len(), 3);
+ /// ```
+ #[inline]
+ pub fn into_byte_record(self) -> ByteRecord {
+ self.0
+ }
+
+ /// A safe function for reading CSV data into a `StringRecord`.
+ ///
+ /// This relies on the internal representation of `StringRecord`.
+ #[inline(always)]
+ pub(crate) fn read<R: io::Read>(
+ &mut self,
+ rdr: &mut Reader<R>,
+ ) -> Result<bool> {
+ // SAFETY: This code is critical to upholding the safety of other code
+ // blocks in this module. Namely, after calling `read_byte_record`,
+ // it is possible for `record` to contain invalid UTF-8. We check for
+ // this in the `validate` method, and if it does have invalid UTF-8, we
+ // clear the record. (It is bad for `record` to contain invalid UTF-8
+ // because other accessor methods, like `get`, assume that every field
+ // is valid UTF-8.)
+ let pos = rdr.position().clone();
+ let read_res = rdr.read_byte_record(&mut self.0);
+ let utf8_res = match self.0.validate() {
+ Ok(()) => Ok(()),
+ Err(err) => {
+ // If this record isn't valid UTF-8, then completely wipe it.
+ self.0.clear();
+ Err(err)
+ }
+ };
+ match (read_res, utf8_res) {
+ (Err(err), _) => Err(err),
+ (Ok(_), Err(err)) => {
+ Err(Error::new(ErrorKind::Utf8 { pos: Some(pos), err: err }))
+ }
+ (Ok(eof), Ok(())) => Ok(eof),
+ }
+ }
+}
+
+impl ops::Index<usize> for StringRecord {
+ type Output = str;
+ #[inline]
+ fn index(&self, i: usize) -> &str {
+ self.get(i).unwrap()
+ }
+}
+
+impl<T: AsRef<str>> From<Vec<T>> for StringRecord {
+ #[inline]
+ fn from(xs: Vec<T>) -> StringRecord {
+ StringRecord::from_iter(xs.into_iter())
+ }
+}
+
+impl<'a, T: AsRef<str>> From<&'a [T]> for StringRecord {
+ #[inline]
+ fn from(xs: &'a [T]) -> StringRecord {
+ StringRecord::from_iter(xs)
+ }
+}
+
+impl<T: AsRef<str>> FromIterator<T> for StringRecord {
+ #[inline]
+ fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> StringRecord {
+ let mut record = StringRecord::new();
+ record.extend(iter);
+ record
+ }
+}
+
+impl<T: AsRef<str>> Extend<T> for StringRecord {
+ #[inline]
+ fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
+ for x in iter {
+ self.push_field(x.as_ref());
+ }
+ }
+}
+
+impl<'a> IntoIterator for &'a StringRecord {
+ type IntoIter = StringRecordIter<'a>;
+ type Item = &'a str;
+
+ #[inline]
+ fn into_iter(self) -> StringRecordIter<'a> {
+ StringRecordIter(self.0.iter())
+ }
+}
+
+/// An iterator over the fields in a string record.
+///
+/// The `'r` lifetime variable refers to the lifetime of the `StringRecord`
+/// that is being iterated over.
+#[derive(Clone)]
+pub struct StringRecordIter<'r>(ByteRecordIter<'r>);
+
+impl<'r> Iterator for StringRecordIter<'r> {
+ type Item = &'r str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'r str> {
+ self.0.next().map(|bytes| {
+ debug_assert!(str::from_utf8(bytes).is_ok());
+ // See StringRecord::get for safety argument.
+ unsafe { str::from_utf8_unchecked(bytes) }
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.0.size_hint()
+ }
+
+ #[inline]
+ fn count(self) -> usize {
+ self.0.len()
+ }
+}
+
+impl<'r> DoubleEndedIterator for StringRecordIter<'r> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'r str> {
+ self.0.next_back().map(|bytes| {
+ debug_assert!(str::from_utf8(bytes).is_ok());
+ // See StringRecord::get for safety argument.
+ unsafe { str::from_utf8_unchecked(bytes) }
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::string_record::StringRecord;
+
+ #[test]
+ fn trim_front() {
+ let mut rec = StringRecord::from(vec![" abc"]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+
+ let mut rec = StringRecord::from(vec![" abc", " xyz"]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+ assert_eq!(rec.get(1), Some("xyz"));
+ }
+
+ #[test]
+ fn trim_back() {
+ let mut rec = StringRecord::from(vec!["abc "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+
+ let mut rec = StringRecord::from(vec!["abc ", "xyz "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+ assert_eq!(rec.get(1), Some("xyz"));
+ }
+
+ #[test]
+ fn trim_both() {
+ let mut rec = StringRecord::from(vec![" abc "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+
+ let mut rec = StringRecord::from(vec![" abc ", " xyz "]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some("abc"));
+ assert_eq!(rec.get(1), Some("xyz"));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_1() {
+ let mut rec = StringRecord::from(vec![""]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(""));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_2() {
+ let mut rec = StringRecord::from(vec!["", ""]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(""));
+ assert_eq!(rec.get(1), Some(""));
+ }
+
+ #[test]
+ fn trim_does_not_panic_on_empty_records_3() {
+ let mut rec = StringRecord::new();
+ rec.trim();
+ assert_eq!(rec.as_slice().len(), 0);
+ }
+
+ #[test]
+ fn trim_whitespace_only() {
+ let mut rec = StringRecord::from(vec![
+ "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}",
+ ]);
+ rec.trim();
+ assert_eq!(rec.get(0), Some(""));
+ }
+
+ // Check that record equality respects field boundaries.
+ //
+ // Regression test for #138.
+ #[test]
+ fn eq_field_boundaries() {
+ let test1 = StringRecord::from(vec!["12", "34"]);
+ let test2 = StringRecord::from(vec!["123", "4"]);
+
+ assert_ne!(test1, test2);
+ }
+
+ // Check that record equality respects number of fields.
+ //
+ // Regression test for #138.
+ #[test]
+ fn eq_record_len() {
+ let test1 = StringRecord::from(vec!["12", "34", "56"]);
+ let test2 = StringRecord::from(vec!["12", "34"]);
+ assert_ne!(test1, test2);
+ }
+}
diff --git a/src/tutorial.rs b/src/tutorial.rs
new file mode 100644
index 0000000..9d5e607
--- /dev/null
+++ b/src/tutorial.rs
@@ -0,0 +1,2520 @@
+/*!
+A tutorial for handling CSV data in Rust.
+
+This tutorial will cover basic CSV reading and writing, automatic
+(de)serialization with Serde, CSV transformations and performance.
+
+This tutorial is targeted at beginner Rust programmers. Experienced Rust
+programmers may find this tutorial to be too verbose, but skimming may be
+useful. There is also a
+[cookbook](../cookbook/index.html)
+of examples for those that prefer more information density.
+
+For an introduction to Rust, please see the
+[official book](https://doc.rust-lang.org/book/second-edition/).
+If you haven't written any Rust code yet but have written code in another
+language, then this tutorial might be accessible to you without needing to read
+the book first.
+
+# Table of contents
+
+1. [Setup](#setup)
+1. [Basic error handling](#basic-error-handling)
+ * [Switch to recoverable errors](#switch-to-recoverable-errors)
+1. [Reading CSV](#reading-csv)
+ * [Reading headers](#reading-headers)
+ * [Delimiters, quotes and variable length records](#delimiters-quotes-and-variable-length-records)
+ * [Reading with Serde](#reading-with-serde)
+ * [Handling invalid data with Serde](#handling-invalid-data-with-serde)
+1. [Writing CSV](#writing-csv)
+ * [Writing tab separated values](#writing-tab-separated-values)
+ * [Writing with Serde](#writing-with-serde)
+1. [Pipelining](#pipelining)
+ * [Filter by search](#filter-by-search)
+ * [Filter by population count](#filter-by-population-count)
+1. [Performance](#performance)
+ * [Amortizing allocations](#amortizing-allocations)
+ * [Serde and zero allocation](#serde-and-zero-allocation)
+ * [CSV parsing without the standard library](#csv-parsing-without-the-standard-library)
+1. [Closing thoughts](#closing-thoughts)
+
+# Setup
+
+In this section, we'll get you setup with a simple program that reads CSV data
+and prints a "debug" version of each record. This assumes that you have the
+[Rust toolchain installed](https://www.rust-lang.org/install.html),
+which includes both Rust and Cargo.
+
+We'll start by creating a new Cargo project:
+
+```text
+$ cargo new --bin csvtutor
+$ cd csvtutor
+```
+
+Once inside `csvtutor`, open `Cargo.toml` in your favorite text editor and add
+`csv = "1.1"` to your `[dependencies]` section. At this point, your
+`Cargo.toml` should look something like this:
+
+```text
+[package]
+name = "csvtutor"
+version = "0.1.0"
+authors = ["Your Name"]
+
+[dependencies]
+csv = "1.1"
+```
+
+Next, let's build your project. Since you added the `csv` crate as a
+dependency, Cargo will automatically download it and compile it for you. To
+build your project, use Cargo:
+
+```text
+$ cargo build
+```
+
+This will produce a new binary, `csvtutor`, in your `target/debug` directory.
+It won't do much at this point, but you can run it:
+
+```text
+$ ./target/debug/csvtutor
+Hello, world!
+```
+
+Let's make our program do something useful. Our program will read CSV data on
+stdin and print debug output for each record on stdout. To write this program,
+open `src/main.rs` in your favorite text editor and replace its contents with
+this:
+
+```no_run
+//tutorial-setup-01.rs
+// Import the standard library's I/O module so we can read from stdin.
+use std::io;
+
+// The `main` function is where your program starts executing.
+fn main() {
+ // Create a CSV parser that reads data from stdin.
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ // Loop over each record.
+ for result in rdr.records() {
+ // An error may occur, so abort the program in an unfriendly way.
+ // We will make this more friendly later!
+ let record = result.expect("a CSV record");
+ // Print a debug version of the record.
+ println!("{:?}", record);
+ }
+}
+```
+
+Don't worry too much about what this code means; we'll dissect it in the next
+section. For now, try rebuilding your project:
+
+```text
+$ cargo build
+```
+
+Assuming that succeeds, let's try running our program. But first, we will need
+some CSV data to play with! For that, we will use a random selection of 100
+US cities, along with their population size and geographical coordinates. (We
+will use this same CSV data throughout the entire tutorial.) To get the data,
+download it from github:
+
+```text
+$ curl -LO 'https://raw.githubusercontent.com/BurntSushi/rust-csv/master/examples/data/uspop.csv'
+```
+
+And now finally, run your program on `uspop.csv`:
+
+```text
+$ ./target/debug/csvtutor < uspop.csv
+StringRecord(["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])
+StringRecord(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])
+StringRecord(["Oakman", "AL", "", "33.7133333", "-87.3886111"])
+# ... and much more
+```
+
+# Basic error handling
+
+Since reading CSV data can result in errors, error handling is pervasive
+throughout the examples in this tutorial. Therefore, we're going to spend a
+little bit of time going over basic error handling, and in particular, fix
+our previous example to show errors in a more friendly way. **If you're already
+comfortable with things like `Result` and `try!`/`?` in Rust, then you can
+safely skip this section.**
+
+Note that
+[The Rust Programming Language Book](https://doc.rust-lang.org/book/second-edition/)
+contains an
+[introduction to general error handling](https://doc.rust-lang.org/book/second-edition/ch09-00-error-handling.html).
+For a deeper dive, see
+[my blog post on error handling in Rust](http://blog.burntsushi.net/rust-error-handling/).
+The blog post is especially important if you plan on building Rust libraries.
+
+With that out of the way, error handling in Rust comes in two different forms:
+unrecoverable errors and recoverable errors.
+
+Unrecoverable errors generally correspond to things like bugs in your program,
+which might occur when an invariant or contract is broken. At that point, the
+state of your program is unpredictable, and there's typically little recourse
+other than *panicking*. In Rust, a panic is similar to simply aborting your
+program, but it will unwind the stack and clean up resources before your
+program exits.
+
+On the other hand, recoverable errors generally correspond to predictable
+errors. A non-existent file or invalid CSV data are examples of recoverable
+errors. In Rust, recoverable errors are handled via `Result`. A `Result`
+represents the state of a computation that has either succeeded or failed.
+It is defined like so:
+
+```
+enum Result<T, E> {
+ Ok(T),
+ Err(E),
+}
+```
+
+That is, a `Result` either contains a value of type `T` when the computation
+succeeds, or it contains a value of type `E` when the computation fails.
+
+The relationship between unrecoverable errors and recoverable errors is
+important. In particular, it is **strongly discouraged** to treat recoverable
+errors as if they were unrecoverable. For example, panicking when a file could
+not be found, or if some CSV data is invalid, is considered bad practice.
+Instead, predictable errors should be handled using Rust's `Result` type.
+
+With our new found knowledge, let's re-examine our previous example and dissect
+its error handling.
+
+```no_run
+//tutorial-error-01.rs
+use std::io;
+
+fn main() {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result.expect("a CSV record");
+ println!("{:?}", record);
+ }
+}
+```
+
+There are two places where an error can occur in this program. The first is
+if there was a problem reading a record from stdin. The second is if there is
+a problem writing to stdout. In general, we will ignore the latter problem in
+this tutorial, although robust command line applications should probably try
+to handle it (e.g., when a broken pipe occurs). The former however is worth
+looking into in more detail. For example, if a user of this program provides
+invalid CSV data, then the program will panic:
+
+```text
+$ cat invalid
+header1,header2
+foo,bar
+quux,baz,foobar
+$ ./target/debug/csvtutor < invalid
+StringRecord { position: Some(Position { byte: 16, line: 2, record: 1 }), fields: ["foo", "bar"] }
+thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: UnequalLengths { pos: Some(Position { byte: 24, line: 3, record: 2 }), expected_len: 2, len: 3 }', /checkout/src/libcore/result.rs:859
+note: Run with `RUST_BACKTRACE=1` for a backtrace.
+```
+
+What happened here? First and foremost, we should talk about why the CSV data
+is invalid. The CSV data consists of three records: a header and two data
+records. The header and first data record have two fields, but the second
+data record has three fields. By default, the csv crate will treat inconsistent
+record lengths as an error.
+(This behavior can be toggled using the
+[`ReaderBuilder::flexible`](../struct.ReaderBuilder.html#method.flexible)
+config knob.) This explains why the first data record is printed in this
+example, since it has the same number of fields as the header record. That is,
+we don't actually hit an error until we parse the second data record.
+
+(Note that the CSV reader automatically interprets the first record as a
+header. This can be toggled with the
+[`ReaderBuilder::has_headers`](../struct.ReaderBuilder.html#method.has_headers)
+config knob.)
+
+So what actually causes the panic to happen in our program? That would be the
+first line in our loop:
+
+```ignore
+for result in rdr.records() {
+ let record = result.expect("a CSV record"); // this panics
+ println!("{:?}", record);
+}
+```
+
+The key thing to understand here is that `rdr.records()` returns an iterator
+that yields `Result` values. That is, instead of yielding records, it yields
+a `Result` that contains either a record or an error. The `expect` method,
+which is defined on `Result`, *unwraps* the success value inside the `Result`.
+Since the `Result` might contain an error instead, `expect` will *panic* when
+it does contain an error.
+
+It might help to look at the implementation of `expect`:
+
+```ignore
+use std::fmt;
+
+// This says, "for all types T and E, where E can be turned into a human
+// readable debug message, define the `expect` method."
+impl<T, E: fmt::Debug> Result<T, E> {
+ fn expect(self, msg: &str) -> T {
+ match self {
+ Ok(t) => t,
+ Err(e) => panic!("{}: {:?}", msg, e),
+ }
+ }
+}
+```
+
+Since this causes a panic if the CSV data is invalid, and invalid CSV data is
+a perfectly predictable error, we've turned what should be a *recoverable*
+error into an *unrecoverable* error. We did this because it is expedient to
+use unrecoverable errors. Since this is bad practice, we will endeavor to avoid
+unrecoverable errors throughout the rest of the tutorial.
+
+## Switch to recoverable errors
+
+We'll convert our unrecoverable error to a recoverable error in 3 steps. First,
+let's get rid of the panic and print an error message manually:
+
+```no_run
+//tutorial-error-02.rs
+use std::io;
+use std::process;
+
+fn main() {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ // Examine our Result.
+ // If there was no problem, print the record.
+ // Otherwise, print the error message and quit the program.
+ match result {
+ Ok(record) => println!("{:?}", record),
+ Err(err) => {
+ println!("error reading CSV from <stdin>: {}", err);
+ process::exit(1);
+ }
+ }
+ }
+}
+```
+
+If we run our program again, we'll still see an error message, but it is no
+longer a panic message:
+
+```text
+$ cat invalid
+header1,header2
+foo,bar
+quux,baz,foobar
+$ ./target/debug/csvtutor < invalid
+StringRecord { position: Some(Position { byte: 16, line: 2, record: 1 }), fields: ["foo", "bar"] }
+error reading CSV from <stdin>: CSV error: record 2 (line: 3, byte: 24): found record with 3 fields, but the previous record has 2 fields
+```
+
+The second step for moving to recoverable errors is to put our CSV record loop
+into a separate function. This function then has the option of *returning* an
+error, which our `main` function can then inspect and decide what to do with.
+
+```no_run
+//tutorial-error-03.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ // Examine our Result.
+ // If there was no problem, print the record.
+ // Otherwise, convert our error to a Box<dyn Error> and return it.
+ match result {
+ Err(err) => return Err(From::from(err)),
+ Ok(record) => {
+ println!("{:?}", record);
+ }
+ }
+ }
+ Ok(())
+}
+```
+
+Our new function, `run`, has a return type of `Result<(), Box<dyn Error>>`. In
+simple terms, this says that `run` either returns nothing when successful, or
+if an error occurred, it returns a `Box<dyn Error>`, which stands for "any kind of
+error." A `Box<dyn Error>` is hard to inspect if we cared about the specific error
+that occurred. But for our purposes, all we need to do is gracefully print an
+error message and exit the program.
+
+The third and final step is to replace our explicit `match` expression with a
+special Rust language feature: the question mark.
+
+```no_run
+//tutorial-error-04.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ // This is effectively the same code as our `match` in the
+ // previous example. In other words, `?` is syntactic sugar.
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+```
+
+This last step shows how we can use the `?` to automatically forward errors
+to our caller without having to do explicit case analysis with `match`
+ourselves. We will use the `?` heavily throughout this tutorial, and it's
+important to note that it can **only be used in functions that return
+`Result`.**
+
+We'll end this section with a word of caution: using `Box<dyn Error>` as our error
+type is the minimally acceptable thing we can do here. Namely, while it allows
+our program to gracefully handle errors, it makes it hard for callers to
+inspect the specific error condition that occurred. However, since this is a
+tutorial on writing command line programs that do CSV parsing, we will consider
+ourselves satisfied. If you'd like to know more, or are interested in writing
+a library that handles CSV data, then you should check out my
+[blog post on error handling](http://blog.burntsushi.net/rust-error-handling/).
+
+With all that said, if all you're doing is writing a one-off program to do
+CSV transformations, then using methods like `expect` and panicking when an
+error occurs is a perfectly reasonable thing to do. Nevertheless, this tutorial
+will endeavor to show idiomatic code.
+
+# Reading CSV
+
+Now that we've got you setup and covered basic error handling, it's time to do
+what we came here to do: handle CSV data. We've already seen how to read
+CSV data from `stdin`, but this section will cover how to read CSV data from
+files and how to configure our CSV reader to data formatted with different
+delimiters and quoting strategies.
+
+First up, let's adapt the example we've been working with to accept a file
+path argument instead of stdin.
+
+```no_run
+//tutorial-read-01.rs
+use std::env;
+use std::error::Error;
+use std::ffi::OsString;
+use std::fs::File;
+use std::process;
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let file_path = get_first_arg()?;
+ let file = File::open(file_path)?;
+ let mut rdr = csv::Reader::from_reader(file);
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+
+/// Returns the first positional argument sent to this process. If there are no
+/// positional arguments, then this returns an error.
+fn get_first_arg() -> Result<OsString, Box<dyn Error>> {
+ match env::args_os().nth(1) {
+ None => Err(From::from("expected 1 argument, but got none")),
+ Some(file_path) => Ok(file_path),
+ }
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+If you replace the contents of your `src/main.rs` file with the above code,
+then you should be able to rebuild your project and try it out:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor uspop.csv
+StringRecord(["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])
+StringRecord(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])
+StringRecord(["Oakman", "AL", "", "33.7133333", "-87.3886111"])
+# ... and much more
+```
+
+This example contains two new pieces of code:
+
+1. Code for querying the positional arguments of your program. We put this code
+ into its own function called `get_first_arg`. Our program expects a file
+ path in the first position (which is indexed at `1`; the argument at index
+ `0` is the executable name), so if one doesn't exist, then `get_first_arg`
+ returns an error.
+2. Code for opening a file. In `run`, we open a file using `File::open`. If
+ there was a problem opening the file, we forward the error to the caller of
+ `run` (which is `main` in this program). Note that we do *not* wrap the
+ `File` in a buffer. The CSV reader does buffering internally, so there's
+ no need for the caller to do it.
+
+Now is a good time to introduce an alternate CSV reader constructor, which
+makes it slightly more convenient to open CSV data from a file. That is,
+instead of:
+
+```ignore
+let file_path = get_first_arg()?;
+let file = File::open(file_path)?;
+let mut rdr = csv::Reader::from_reader(file);
+```
+
+you can use:
+
+```ignore
+let file_path = get_first_arg()?;
+let mut rdr = csv::Reader::from_path(file_path)?;
+```
+
+`csv::Reader::from_path` will open the file for you and return an error if
+the file could not be opened.
+
+## Reading headers
+
+If you had a chance to look at the data inside `uspop.csv`, you would notice
+that there is a header record that looks like this:
+
+```text
+City,State,Population,Latitude,Longitude
+```
+
+Now, if you look back at the output of the commands you've run so far, you'll
+notice that the header record is never printed. Why is that? By default, the
+CSV reader will interpret the first record in CSV data as a header, which
+is typically distinct from the actual data in the records that follow.
+Therefore, the header record is always skipped whenever you try to read or
+iterate over the records in CSV data.
+
+The CSV reader does not try to be smart about the header record and does
+**not** employ any heuristics for automatically detecting whether the first
+record is a header or not. Instead, if you don't want to treat the first record
+as a header, you'll need to tell the CSV reader that there are no headers.
+
+To configure a CSV reader to do this, we'll need to use a
+[`ReaderBuilder`](../struct.ReaderBuilder.html)
+to build a CSV reader with our desired configuration. Here's an example that
+does just that. (Note that we've moved back to reading from `stdin`, since it
+produces terser examples.)
+
+```no_run
+//tutorial-read-headers-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::ReaderBuilder::new()
+ .has_headers(false)
+ .from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+If you compile and run this program with our `uspop.csv` data, then you'll see
+that the header record is now printed:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop.csv
+StringRecord(["City", "State", "Population", "Latitude", "Longitude"])
+StringRecord(["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])
+StringRecord(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])
+StringRecord(["Oakman", "AL", "", "33.7133333", "-87.3886111"])
+```
+
+If you ever need to access the header record directly, then you can use the
+[`Reader::header`](../struct.Reader.html#method.headers)
+method like so:
+
+```no_run
+//tutorial-read-headers-02.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ {
+ // We nest this call in its own scope because of lifetimes.
+ let headers = rdr.headers()?;
+ println!("{:?}", headers);
+ }
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ // We can ask for the headers at any time. There's no need to nest this
+ // call in its own scope because we never try to borrow the reader again.
+ let headers = rdr.headers()?;
+ println!("{:?}", headers);
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+One interesting thing to note in this example is that we put the call to
+`rdr.headers()` in its own scope. We do this because `rdr.headers()` returns
+a *borrow* of the reader's internal header state. The nested scope in this
+code allows the borrow to end before we try to iterate over the records. If
+we didn't nest the call to `rdr.headers()` in its own scope, then the code
+wouldn't compile because we cannot borrow the reader's headers at the same time
+that we try to borrow the reader to iterate over its records.
+
+Another way of solving this problem is to *clone* the header record:
+
+```ignore
+let headers = rdr.headers()?.clone();
+```
+
+This converts it from a borrow of the CSV reader to a new owned value. This
+makes the code a bit easier to read, but at the cost of copying the header
+record into a new allocation.
+
+## Delimiters, quotes and variable length records
+
+In this section we'll temporarily depart from our `uspop.csv` data set and
+show how to read some CSV data that is a little less clean. This CSV data
+uses `;` as a delimiter, escapes quotes with `\"` (instead of `""`) and has
+records of varying length. Here's the data, which contains a list of WWE
+wrestlers and the year they started, if it's known:
+
+```text
+$ cat strange.csv
+"\"Hacksaw\" Jim Duggan";1987
+"Bret \"Hit Man\" Hart";1984
+# We're not sure when Rafael started, so omit the year.
+Rafael Halperin
+"\"Big Cat\" Ernie Ladd";1964
+"\"Macho Man\" Randy Savage";1985
+"Jake \"The Snake\" Roberts";1986
+```
+
+To read this CSV data, we'll want to do the following:
+
+1. Disable headers, since this data has none.
+2. Change the delimiter from `,` to `;`.
+3. Change the quote strategy from doubled (e.g., `""`) to escaped (e.g., `\"`).
+4. Permit flexible length records, since some omit the year.
+5. Ignore lines beginning with a `#`.
+
+All of this (and more!) can be configured with a
+[`ReaderBuilder`](../struct.ReaderBuilder.html),
+as seen in the following example:
+
+```no_run
+//tutorial-read-delimiter-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::ReaderBuilder::new()
+ .has_headers(false)
+ .delimiter(b';')
+ .double_quote(false)
+ .escape(Some(b'\\'))
+ .flexible(true)
+ .comment(Some(b'#'))
+ .from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Now re-compile your project and try running the program on `strange.csv`:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < strange.csv
+StringRecord(["\"Hacksaw\" Jim Duggan", "1987"])
+StringRecord(["Bret \"Hit Man\" Hart", "1984"])
+StringRecord(["Rafael Halperin"])
+StringRecord(["\"Big Cat\" Ernie Ladd", "1964"])
+StringRecord(["\"Macho Man\" Randy Savage", "1985"])
+StringRecord(["Jake \"The Snake\" Roberts", "1986"])
+```
+
+You should feel encouraged to play around with the settings. Some interesting
+things you might try:
+
+1. If you remove the `escape` setting, notice that no CSV errors are reported.
+ Instead, records are still parsed. This is a feature of the CSV parser. Even
+ though it gets the data slightly wrong, it still provides a parse that you
+ might be able to work with. This is a useful property given the messiness
+ of real world CSV data.
+2. If you remove the `delimiter` setting, parsing still succeeds, although
+ every record has exactly one field.
+3. If you remove the `flexible` setting, the reader will print the first two
+ records (since they both have the same number of fields), but will return a
+ parse error on the third record, since it has only one field.
+
+This covers most of the things you might want to configure on your CSV reader,
+although there are a few other knobs. For example, you can change the record
+terminator from a new line to any other character. (By default, the terminator
+is `CRLF`, which treats each of `\r\n`, `\r` and `\n` as single record
+terminators.) For more details, see the documentation and examples for each of
+the methods on
+[`ReaderBuilder`](../struct.ReaderBuilder.html).
+
+## Reading with Serde
+
+One of the most convenient features of this crate is its support for
+[Serde](https://serde.rs/).
+Serde is a framework for automatically serializing and deserializing data into
+Rust types. In simpler terms, that means instead of iterating over records
+as an array of string fields, we can iterate over records of a specific type
+of our choosing.
+
+For example, let's take a look at some data from our `uspop.csv` file:
+
+```text
+City,State,Population,Latitude,Longitude
+Davidsons Landing,AK,,65.2419444,-165.2716667
+Kenai,AK,7610,60.5544444,-151.2583333
+```
+
+While some of these fields make sense as strings (`City`, `State`), other
+fields look more like numbers. For example, `Population` looks like it contains
+integers while `Latitude` and `Longitude` appear to contain decimals. If we
+wanted to convert these fields to their "proper" types, then we need to do
+a lot of manual work. This next example shows how.
+
+```no_run
+//tutorial-read-serde-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.records() {
+ let record = result?;
+
+ let city = &record[0];
+ let state = &record[1];
+ // Some records are missing population counts, so if we can't
+ // parse a number, treat the population count as missing instead
+ // of returning an error.
+ let pop: Option<u64> = record[2].parse().ok();
+ // Lucky us! Latitudes and longitudes are available for every record.
+ // Therefore, if one couldn't be parsed, return an error.
+ let latitude: f64 = record[3].parse()?;
+ let longitude: f64 = record[4].parse()?;
+
+ println!(
+ "city: {:?}, state: {:?}, \
+ pop: {:?}, latitude: {:?}, longitude: {:?}",
+ city, state, pop, latitude, longitude);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+The problem here is that we need to parse each individual field manually, which
+can be labor intensive and repetitive. Serde, however, makes this process
+automatic. For example, we can ask to deserialize every record into a tuple
+type: `(String, String, Option<u64>, f64, f64)`.
+
+```no_run
+//tutorial-read-serde-02.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+// This introduces a type alias so that we can conveniently reference our
+// record type.
+type Record = (String, String, Option<u64>, f64, f64);
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ // Instead of creating an iterator with the `records` method, we create
+ // an iterator with the `deserialize` method.
+ for result in rdr.deserialize() {
+ // We must tell Serde what type we want to deserialize into.
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Running this code should show similar output as previous examples:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop.csv
+("Davidsons Landing", "AK", None, 65.2419444, -165.2716667)
+("Kenai", "AK", Some(7610), 60.5544444, -151.2583333)
+("Oakman", "AL", None, 33.7133333, -87.3886111)
+# ... and much more
+```
+
+One of the downsides of using Serde this way is that the type you use must
+match the order of fields as they appear in each record. This can be a pain
+if your CSV data has a header record, since you might tend to think about each
+field as a value of a particular named field rather than as a numbered field.
+One way we might achieve this is to deserialize our record into a map type like
+[`HashMap`](https://doc.rust-lang.org/std/collections/struct.HashMap.html)
+or
+[`BTreeMap`](https://doc.rust-lang.org/std/collections/struct.BTreeMap.html).
+The next example shows how, and in particular, notice that the only thing that
+changed from the last example is the definition of the `Record` type alias and
+a new `use` statement that imports `HashMap` from the standard library:
+
+```no_run
+//tutorial-read-serde-03.rs
+use std::collections::HashMap;
+# use std::error::Error;
+# use std::io;
+# use std::process;
+
+// This introduces a type alias so that we can conveniently reference our
+// record type.
+type Record = HashMap<String, String>;
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Running this program shows similar results as before, but each record is
+printed as a map:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop.csv
+{"City": "Davidsons Landing", "Latitude": "65.2419444", "State": "AK", "Population": "", "Longitude": "-165.2716667"}
+{"City": "Kenai", "Population": "7610", "State": "AK", "Longitude": "-151.2583333", "Latitude": "60.5544444"}
+{"State": "AL", "City": "Oakman", "Longitude": "-87.3886111", "Population": "", "Latitude": "33.7133333"}
+```
+
+This method works especially well if you need to read CSV data with header
+records, but whose exact structure isn't known until your program runs.
+However, in our case, we know the structure of the data in `uspop.csv`. In
+particular, with the `HashMap` approach, we've lost the specific types we had
+for each field in the previous example when we deserialized each record into a
+`(String, String, Option<u64>, f64, f64)`. Is there a way to identify fields
+by their corresponding header name *and* assign each field its own unique
+type? The answer is yes, but we'll need to bring in Serde's `derive` feature
+first. You can do that by adding this to the `[dependencies]` section of your
+`Cargo.toml` file:
+
+```text
+serde = { version = "1", features = ["derive"] }
+```
+
+With these crates added to our project, we can now define our own custom struct
+that represents our record. We then ask Serde to automatically write the glue
+code required to populate our struct from a CSV record. The next example shows
+how. Don't miss the new Serde imports!
+
+```no_run
+//tutorial-read-serde-04.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+// This lets us write `#[derive(Deserialize)]`.
+use serde::Deserialize;
+
+// We don't need to derive `Debug` (which doesn't require Serde), but it's a
+// good habit to do it for all your types.
+//
+// Notice that the field names in this struct are NOT in the same order as
+// the fields in the CSV data!
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record {
+ latitude: f64,
+ longitude: f64,
+ population: Option<u64>,
+ city: String,
+ state: String,
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ let record: Record = result?;
+ println!("{:?}", record);
+ // Try this if you don't like each record smushed on one line:
+ // println!("{:#?}", record);
+ }
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+Compile and run this program to see similar output as before:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop.csv
+Record { latitude: 65.2419444, longitude: -165.2716667, population: None, city: "Davidsons Landing", state: "AK" }
+Record { latitude: 60.5544444, longitude: -151.2583333, population: Some(7610), city: "Kenai", state: "AK" }
+Record { latitude: 33.7133333, longitude: -87.3886111, population: None, city: "Oakman", state: "AL" }
+```
+
+Once again, we didn't need to change our `run` function at all: we're still
+iterating over records using the `deserialize` iterator that we started with
+in the beginning of this section. The only thing that changed in this example
+was the definition of the `Record` type and a new `use` statement. Our `Record`
+type is now a custom struct that we defined instead of a type alias, and as a
+result, Serde doesn't know how to deserialize it by default. However, a special
+compiler plugin provided by Serde is available, which will read your struct
+definition at compile time and generate code that will deserialize a CSV record
+into a `Record` value. To see what happens if you leave out the automatic
+derive, change `#[derive(Debug, Deserialize)]` to `#[derive(Debug)]`.
+
+One other thing worth mentioning in this example is the use of
+`#[serde(rename_all = "PascalCase")]`. This directive helps Serde map your
+struct's field names to the header names in the CSV data. If you recall, our
+header record is:
+
+```text
+City,State,Population,Latitude,Longitude
+```
+
+Notice that each name is capitalized, but the fields in our struct are not. The
+`#[serde(rename_all = "PascalCase")]` directive fixes that by interpreting each
+field in `PascalCase`, where the first letter of the field is capitalized. If
+we didn't tell Serde about the name remapping, then the program will quit with
+an error:
+
+```text
+$ ./target/debug/csvtutor < uspop.csv
+CSV deserialize error: record 1 (line: 2, byte: 41): missing field `latitude`
+```
+
+We could have fixed this through other means. For example, we could have used
+capital letters in our field names:
+
+```ignore
+#[derive(Debug, Deserialize)]
+struct Record {
+ Latitude: f64,
+ Longitude: f64,
+ Population: Option<u64>,
+ City: String,
+ State: String,
+}
+```
+
+However, this violates Rust naming style. (In fact, the Rust compiler
+will even warn you that the names do not follow convention!)
+
+Another way to fix this is to ask Serde to rename each field individually. This
+is useful when there is no consistent name mapping from fields to header names:
+
+```ignore
+#[derive(Debug, Deserialize)]
+struct Record {
+ #[serde(rename = "Latitude")]
+ latitude: f64,
+ #[serde(rename = "Longitude")]
+ longitude: f64,
+ #[serde(rename = "Population")]
+ population: Option<u64>,
+ #[serde(rename = "City")]
+ city: String,
+ #[serde(rename = "State")]
+ state: String,
+}
+```
+
+To read more about renaming fields and about other Serde directives, please
+consult the
+[Serde documentation on attributes](https://serde.rs/attributes.html).
+
+## Handling invalid data with Serde
+
+In this section we will see a brief example of how to deal with data that isn't
+clean. To do this exercise, we'll work with a slightly tweaked version of the
+US population data we've been using throughout this tutorial. This version of
+the data is slightly messier than what we've been using. You can get it like
+so:
+
+```text
+$ curl -LO 'https://raw.githubusercontent.com/BurntSushi/rust-csv/master/examples/data/uspop-null.csv'
+```
+
+Let's start by running our program from the previous section:
+
+```no_run
+//tutorial-read-serde-invalid-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+# use serde::Deserialize;
+#
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record {
+ latitude: f64,
+ longitude: f64,
+ population: Option<u64>,
+ city: String,
+ state: String,
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Compile and run it on our messier data:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop-null.csv
+Record { latitude: 65.2419444, longitude: -165.2716667, population: None, city: "Davidsons Landing", state: "AK" }
+Record { latitude: 60.5544444, longitude: -151.2583333, population: Some(7610), city: "Kenai", state: "AK" }
+Record { latitude: 33.7133333, longitude: -87.3886111, population: None, city: "Oakman", state: "AL" }
+# ... more records
+CSV deserialize error: record 42 (line: 43, byte: 1710): field 2: invalid digit found in string
+```
+
+Oops! What happened? The program printed several records, but stopped when it
+tripped over a deserialization problem. The error message says that it found
+an invalid digit in the field at index `2` (which is the `Population` field)
+on line 43. What does line 43 look like?
+
+```text
+$ head -n 43 uspop-null.csv | tail -n1
+Flint Springs,KY,NULL,37.3433333,-86.7136111
+```
+
+Ah! The third field (index `2`) is supposed to either be empty or contain a
+population count. However, in this data, it seems that `NULL` sometimes appears
+as a value, presumably to indicate that there is no count available.
+
+The problem with our current program is that it fails to read this record
+because it doesn't know how to deserialize a `NULL` string into an
+`Option<u64>`. That is, a `Option<u64>` either corresponds to an empty field
+or an integer.
+
+To fix this, we tell Serde to convert any deserialization errors on this field
+to a `None` value, as shown in this next example:
+
+```no_run
+//tutorial-read-serde-invalid-02.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+# use serde::Deserialize;
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record {
+ latitude: f64,
+ longitude: f64,
+ #[serde(deserialize_with = "csv::invalid_option")]
+ population: Option<u64>,
+ city: String,
+ state: String,
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ for result in rdr.deserialize() {
+ let record: Record = result?;
+ println!("{:?}", record);
+ }
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+If you compile and run this example, then it should run to completion just
+like the other examples:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor < uspop-null.csv
+Record { latitude: 65.2419444, longitude: -165.2716667, population: None, city: "Davidsons Landing", state: "AK" }
+Record { latitude: 60.5544444, longitude: -151.2583333, population: Some(7610), city: "Kenai", state: "AK" }
+Record { latitude: 33.7133333, longitude: -87.3886111, population: None, city: "Oakman", state: "AL" }
+# ... and more
+```
+
+The only change in this example was adding this attribute to the `population`
+field in our `Record` type:
+
+```ignore
+#[serde(deserialize_with = "csv::invalid_option")]
+```
+
+The
+[`invalid_option`](../fn.invalid_option.html)
+function is a generic helper function that does one very simple thing: when
+applied to `Option` fields, it will convert any deserialization error into a
+`None` value. This is useful when you need to work with messy CSV data.
+
+# Writing CSV
+
+In this section we'll show a few examples that write CSV data. Writing CSV data
+tends to be a bit more straight-forward than reading CSV data, since you get to
+control the output format.
+
+Let's start with the most basic example: writing a few CSV records to `stdout`.
+
+```no_run
+//tutorial-write-01.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+ // Since we're writing records manually, we must explicitly write our
+ // header record. A header record is written the same way that other
+ // records are written.
+ wtr.write_record(&["City", "State", "Population", "Latitude", "Longitude"])?;
+ wtr.write_record(&["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])?;
+ wtr.write_record(&["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
+ wtr.write_record(&["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
+
+ // A CSV writer maintains an internal buffer, so it's important
+ // to flush the buffer when you're done.
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+Compiling and running this example results in CSV data being printed:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor
+City,State,Population,Latitude,Longitude
+Davidsons Landing,AK,,65.2419444,-165.2716667
+Kenai,AK,7610,60.5544444,-151.2583333
+Oakman,AL,,33.7133333,-87.3886111
+```
+
+Before moving on, it's worth taking a closer look at the `write_record`
+method. In this example, it looks rather simple, but if you're new to Rust then
+its type signature might look a little daunting:
+
+```ignore
+pub fn write_record<I, T>(&mut self, record: I) -> csv::Result<()>
+ where I: IntoIterator<Item=T>, T: AsRef<[u8]>
+{
+ // implementation elided
+}
+```
+
+To understand the type signature, we can break it down piece by piece.
+
+1. The method takes two parameters: `self` and `record`.
+2. `self` is a special parameter that corresponds to the `Writer` itself.
+3. `record` is the CSV record we'd like to write. Its type is `I`, which is
+ a generic type.
+4. In the method's `where` clause, the `I` type is constrained by the
+ `IntoIterator<Item=T>` bound. What that means is that `I` must satisfy the
+ `IntoIterator` trait. If you look at the documentation of the
+ [`IntoIterator` trait](https://doc.rust-lang.org/std/iter/trait.IntoIterator.html),
+ then we can see that it describes types that can build iterators. In this
+ case, we want an iterator that yields *another* generic type `T`, where
+ `T` is the type of each field we want to write.
+5. `T` also appears in the method's `where` clause, but its constraint is the
+ `AsRef<[u8]>` bound. The `AsRef` trait is a way to describe zero cost
+ conversions between types in Rust. In this case, the `[u8]` in `AsRef<[u8]>`
+ means that we want to be able to *borrow* a slice of bytes from `T`.
+ The CSV writer will take these bytes and write them as a single field.
+ The `AsRef<[u8]>` bound is useful because types like `String`, `&str`,
+ `Vec<u8>` and `&[u8]` all satisfy it.
+6. Finally, the method returns a `csv::Result<()>`, which is short-hand for
+ `Result<(), csv::Error>`. That means `write_record` either returns nothing
+ on success or returns a `csv::Error` on failure.
+
+Now, let's apply our new found understanding of the type signature of
+`write_record`. If you recall, in our previous example, we used it like so:
+
+```ignore
+wtr.write_record(&["field 1", "field 2", "etc"])?;
+```
+
+So how do the types match up? Well, the type of each of our fields in this
+code is `&'static str` (which is the type of a string literal in Rust). Since
+we put them in a slice literal, the type of our parameter is
+`&'static [&'static str]`, or more succinctly written as `&[&str]` without the
+lifetime annotations. Since slices satisfy the `IntoIterator` bound and
+strings satisfy the `AsRef<[u8]>` bound, this ends up being a legal call.
+
+Here are a few more examples of ways you can call `write_record`:
+
+```no_run
+# use csv;
+# let mut wtr = csv::Writer::from_writer(vec![]);
+// A slice of byte strings.
+wtr.write_record(&[b"a", b"b", b"c"]);
+// A vector.
+wtr.write_record(vec!["a", "b", "c"]);
+// A string record.
+wtr.write_record(&csv::StringRecord::from(vec!["a", "b", "c"]));
+// A byte record.
+wtr.write_record(&csv::ByteRecord::from(vec!["a", "b", "c"]));
+```
+
+Finally, the example above can be easily adapted to write to a file instead
+of `stdout`:
+
+```no_run
+//tutorial-write-02.rs
+use std::env;
+use std::error::Error;
+use std::ffi::OsString;
+use std::process;
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let file_path = get_first_arg()?;
+ let mut wtr = csv::Writer::from_path(file_path)?;
+
+ wtr.write_record(&["City", "State", "Population", "Latitude", "Longitude"])?;
+ wtr.write_record(&["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])?;
+ wtr.write_record(&["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
+ wtr.write_record(&["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
+
+ wtr.flush()?;
+ Ok(())
+}
+
+/// Returns the first positional argument sent to this process. If there are no
+/// positional arguments, then this returns an error.
+fn get_first_arg() -> Result<OsString, Box<dyn Error>> {
+ match env::args_os().nth(1) {
+ None => Err(From::from("expected 1 argument, but got none")),
+ Some(file_path) => Ok(file_path),
+ }
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+## Writing tab separated values
+
+In the previous section, we saw how to write some simple CSV data to `stdout`
+that looked like this:
+
+```text
+City,State,Population,Latitude,Longitude
+Davidsons Landing,AK,,65.2419444,-165.2716667
+Kenai,AK,7610,60.5544444,-151.2583333
+Oakman,AL,,33.7133333,-87.3886111
+```
+
+You might wonder to yourself: what's the point of using a CSV writer if the
+data is so simple? Well, the benefit of a CSV writer is that it can handle all
+types of data without sacrificing the integrity of your data. That is, it knows
+when to quote fields that contain special CSV characters (like commas or new
+lines) or escape literal quotes that appear in your data. The CSV writer can
+also be easily configured to use different delimiters or quoting strategies.
+
+In this section, we'll take a look a look at how to tweak some of the settings
+on a CSV writer. In particular, we'll write TSV ("tab separated values")
+instead of CSV, and we'll ask the CSV writer to quote all non-numeric fields.
+Here's an example:
+
+```no_run
+//tutorial-write-delimiter-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::WriterBuilder::new()
+ .delimiter(b'\t')
+ .quote_style(csv::QuoteStyle::NonNumeric)
+ .from_writer(io::stdout());
+
+ wtr.write_record(&["City", "State", "Population", "Latitude", "Longitude"])?;
+ wtr.write_record(&["Davidsons Landing", "AK", "", "65.2419444", "-165.2716667"])?;
+ wtr.write_record(&["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
+ wtr.write_record(&["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
+
+ wtr.flush()?;
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Compiling and running this example gives:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor
+"City" "State" "Population" "Latitude" "Longitude"
+"Davidsons Landing" "AK" "" 65.2419444 -165.2716667
+"Kenai" "AK" 7610 60.5544444 -151.2583333
+"Oakman" "AL" "" 33.7133333 -87.3886111
+```
+
+In this example, we used a new type
+[`QuoteStyle`](../enum.QuoteStyle.html).
+The `QuoteStyle` type represents the different quoting strategies available
+to you. The default is to add quotes to fields only when necessary. This
+probably works for most use cases, but you can also ask for quotes to always
+be put around fields, to never be put around fields or to always be put around
+non-numeric fields.
+
+## Writing with Serde
+
+Just like the CSV reader supports automatic deserialization into Rust types
+with Serde, the CSV writer supports automatic serialization from Rust types
+into CSV records using Serde. In this section, we'll learn how to use it.
+
+As with reading, let's start by seeing how we can serialize a Rust tuple.
+
+```no_run
+//tutorial-write-serde-01.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ // We still need to write headers manually.
+ wtr.write_record(&["City", "State", "Population", "Latitude", "Longitude"])?;
+
+ // But now we can write records by providing a normal Rust value.
+ //
+ // Note that the odd `None::<u64>` syntax is required because `None` on
+ // its own doesn't have a concrete type, but Serde needs a concrete type
+ // in order to serialize it. That is, `None` has type `Option<T>` but
+ // `None::<u64>` has type `Option<u64>`.
+ wtr.serialize(("Davidsons Landing", "AK", None::<u64>, 65.2419444, -165.2716667))?;
+ wtr.serialize(("Kenai", "AK", Some(7610), 60.5544444, -151.2583333))?;
+ wtr.serialize(("Oakman", "AL", None::<u64>, 33.7133333, -87.3886111))?;
+
+ wtr.flush()?;
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Compiling and running this program gives the expected output:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor
+City,State,Population,Latitude,Longitude
+Davidsons Landing,AK,,65.2419444,-165.2716667
+Kenai,AK,7610,60.5544444,-151.2583333
+Oakman,AL,,33.7133333,-87.3886111
+```
+
+The key thing to note in the above example is the use of `serialize` instead
+of `write_record` to write our data. In particular, `write_record` is used
+when writing a simple record that contains string-like data only. On the other
+hand, `serialize` is used when your data consists of more complex values like
+numbers, floats or optional values. Of course, you could always convert the
+complex values to strings and then use `write_record`, but Serde can do it for
+you automatically.
+
+As with reading, we can also serialize custom structs as CSV records. As a
+bonus, the fields in a struct will automatically be written as a header
+record!
+
+To write custom structs as CSV records, we'll need to make use of Serde's
+automatic `derive` feature again. As in the
+[previous section on reading with Serde](#reading-with-serde),
+we'll need to add a couple crates to our `[dependencies]` section in our
+`Cargo.toml` (if they aren't already there):
+
+```text
+serde = { version = "1", features = ["derive"] }
+```
+
+And we'll also need to add a new `use` statement to our code, for Serde, as
+shown in the example:
+
+```no_run
+//tutorial-write-serde-02.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::Serialize;
+
+// Note that structs can derive both Serialize and Deserialize!
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record<'a> {
+ city: &'a str,
+ state: &'a str,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ wtr.serialize(Record {
+ city: "Davidsons Landing",
+ state: "AK",
+ population: None,
+ latitude: 65.2419444,
+ longitude: -165.2716667,
+ })?;
+ wtr.serialize(Record {
+ city: "Kenai",
+ state: "AK",
+ population: Some(7610),
+ latitude: 60.5544444,
+ longitude: -151.2583333,
+ })?;
+ wtr.serialize(Record {
+ city: "Oakman",
+ state: "AL",
+ population: None,
+ latitude: 33.7133333,
+ longitude: -87.3886111,
+ })?;
+
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+Compiling and running this example has the same output as last time, even
+though we didn't explicitly write a header record:
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor
+City,State,Population,Latitude,Longitude
+Davidsons Landing,AK,,65.2419444,-165.2716667
+Kenai,AK,7610,60.5544444,-151.2583333
+Oakman,AL,,33.7133333,-87.3886111
+```
+
+In this case, the `serialize` method noticed that we were writing a struct
+with field names. When this happens, `serialize` will automatically write a
+header record (only if no other records have been written) that consists of
+the fields in the struct in the order in which they are defined. Note that
+this behavior can be disabled with the
+[`WriterBuilder::has_headers`](../struct.WriterBuilder.html#method.has_headers)
+method.
+
+It's also worth pointing out the use of a *lifetime parameter* in our `Record`
+struct:
+
+```ignore
+struct Record<'a> {
+ city: &'a str,
+ state: &'a str,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+```
+
+The `'a` lifetime parameter corresponds to the lifetime of the `city` and
+`state` string slices. This says that the `Record` struct contains *borrowed*
+data. We could have written our struct without borrowing any data, and
+therefore, without any lifetime parameters:
+
+```ignore
+struct Record {
+ city: String,
+ state: String,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+```
+
+However, since we had to replace our borrowed `&str` types with owned `String`
+types, we're now forced to allocate a new `String` value for both of `city`
+and `state` for every record that we write. There's no intrinsic problem with
+doing that, but it might be a bit wasteful.
+
+For more examples and more details on the rules for serialization, please see
+the
+[`Writer::serialize`](../struct.Writer.html#method.serialize)
+method.
+
+# Pipelining
+
+In this section, we're going to cover a few examples that demonstrate programs
+that take CSV data as input, and produce possibly transformed or filtered CSV
+data as output. This shows how to write a complete program that efficiently
+reads and writes CSV data. Rust is well positioned to perform this task, since
+you'll get great performance with the convenience of a high level CSV library.
+
+## Filter by search
+
+The first example of CSV pipelining we'll look at is a simple filter. It takes
+as input some CSV data on stdin and a single string query as its only
+positional argument, and it will produce as output CSV data that only contains
+rows with a field that matches the query.
+
+```no_run
+//tutorial-pipeline-search-01.rs
+use std::env;
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn run() -> Result<(), Box<dyn Error>> {
+ // Get the query from the positional arguments.
+ // If one doesn't exist, return an error.
+ let query = match env::args().nth(1) {
+ None => return Err(From::from("expected 1 argument, but got none")),
+ Some(query) => query,
+ };
+
+ // Build CSV readers and writers to stdin and stdout, respectively.
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ // Before reading our data records, we should write the header record.
+ wtr.write_record(rdr.headers()?)?;
+
+ // Iterate over all the records in `rdr`, and write only records containing
+ // `query` to `wtr`.
+ for result in rdr.records() {
+ let record = result?;
+ if record.iter().any(|field| field == &query) {
+ wtr.write_record(&record)?;
+ }
+ }
+
+ // CSV writers use an internal buffer, so we should always flush when done.
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+If we compile and run this program with a query of `MA` on `uspop.csv`, we'll
+see that only one record matches:
+
+```text
+$ cargo build
+$ ./csvtutor MA < uspop.csv
+City,State,Population,Latitude,Longitude
+Reading,MA,23441,42.5255556,-71.0958333
+```
+
+This example doesn't actually introduce anything new. It merely combines what
+you've already learned about CSV readers and writers from previous sections.
+
+Let's add a twist to this example. In the real world, you're often faced with
+messy CSV data that might not be encoded correctly. One example you might come
+across is CSV data encoded in
+[Latin-1](https://en.wikipedia.org/wiki/ISO/IEC_8859-1).
+Unfortunately, for the examples we've seen so far, our CSV reader assumes that
+all of the data is UTF-8. Since all of the data we've worked on has been
+ASCII---which is a subset of both Latin-1 and UTF-8---we haven't had any
+problems. But let's introduce a slightly tweaked version of our `uspop.csv`
+file that contains an encoding of a Latin-1 character that is invalid UTF-8.
+You can get the data like so:
+
+```text
+$ curl -LO 'https://raw.githubusercontent.com/BurntSushi/rust-csv/master/examples/data/uspop-latin1.csv'
+```
+
+Even though I've already given away the problem, let's see what happen when
+we try to run our previous example on this new data:
+
+```text
+$ ./csvtutor MA < uspop-latin1.csv
+City,State,Population,Latitude,Longitude
+CSV parse error: record 3 (line 4, field: 0, byte: 125): invalid utf-8: invalid UTF-8 in field 0 near byte index 0
+```
+
+The error message tells us exactly what's wrong. Let's take a look at line 4
+to see what we're dealing with:
+
+```text
+$ head -n4 uspop-latin1.csv | tail -n1
+Õakman,AL,,33.7133333,-87.3886111
+```
+
+In this case, the very first character is the Latin-1 `Õ`, which is encoded as
+the byte `0xD5`, which is in turn invalid UTF-8. So what do we do now that our
+CSV parser has choked on our data? You have two choices. The first is to go in
+and fix up your CSV data so that it's valid UTF-8. This is probably a good
+idea anyway, and tools like `iconv` can help with the task of transcoding.
+But if you can't or don't want to do that, then you can instead read CSV data
+in a way that is mostly encoding agnostic (so long as ASCII is still a valid
+subset). The trick is to use *byte records* instead of *string records*.
+
+Thus far, we haven't actually talked much about the type of a record in this
+library, but now is a good time to introduce them. There are two of them,
+[`StringRecord`](../struct.StringRecord.html)
+and
+[`ByteRecord`](../struct.ByteRecord.html).
+Each them represent a single record in CSV data, where a record is a sequence
+of an arbitrary number of fields. The only difference between `StringRecord`
+and `ByteRecord` is that `StringRecord` is guaranteed to be valid UTF-8, where
+as `ByteRecord` contains arbitrary bytes.
+
+Armed with that knowledge, we can now begin to understand why we saw an error
+when we ran the last example on data that wasn't UTF-8. Namely, when we call
+`records`, we get back an iterator of `StringRecord`. Since `StringRecord` is
+guaranteed to be valid UTF-8, trying to build a `StringRecord` with invalid
+UTF-8 will result in the error that we see.
+
+All we need to do to make our example work is to switch from a `StringRecord`
+to a `ByteRecord`. This means using `byte_records` to create our iterator
+instead of `records`, and similarly using `byte_headers` instead of `headers`
+if we think our header data might contain invalid UTF-8 as well. Here's the
+change:
+
+```no_run
+//tutorial-pipeline-search-02.rs
+# use std::env;
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<(), Box<dyn Error>> {
+ let query = match env::args().nth(1) {
+ None => return Err(From::from("expected 1 argument, but got none")),
+ Some(query) => query,
+ };
+
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ wtr.write_record(rdr.byte_headers()?)?;
+
+ for result in rdr.byte_records() {
+ let record = result?;
+ // `query` is a `String` while `field` is now a `&[u8]`, so we'll
+ // need to convert `query` to `&[u8]` before doing a comparison.
+ if record.iter().any(|field| field == query.as_bytes()) {
+ wtr.write_record(&record)?;
+ }
+ }
+
+ wtr.flush()?;
+ Ok(())
+}
+#
+# fn main() {
+# if let Err(err) = run() {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+```
+
+Compiling and running this now yields the same results as our first example,
+but this time it works on data that isn't valid UTF-8.
+
+```text
+$ cargo build
+$ ./csvtutor MA < uspop-latin1.csv
+City,State,Population,Latitude,Longitude
+Reading,MA,23441,42.5255556,-71.0958333
+```
+
+## Filter by population count
+
+In this section, we will show another example program that both reads and
+writes CSV data, but instead of dealing with arbitrary records, we will use
+Serde to deserialize and serialize records with specific types.
+
+For this program, we'd like to be able to filter records in our population data
+by population count. Specifically, we'd like to see which records meet a
+certain population threshold. In addition to using a simple inequality, we must
+also account for records that have a missing population count. This is where
+types like `Option<T>` come in handy, because the compiler will force us to
+consider the case when the population count is missing.
+
+Since we're using Serde in this example, don't forget to add the Serde
+dependencies to your `Cargo.toml` in your `[dependencies]` section if they
+aren't already there:
+
+```text
+serde = { version = "1", features = ["derive"] }
+```
+
+Now here's the code:
+
+```no_run
+//tutorial-pipeline-pop-01.rs
+use std::env;
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::{Deserialize, Serialize};
+
+// Unlike previous examples, we derive both Deserialize and Serialize. This
+// means we'll be able to automatically deserialize and serialize this type.
+#[derive(Debug, Deserialize, Serialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record {
+ city: String,
+ state: String,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+
+fn run() -> Result<(), Box<dyn Error>> {
+ // Get the query from the positional arguments.
+ // If one doesn't exist or isn't an integer, return an error.
+ let minimum_pop: u64 = match env::args().nth(1) {
+ None => return Err(From::from("expected 1 argument, but got none")),
+ Some(arg) => arg.parse()?,
+ };
+
+ // Build CSV readers and writers to stdin and stdout, respectively.
+ // Note that we don't need to write headers explicitly. Since we're
+ // serializing a custom struct, that's done for us automatically.
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut wtr = csv::Writer::from_writer(io::stdout());
+
+ // Iterate over all the records in `rdr`, and write only records containing
+ // a population that is greater than or equal to `minimum_pop`.
+ for result in rdr.deserialize() {
+ // Remember that when deserializing, we must use a type hint to
+ // indicate which type we want to deserialize our record into.
+ let record: Record = result?;
+
+ // `map_or` is a combinator on `Option`. It take two parameters:
+ // a value to use when the `Option` is `None` (i.e., the record has
+ // no population count) and a closure that returns another value of
+ // the same type when the `Option` is `Some`. In this case, we test it
+ // against our minimum population count that we got from the command
+ // line.
+ if record.population.map_or(false, |pop| pop >= minimum_pop) {
+ wtr.serialize(record)?;
+ }
+ }
+
+ // CSV writers use an internal buffer, so we should always flush when done.
+ wtr.flush()?;
+ Ok(())
+}
+
+fn main() {
+ if let Err(err) = run() {
+ println!("{}", err);
+ process::exit(1);
+ }
+}
+```
+
+If we compile and run our program with a minimum threshold of `100000`, we
+should see three matching records. Notice that the headers were added even
+though we never explicitly wrote them!
+
+```text
+$ cargo build
+$ ./target/debug/csvtutor 100000 < uspop.csv
+City,State,Population,Latitude,Longitude
+Fontana,CA,169160,34.0922222,-117.4341667
+Bridgeport,CT,139090,41.1669444,-73.2052778
+Indianapolis,IN,773283,39.7683333,-86.1580556
+```
+
+# Performance
+
+In this section, we'll go over how to squeeze the most juice out of our CSV
+reader. As it happens, most of the APIs we've seen so far were designed with
+high level convenience in mind, and that often comes with some costs. For the
+most part, those costs revolve around unnecessary allocations. Therefore, most
+of the section will show how to do CSV parsing with as little allocation as
+possible.
+
+There are two critical preliminaries we must cover.
+
+Firstly, when you care about performance, you should compile your code
+with `cargo build --release` instead of `cargo build`. The `--release`
+flag instructs the compiler to spend more time optimizing your code. When
+compiling with the `--release` flag, you'll find your compiled program at
+`target/release/csvtutor` instead of `target/debug/csvtutor`. Throughout this
+tutorial, we've used `cargo build` because our dataset was small and we weren't
+focused on speed. The downside of `cargo build --release` is that it will take
+longer than `cargo build`.
+
+Secondly, the dataset we've used throughout this tutorial only has 100 records.
+We'd have to try really hard to cause our program to run slowly on 100 records,
+even when we compile without the `--release` flag. Therefore, in order to
+actually witness a performance difference, we need a bigger dataset. To get
+such a dataset, we'll use the original source of `uspop.csv`. **Warning: the
+download is 41MB compressed and decompresses to 145MB.**
+
+```text
+$ curl -LO http://burntsushi.net/stuff/worldcitiespop.csv.gz
+$ gunzip worldcitiespop.csv.gz
+$ wc worldcitiespop.csv
+ 3173959 5681543 151492068 worldcitiespop.csv
+$ md5sum worldcitiespop.csv
+6198bd180b6d6586626ecbf044c1cca5 worldcitiespop.csv
+```
+
+Finally, it's worth pointing out that this section is not attempting to
+present a rigorous set of benchmarks. We will stay away from rigorous analysis
+and instead rely a bit more on wall clock times and intuition.
+
+## Amortizing allocations
+
+In order to measure performance, we must be careful about what it is we're
+measuring. We must also be careful to not change the thing we're measuring as
+we make improvements to the code. For this reason, we will focus on measuring
+how long it takes to count the number of records corresponding to city
+population counts in Massachusetts. This represents a very small amount of work
+that requires us to visit every record, and therefore represents a decent way
+to measure how long it takes to do CSV parsing.
+
+Before diving into our first optimization, let's start with a baseline by
+adapting a previous example to count the number of records in
+`worldcitiespop.csv`:
+
+```no_run
+//tutorial-perf-alloc-01.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+
+ let mut count = 0;
+ for result in rdr.records() {
+ let record = result?;
+ if &record[0] == "us" && &record[3] == "MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+
+fn main() {
+ match run() {
+ Ok(count) => {
+ println!("{}", count);
+ }
+ Err(err) => {
+ println!("{}", err);
+ process::exit(1);
+ }
+ }
+}
+```
+
+Now let's compile and run it and see what kind of timing we get. Don't forget
+to compile with the `--release` flag. (For grins, try compiling without the
+`--release` flag and see how long it takes to run the program!)
+
+```text
+$ cargo build --release
+$ time ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m0.645s
+user 0m0.627s
+sys 0m0.017s
+```
+
+All right, so what's the first thing we can do to make this faster? This
+section promised to speed things up by amortizing allocation, but we can do
+something even simpler first: iterate over
+[`ByteRecord`](../struct.ByteRecord.html)s
+instead of
+[`StringRecord`](../struct.StringRecord.html)s.
+If you recall from a previous section, a `StringRecord` is guaranteed to be
+valid UTF-8, and therefore must validate that its contents is actually UTF-8.
+(If validation fails, then the CSV reader will return an error.) If we remove
+that validation from our program, then we can realize a nice speed boost as
+shown in the next example:
+
+```no_run
+//tutorial-perf-alloc-02.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+
+ let mut count = 0;
+ for result in rdr.byte_records() {
+ let record = result?;
+ if &record[0] == b"us" && &record[3] == b"MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+#
+# fn main() {
+# match run() {
+# Ok(count) => {
+# println!("{}", count);
+# }
+# Err(err) => {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+# }
+```
+
+And now compile and run:
+
+```text
+$ cargo build --release
+$ time ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m0.429s
+user 0m0.403s
+sys 0m0.023s
+```
+
+Our program is now approximately 30% faster, all because we removed UTF-8
+validation. But was it actually okay to remove UTF-8 validation? What have we
+lost? In this case, it is perfectly acceptable to drop UTF-8 validation and use
+`ByteRecord` instead because all we're doing with the data in the record is
+comparing two of its fields to raw bytes:
+
+```ignore
+if &record[0] == b"us" && &record[3] == b"MA" {
+ count += 1;
+}
+```
+
+In particular, it doesn't matter whether `record` is valid UTF-8 or not, since
+we're checking for equality on the raw bytes themselves.
+
+UTF-8 validation via `StringRecord` is useful because it provides access to
+fields as `&str` types, where as `ByteRecord` provides fields as `&[u8]` types.
+`&str` is the type of a borrowed string in Rust, which provides convenient
+access to string APIs like substring search. Strings are also frequently used
+in other areas, so they tend to be a useful thing to have. Therefore, sticking
+with `StringRecord` is a good default, but if you need the extra speed and can
+deal with arbitrary bytes, then switching to `ByteRecord` might be a good idea.
+
+Moving on, let's try to get another speed boost by amortizing allocation.
+Amortizing allocation is the technique that creates an allocation once (or
+very rarely), and then attempts to reuse it instead of creating additional
+allocations. In the case of the previous examples, we used iterators created
+by the `records` and `byte_records` methods on a CSV reader. These iterators
+allocate a new record for every item that it yields, which in turn corresponds
+to a new allocation. It does this because iterators cannot yield items that
+borrow from the iterator itself, and because creating new allocations tends to
+be a lot more convenient.
+
+If we're willing to forgo use of iterators, then we can amortize allocations
+by creating a *single* `ByteRecord` and asking the CSV reader to read into it.
+We do this by using the
+[`Reader::read_byte_record`](../struct.Reader.html#method.read_byte_record)
+method.
+
+```no_run
+//tutorial-perf-alloc-03.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut record = csv::ByteRecord::new();
+
+ let mut count = 0;
+ while rdr.read_byte_record(&mut record)? {
+ if &record[0] == b"us" && &record[3] == b"MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+#
+# fn main() {
+# match run() {
+# Ok(count) => {
+# println!("{}", count);
+# }
+# Err(err) => {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+# }
+```
+
+Compile and run:
+
+```text
+$ cargo build --release
+$ time ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m0.308s
+user 0m0.283s
+sys 0m0.023s
+```
+
+Woohoo! This represents *another* 30% boost over the previous example, which is
+a 50% boost over the first example.
+
+Let's dissect this code by taking a look at the type signature of the
+`read_byte_record` method:
+
+```ignore
+fn read_byte_record(&mut self, record: &mut ByteRecord) -> csv::Result<bool>;
+```
+
+This method takes as input a CSV reader (the `self` parameter) and a *mutable
+borrow* of a `ByteRecord`, and returns a `csv::Result<bool>`. (The
+`csv::Result<bool>` is equivalent to `Result<bool, csv::Error>`.) The return
+value is `true` if and only if a record was read. When it's `false`, that means
+the reader has exhausted its input. This method works by copying the contents
+of the next record into the provided `ByteRecord`. Since the same `ByteRecord`
+is used to read every record, it will already have space allocated for data.
+When `read_byte_record` runs, it will overwrite the contents that were there
+with the new record, which means that it can reuse the space that was
+allocated. Thus, we have *amortized allocation*.
+
+An exercise you might consider doing is to use a `StringRecord` instead of a
+`ByteRecord`, and therefore
+[`Reader::read_record`](../struct.Reader.html#method.read_record)
+instead of `read_byte_record`. This will give you easy access to Rust strings
+at the cost of UTF-8 validation but *without* the cost of allocating a new
+`StringRecord` for every record.
+
+## Serde and zero allocation
+
+In this section, we are going to briefly examine how we use Serde and what we
+can do to speed it up. The key optimization we'll want to make is to---you
+guessed it---amortize allocation.
+
+As with the previous section, let's start with a simple baseline based off an
+example using Serde in a previous section:
+
+```no_run
+//tutorial-perf-serde-01.rs
+use std::error::Error;
+use std::io;
+use std::process;
+
+use serde::Deserialize;
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record {
+ country: String,
+ city: String,
+ accent_city: String,
+ region: String,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+
+ let mut count = 0;
+ for result in rdr.deserialize() {
+ let record: Record = result?;
+ if record.country == "us" && record.region == "MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+
+fn main() {
+ match run() {
+ Ok(count) => {
+ println!("{}", count);
+ }
+ Err(err) => {
+ println!("{}", err);
+ process::exit(1);
+ }
+ }
+}
+```
+
+Now compile and run this program:
+
+```text
+$ cargo build --release
+$ ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m1.381s
+user 0m1.367s
+sys 0m0.013s
+```
+
+The first thing you might notice is that this is quite a bit slower than our
+programs in the previous section. This is because deserializing each record
+has a certain amount of overhead to it. In particular, some of the fields need
+to be parsed as integers or floating point numbers, which isn't free. However,
+there is hope yet, because we can speed up this program!
+
+Our first attempt to speed up the program will be to amortize allocation. Doing
+this with Serde is a bit trickier than before, because we need to change our
+`Record` type and use the manual deserialization API. Let's see what that looks
+like:
+
+```no_run
+//tutorial-perf-serde-02.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+# use serde::Deserialize;
+#
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record<'a> {
+ country: &'a str,
+ city: &'a str,
+ accent_city: &'a str,
+ region: &'a str,
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut raw_record = csv::StringRecord::new();
+ let headers = rdr.headers()?.clone();
+
+ let mut count = 0;
+ while rdr.read_record(&mut raw_record)? {
+ let record: Record = raw_record.deserialize(Some(&headers))?;
+ if record.country == "us" && record.region == "MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+#
+# fn main() {
+# match run() {
+# Ok(count) => {
+# println!("{}", count);
+# }
+# Err(err) => {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+# }
+```
+
+Compile and run:
+
+```text
+$ cargo build --release
+$ ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m1.055s
+user 0m1.040s
+sys 0m0.013s
+```
+
+This corresponds to an approximately 24% increase in performance. To achieve
+this, we had to make two important changes.
+
+The first was to make our `Record` type contain `&str` fields instead of
+`String` fields. If you recall from a previous section, `&str` is a *borrowed*
+string where a `String` is an *owned* string. A borrowed string points to
+a already existing allocation where as a `String` always implies a new
+allocation. In this case, our `&str` is borrowing from the CSV record itself.
+
+The second change we had to make was to stop using the
+[`Reader::deserialize`](../struct.Reader.html#method.deserialize)
+iterator, and instead deserialize our record into a `StringRecord` explicitly
+and then use the
+[`StringRecord::deserialize`](../struct.StringRecord.html#method.deserialize)
+method to deserialize a single record.
+
+The second change is a bit tricky, because in order for it to work, our
+`Record` type needs to borrow from the data inside the `StringRecord`. That
+means that our `Record` value cannot outlive the `StringRecord` that it was
+created from. Since we overwrite the same `StringRecord` on each iteration
+(in order to amortize allocation), that means our `Record` value must evaporate
+before the next iteration of the loop. Indeed, the compiler will enforce this!
+
+There is one more optimization we can make: remove UTF-8 validation. In
+general, this means using `&[u8]` instead of `&str` and `ByteRecord` instead
+of `StringRecord`:
+
+```no_run
+//tutorial-perf-serde-03.rs
+# use std::error::Error;
+# use std::io;
+# use std::process;
+#
+# use serde::Deserialize;
+#
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct Record<'a> {
+ country: &'a [u8],
+ city: &'a [u8],
+ accent_city: &'a [u8],
+ region: &'a [u8],
+ population: Option<u64>,
+ latitude: f64,
+ longitude: f64,
+}
+
+fn run() -> Result<u64, Box<dyn Error>> {
+ let mut rdr = csv::Reader::from_reader(io::stdin());
+ let mut raw_record = csv::ByteRecord::new();
+ let headers = rdr.byte_headers()?.clone();
+
+ let mut count = 0;
+ while rdr.read_byte_record(&mut raw_record)? {
+ let record: Record = raw_record.deserialize(Some(&headers))?;
+ if record.country == b"us" && record.region == b"MA" {
+ count += 1;
+ }
+ }
+ Ok(count)
+}
+#
+# fn main() {
+# match run() {
+# Ok(count) => {
+# println!("{}", count);
+# }
+# Err(err) => {
+# println!("{}", err);
+# process::exit(1);
+# }
+# }
+# }
+```
+
+Compile and run:
+
+```text
+$ cargo build --release
+$ ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m0.873s
+user 0m0.850s
+sys 0m0.023s
+```
+
+This corresponds to a 17% increase over the previous example and a 37% increase
+over the first example.
+
+In sum, Serde parsing is still quite fast, but will generally not be the
+fastest way to parse CSV since it necessarily needs to do more work.
+
+## CSV parsing without the standard library
+
+In this section, we will explore a niche use case: parsing CSV without the
+standard library. While the `csv` crate itself requires the standard library,
+the underlying parser is actually part of the
+[`csv-core`](https://docs.rs/csv-core)
+crate, which does not depend on the standard library. The downside of not
+depending on the standard library is that CSV parsing becomes a lot more
+inconvenient.
+
+The `csv-core` crate is structured similarly to the `csv` crate. There is a
+[`Reader`](../../csv_core/struct.Reader.html)
+and a
+[`Writer`](../../csv_core/struct.Writer.html),
+as well as corresponding builders
+[`ReaderBuilder`](../../csv_core/struct.ReaderBuilder.html)
+and
+[`WriterBuilder`](../../csv_core/struct.WriterBuilder.html).
+The `csv-core` crate has no record types or iterators. Instead, CSV data
+can either be read one field at a time or one record at a time. In this
+section, we'll focus on reading a field at a time since it is simpler, but it
+is generally faster to read a record at a time since it does more work per
+function call.
+
+In keeping with this section on performance, let's write a program using only
+`csv-core` that counts the number of records in the state of Massachusetts.
+
+(Note that we unfortunately use the standard library in this example even
+though `csv-core` doesn't technically require it. We do this for convenient
+access to I/O, which would be harder without the standard library.)
+
+```no_run
+//tutorial-perf-core-01.rs
+use std::io::{self, Read};
+use std::process;
+
+use csv_core::{Reader, ReadFieldResult};
+
+fn run(mut data: &[u8]) -> Option<u64> {
+ let mut rdr = Reader::new();
+
+ // Count the number of records in Massachusetts.
+ let mut count = 0;
+ // Indicates the current field index. Reset to 0 at start of each record.
+ let mut fieldidx = 0;
+ // True when the current record is in the United States.
+ let mut inus = false;
+ // Buffer for field data. Must be big enough to hold the largest field.
+ let mut field = [0; 1024];
+ loop {
+ // Attempt to incrementally read the next CSV field.
+ let (result, nread, nwrite) = rdr.read_field(data, &mut field);
+ // nread is the number of bytes read from our input. We should never
+ // pass those bytes to read_field again.
+ data = &data[nread..];
+ // nwrite is the number of bytes written to the output buffer `field`.
+ // The contents of the buffer after this point is unspecified.
+ let field = &field[..nwrite];
+
+ match result {
+ // We don't need to handle this case because we read all of the
+ // data up front. If we were reading data incrementally, then this
+ // would be a signal to read more.
+ ReadFieldResult::InputEmpty => {}
+ // If we get this case, then we found a field that contains more
+ // than 1024 bytes. We keep this example simple and just fail.
+ ReadFieldResult::OutputFull => {
+ return None;
+ }
+ // This case happens when we've successfully read a field. If the
+ // field is the last field in a record, then `record_end` is true.
+ ReadFieldResult::Field { record_end } => {
+ if fieldidx == 0 && field == b"us" {
+ inus = true;
+ } else if inus && fieldidx == 3 && field == b"MA" {
+ count += 1;
+ }
+ if record_end {
+ fieldidx = 0;
+ inus = false;
+ } else {
+ fieldidx += 1;
+ }
+ }
+ // This case happens when the CSV reader has successfully exhausted
+ // all input.
+ ReadFieldResult::End => {
+ break;
+ }
+ }
+ }
+ Some(count)
+}
+
+fn main() {
+ // Read the entire contents of stdin up front.
+ let mut data = vec![];
+ if let Err(err) = io::stdin().read_to_end(&mut data) {
+ println!("{}", err);
+ process::exit(1);
+ }
+ match run(&data) {
+ None => {
+ println!("error: could not count records, buffer too small");
+ process::exit(1);
+ }
+ Some(count) => {
+ println!("{}", count);
+ }
+ }
+}
+```
+
+And compile and run it:
+
+```text
+$ cargo build --release
+$ time ./target/release/csvtutor < worldcitiespop.csv
+2176
+
+real 0m0.572s
+user 0m0.513s
+sys 0m0.057s
+```
+
+This isn't as fast as some of our previous examples where we used the `csv`
+crate to read into a `StringRecord` or a `ByteRecord`. This is mostly because
+this example reads a field at a time, which incurs more overhead than reading a
+record at a time. To fix this, you would want to use the
+[`Reader::read_record`](../../csv_core/struct.Reader.html#method.read_record)
+method instead, which is defined on `csv_core::Reader`.
+
+The other thing to notice here is that the example is considerably longer than
+the other examples. This is because we need to do more book keeping to keep
+track of which field we're reading and how much data we've already fed to the
+reader. There are basically two reasons to use the `csv_core` crate:
+
+1. If you're in an environment where the standard library is not usable.
+2. If you wanted to build your own csv-like library, you could build it on top
+ of `csv-core`.
+
+# Closing thoughts
+
+Congratulations on making it to the end! It seems incredible that one could
+write so many words on something as basic as CSV parsing. I wanted this
+guide to be accessible not only to Rust beginners, but to inexperienced
+programmers as well. My hope is that the large number of examples will help
+push you in the right direction.
+
+With that said, here are a few more things you might want to look at:
+
+* The [API documentation for the `csv` crate](../index.html) documents all
+ facets of the library, and is itself littered with even more examples.
+* The [`csv-index` crate](https://docs.rs/csv-index) provides data structures
+ that can index CSV data that are amenable to writing to disk. (This library
+ is still a work in progress.)
+* The [`xsv` command line tool](https://github.com/BurntSushi/xsv) is a high
+ performance CSV swiss army knife. It can slice, select, search, sort, join,
+ concatenate, index, format and compute statistics on arbitrary CSV data. Give
+ it a try!
+
+*/
diff --git a/src/writer.rs b/src/writer.rs
new file mode 100644
index 0000000..6ab442f
--- /dev/null
+++ b/src/writer.rs
@@ -0,0 +1,1415 @@
+use std::fs::File;
+use std::io;
+use std::path::Path;
+use std::result;
+
+use csv_core::{
+ self, WriteResult, Writer as CoreWriter,
+ WriterBuilder as CoreWriterBuilder,
+};
+use serde::Serialize;
+
+use crate::byte_record::ByteRecord;
+use crate::error::{Error, ErrorKind, IntoInnerError, Result};
+use crate::serializer::{serialize, serialize_header};
+use crate::{QuoteStyle, Terminator};
+
+/// Builds a CSV writer with various configuration knobs.
+///
+/// This builder can be used to tweak the field delimiter, record terminator
+/// and more. Once a CSV `Writer` is built, its configuration cannot be
+/// changed.
+#[derive(Debug)]
+pub struct WriterBuilder {
+ builder: CoreWriterBuilder,
+ capacity: usize,
+ flexible: bool,
+ has_headers: bool,
+}
+
+impl Default for WriterBuilder {
+ fn default() -> WriterBuilder {
+ WriterBuilder {
+ builder: CoreWriterBuilder::default(),
+ capacity: 8 * (1 << 10),
+ flexible: false,
+ has_headers: true,
+ }
+ }
+}
+
+impl WriterBuilder {
+ /// Create a new builder for configuring CSV writing.
+ ///
+ /// To convert a builder into a writer, call one of the methods starting
+ /// with `from_`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn new() -> WriterBuilder {
+ WriterBuilder::default()
+ }
+
+ /// Build a CSV writer from this configuration that writes data to the
+ /// given file path. The file is truncated if it already exists.
+ ///
+ /// If there was a problem opening the file at the given path, then this
+ /// returns the corresponding error.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new().from_path("foo.csv")?;
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ /// wtr.flush()?;
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Writer<File>> {
+ Ok(Writer::new(self, File::create(path)?))
+ }
+
+ /// Build a CSV writer from this configuration that writes data to `wtr`.
+ ///
+ /// Note that the CSV writer is buffered automatically, so you should not
+ /// wrap `wtr` in a buffered writer like `io::BufWriter`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_writer<W: io::Write>(&self, wtr: W) -> Writer<W> {
+ Writer::new(self, wtr)
+ }
+
+ /// The field delimiter to use when writing CSV.
+ ///
+ /// The default is `b','`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .delimiter(b';')
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a;b;c\nx;y;z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn delimiter(&mut self, delimiter: u8) -> &mut WriterBuilder {
+ self.builder.delimiter(delimiter);
+ self
+ }
+
+ /// Whether to write a header row before writing any other row.
+ ///
+ /// When this is enabled and the `serialize` method is used to write data
+ /// with something that contains field names (i.e., a struct), then a
+ /// header row is written containing the field names before any other row
+ /// is written.
+ ///
+ /// This option has no effect when using other methods to write rows. That
+ /// is, if you don't use `serialize`, then you must write your header row
+ /// explicitly if you want a header row.
+ ///
+ /// This is enabled by default.
+ ///
+ /// # Example: with headers
+ ///
+ /// This shows how the header will be automatically written from the field
+ /// names of a struct.
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::WriterBuilder;
+ /// use serde::Serialize;
+ ///
+ /// #[derive(Serialize)]
+ /// struct Row<'a> {
+ /// city: &'a str,
+ /// country: &'a str,
+ /// // Serde allows us to name our headers exactly,
+ /// // even if they don't match our struct field names.
+ /// #[serde(rename = "popcount")]
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ /// wtr.serialize(Row {
+ /// city: "Boston",
+ /// country: "United States",
+ /// population: 4628910,
+ /// })?;
+ /// wtr.serialize(Row {
+ /// city: "Concord",
+ /// country: "United States",
+ /// population: 42695,
+ /// })?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: without headers
+ ///
+ /// This shows that serializing things that aren't structs (in this case,
+ /// a tuple struct) won't result in a header row being written. This means
+ /// you usually don't need to set `has_headers(false)` unless you
+ /// explicitly want to both write custom headers and serialize structs.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ /// wtr.serialize(("Boston", "United States", 4628910))?;
+ /// wtr.serialize(("Concord", "United States", 42695))?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn has_headers(&mut self, yes: bool) -> &mut WriterBuilder {
+ self.has_headers = yes;
+ self
+ }
+
+ /// Whether the number of fields in records is allowed to change or not.
+ ///
+ /// When disabled (which is the default), writing CSV data will return an
+ /// error if a record is written with a number of fields different from the
+ /// number of fields written in a previous record.
+ ///
+ /// When enabled, this error checking is turned off.
+ ///
+ /// # Example: writing flexible records
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .flexible(true)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "b"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: error when `flexible` is disabled
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .flexible(false)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "b"])?;
+ /// let err = wtr.write_record(&["x", "y", "z"]).unwrap_err();
+ /// match *err.kind() {
+ /// csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
+ /// assert_eq!(expected_len, 2);
+ /// assert_eq!(len, 3);
+ /// }
+ /// ref wrong => {
+ /// panic!("expected UnequalLengths but got {:?}", wrong);
+ /// }
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn flexible(&mut self, yes: bool) -> &mut WriterBuilder {
+ self.flexible = yes;
+ self
+ }
+
+ /// The record terminator to use when writing CSV.
+ ///
+ /// A record terminator can be any single byte. The default is `\n`.
+ ///
+ /// Note that RFC 4180 specifies that record terminators should be `\r\n`.
+ /// To use `\r\n`, use the special `Terminator::CRLF` value.
+ ///
+ /// # Example: CRLF
+ ///
+ /// This shows how to use RFC 4180 compliant record terminators.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{Terminator, WriterBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .terminator(Terminator::CRLF)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\r\nx,y,z\r\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn terminator(&mut self, term: Terminator) -> &mut WriterBuilder {
+ self.builder.terminator(term.to_core());
+ self
+ }
+
+ /// The quoting style to use when writing CSV.
+ ///
+ /// By default, this is set to `QuoteStyle::Necessary`, which will only
+ /// use quotes when they are necessary to preserve the integrity of data.
+ ///
+ /// Note that unless the quote style is set to `Never`, an empty field is
+ /// quoted if it is the only field in a record.
+ ///
+ /// # Example: non-numeric quoting
+ ///
+ /// This shows how to quote non-numeric fields only.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{QuoteStyle, WriterBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .quote_style(QuoteStyle::NonNumeric)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "5", "c"])?;
+ /// wtr.write_record(&["3.14", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\"a\",5,\"c\"\n3.14,\"y\",\"z\"\n");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Example: never quote
+ ///
+ /// This shows how the CSV writer can be made to never write quotes, even
+ /// if it sacrifices the integrity of the data.
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{QuoteStyle, WriterBuilder};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .quote_style(QuoteStyle::Never)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "foo\nbar", "c"])?;
+ /// wtr.write_record(&["g\"h\"i", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,foo\nbar,c\ng\"h\"i,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn quote_style(&mut self, style: QuoteStyle) -> &mut WriterBuilder {
+ self.builder.quote_style(style.to_core());
+ self
+ }
+
+ /// The quote character to use when writing CSV.
+ ///
+ /// The default is `b'"'`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .quote(b'\'')
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "foo\nbar", "c"])?;
+ /// wtr.write_record(&["g'h'i", "y\"y\"y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,'foo\nbar',c\n'g''h''i',y\"y\"y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn quote(&mut self, quote: u8) -> &mut WriterBuilder {
+ self.builder.quote(quote);
+ self
+ }
+
+ /// Enable double quote escapes.
+ ///
+ /// This is enabled by default, but it may be disabled. When disabled,
+ /// quotes in field data are escaped instead of doubled.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .double_quote(false)
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "foo\"bar", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,\"foo\\\"bar\",c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn double_quote(&mut self, yes: bool) -> &mut WriterBuilder {
+ self.builder.double_quote(yes);
+ self
+ }
+
+ /// The escape character to use when writing CSV.
+ ///
+ /// In some variants of CSV, quotes are escaped using a special escape
+ /// character like `\` (instead of escaping quotes by doubling them).
+ ///
+ /// By default, writing these idiosyncratic escapes is disabled, and is
+ /// only used when `double_quote` is disabled.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::WriterBuilder;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .double_quote(false)
+ /// .escape(b'$')
+ /// .from_writer(vec![]);
+ /// wtr.write_record(&["a", "foo\"bar", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,\"foo$\"bar\",c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn escape(&mut self, escape: u8) -> &mut WriterBuilder {
+ self.builder.escape(escape);
+ self
+ }
+
+ /// Set the capacity (in bytes) of the internal buffer used in the CSV
+ /// writer. This defaults to a reasonable setting.
+ pub fn buffer_capacity(&mut self, capacity: usize) -> &mut WriterBuilder {
+ self.capacity = capacity;
+ self
+ }
+}
+
+/// A already configured CSV writer.
+///
+/// A CSV writer takes as input Rust values and writes those values in a valid
+/// CSV format as output.
+///
+/// While CSV writing is considerably easier than parsing CSV, a proper writer
+/// will do a number of things for you:
+///
+/// 1. Quote fields when necessary.
+/// 2. Check that all records have the same number of fields.
+/// 3. Write records with a single empty field correctly.
+/// 4. Automatically serialize normal Rust types to CSV records. When that
+/// type is a struct, a header row is automatically written corresponding
+/// to the fields of that struct.
+/// 5. Use buffering intelligently and otherwise avoid allocation. (This means
+/// that callers should not do their own buffering.)
+///
+/// All of the above can be configured using a
+/// [`WriterBuilder`](struct.WriterBuilder.html).
+/// However, a `Writer` has a couple of convenience constructors (`from_path`
+/// and `from_writer`) that use the default configuration.
+///
+/// Note that the default configuration of a `Writer` uses `\n` for record
+/// terminators instead of `\r\n` as specified by RFC 4180. Use the
+/// `terminator` method on `WriterBuilder` to set the terminator to `\r\n` if
+/// it's desired.
+#[derive(Debug)]
+pub struct Writer<W: io::Write> {
+ core: CoreWriter,
+ wtr: Option<W>,
+ buf: Buffer,
+ state: WriterState,
+}
+
+#[derive(Debug)]
+struct WriterState {
+ /// Whether the Serde serializer should attempt to write a header row.
+ header: HeaderState,
+ /// Whether inconsistent record lengths are allowed.
+ flexible: bool,
+ /// The number of fields writtein in the first record. This is compared
+ /// with `fields_written` on all subsequent records to check for
+ /// inconsistent record lengths.
+ first_field_count: Option<u64>,
+ /// The number of fields written in this record. This is used to report
+ /// errors for inconsistent record lengths if `flexible` is disabled.
+ fields_written: u64,
+ /// This is set immediately before flushing the buffer and then unset
+ /// immediately after flushing the buffer. This avoids flushing the buffer
+ /// twice if the inner writer panics.
+ panicked: bool,
+}
+
+/// HeaderState encodes a small state machine for handling header writes.
+#[derive(Debug)]
+enum HeaderState {
+ /// Indicates that we should attempt to write a header.
+ Write,
+ /// Indicates that writing a header was attempt, and a header was written.
+ DidWrite,
+ /// Indicates that writing a header was attempted, but no headers were
+ /// written or the attempt failed.
+ DidNotWrite,
+ /// This state is used when headers are disabled. It cannot transition
+ /// to any other state.
+ None,
+}
+
+/// A simple internal buffer for buffering writes.
+///
+/// We need this because the `csv_core` APIs want to write into a `&mut [u8]`,
+/// which is not available with the `std::io::BufWriter` API.
+#[derive(Debug)]
+struct Buffer {
+ /// The contents of the buffer.
+ buf: Vec<u8>,
+ /// The number of bytes written to the buffer.
+ len: usize,
+}
+
+impl<W: io::Write> Drop for Writer<W> {
+ fn drop(&mut self) {
+ if self.wtr.is_some() && !self.state.panicked {
+ let _ = self.flush();
+ }
+ }
+}
+
+impl Writer<File> {
+ /// Build a CSV writer with a default configuration that writes data to the
+ /// given file path. The file is truncated if it already exists.
+ ///
+ /// If there was a problem opening the file at the given path, then this
+ /// returns the corresponding error.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// use std::error::Error;
+ /// use csv::Writer;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_path("foo.csv")?;
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ /// wtr.flush()?;
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Writer<File>> {
+ WriterBuilder::new().from_path(path)
+ }
+}
+
+impl<W: io::Write> Writer<W> {
+ fn new(builder: &WriterBuilder, wtr: W) -> Writer<W> {
+ let header_state = if builder.has_headers {
+ HeaderState::Write
+ } else {
+ HeaderState::None
+ };
+ Writer {
+ core: builder.builder.build(),
+ wtr: Some(wtr),
+ buf: Buffer { buf: vec![0; builder.capacity], len: 0 },
+ state: WriterState {
+ header: header_state,
+ flexible: builder.flexible,
+ first_field_count: None,
+ fields_written: 0,
+ panicked: false,
+ },
+ }
+ }
+
+ /// Build a CSV writer with a default configuration that writes data to
+ /// `wtr`.
+ ///
+ /// Note that the CSV writer is buffered automatically, so you should not
+ /// wrap `wtr` in a buffered writer like `io::BufWriter`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Writer;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn from_writer(wtr: W) -> Writer<W> {
+ WriterBuilder::new().from_writer(wtr)
+ }
+
+ /// Serialize a single record using Serde.
+ ///
+ /// # Example
+ ///
+ /// This shows how to serialize normal Rust structs as CSV records. The
+ /// fields of the struct are used to write a header row automatically.
+ /// (Writing the header row automatically can be disabled by building the
+ /// CSV writer with a [`WriterBuilder`](struct.WriterBuilder.html) and
+ /// calling the `has_headers` method.)
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::Writer;
+ /// use serde::Serialize;
+ ///
+ /// #[derive(Serialize)]
+ /// struct Row<'a> {
+ /// city: &'a str,
+ /// country: &'a str,
+ /// // Serde allows us to name our headers exactly,
+ /// // even if they don't match our struct field names.
+ /// #[serde(rename = "popcount")]
+ /// population: u64,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.serialize(Row {
+ /// city: "Boston",
+ /// country: "United States",
+ /// population: 4628910,
+ /// })?;
+ /// wtr.serialize(Row {
+ /// city: "Concord",
+ /// country: "United States",
+ /// population: 42695,
+ /// })?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\
+ /// city,country,popcount
+ /// Boston,United States,4628910
+ /// Concord,United States,42695
+ /// ");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// # Rules
+ ///
+ /// The behavior of `serialize` is fairly simple:
+ ///
+ /// 1. Nested containers (tuples, `Vec`s, structs, etc.) are always
+ /// flattened (depth-first order).
+ ///
+ /// 2. If `has_headers` is `true` and the type contains field names, then
+ /// a header row is automatically generated.
+ ///
+ /// However, some container types cannot be serialized, and if
+ /// `has_headers` is `true`, there are some additional restrictions on the
+ /// types that can be serialized. See below for details.
+ ///
+ /// For the purpose of this section, Rust types can be divided into three
+ /// categories: scalars, non-struct containers, and structs.
+ ///
+ /// ## Scalars
+ ///
+ /// Single values with no field names are written like the following. Note
+ /// that some of the outputs may be quoted, according to the selected
+ /// quoting style.
+ ///
+ /// | Name | Example Type | Example Value | Output |
+ /// | ---- | ---- | ---- | ---- |
+ /// | boolean | `bool` | `true` | `true` |
+ /// | integers | `i8`, `i16`, `i32`, `i64`, `i128`, `u8`, `u16`, `u32`, `u64`, `u128` | `5` | `5` |
+ /// | floats | `f32`, `f64` | `3.14` | `3.14` |
+ /// | character | `char` | `'☃'` | `☃` |
+ /// | string | `&str` | `"hi"` | `hi` |
+ /// | bytes | `&[u8]` | `b"hi"[..]` | `hi` |
+ /// | option | `Option` | `None` | *empty* |
+ /// | option | | `Some(5)` | `5` |
+ /// | unit | `()` | `()` | *empty* |
+ /// | unit struct | `struct Foo;` | `Foo` | `Foo` |
+ /// | unit enum variant | `enum E { A, B }` | `E::A` | `A` |
+ /// | newtype struct | `struct Foo(u8);` | `Foo(5)` | `5` |
+ /// | newtype enum variant | `enum E { A(u8) }` | `E::A(5)` | `5` |
+ ///
+ /// Note that this table includes simple structs and enums. For example, to
+ /// serialize a field from either an integer or a float type, one can do
+ /// this:
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::Writer;
+ /// use serde::Serialize;
+ ///
+ /// #[derive(Serialize)]
+ /// struct Row {
+ /// label: String,
+ /// value: Value,
+ /// }
+ ///
+ /// #[derive(Serialize)]
+ /// enum Value {
+ /// Integer(i64),
+ /// Float(f64),
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.serialize(Row {
+ /// label: "foo".to_string(),
+ /// value: Value::Integer(3),
+ /// })?;
+ /// wtr.serialize(Row {
+ /// label: "bar".to_string(),
+ /// value: Value::Float(3.14),
+ /// })?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\
+ /// label,value
+ /// foo,3
+ /// bar,3.14
+ /// ");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// ## Non-Struct Containers
+ ///
+ /// Nested containers are flattened to their scalar components, with the
+ /// exeption of a few types that are not allowed:
+ ///
+ /// | Name | Example Type | Example Value | Output |
+ /// | ---- | ---- | ---- | ---- |
+ /// | sequence | `Vec<u8>` | `vec![1, 2, 3]` | `1,2,3` |
+ /// | tuple | `(u8, bool)` | `(5, true)` | `5,true` |
+ /// | tuple struct | `Foo(u8, bool)` | `Foo(5, true)` | `5,true` |
+ /// | tuple enum variant | `enum E { A(u8, bool) }` | `E::A(5, true)` | *error* |
+ /// | struct enum variant | `enum E { V { a: u8, b: bool } }` | `E::V { a: 5, b: true }` | *error* |
+ /// | map | `BTreeMap<K, V>` | `BTreeMap::new()` | *error* |
+ ///
+ /// ## Structs
+ ///
+ /// Like the other containers, structs are flattened to their scalar
+ /// components:
+ ///
+ /// | Name | Example Type | Example Value | Output |
+ /// | ---- | ---- | ---- | ---- |
+ /// | struct | `struct Foo { a: u8, b: bool }` | `Foo { a: 5, b: true }` | `5,true` |
+ ///
+ /// If `has_headers` is `false`, then there are no additional restrictions;
+ /// types can be nested arbitrarily. For example:
+ ///
+ /// ```
+ /// use std::error::Error;
+ ///
+ /// use csv::WriterBuilder;
+ /// use serde::Serialize;
+ ///
+ /// #[derive(Serialize)]
+ /// struct Row {
+ /// label: String,
+ /// values: Vec<f64>,
+ /// }
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = WriterBuilder::new()
+ /// .has_headers(false)
+ /// .from_writer(vec![]);
+ /// wtr.serialize(Row {
+ /// label: "foo".to_string(),
+ /// values: vec![1.1234, 2.5678, 3.14],
+ /// })?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "\
+ /// foo,1.1234,2.5678,3.14
+ /// ");
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// However, if `has_headers` were enabled in the above example, then
+ /// serialization would return an error. Speficially, when `has_headers` is
+ /// `true`, there are two restrictions:
+ ///
+ /// 1. Named field values in structs must be scalars.
+ ///
+ /// 2. All scalars must be named field values in structs.
+ ///
+ /// Other than these two restrictions, types can be nested arbitrarily.
+ /// Here are a few examples:
+ ///
+ /// | Value | Header | Record |
+ /// | ---- | ---- | ---- |
+ /// | `(Foo { x: 5, y: 6 }, Bar { z: true })` | `x,y,z` | `5,6,true` |
+ /// | `vec![Foo { x: 5, y: 6 }, Foo { x: 7, y: 8 }]` | `x,y,x,y` | `5,6,7,8` |
+ /// | `(Foo { x: 5, y: 6 }, vec![Bar { z: Baz(true) }])` | `x,y,z` | `5,6,true` |
+ /// | `Foo { x: 5, y: (6, 7) }` | *error: restriction 1* | `5,6,7` |
+ /// | `(5, Foo { x: 6, y: 7 }` | *error: restriction 2* | `5,6,7` |
+ /// | `(Foo { x: 5, y: 6 }, true)` | *error: restriction 2* | `5,6,true` |
+ pub fn serialize<S: Serialize>(&mut self, record: S) -> Result<()> {
+ if let HeaderState::Write = self.state.header {
+ let wrote_header = serialize_header(self, &record)?;
+ if wrote_header {
+ self.write_terminator()?;
+ self.state.header = HeaderState::DidWrite;
+ } else {
+ self.state.header = HeaderState::DidNotWrite;
+ };
+ }
+ serialize(self, &record)?;
+ self.write_terminator()?;
+ Ok(())
+ }
+
+ /// Write a single record.
+ ///
+ /// This method accepts something that can be turned into an iterator that
+ /// yields elements that can be represented by a `&[u8]`.
+ ///
+ /// This may be called with an empty iterator, which will cause a record
+ /// terminator to be written. If no fields had been written, then a single
+ /// empty field is written before the terminator.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Writer;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.write_record(&["a", "b", "c"])?;
+ /// wtr.write_record(&["x", "y", "z"])?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn write_record<I, T>(&mut self, record: I) -> Result<()>
+ where
+ I: IntoIterator<Item = T>,
+ T: AsRef<[u8]>,
+ {
+ for field in record.into_iter() {
+ self.write_field_impl(field)?;
+ }
+ self.write_terminator()
+ }
+
+ /// Write a single `ByteRecord`.
+ ///
+ /// This method accepts a borrowed `ByteRecord` and writes its contents
+ /// to the underlying writer.
+ ///
+ /// This is similar to `write_record` except that it specifically requires
+ /// a `ByteRecord`. This permits the writer to possibly write the record
+ /// more quickly than the more generic `write_record`.
+ ///
+ /// This may be called with an empty record, which will cause a record
+ /// terminator to be written. If no fields had been written, then a single
+ /// empty field is written before the terminator.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::{ByteRecord, Writer};
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.write_byte_record(&ByteRecord::from(&["a", "b", "c"][..]))?;
+ /// wtr.write_byte_record(&ByteRecord::from(&["x", "y", "z"][..]))?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ #[inline(never)]
+ pub fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> {
+ if record.as_slice().is_empty() {
+ return self.write_record(record);
+ }
+ // The idea here is to find a fast path for shuffling our record into
+ // our buffer as quickly as possible. We do this because the underlying
+ // "core" CSV writer does a lot of book-keeping to maintain its state
+ // oriented API.
+ //
+ // The fast path occurs when we know our record will fit in whatever
+ // space we have left in our buffer. We can actually quickly compute
+ // the upper bound on the space required:
+ let upper_bound =
+ // The data itself plus the worst case: every byte is a quote.
+ (2 * record.as_slice().len())
+ // The number of field delimiters.
+ + (record.len().saturating_sub(1))
+ // The maximum number of quotes inserted around each field.
+ + (2 * record.len())
+ // The maximum number of bytes for the terminator.
+ + 2;
+ if self.buf.writable().len() < upper_bound {
+ return self.write_record(record);
+ }
+ let mut first = true;
+ for field in record.iter() {
+ if !first {
+ self.buf.writable()[0] = self.core.get_delimiter();
+ self.buf.written(1);
+ }
+ first = false;
+
+ if !self.core.should_quote(field) {
+ self.buf.writable()[..field.len()].copy_from_slice(field);
+ self.buf.written(field.len());
+ } else {
+ self.buf.writable()[0] = self.core.get_quote();
+ self.buf.written(1);
+ let (res, nin, nout) = csv_core::quote(
+ field,
+ self.buf.writable(),
+ self.core.get_quote(),
+ self.core.get_escape(),
+ self.core.get_double_quote(),
+ );
+ debug_assert!(res == WriteResult::InputEmpty);
+ debug_assert!(nin == field.len());
+ self.buf.written(nout);
+ self.buf.writable()[0] = self.core.get_quote();
+ self.buf.written(1);
+ }
+ }
+ self.state.fields_written = record.len() as u64;
+ self.write_terminator_into_buffer()
+ }
+
+ /// Write a single field.
+ ///
+ /// One should prefer using `write_record` over this method. It is provided
+ /// for cases where writing a field at a time is more convenient than
+ /// writing a record at a time.
+ ///
+ /// Note that if this API is used, `write_record` should be called with an
+ /// empty iterator to write a record terminator.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use csv::Writer;
+ ///
+ /// # fn main() { example().unwrap(); }
+ /// fn example() -> Result<(), Box<dyn Error>> {
+ /// let mut wtr = Writer::from_writer(vec![]);
+ /// wtr.write_field("a")?;
+ /// wtr.write_field("b")?;
+ /// wtr.write_field("c")?;
+ /// wtr.write_record(None::<&[u8]>)?;
+ /// wtr.write_field("x")?;
+ /// wtr.write_field("y")?;
+ /// wtr.write_field("z")?;
+ /// wtr.write_record(None::<&[u8]>)?;
+ ///
+ /// let data = String::from_utf8(wtr.into_inner()?)?;
+ /// assert_eq!(data, "a,b,c\nx,y,z\n");
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn write_field<T: AsRef<[u8]>>(&mut self, field: T) -> Result<()> {
+ self.write_field_impl(field)
+ }
+
+ /// Implementation of write_field.
+ ///
+ /// This is a separate method so we can force the compiler to inline it
+ /// into write_record.
+ #[inline(always)]
+ fn write_field_impl<T: AsRef<[u8]>>(&mut self, field: T) -> Result<()> {
+ if self.state.fields_written > 0 {
+ self.write_delimiter()?;
+ }
+ let mut field = field.as_ref();
+ loop {
+ let (res, nin, nout) = self.core.field(field, self.buf.writable());
+ field = &field[nin..];
+ self.buf.written(nout);
+ match res {
+ WriteResult::InputEmpty => {
+ self.state.fields_written += 1;
+ return Ok(());
+ }
+ WriteResult::OutputFull => self.flush_buf()?,
+ }
+ }
+ }
+
+ /// Flush the contents of the internal buffer to the underlying writer.
+ ///
+ /// If there was a problem writing to the underlying writer, then an error
+ /// is returned.
+ ///
+ /// Note that this also flushes the underlying writer.
+ pub fn flush(&mut self) -> io::Result<()> {
+ self.flush_buf()?;
+ self.wtr.as_mut().unwrap().flush()?;
+ Ok(())
+ }
+
+ /// Flush the contents of the internal buffer to the underlying writer,
+ /// without flushing the underlying writer.
+ fn flush_buf(&mut self) -> io::Result<()> {
+ self.state.panicked = true;
+ let result = self.wtr.as_mut().unwrap().write_all(self.buf.readable());
+ self.state.panicked = false;
+ result?;
+ self.buf.clear();
+ Ok(())
+ }
+
+ /// Flush the contents of the internal buffer and return the underlying
+ /// writer.
+ pub fn into_inner(
+ mut self,
+ ) -> result::Result<W, IntoInnerError<Writer<W>>> {
+ match self.flush() {
+ Ok(()) => Ok(self.wtr.take().unwrap()),
+ Err(err) => Err(IntoInnerError::new(self, err)),
+ }
+ }
+
+ /// Write a CSV delimiter.
+ fn write_delimiter(&mut self) -> Result<()> {
+ loop {
+ let (res, nout) = self.core.delimiter(self.buf.writable());
+ self.buf.written(nout);
+ match res {
+ WriteResult::InputEmpty => return Ok(()),
+ WriteResult::OutputFull => self.flush_buf()?,
+ }
+ }
+ }
+
+ /// Write a CSV terminator.
+ fn write_terminator(&mut self) -> Result<()> {
+ self.check_field_count()?;
+ loop {
+ let (res, nout) = self.core.terminator(self.buf.writable());
+ self.buf.written(nout);
+ match res {
+ WriteResult::InputEmpty => {
+ self.state.fields_written = 0;
+ return Ok(());
+ }
+ WriteResult::OutputFull => self.flush_buf()?,
+ }
+ }
+ }
+
+ /// Write a CSV terminator that is guaranteed to fit into the current
+ /// buffer.
+ #[inline(never)]
+ fn write_terminator_into_buffer(&mut self) -> Result<()> {
+ self.check_field_count()?;
+ match self.core.get_terminator() {
+ csv_core::Terminator::CRLF => {
+ self.buf.writable()[0] = b'\r';
+ self.buf.writable()[1] = b'\n';
+ self.buf.written(2);
+ }
+ csv_core::Terminator::Any(b) => {
+ self.buf.writable()[0] = b;
+ self.buf.written(1);
+ }
+ _ => unreachable!(),
+ }
+ self.state.fields_written = 0;
+ Ok(())
+ }
+
+ fn check_field_count(&mut self) -> Result<()> {
+ if !self.state.flexible {
+ match self.state.first_field_count {
+ None => {
+ self.state.first_field_count =
+ Some(self.state.fields_written);
+ }
+ Some(expected) if expected != self.state.fields_written => {
+ return Err(Error::new(ErrorKind::UnequalLengths {
+ pos: None,
+ expected_len: expected,
+ len: self.state.fields_written,
+ }))
+ }
+ Some(_) => {}
+ }
+ }
+ Ok(())
+ }
+}
+
+impl Buffer {
+ /// Returns a slice of the buffer's current contents.
+ ///
+ /// The slice returned may be empty.
+ #[inline]
+ fn readable(&self) -> &[u8] {
+ &self.buf[..self.len]
+ }
+
+ /// Returns a mutable slice of the remaining space in this buffer.
+ ///
+ /// The slice returned may be empty.
+ #[inline]
+ fn writable(&mut self) -> &mut [u8] {
+ &mut self.buf[self.len..]
+ }
+
+ /// Indicates that `n` bytes have been written to this buffer.
+ #[inline]
+ fn written(&mut self, n: usize) {
+ self.len += n;
+ }
+
+ /// Clear the buffer.
+ #[inline]
+ fn clear(&mut self) {
+ self.len = 0;
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use serde::{serde_if_integer128, Serialize};
+
+ use std::io::{self, Write};
+
+ use crate::byte_record::ByteRecord;
+ use crate::error::ErrorKind;
+ use crate::string_record::StringRecord;
+
+ use super::{Writer, WriterBuilder};
+
+ fn wtr_as_string(wtr: Writer<Vec<u8>>) -> String {
+ String::from_utf8(wtr.into_inner().unwrap()).unwrap()
+ }
+
+ #[test]
+ fn one_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&["a", "b", "c"]).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "a,b,c\n");
+ }
+
+ #[test]
+ fn one_string_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&StringRecord::from(vec!["a", "b", "c"])).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "a,b,c\n");
+ }
+
+ #[test]
+ fn one_byte_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "a,b,c\n");
+ }
+
+ #[test]
+ fn raw_one_byte_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "a,b,c\n");
+ }
+
+ #[test]
+ fn one_empty_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&[""]).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "\"\"\n");
+ }
+
+ #[test]
+ fn raw_one_empty_record() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_byte_record(&ByteRecord::from(vec![""])).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "\"\"\n");
+ }
+
+ #[test]
+ fn two_empty_records() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&[""]).unwrap();
+ wtr.write_record(&[""]).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "\"\"\n\"\"\n");
+ }
+
+ #[test]
+ fn raw_two_empty_records() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_byte_record(&ByteRecord::from(vec![""])).unwrap();
+ wtr.write_byte_record(&ByteRecord::from(vec![""])).unwrap();
+
+ assert_eq!(wtr_as_string(wtr), "\"\"\n\"\"\n");
+ }
+
+ #[test]
+ fn unequal_records_bad() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+ let err = wtr.write_record(&ByteRecord::from(vec!["a"])).unwrap_err();
+ match *err.kind() {
+ ErrorKind::UnequalLengths { ref pos, expected_len, len } => {
+ assert!(pos.is_none());
+ assert_eq!(expected_len, 3);
+ assert_eq!(len, 1);
+ }
+ ref x => {
+ panic!("expected UnequalLengths error, but got '{:?}'", x);
+ }
+ }
+ }
+
+ #[test]
+ fn raw_unequal_records_bad() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+ let err =
+ wtr.write_byte_record(&ByteRecord::from(vec!["a"])).unwrap_err();
+ match *err.kind() {
+ ErrorKind::UnequalLengths { ref pos, expected_len, len } => {
+ assert!(pos.is_none());
+ assert_eq!(expected_len, 3);
+ assert_eq!(len, 1);
+ }
+ ref x => {
+ panic!("expected UnequalLengths error, but got '{:?}'", x);
+ }
+ }
+ }
+
+ #[test]
+ fn unequal_records_ok() {
+ let mut wtr = WriterBuilder::new().flexible(true).from_writer(vec![]);
+ wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+ wtr.write_record(&ByteRecord::from(vec!["a"])).unwrap();
+ assert_eq!(wtr_as_string(wtr), "a,b,c\na\n");
+ }
+
+ #[test]
+ fn raw_unequal_records_ok() {
+ let mut wtr = WriterBuilder::new().flexible(true).from_writer(vec![]);
+ wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap();
+ wtr.write_byte_record(&ByteRecord::from(vec!["a"])).unwrap();
+ assert_eq!(wtr_as_string(wtr), "a,b,c\na\n");
+ }
+
+ #[test]
+ fn full_buffer_should_not_flush_underlying() {
+ struct MarkWriteAndFlush(Vec<u8>);
+
+ impl MarkWriteAndFlush {
+ fn to_str(self) -> String {
+ String::from_utf8(self.0).unwrap()
+ }
+ }
+
+ impl Write for MarkWriteAndFlush {
+ fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ self.0.write(b">")?;
+ let written = self.0.write(data)?;
+ self.0.write(b"<")?;
+
+ Ok(written)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.0.write(b"!")?;
+ Ok(())
+ }
+ }
+
+ let underlying = MarkWriteAndFlush(vec![]);
+ let mut wtr =
+ WriterBuilder::new().buffer_capacity(4).from_writer(underlying);
+
+ wtr.write_byte_record(&ByteRecord::from(vec!["a", "b"])).unwrap();
+ wtr.write_byte_record(&ByteRecord::from(vec!["c", "d"])).unwrap();
+ wtr.flush().unwrap();
+ wtr.write_byte_record(&ByteRecord::from(vec!["e", "f"])).unwrap();
+
+ let got = wtr.into_inner().unwrap().to_str();
+
+ // As the buffer size is 4 we should write each record separately, and
+ // flush when explicitly called and implictly in into_inner.
+ assert_eq!(got, ">a,b\n<>c,d\n<!>e,f\n<!");
+ }
+
+ #[test]
+ fn serialize_with_headers() {
+ #[derive(Serialize)]
+ struct Row {
+ foo: i32,
+ bar: f64,
+ baz: bool,
+ }
+
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.serialize(Row { foo: 42, bar: 42.5, baz: true }).unwrap();
+ assert_eq!(wtr_as_string(wtr), "foo,bar,baz\n42,42.5,true\n");
+ }
+
+ #[test]
+ fn serialize_no_headers() {
+ #[derive(Serialize)]
+ struct Row {
+ foo: i32,
+ bar: f64,
+ baz: bool,
+ }
+
+ let mut wtr =
+ WriterBuilder::new().has_headers(false).from_writer(vec![]);
+ wtr.serialize(Row { foo: 42, bar: 42.5, baz: true }).unwrap();
+ assert_eq!(wtr_as_string(wtr), "42,42.5,true\n");
+ }
+
+ serde_if_integer128! {
+ #[test]
+ fn serialize_no_headers_128() {
+ #[derive(Serialize)]
+ struct Row {
+ foo: i128,
+ bar: f64,
+ baz: bool,
+ }
+
+ let mut wtr =
+ WriterBuilder::new().has_headers(false).from_writer(vec![]);
+ wtr.serialize(Row {
+ foo: 9_223_372_036_854_775_808,
+ bar: 42.5,
+ baz: true,
+ }).unwrap();
+ assert_eq!(wtr_as_string(wtr), "9223372036854775808,42.5,true\n");
+ }
+ }
+
+ #[test]
+ fn serialize_tuple() {
+ let mut wtr = WriterBuilder::new().from_writer(vec![]);
+ wtr.serialize((true, 1.3, "hi")).unwrap();
+ assert_eq!(wtr_as_string(wtr), "true,1.3,hi\n");
+ }
+}