Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 1 | /// |
| 2 | /// This example parses, sorts and groups the iris dataset |
| 3 | /// and does some simple manipulations. |
| 4 | /// |
| 5 | /// Iterators and itertools functionality are used throughout. |
| 6 | |
| 7 | use itertools::Itertools; |
| 8 | use std::collections::HashMap; |
| 9 | use std::iter::repeat; |
| 10 | use std::num::ParseFloatError; |
| 11 | use std::str::FromStr; |
| 12 | |
| 13 | static DATA: &'static str = include_str!("iris.data"); |
| 14 | |
| 15 | #[derive(Clone, Debug)] |
| 16 | struct Iris { |
| 17 | name: String, |
| 18 | data: [f32; 4], |
| 19 | } |
| 20 | |
| 21 | #[derive(Clone, Debug)] |
| 22 | enum ParseError { |
| 23 | Numeric(ParseFloatError), |
| 24 | Other(&'static str), |
| 25 | } |
| 26 | |
| 27 | impl From<ParseFloatError> for ParseError { |
| 28 | fn from(err: ParseFloatError) -> Self { |
| 29 | ParseError::Numeric(err) |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | /// Parse an Iris from a comma-separated line |
| 34 | impl FromStr for Iris { |
| 35 | type Err = ParseError; |
| 36 | |
| 37 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 38 | let mut iris = Iris { name: "".into(), data: [0.; 4] }; |
| 39 | let mut parts = s.split(",").map(str::trim); |
| 40 | |
| 41 | // using Iterator::by_ref() |
| 42 | for (index, part) in parts.by_ref().take(4).enumerate() { |
| 43 | iris.data[index] = part.parse::<f32>()?; |
| 44 | } |
| 45 | if let Some(name) = parts.next() { |
| 46 | iris.name = name.into(); |
| 47 | } else { |
| 48 | return Err(ParseError::Other("Missing name")) |
| 49 | } |
| 50 | Ok(iris) |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | fn main() { |
| 55 | // using Itertools::fold_results to create the result of parsing |
| 56 | let irises = DATA.lines() |
| 57 | .map(str::parse) |
Joel Galenson | 6f79871 | 2021-04-01 17:03:06 -0700 | [diff] [blame] | 58 | .fold_ok(Vec::new(), |mut v, iris: Iris| { |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 59 | v.push(iris); |
| 60 | v |
| 61 | }); |
| 62 | let mut irises = match irises { |
| 63 | Err(e) => { |
| 64 | println!("Error parsing: {:?}", e); |
| 65 | std::process::exit(1); |
| 66 | } |
| 67 | Ok(data) => data, |
| 68 | }; |
| 69 | |
| 70 | // Sort them and group them |
| 71 | irises.sort_by(|a, b| Ord::cmp(&a.name, &b.name)); |
| 72 | |
| 73 | // using Iterator::cycle() |
| 74 | let mut plot_symbols = "+ox".chars().cycle(); |
| 75 | let mut symbolmap = HashMap::new(); |
| 76 | |
| 77 | // using Itertools::group_by |
| 78 | for (species, species_group) in &irises.iter().group_by(|iris| &iris.name) { |
| 79 | // assign a plot symbol |
| 80 | symbolmap.entry(species).or_insert_with(|| { |
| 81 | plot_symbols.next().unwrap() |
| 82 | }); |
| 83 | println!("{} (symbol={})", species, symbolmap[species]); |
| 84 | |
| 85 | for iris in species_group { |
| 86 | // using Itertools::format for lazy formatting |
| 87 | println!("{:>3.1}", iris.data.iter().format(", ")); |
| 88 | } |
| 89 | |
| 90 | } |
| 91 | |
| 92 | // Look at all combinations of the four columns |
| 93 | // |
| 94 | // See https://en.wikipedia.org/wiki/Iris_flower_data_set |
| 95 | // |
| 96 | let n = 30; // plot size |
| 97 | let mut plot = vec![' '; n * n]; |
| 98 | |
| 99 | // using Itertools::tuple_combinations |
| 100 | for (a, b) in (0..4).tuple_combinations() { |
| 101 | println!("Column {} vs {}:", a, b); |
| 102 | |
| 103 | // Clear plot |
| 104 | // |
| 105 | // using std::iter::repeat; |
| 106 | // using Itertools::set_from |
| 107 | plot.iter_mut().set_from(repeat(' ')); |
| 108 | |
| 109 | // using Itertools::minmax |
| 110 | let min_max = |data: &[Iris], col| { |
| 111 | data.iter() |
| 112 | .map(|iris| iris.data[col]) |
| 113 | .minmax() |
| 114 | .into_option() |
| 115 | .expect("Can't find min/max of empty iterator") |
| 116 | }; |
| 117 | let (min_x, max_x) = min_max(&irises, a); |
| 118 | let (min_y, max_y) = min_max(&irises, b); |
| 119 | |
| 120 | // Plot the data points |
| 121 | let round_to_grid = |x, min, max| ((x - min) / (max - min) * ((n - 1) as f32)) as usize; |
| 122 | let flip = |ix| n - 1 - ix; // reverse axis direction |
| 123 | |
| 124 | for iris in &irises { |
| 125 | let ix = round_to_grid(iris.data[a], min_x, max_x); |
| 126 | let iy = flip(round_to_grid(iris.data[b], min_y, max_y)); |
| 127 | plot[n * iy + ix] = symbolmap[&iris.name]; |
| 128 | } |
| 129 | |
| 130 | // render plot |
| 131 | // |
| 132 | // using Itertools::join |
| 133 | for line in plot.chunks(n) { |
| 134 | println!("{}", line.iter().join(" ")) |
| 135 | } |
| 136 | } |
| 137 | } |