blob: 42049df35c53a48f09ea47faff5cdd15dc711ba6 [file] [log] [blame]
Joel Galensonb593e252021-06-21 13:15:57 -07001use std::hash::Hash;
2
3mod private {
4 use std::collections::HashMap;
5 use std::hash::Hash;
6 use std::fmt;
7
8 #[derive(Clone)]
9 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
10 pub struct DuplicatesBy<I: Iterator, Key, F> {
11 pub(crate) iter: I,
12 pub(crate) meta: Meta<Key, F>,
13 }
14
15 impl<I, V, F> fmt::Debug for DuplicatesBy<I, V, F>
16 where
17 I: Iterator + fmt::Debug,
18 V: fmt::Debug + Hash + Eq,
19 {
20 debug_fmt_fields!(DuplicatesBy, iter, meta.used);
21 }
22
23 impl<I: Iterator, Key: Eq + Hash, F> DuplicatesBy<I, Key, F> {
24 pub(crate) fn new(iter: I, key_method: F) -> Self {
25 DuplicatesBy {
26 iter,
27 meta: Meta {
28 used: HashMap::new(),
29 pending: 0,
30 key_method,
31 },
32 }
33 }
34 }
35
36 #[derive(Clone)]
37 pub struct Meta<Key, F> {
38 used: HashMap<Key, bool>,
39 pending: usize,
40 key_method: F,
41 }
42
43 impl<Key, F> Meta<Key, F>
44 where
45 Key: Eq + Hash,
46 {
47 /// Takes an item and returns it back to the caller if it's the second time we see it.
48 /// Otherwise the item is consumed and None is returned
49 #[inline(always)]
50 fn filter<I>(&mut self, item: I) -> Option<I>
51 where
52 F: KeyMethod<Key, I>,
53 {
54 let kv = self.key_method.make(item);
55 match self.used.get_mut(kv.key_ref()) {
56 None => {
57 self.used.insert(kv.key(), false);
58 self.pending += 1;
59 None
60 }
61 Some(true) => None,
62 Some(produced) => {
63 *produced = true;
64 self.pending -= 1;
65 Some(kv.value())
66 }
67 }
68 }
69 }
70
71 impl<I, Key, F> Iterator for DuplicatesBy<I, Key, F>
72 where
73 I: Iterator,
74 Key: Eq + Hash,
75 F: KeyMethod<Key, I::Item>,
76 {
77 type Item = I::Item;
78
79 fn next(&mut self) -> Option<Self::Item> {
80 let DuplicatesBy { iter, meta } = self;
81 iter.find_map(|v| meta.filter(v))
82 }
83
84 #[inline]
85 fn size_hint(&self) -> (usize, Option<usize>) {
86 let (_, hi) = self.iter.size_hint();
87 // There are `hi` number of items left in the base iterator. In the best case scenario,
88 // these items are exactly the same as the ones pending (i.e items seen exactly once so
89 // far), plus (hi - pending) / 2 pairs of never seen before items.
90 let hi = hi.map(|hi| {
91 let max_pending = std::cmp::min(self.meta.pending, hi);
92 let max_new = std::cmp::max(hi - self.meta.pending, 0) / 2;
93 max_pending + max_new
94 });
95 // The lower bound is always 0 since we might only get unique items from now on
96 (0, hi)
97 }
98 }
99
100 impl<I, Key, F> DoubleEndedIterator for DuplicatesBy<I, Key, F>
101 where
102 I: DoubleEndedIterator,
103 Key: Eq + Hash,
104 F: KeyMethod<Key, I::Item>,
105 {
106 fn next_back(&mut self) -> Option<Self::Item> {
107 let DuplicatesBy { iter, meta } = self;
108 iter.rev().find_map(|v| meta.filter(v))
109 }
110 }
111
112 /// A keying method for use with `DuplicatesBy`
113 pub trait KeyMethod<K, V> {
114 type Container: KeyXorValue<K, V>;
115
116 fn make(&mut self, value: V) -> Self::Container;
117 }
118
119 /// Apply the identity function to elements before checking them for equality.
120 pub struct ById;
121 impl<V> KeyMethod<V, V> for ById {
122 type Container = JustValue<V>;
123
124 fn make(&mut self, v: V) -> Self::Container {
125 JustValue(v)
126 }
127 }
128
129 /// Apply a user-supplied function to elements before checking them for equality.
130 pub struct ByFn<F>(pub(crate) F);
131 impl<K, V, F> KeyMethod<K, V> for ByFn<F>
132 where
133 F: FnMut(&V) -> K,
134 {
135 type Container = KeyValue<K, V>;
136
137 fn make(&mut self, v: V) -> Self::Container {
138 KeyValue((self.0)(&v), v)
139 }
140 }
141
142 // Implementors of this trait can hold onto a key and a value but only give access to one of them
143 // at a time. This allows the key and the value to be the same value internally
144 pub trait KeyXorValue<K, V> {
145 fn key_ref(&self) -> &K;
146 fn key(self) -> K;
147 fn value(self) -> V;
148 }
149
150 pub struct KeyValue<K, V>(K, V);
151 impl<K, V> KeyXorValue<K, V> for KeyValue<K, V> {
152 fn key_ref(&self) -> &K {
153 &self.0
154 }
155 fn key(self) -> K {
156 self.0
157 }
158 fn value(self) -> V {
159 self.1
160 }
161 }
162
163 pub struct JustValue<V>(V);
164 impl<V> KeyXorValue<V, V> for JustValue<V> {
165 fn key_ref(&self) -> &V {
166 &self.0
167 }
168 fn key(self) -> V {
169 self.0
170 }
171 fn value(self) -> V {
172 self.0
173 }
174 }
175}
176
177/// An iterator adapter to filter for duplicate elements.
178///
179/// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information.
180#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
181pub type DuplicatesBy<I, V, F> = private::DuplicatesBy<I, V, private::ByFn<F>>;
182
183/// Create a new `DuplicatesBy` iterator.
184pub fn duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F>
185where
186 Key: Eq + Hash,
187 F: FnMut(&I::Item) -> Key,
188 I: Iterator,
189{
190 DuplicatesBy::new(iter, private::ByFn(f))
191}
192
193/// An iterator adapter to filter out duplicate elements.
194///
195/// See [`.duplicates()`](crate::Itertools::duplicates) for more information.
196pub type Duplicates<I> = private::DuplicatesBy<I, <I as Iterator>::Item, private::ById>;
197
198/// Create a new `Duplicates` iterator.
199pub fn duplicates<I>(iter: I) -> Duplicates<I>
200where
201 I: Iterator,
202 I::Item: Eq + Hash,
203{
204 Duplicates::new(iter, private::ById)
205}
206