Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 1 | use crate::size_hint; |
| 2 | use crate::Itertools; |
| 3 | |
Joel Galenson | 6f79871 | 2021-04-01 17:03:06 -0700 | [diff] [blame] | 4 | use alloc::vec::Vec; |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 5 | use std::iter::FusedIterator; |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 6 | use std::mem::replace; |
| 7 | use std::fmt; |
| 8 | |
| 9 | /// Head element and Tail iterator pair |
| 10 | /// |
| 11 | /// `PartialEq`, `Eq`, `PartialOrd` and `Ord` are implemented by comparing sequences based on |
| 12 | /// first items (which are guaranteed to exist). |
| 13 | /// |
| 14 | /// The meanings of `PartialOrd` and `Ord` are reversed so as to turn the heap used in |
| 15 | /// `KMerge` into a min-heap. |
| 16 | #[derive(Debug)] |
| 17 | struct HeadTail<I> |
| 18 | where I: Iterator |
| 19 | { |
| 20 | head: I::Item, |
| 21 | tail: I, |
| 22 | } |
| 23 | |
| 24 | impl<I> HeadTail<I> |
| 25 | where I: Iterator |
| 26 | { |
| 27 | /// Constructs a `HeadTail` from an `Iterator`. Returns `None` if the `Iterator` is empty. |
| 28 | fn new(mut it: I) -> Option<HeadTail<I>> { |
| 29 | let head = it.next(); |
| 30 | head.map(|h| { |
| 31 | HeadTail { |
| 32 | head: h, |
| 33 | tail: it, |
| 34 | } |
| 35 | }) |
| 36 | } |
| 37 | |
| 38 | /// Get the next element and update `head`, returning the old head in `Some`. |
| 39 | /// |
| 40 | /// Returns `None` when the tail is exhausted (only `head` then remains). |
| 41 | fn next(&mut self) -> Option<I::Item> { |
| 42 | if let Some(next) = self.tail.next() { |
| 43 | Some(replace(&mut self.head, next)) |
| 44 | } else { |
| 45 | None |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | /// Hints at the size of the sequence, same as the `Iterator` method. |
| 50 | fn size_hint(&self) -> (usize, Option<usize>) { |
| 51 | size_hint::add_scalar(self.tail.size_hint(), 1) |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | impl<I> Clone for HeadTail<I> |
| 56 | where I: Iterator + Clone, |
| 57 | I::Item: Clone |
| 58 | { |
| 59 | clone_fields!(head, tail); |
| 60 | } |
| 61 | |
| 62 | /// Make `data` a heap (min-heap w.r.t the sorting). |
| 63 | fn heapify<T, S>(data: &mut [T], mut less_than: S) |
| 64 | where S: FnMut(&T, &T) -> bool |
| 65 | { |
| 66 | for i in (0..data.len() / 2).rev() { |
| 67 | sift_down(data, i, &mut less_than); |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | /// Sift down element at `index` (`heap` is a min-heap wrt the ordering) |
| 72 | fn sift_down<T, S>(heap: &mut [T], index: usize, mut less_than: S) |
| 73 | where S: FnMut(&T, &T) -> bool |
| 74 | { |
| 75 | debug_assert!(index <= heap.len()); |
| 76 | let mut pos = index; |
| 77 | let mut child = 2 * pos + 1; |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 78 | // Require the right child to be present |
| 79 | // This allows to find the index of the smallest child without a branch |
| 80 | // that wouldn't be predicted if present |
| 81 | while child + 1 < heap.len() { |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 82 | // pick the smaller of the two children |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 83 | // use aritmethic to avoid an unpredictable branch |
| 84 | child += less_than(&heap[child+1], &heap[child]) as usize; |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 85 | |
| 86 | // sift down is done if we are already in order |
| 87 | if !less_than(&heap[child], &heap[pos]) { |
| 88 | return; |
| 89 | } |
| 90 | heap.swap(pos, child); |
| 91 | pos = child; |
| 92 | child = 2 * pos + 1; |
| 93 | } |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 94 | // Check if the last (left) child was an only child |
| 95 | // if it is then it has to be compared with the parent |
| 96 | if child + 1 == heap.len() && less_than(&heap[child], &heap[pos]) { |
| 97 | heap.swap(pos, child); |
| 98 | } |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | /// An iterator adaptor that merges an abitrary number of base iterators in ascending order. |
| 102 | /// If all base iterators are sorted (ascending), the result is sorted. |
| 103 | /// |
| 104 | /// Iterator element type is `I::Item`. |
| 105 | /// |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 106 | /// See [`.kmerge()`](crate::Itertools::kmerge) for more information. |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 107 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
| 108 | pub type KMerge<I> = KMergeBy<I, KMergeByLt>; |
| 109 | |
| 110 | pub trait KMergePredicate<T> { |
| 111 | fn kmerge_pred(&mut self, a: &T, b: &T) -> bool; |
| 112 | } |
| 113 | |
David LeGare | b72e905 | 2022-03-02 16:21:08 +0000 | [diff] [blame] | 114 | #[derive(Clone, Debug)] |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 115 | pub struct KMergeByLt; |
| 116 | |
| 117 | impl<T: PartialOrd> KMergePredicate<T> for KMergeByLt { |
| 118 | fn kmerge_pred(&mut self, a: &T, b: &T) -> bool { |
| 119 | a < b |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | impl<T, F: FnMut(&T, &T)->bool> KMergePredicate<T> for F { |
| 124 | fn kmerge_pred(&mut self, a: &T, b: &T) -> bool { |
| 125 | self(a, b) |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | /// Create an iterator that merges elements of the contained iterators using |
| 130 | /// the ordering function. |
| 131 | /// |
| 132 | /// Equivalent to `iterable.into_iter().kmerge()`. |
| 133 | /// |
| 134 | /// ``` |
| 135 | /// use itertools::kmerge; |
| 136 | /// |
| 137 | /// for elt in kmerge(vec![vec![0, 2, 4], vec![1, 3, 5], vec![6, 7]]) { |
| 138 | /// /* loop body */ |
| 139 | /// } |
| 140 | /// ``` |
| 141 | pub fn kmerge<I>(iterable: I) -> KMerge<<I::Item as IntoIterator>::IntoIter> |
| 142 | where I: IntoIterator, |
| 143 | I::Item: IntoIterator, |
| 144 | <<I as IntoIterator>::Item as IntoIterator>::Item: PartialOrd |
| 145 | { |
| 146 | kmerge_by(iterable, KMergeByLt) |
| 147 | } |
| 148 | |
| 149 | /// An iterator adaptor that merges an abitrary number of base iterators |
| 150 | /// according to an ordering function. |
| 151 | /// |
| 152 | /// Iterator element type is `I::Item`. |
| 153 | /// |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 154 | /// See [`.kmerge_by()`](crate::Itertools::kmerge_by) for more |
Jakub Kotur | a425e55 | 2020-12-21 17:28:15 +0100 | [diff] [blame] | 155 | /// information. |
| 156 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
| 157 | pub struct KMergeBy<I, F> |
| 158 | where I: Iterator, |
| 159 | { |
| 160 | heap: Vec<HeadTail<I>>, |
| 161 | less_than: F, |
| 162 | } |
| 163 | |
| 164 | impl<I, F> fmt::Debug for KMergeBy<I, F> |
| 165 | where I: Iterator + fmt::Debug, |
| 166 | I::Item: fmt::Debug, |
| 167 | { |
| 168 | debug_fmt_fields!(KMergeBy, heap); |
| 169 | } |
| 170 | |
| 171 | /// Create an iterator that merges elements of the contained iterators. |
| 172 | /// |
| 173 | /// Equivalent to `iterable.into_iter().kmerge_by(less_than)`. |
| 174 | pub fn kmerge_by<I, F>(iterable: I, mut less_than: F) |
| 175 | -> KMergeBy<<I::Item as IntoIterator>::IntoIter, F> |
| 176 | where I: IntoIterator, |
| 177 | I::Item: IntoIterator, |
| 178 | F: KMergePredicate<<<I as IntoIterator>::Item as IntoIterator>::Item>, |
| 179 | { |
| 180 | let iter = iterable.into_iter(); |
| 181 | let (lower, _) = iter.size_hint(); |
| 182 | let mut heap: Vec<_> = Vec::with_capacity(lower); |
| 183 | heap.extend(iter.filter_map(|it| HeadTail::new(it.into_iter()))); |
| 184 | heapify(&mut heap, |a, b| less_than.kmerge_pred(&a.head, &b.head)); |
| 185 | KMergeBy { heap, less_than } |
| 186 | } |
| 187 | |
| 188 | impl<I, F> Clone for KMergeBy<I, F> |
| 189 | where I: Iterator + Clone, |
| 190 | I::Item: Clone, |
| 191 | F: Clone, |
| 192 | { |
| 193 | clone_fields!(heap, less_than); |
| 194 | } |
| 195 | |
| 196 | impl<I, F> Iterator for KMergeBy<I, F> |
| 197 | where I: Iterator, |
| 198 | F: KMergePredicate<I::Item> |
| 199 | { |
| 200 | type Item = I::Item; |
| 201 | |
| 202 | fn next(&mut self) -> Option<Self::Item> { |
| 203 | if self.heap.is_empty() { |
| 204 | return None; |
| 205 | } |
| 206 | let result = if let Some(next) = self.heap[0].next() { |
| 207 | next |
| 208 | } else { |
| 209 | self.heap.swap_remove(0).head |
| 210 | }; |
| 211 | let less_than = &mut self.less_than; |
| 212 | sift_down(&mut self.heap, 0, |a, b| less_than.kmerge_pred(&a.head, &b.head)); |
| 213 | Some(result) |
| 214 | } |
| 215 | |
| 216 | fn size_hint(&self) -> (usize, Option<usize>) { |
| 217 | self.heap.iter() |
| 218 | .map(|i| i.size_hint()) |
| 219 | .fold1(size_hint::add) |
| 220 | .unwrap_or((0, Some(0))) |
| 221 | } |
| 222 | } |
Joel Galenson | b593e25 | 2021-06-21 13:15:57 -0700 | [diff] [blame] | 223 | |
| 224 | impl<I, F> FusedIterator for KMergeBy<I, F> |
| 225 | where I: Iterator, |
| 226 | F: KMergePredicate<I::Item> |
| 227 | {} |