Snap for 7213262 from 8d529d840c6c556fe14f40cc2c5c846cf202064f to sc-v2-release

Change-Id: I234ae6013f5abc360f4b6c9412a77c1d01282b8b
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..c28c857
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "dc13cb7875ad43c7d1ea8b1e504b09c031f7ed5a"
+  }
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..274bf18
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+Cargo.lock
+target
+*~
+TAGS
+*.bk
+.idea
\ No newline at end of file
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..808c581
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,34 @@
+// This file is generated by cargo2android.py --run --device --dependencies.
+// Do not modify this file as changes will be overridden on upgrade.
+
+rust_library {
+    name: "librayon",
+    // has rustc warnings
+    host_supported: true,
+    crate_name: "rayon",
+    srcs: ["src/lib.rs"],
+    edition: "2018",
+    flags: [
+        "--cfg step_by",
+    ],
+    rustlibs: [
+        "libcrossbeam_deque",
+        "libeither",
+        "librayon_core",
+    ],
+}
+
+// dependent_library ["feature_list"]
+//   autocfg-1.0.1
+//   cfg-if-1.0.0
+//   crossbeam-channel-0.5.0 "crossbeam-utils,default,std"
+//   crossbeam-deque-0.8.0 "crossbeam-epoch,crossbeam-utils,default,std"
+//   crossbeam-epoch-0.9.3 "alloc,lazy_static,std"
+//   crossbeam-utils-0.8.3 "default,lazy_static,std"
+//   either-1.6.1
+//   lazy_static-1.4.0
+//   libc-0.2.88 "default,std"
+//   memoffset-0.6.1 "default"
+//   num_cpus-1.13.0
+//   rayon-core-1.9.0
+//   scopeguard-1.1.0
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..2129b6f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,51 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "rayon"
+version = "1.5.0"
+authors = ["Niko Matsakis <niko@alum.mit.edu>", "Josh Stone <cuviper@gmail.com>"]
+exclude = ["/ci/*", "/scripts/*", "/.github/*", "/bors.toml"]
+description = "Simple work-stealing parallelism for Rust"
+documentation = "https://docs.rs/rayon/"
+readme = "README.md"
+keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+categories = ["concurrency"]
+license = "Apache-2.0/MIT"
+repository = "https://github.com/rayon-rs/rayon"
+[dependencies.crossbeam-deque]
+version = "0.8.0"
+
+[dependencies.either]
+version = "1.0"
+default-features = false
+
+[dependencies.rayon-core]
+version = "1.9.0"
+[dev-dependencies.docopt]
+version = "1"
+
+[dev-dependencies.lazy_static]
+version = "1"
+
+[dev-dependencies.rand]
+version = "0.7"
+
+[dev-dependencies.rand_xorshift]
+version = "0.2"
+
+[dev-dependencies.serde]
+version = "1.0.85"
+features = ["derive"]
+[build-dependencies.autocfg]
+version = "1"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..a7600e1
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,41 @@
+[package]
+name = "rayon"
+# Reminder to update html_rool_url in lib.rs when updating version
+version = "1.5.0"
+authors = ["Niko Matsakis <niko@alum.mit.edu>",
+           "Josh Stone <cuviper@gmail.com>"]
+description = "Simple work-stealing parallelism for Rust"
+edition = "2018"
+license = "Apache-2.0/MIT"
+repository = "https://github.com/rayon-rs/rayon"
+documentation = "https://docs.rs/rayon/"
+readme = "README.md"
+keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+categories = ["concurrency"]
+exclude = ["/ci/*", "/scripts/*", "/.github/*", "/bors.toml"]
+
+[workspace]
+members = ["rayon-demo", "rayon-core"]
+exclude = ["ci"]
+
+[dependencies]
+rayon-core = { version = "1.9.0", path = "rayon-core" }
+crossbeam-deque = "0.8.0"
+
+# This is a public dependency!
+[dependencies.either]
+version = "1.0"
+default-features = false
+
+[dev-dependencies]
+docopt = "1"
+lazy_static = "1"
+rand = "0.7"
+rand_xorshift = "0.2"
+
+[dev-dependencies.serde]
+version = "1.0.85"
+features = ["derive"]
+
+[build-dependencies]
+autocfg = "1"
diff --git a/FAQ.md b/FAQ.md
new file mode 100644
index 0000000..11117d3
--- /dev/null
+++ b/FAQ.md
@@ -0,0 +1,227 @@
+# Rayon FAQ
+
+This file is for general questions that don't fit into the README or
+crate docs.
+
+## How many threads will Rayon spawn?
+
+By default, Rayon uses the same number of threads as the number of
+CPUs available. Note that on systems with hyperthreading enabled this
+equals the number of logical cores and not the physical ones.
+
+If you want to alter the number of threads spawned, you can set the
+environmental variable `RAYON_NUM_THREADS` to the desired number of
+threads or use the
+[`ThreadPoolBuilder::build_global` function](https://docs.rs/rayon/*/rayon/struct.ThreadPoolBuilder.html#method.build_global)
+method.
+
+## How does Rayon balance work between threads?
+
+Behind the scenes, Rayon uses a technique called **work stealing** to
+try and dynamically ascertain how much parallelism is available and
+exploit it. The idea is very simple: we always have a pool of worker
+threads available, waiting for some work to do. When you call `join`
+the first time, we shift over into that pool of threads. But if you
+call `join(a, b)` from a worker thread W, then W will place `b` into
+its work queue, advertising that this is work that other worker
+threads might help out with. W will then start executing `a`.
+
+While W is busy with `a`, other threads might come along and take `b`
+from its queue. That is called *stealing* `b`. Once `a` is done, W
+checks whether `b` was stolen by another thread and, if not, executes
+`b` itself. If W runs out of jobs in its own queue, it will look
+through the other threads' queues and try to steal work from them.
+
+This technique is not new. It was first introduced by the
+[Cilk project][cilk], done at MIT in the late nineties. The name Rayon
+is an homage to that work.
+
+[cilk]: http://supertech.csail.mit.edu/cilk/
+
+## What should I do if I use `Rc`, `Cell`, `RefCell` or other non-Send-and-Sync types?
+
+There are a number of non-threadsafe types in the Rust standard library,
+and if your code is using them, you will not be able to combine it
+with Rayon. Similarly, even if you don't have such types, but you try
+to have multiple closures mutating the same state, you will get
+compilation errors; for example, this function won't work, because
+both closures access `slice`:
+
+```rust
+/// Increment all values in slice.
+fn increment_all(slice: &mut [i32]) {
+    rayon::join(|| process(slice), || process(slice));
+}
+```
+
+The correct way to resolve such errors will depend on the case.  Some
+cases are easy: for example, uses of [`Rc`] can typically be replaced
+with [`Arc`], which is basically equivalent, but thread-safe.
+
+Code that uses `Cell` or `RefCell`, however, can be somewhat more complicated.
+If you can refactor your code to avoid those types, that is often the best way
+forward, but otherwise, you can try to replace those types with their threadsafe
+equivalents:
+
+- `Cell` -- replacement: `AtomicUsize`, `AtomicBool`, etc
+- `RefCell` -- replacement: `RwLock`, or perhaps `Mutex`
+
+However, you have to be wary! The parallel versions of these types
+have different atomicity guarantees. For example, with a `Cell`, you
+can increment a counter like so:
+
+```rust
+let value = counter.get();
+counter.set(value + 1);
+```
+
+But when you use the equivalent `AtomicUsize` methods, you are
+actually introducing a potential race condition (not a data race,
+technically, but it can be an awfully fine distinction):
+
+```rust
+let value = tscounter.load(Ordering::SeqCst);
+tscounter.store(value + 1, Ordering::SeqCst);
+```
+
+You can already see that the `AtomicUsize` API is a bit more complex,
+as it requires you to specify an
+[ordering](http://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html). (I
+won't go into the details on ordering here, but suffice to say that if
+you don't know what an ordering is, and probably even if you do, you
+should use `Ordering::SeqCst`.) The danger in this parallel version of
+the counter is that other threads might be running at the same time
+and they could cause our counter to get out of sync. For example, if
+we have two threads, then they might both execute the "load" before
+either has a chance to execute the "store":
+
+```
+Thread 1                                          Thread 2
+let value = tscounter.load(Ordering::SeqCst);
+// value = X                                      let value = tscounter.load(Ordering::SeqCst);
+                                                  // value = X
+tscounter.store(value+1);                         tscounter.store(value+1);
+// tscounter = X+1                                // tscounter = X+1
+```
+
+Now even though we've had two increments, we'll only increase the
+counter by one!  Even though we've got no data race, this is still
+probably not the result we wanted. The problem here is that the `Cell`
+API doesn't make clear the scope of a "transaction" -- that is, the
+set of reads/writes that should occur atomically. In this case, we
+probably wanted the get/set to occur together.
+
+In fact, when using the `Atomic` types, you very rarely want a plain
+`load` or plain `store`. You probably want the more complex
+operations. A counter, for example, would use `fetch_add` to
+atomically load and increment the value in one step. Compare-and-swap
+is another popular building block.
+
+A similar problem can arise when converting `RefCell` to `RwLock`, but
+it is somewhat less likely, because the `RefCell` API does in fact
+have a notion of a transaction: the scope of the handle returned by
+`borrow` or `borrow_mut`. So if you convert each call to `borrow` to
+`read` (and `borrow_mut` to `write`), things will mostly work fine in
+a parallel setting, but there can still be changes in behavior.
+Consider using a `handle: RefCell<Vec<i32>>` like:
+
+```rust
+let len = handle.borrow().len();
+for i in 0 .. len {
+    let data = handle.borrow()[i];
+    println!("{}", data);
+}
+```
+
+In sequential code, we know that this loop is safe. But if we convert
+this to parallel code with an `RwLock`, we do not: this is because
+another thread could come along and do
+`handle.write().unwrap().pop()`, and thus change the length of the
+vector. In fact, even in *sequential* code, using very small borrow
+sections like this is an anti-pattern: you ought to be enclosing the
+entire transaction together, like so:
+
+```rust
+let vec = handle.borrow();
+let len = vec.len();
+for i in 0 .. len {
+    let data = vec[i];
+    println!("{}", data);
+}
+```
+
+Or, even better, using an iterator instead of indexing:
+
+```rust
+let vec = handle.borrow();
+for data in vec {
+    println!("{}", data);
+}
+```
+
+There are several reasons to prefer one borrow over many. The most
+obvious is that it is more efficient, since each borrow has to perform
+some safety checks. But it's also more reliable: suppose we modified
+the loop above to not just print things out, but also call into a
+helper function:
+
+```rust
+let vec = handle.borrow();
+for data in vec {
+    helper(...);
+}
+```
+
+And now suppose, independently, this helper fn evolved and had to pop
+something off of the vector:
+
+```rust
+fn helper(...) {
+    handle.borrow_mut().pop();
+}
+```
+
+Under the old model, where we did lots of small borrows, this would
+yield precisely the same error that we saw in parallel land using an
+`RwLock`: the length would be out of sync and our indexing would fail
+(note that in neither case would there be an actual *data race* and
+hence there would never be undefined behavior). But now that we use a
+single borrow, we'll see a borrow error instead, which is much easier
+to diagnose, since it occurs at the point of the `borrow_mut`, rather
+than downstream. Similarly, if we move to an `RwLock`, we'll find that
+the code either deadlocks (if the write is on the same thread as the
+read) or, if the write is on another thread, works just fine. Both of
+these are preferable to random failures in my experience.
+
+## But wait, isn't Rust supposed to free me from this kind of thinking?
+
+You might think that Rust is supposed to mean that you don't have to
+think about atomicity at all. In fact, if you avoid interior
+mutability (`Cell` and `RefCell` in a sequential setting, or
+`AtomicUsize`, `RwLock`, `Mutex`, et al. in parallel code), then this
+is true: the type system will basically guarantee that you don't have
+to think about atomicity at all. But often there are times when you
+WANT threads to interleave in the ways I showed above.
+
+Consider for example when you are conducting a search in parallel, say
+to find the shortest route. To avoid fruitless search, you might want
+to keep a cell with the shortest route you've found thus far.  This
+way, when you are searching down some path that's already longer than
+this shortest route, you can just stop and avoid wasted effort. In
+sequential land, you might model this "best result" as a shared value
+like `Rc<Cell<usize>>` (here the `usize` represents the length of best
+path found so far); in parallel land, you'd use a `Arc<AtomicUsize>`.
+Now we can make our search function look like:
+
+```rust
+fn search(path: &Path, cost_so_far: usize, best_cost: &Arc<AtomicUsize>) {
+    if cost_so_far >= best_cost.load(Ordering::SeqCst) {
+        return;
+    }
+    ...
+    best_cost.store(...);
+}
+```
+
+Now in this case, we really WANT to see results from other threads
+interjected into our execution!
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE-APACHE
\ No newline at end of file
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..25597d5
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2010 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..7d115ed
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "rayon"
+description: "Simple work-stealing parallelism for Rust"
+third_party {
+  url {
+    type: HOMEPAGE
+    value: "https://crates.io/crates/rayon"
+  }
+  url {
+    type: ARCHIVE
+    value: "https://static.crates.io/crates/rayon/rayon-1.5.0.crate"
+  }
+  version: "1.5.0"
+  license_type: NOTICE
+  last_upgrade_date {
+    year: 2020
+    month: 12
+    day: 21
+  }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..46fc303
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:/OWNERS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..869c537
--- /dev/null
+++ b/README.md
@@ -0,0 +1,124 @@
+# Rayon
+
+[![Rayon crate](https://img.shields.io/crates/v/rayon.svg)](https://crates.io/crates/rayon)
+[![Rayon documentation](https://docs.rs/rayon/badge.svg)](https://docs.rs/rayon)
+![minimum rustc 1.36](https://img.shields.io/badge/rustc-1.36+-red.svg)
+![build status](https://github.com/rayon-rs/rayon/workflows/master/badge.svg)
+[![Join the chat at https://gitter.im/rayon-rs/Lobby](https://badges.gitter.im/rayon-rs/Lobby.svg)](https://gitter.im/rayon-rs/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
+Rayon is a data-parallelism library for Rust. It is extremely
+lightweight and makes it easy to convert a sequential computation into
+a parallel one. It also guarantees data-race freedom. (You may also
+enjoy [this blog post][blog] about Rayon, which gives more background
+and details about how it works, or [this video][video], from the Rust
+Belt Rust conference.) Rayon is
+[available on crates.io](https://crates.io/crates/rayon), and
+[API Documentation is available on docs.rs](https://docs.rs/rayon/).
+
+[blog]: http://smallcultfollowing.com/babysteps/blog/2015/12/18/rayon-data-parallelism-in-rust/
+[video]: https://www.youtube.com/watch?v=gof_OEv71Aw
+
+## Parallel iterators and more
+
+Rayon makes it drop-dead simple to convert sequential iterators into
+parallel ones: usually, you just change your `foo.iter()` call into
+`foo.par_iter()`, and Rayon does the rest:
+
+```rust
+use rayon::prelude::*;
+fn sum_of_squares(input: &[i32]) -> i32 {
+    input.par_iter() // <-- just change that!
+         .map(|&i| i * i)
+         .sum()
+}
+```
+
+[Parallel iterators] take care of deciding how to divide your data
+into tasks; it will dynamically adapt for maximum performance. If you
+need more flexibility than that, Rayon also offers the [join] and
+[scope] functions, which let you create parallel tasks on your own.
+For even more control, you can create [custom threadpools] rather than
+using Rayon's default, global threadpool.
+
+[Parallel iterators]: https://docs.rs/rayon/*/rayon/iter/index.html
+[join]: https://docs.rs/rayon/*/rayon/fn.join.html
+[scope]: https://docs.rs/rayon/*/rayon/fn.scope.html
+[custom threadpools]: https://docs.rs/rayon/*/rayon/struct.ThreadPool.html
+
+## No data races
+
+You may have heard that parallel execution can produce all kinds of
+crazy bugs. Well, rest easy. Rayon's APIs all guarantee **data-race
+freedom**, which generally rules out most parallel bugs (though not
+all). In other words, **if your code compiles**, it typically does the
+same thing it did before.
+
+For the most, parallel iterators in particular are guaranteed to
+produce the same results as their sequential counterparts. One caveat:
+If your iterator has side effects (for example, sending methods to
+other threads through a [Rust channel] or writing to disk), those side
+effects may occur in a different order. Note also that, in some cases,
+parallel iterators offer alternative versions of the sequential
+iterator methods that can have higher performance.
+
+[Rust channel]: https://doc.rust-lang.org/std/sync/mpsc/fn.channel.html
+
+## Using Rayon
+
+[Rayon is available on crates.io](https://crates.io/crates/rayon). The
+recommended way to use it is to add a line into your Cargo.toml such
+as:
+
+```toml
+[dependencies]
+rayon = "1.5"
+```
+
+To use the Parallel Iterator APIs, a number of traits have to be in
+scope. The easiest way to bring those things into scope is to use the
+[Rayon prelude](https://docs.rs/rayon/*/rayon/prelude/index.html).  In
+each module where you would like to use the parallel iterator APIs,
+just add:
+
+```rust
+use rayon::prelude::*;
+```
+
+Rayon currently requires `rustc 1.36.0` or greater.
+
+## Contribution
+
+Rayon is an open source project! If you'd like to contribute to Rayon, check out [the list of "help wanted" issues](https://github.com/rayon-rs/rayon/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). These are all (or should be) issues that are suitable for getting started, and they generally include a detailed set of instructions for what to do. Please ask questions if anything is unclear! Also, check out the [Guide to Development](https://github.com/rayon-rs/rayon/wiki/Guide-to-Development) page on the wiki. Note that all code submitted in PRs to Rayon is assumed to [be licensed under Rayon's dual MIT/Apache2 licensing](https://github.com/rayon-rs/rayon/blob/master/README.md#license).
+
+## Quick demo
+
+To see Rayon in action, check out the `rayon-demo` directory, which
+includes a number of demos of code using Rayon. For example, run this
+command to get a visualization of an nbody simulation. To see the
+effect of using Rayon, press `s` to run sequentially and `p` to run in
+parallel.
+
+```text
+> cd rayon-demo
+> cargo run --release -- nbody visualize
+```
+
+For more information on demos, try:
+
+```text
+> cd rayon-demo
+> cargo run --release -- --help
+```
+
+## Other questions?
+
+See [the Rayon FAQ][faq].
+
+[faq]: https://github.com/rayon-rs/rayon/blob/master/FAQ.md
+
+## License
+
+Rayon is distributed under the terms of both the MIT license and the
+Apache License (Version 2.0). See [LICENSE-APACHE](LICENSE-APACHE) and
+[LICENSE-MIT](LICENSE-MIT) for details. Opening a pull requests is
+assumed to signal agreement with these licensing terms.
diff --git a/RELEASES.md b/RELEASES.md
new file mode 100644
index 0000000..fa452c3
--- /dev/null
+++ b/RELEASES.md
@@ -0,0 +1,732 @@
+# Release rayon 1.5.0 / rayon-core 1.9.0 (2020-10-21)
+
+- Update crossbeam dependencies.
+- The minimum supported `rustc` is now 1.36.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @mbrubeck
+- @mrksu
+
+# Release rayon 1.4.1 (2020-09-29)
+
+- The new `flat_map_iter` and `flatten_iter` methods can be used to flatten
+  sequential iterators, which may perform better in cases that don't need the
+  nested parallelism of `flat_map` and `flatten`.
+- The new `par_drain` method is a parallel version of the standard `drain` for
+  collections, removing items while keeping the original capacity. Collections
+  that implement this through `ParallelDrainRange` support draining items from
+  arbitrary index ranges, while `ParallelDrainFull` always drains everything.
+- The new `positions` method finds all items that match the given predicate and
+  returns their indices in a new iterator.
+
+# Release rayon-core 1.8.1 (2020-09-17)
+
+- Fixed an overflow panic on high-contention workloads, for a counter that was
+  meant to simply wrap. This panic only occurred with debug assertions enabled,
+  and was much more likely on 32-bit targets.
+
+# Release rayon 1.4.0 / rayon-core 1.8.0 (2020-08-24)
+
+- Implemented a new thread scheduler, [RFC 5], which uses targeted wakeups for
+  new work and for notifications of completed stolen work, reducing wasteful
+  CPU usage in idle threads.
+- Implemented `IntoParallelIterator for Range<char>` and `RangeInclusive<char>`
+  with the same iteration semantics as Rust 1.45.
+- Relaxed the lifetime requirements of the initial `scope` closure.
+
+[RFC 5]: https://github.com/rayon-rs/rfcs/pull/5
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @CAD97
+- @cuviper
+- @kmaork
+- @nikomatsakis
+- @SuperFluffy
+
+
+# Release rayon 1.3.1 / rayon-core 1.7.1 (2020-06-15)
+
+- Fixed a use-after-free race in calls blocked between two rayon thread pools.
+- Collecting to an indexed `Vec` now drops any partial writes while unwinding,
+  rather than just leaking them. If dropping also panics, Rust will abort.
+  - Note: the old leaking behavior is considered _safe_, just not ideal.
+- The new `IndexedParallelIterator::step_by()` adapts an iterator to step
+  through items by the given count, like `Iterator::step_by()`.
+- The new `ParallelSlice::par_chunks_exact()` and mutable equivalent
+  `ParallelSliceMut::par_chunks_exact_mut()` ensure that the chunks always have
+  the exact length requested, leaving any remainder separate, like the slice
+  methods `chunks_exact()` and `chunks_exact_mut()`.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @adrian5
+- @bluss
+- @cuviper
+- @FlyingCanoe
+- @GuillaumeGomez
+- @matthiasbeyer
+- @picoHz
+- @zesterer
+
+
+# Release rayon 1.3.0 / rayon-core 1.7.0 (2019-12-21)
+
+- Tuples up to length 12 now implement `IntoParallelIterator`, creating a
+  `MultiZip` iterator that produces items as similarly-shaped tuples.
+- The `--cfg=rayon_unstable` supporting code for `rayon-futures` is removed.
+- The minimum supported `rustc` is now 1.31.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @c410-f3r
+- @silwol
+
+
+# Release rayon-futures 0.1.1 (2019-12-21)
+
+- `Send` bounds have been added for the `Item` and `Error` associated types on
+  all generic `F: Future` interfaces. While technically a breaking change, this
+  is a soundness fix, so we are not increasing the semantic version for this.
+- This crate is now deprecated, and the `--cfg=rayon_unstable` supporting code
+  will be removed in `rayon-core 1.7.0`. This only supported the now-obsolete
+  `Future` from `futures 0.1`, while support for `std::future::Future` is
+  expected to come directly in `rayon-core` -- although that is not ready yet.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @kornelski
+- @jClaireCodesStuff
+- @jwass
+- @seanchen1991
+
+
+# Release rayon 1.2.1 / rayon-core 1.6.1 (2019-11-20)
+
+- Update crossbeam dependencies.
+- Add top-level doc links for the iterator traits.
+- Document that the iterator traits are not object safe.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @dnaka91
+- @matklad
+- @nikomatsakis
+- @Qqwy
+- @vorner
+
+
+# Release rayon 1.2.0 / rayon-core 1.6.0 (2019-08-30)
+
+- The new `ParallelIterator::copied()` converts an iterator of references into
+  copied values, like `Iterator::copied()`.
+- `ParallelExtend` is now implemented for the unit `()`.
+- Internal updates were made to improve test determinism, reduce closure type
+  sizes, reduce task allocations, and update dependencies.
+- The minimum supported `rustc` is now 1.28.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @Aaron1011
+- @cuviper
+- @ralfbiedert
+
+
+# Release rayon 1.1.0 / rayon-core 1.5.0 (2019-06-12)
+
+- FIFO spawns are now supported using the new `spawn_fifo()` and `scope_fifo()`
+  global functions, and their corresponding `ThreadPool` methods.
+  - Normally when tasks are queued on a thread, the most recent is processed
+    first (LIFO) while other threads will steal the oldest (FIFO). With FIFO
+    spawns, those tasks are processed locally in FIFO order too.
+  - Regular spawns and other tasks like `join` are not affected.
+  - The `breadth_first` configuration flag, which globally approximated this
+    effect, is now deprecated.
+  - For more design details, please see [RFC 1].
+- `ThreadPoolBuilder` can now take a custom `spawn_handler` to control how
+  threads will be created in the pool.
+  - `ThreadPoolBuilder::build_scoped()` uses this to create a scoped thread
+    pool, where the threads are able to use non-static data.
+  - This may also be used to support threading in exotic environments, like
+    WebAssembly, which don't support the normal `std::thread`.
+- `ParallelIterator` has 3 new methods: `find_map_any()`, `find_map_first()`,
+  and `find_map_last()`, like `Iterator::find_map()` with ordering constraints.
+- The new `ParallelIterator::panic_fuse()` makes a parallel iterator halt as soon
+  as possible if any of its threads panic. Otherwise, the panic state is not
+  usually noticed until the iterator joins its parallel tasks back together.
+- `IntoParallelIterator` is now implemented for integral `RangeInclusive`.
+- Several internal `Folder`s now have optimized `consume_iter` implementations.
+- `rayon_core::current_thread_index()` is now re-exported in `rayon`.
+- The minimum `rustc` is now 1.26, following the update policy defined in [RFC 3].
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @didroe
+- @GuillaumeGomez
+- @huonw
+- @janriemer
+- @kornelski
+- @nikomatsakis
+- @seanchen1991
+- @yegeun542
+
+[RFC 1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md
+[RFC 3]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0003-minimum-rustc.md
+
+
+# Release rayon 1.0.3 (2018-11-02)
+
+- `ParallelExtend` is now implemented for tuple pairs, enabling nested
+  `unzip()` and `partition_map()` operations.  For instance, `(A, (B, C))`
+  items can be unzipped into `(Vec<A>, (Vec<B>, Vec<C>))`.
+  - `ParallelExtend<(A, B)>` works like `unzip()`.
+  - `ParallelExtend<Either<A, B>>` works like `partition_map()`.
+- `ParallelIterator` now has a method `map_init()` which calls an `init`
+  function for a value to pair with items, like `map_with()` but dynamically
+  constructed.  That value type has no constraints, not even `Send` or `Sync`.
+  - The new `for_each_init()` is a variant of this for simple iteration.
+  - The new `try_for_each_init()` is a variant for fallible iteration.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @dan-zheng
+- @dholbert
+- @ignatenkobrain
+- @mdonoughe
+
+
+# Release rayon 1.0.2 / rayon-core 1.4.1 (2018-07-17)
+
+- The `ParallelBridge` trait with method `par_bridge()` makes it possible to
+  use any `Send`able `Iterator` in parallel!
+  - This trait has been added to `rayon::prelude`.
+  - It automatically implements internal synchronization and queueing to
+    spread the `Item`s across the thread pool.  Iteration order is not
+    preserved by this adaptor.
+  - "Native" Rayon iterators like `par_iter()` should still be preferred when
+    possible for better efficiency.
+- `ParallelString` now has additional methods for parity with `std` string
+  iterators: `par_char_indices()`, `par_bytes()`, `par_encode_utf16()`,
+  `par_matches()`, and `par_match_indices()`.
+- `ParallelIterator` now has fallible methods `try_fold()`, `try_reduce()`,
+  and `try_for_each`, plus `*_with()` variants of each, for automatically
+  short-circuiting iterators on `None` or `Err` values.  These are inspired by
+  `Iterator::try_fold()` and `try_for_each()` that were stabilized in Rust 1.27.
+- `Range<i128>` and `Range<u128>` are now supported with Rust 1.26 and later.
+- Small improvements have been made to the documentation.
+- `rayon-core` now only depends on `rand` for testing.
+- Rayon tests now work on stable Rust.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @AndyGauge
+- @cuviper
+- @ignatenkobrain
+- @LukasKalbertodt
+- @MajorBreakfast
+- @nikomatsakis
+- @paulkernfeld
+- @QuietMisdreavus
+
+
+# Release rayon 1.0.1 (2018-03-16)
+
+- Added more documentation for `rayon::iter::split()`.
+- Corrected links and typos in documentation.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @HadrienG2
+- @matthiasbeyer
+- @nikomatsakis
+
+
+# Release rayon 1.0.0 / rayon-core 1.4.0 (2018-02-15)
+
+- `ParallelIterator` added the `update` method which applies a function to
+  mutable references, inspired by `itertools`.
+- `IndexedParallelIterator` added the `chunks` method which yields vectors of
+  consecutive items from the base iterator, inspired by `itertools`.
+- `String` now implements `FromParallelIterator<Cow<str>>` and
+  `ParallelExtend<Cow<str>>`, inspired by `std`.
+- `()` now implements `FromParallelIterator<()>`, inspired by `std`.
+- The new `ThreadPoolBuilder` replaces and deprecates `Configuration`.
+  - Errors from initialization now have the concrete `ThreadPoolBuildError`
+    type, rather than `Box<Error>`, and this type implements `Send` and `Sync`.
+  - `ThreadPool::new` is deprecated in favor of `ThreadPoolBuilder::build`.
+  - `initialize` is deprecated in favor of `ThreadPoolBuilder::build_global`.
+- Examples have been added to most of the parallel iterator methods.
+- A lot of the documentation has been reorganized and extended.
+
+## Breaking changes
+
+- Rayon now requires rustc 1.13 or greater.
+- `IndexedParallelIterator::len` and `ParallelIterator::opt_len` now operate on
+  `&self` instead of `&mut self`.
+- `IndexedParallelIterator::collect_into` is now `collect_into_vec`.
+- `IndexedParallelIterator::unzip_into` is now `unzip_into_vecs`.
+- Rayon no longer exports the deprecated `Configuration` and `initialize` from
+  rayon-core.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @Bilkow
+- @cuviper
+- @Enet4
+- @ignatenkobrain
+- @iwillspeak
+- @jeehoonkang
+- @jwass
+- @Kerollmops
+- @KodrAus
+- @kornelski
+- @MaloJaffre
+- @nikomatsakis
+- @obv-mikhail
+- @oddg
+- @phimuemue
+- @stjepang
+- @tmccombs
+- bors[bot]
+
+
+# Release rayon 0.9.0 / rayon-core 1.3.0 / rayon-futures 0.1.0 (2017-11-09)
+
+- `Configuration` now has a `build` method.
+- `ParallelIterator` added `flatten` and `intersperse`, both inspired by
+  itertools.
+- `IndexedParallelIterator` added `interleave`, `interleave_shortest`, and
+  `zip_eq`, all inspired by itertools.
+- The new functions `iter::empty` and `once` create parallel iterators of
+  exactly zero or one item, like their `std` counterparts.
+- The new functions `iter::repeat` and `repeatn` create parallel iterators
+  repeating an item indefinitely or `n` times, respectively.
+- The new function `join_context` works like `join`, with an added `FnContext`
+  parameter that indicates whether the job was stolen.
+- `Either` (used by `ParallelIterator::partition_map`) is now re-exported from
+  the `either` crate, instead of defining our own type.
+  - `Either` also now implements `ParallelIterator`, `IndexedParallelIterator`,
+    and `ParallelExtend` when both of its `Left` and `Right` types do.
+- All public types now implement `Debug`.
+- Many of the parallel iterators now implement `Clone` where possible.
+- Much of the documentation has been extended. (but still could use more help!)
+- All rayon crates have improved metadata.
+- Rayon was evaluated in the Libz Blitz, leading to many of these improvements.
+- Rayon pull requests are now guarded by bors-ng.
+
+## Futures
+
+The `spawn_future()` method has been refactored into its own `rayon-futures`
+crate, now through a `ScopeFutureExt` trait for `ThreadPool` and `Scope`.  The
+supporting `rayon-core` APIs are still gated by `--cfg rayon_unstable`.
+
+## Breaking changes
+
+- Two breaking changes have been made to `rayon-core`, but since they're fixing
+  soundness bugs, we are considering these _minor_ changes for semver.
+  - `Scope::spawn` now requires `Send` for the closure.
+  - `ThreadPool::install` now requires `Send` for the return value.
+- The `iter::internal` module has been renamed to `iter::plumbing`, to hopefully
+  indicate that while these are low-level details, they're not really internal
+  or private to rayon.  The contents of that module are needed for third-parties
+  to implement new parallel iterators, and we'll treat them with normal semver
+  stability guarantees.
+- The function `rayon::iter::split` is no longer re-exported as `rayon::split`.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @AndyGauge
+- @ChristopherDavenport
+- @chrisvittal
+- @cuviper
+- @dns2utf8
+- @dtolnay
+- @frewsxcv
+- @gsquire
+- @Hittherhod
+- @jdr023
+- @laumann
+- @leodasvacas
+- @lvillani
+- @MajorBreakfast
+- @mamuleanu
+- @marmistrz
+- @mbrubeck
+- @mgattozzi
+- @nikomatsakis
+- @smt923
+- @stjepang
+- @tmccombs
+- @vishalsodani
+- bors[bot]
+
+
+# Release rayon 0.8.2 (2017-06-28)
+
+- `ParallelSliceMut` now has six parallel sorting methods with the same
+  variations as the standard library.
+  - `par_sort`, `par_sort_by`, and `par_sort_by_key` perform stable sorts in
+    parallel, using the default order, a custom comparator, or a key extraction
+    function, respectively.
+  - `par_sort_unstable`, `par_sort_unstable_by`, and `par_sort_unstable_by_key`
+    perform unstable sorts with the same comparison options.
+  - Thanks to @stjepang!
+
+
+# Release rayon 0.8.1 / rayon-core 1.2.0 (2017-06-14)
+
+- The following core APIs are being stabilized:
+  - `rayon::spawn()` -- spawns a task into the Rayon threadpool; as it
+    is contained in the global scope (rather than a user-created
+    scope), the task cannot capture anything from the current stack
+    frame.
+  - `ThreadPool::join()`, `ThreadPool::spawn()`, `ThreadPool::scope()`
+    -- convenience APIs for launching new work within a thread-pool.
+- The various iterator adapters are now tagged with `#[must_use]`
+- Parallel iterators now offer a `for_each_with` adapter, similar to
+  `map_with`.
+- We are adopting a new approach to handling the remaining unstable
+  APIs (which primarily pertain to futures integration). As awlays,
+  unstable APIs are intended for experimentation, but do not come with
+  any promise of compatibility (in other words, we might change them
+  in arbitrary ways in any release). Previously, we designated such
+  APIs using a Cargo feature "unstable". Now, we are using a regular
+  `#[cfg]` flag. This means that to see the unstable APIs, you must do
+  `RUSTFLAGS='--cfg rayon_unstable' cargo build`. This is
+  intentionally inconvenient; in particular, if you are a library,
+  then your clients must also modify their environment, signaling
+  their agreement to instability.
+
+
+# Release rayon 0.8.0 / rayon-core 1.1.0 (2017-06-13)
+
+## Rayon 0.8.0
+
+- Added the `map_with` and `fold_with` combinators, which help for
+  passing along state (like channels) that cannot be shared between
+  threads but which can be cloned on each thread split.
+- Added the `while_some` combinator, which helps for writing short-circuiting iterators.
+- Added support for "short-circuiting" collection: e.g., collecting
+  from an iterator producing `Option<T>` or `Result<T, E>` into a
+  `Option<Collection<T>>` or `Result<Collection<T>, E>`.
+- Support `FromParallelIterator` for `Cow`.
+- Removed the deprecated weight APIs.
+- Simplified the parallel iterator trait hierarchy by removing the
+  `BoundedParallelIterator` and `ExactParallelIterator` traits,
+  which were not serving much purpose.
+- Improved documentation.
+- Added some missing `Send` impls.
+- Fixed some small bugs.
+
+## Rayon-core 1.1.0
+
+- We now have more documentation.
+- Renamed the (unstable) methods `spawn_async` and
+  `spawn_future_async` -- which spawn tasks that cannot hold
+  references -- to simply `spawn` and `spawn_future`, respectively.
+- We are now using the coco library for our deque.
+- Individual threadpools can now be configured in "breadth-first"
+  mode, which causes them to execute spawned tasks in the reverse
+  order that they used to.  In some specific scenarios, this can be a
+  win (though it is not generally the right choice).
+- Added top-level functions:
+  - `current_thread_index`, for querying the index of the current worker thread within
+    its thread-pool (previously available as `thread_pool.current_thread_index()`);
+  - `current_thread_has_pending_tasks`, for querying whether the
+    current worker that has an empty task deque or not. This can be
+    useful when deciding whether to spawn a task.
+- The environment variables for controlling Rayon are now
+  `RAYON_NUM_THREADS` and `RAYON_LOG`. The older variables (e.g.,
+  `RAYON_RS_NUM_CPUS` are still supported but deprecated).
+
+## Rayon-demo
+
+- Added a new game-of-life benchmark.
+
+## Contributors
+
+Thanks to the following contributors:
+
+- @ChristopherDavenport
+- @SuperFluffy
+- @antoinewdg
+- @crazymykl
+- @cuviper
+- @glandium
+- @julian-seward1
+- @leodasvacas
+- @leshow
+- @lilianmoraru
+- @mschmo
+- @nikomatsakis
+- @stjepang
+
+
+# Release rayon 0.7.1 / rayon-core 1.0.2 (2017-05-30)
+
+This release is a targeted performance fix for #343, an issue where
+rayon threads could sometimes enter into a spin loop where they would
+be unable to make progress until they are pre-empted.
+
+
+# Release rayon 0.7 / rayon-core 1.0 (2017-04-06)
+
+This release marks the first step towards Rayon 1.0. **For best
+performance, it is important that all Rayon users update to at least
+Rayon 0.7.** This is because, as of Rayon 0.7, we have taken steps to
+ensure that, no matter how many versions of rayon are actively in use,
+there will only be a single global scheduler. This is achieved via the
+`rayon-core` crate, which is being released at version 1.0, and which
+encapsulates the core schedule APIs like `join()`. (Note: the
+`rayon-core` crate is, to some degree, an implementation detail, and
+not intended to be imported directly; it's entire API surface is
+mirrored through the rayon crate.)
+
+We have also done a lot of work reorganizing the API for Rayon 0.7 in
+preparation for 1.0. The names of iterator types have been changed and
+reorganized (but few users are expected to be naming those types
+explicitly anyhow). In addition, a number of parallel iterator methods
+have been adjusted to match those in the standard iterator traits more
+closely. See the "Breaking Changes" section below for
+details.
+
+Finally, Rayon 0.7 includes a number of new features and new parallel
+iterator methods. **As of this release, Rayon's parallel iterators
+have officially reached parity with sequential iterators** -- that is,
+every sequential iterator method that makes any sense in parallel is
+supported in some capacity.
+
+### New features and methods
+
+- The internal `Producer` trait now features `fold_with`, which enables
+  better performance for some parallel iterators.
+- Strings now support `par_split()` and `par_split_whitespace()`.
+- The `Configuration` API is expanded and simplified:
+    - `num_threads(0)` no longer triggers an error
+    - you can now supply a closure to name the Rayon threads that get created
+      by using `Configuration::thread_name`.
+    - you can now inject code when Rayon threads start up and finish
+    - you can now set a custom panic handler to handle panics in various odd situations
+- Threadpools are now able to more gracefully put threads to sleep when not needed.
+- Parallel iterators now support `find_first()`, `find_last()`, `position_first()`,
+  and `position_last()`.
+- Parallel iterators now support `rev()`, which primarily affects subsequent calls
+  to `enumerate()`.
+- The `scope()` API is now considered stable (and part of `rayon-core`).
+- There is now a useful `rayon::split` function for creating custom
+  Rayon parallel iterators.
+- Parallel iterators now allow you to customize the min/max number of
+  items to be processed in a given thread. This mechanism replaces the
+  older `weight` mechanism, which is deprecated.
+- `sum()` and friends now use the standard `Sum` traits
+
+### Breaking changes
+
+In the move towards 1.0, there have been a number of minor breaking changes:
+
+- Configuration setters like `Configuration::set_num_threads()` lost the `set_` prefix,
+  and hence become something like `Configuration::num_threads()`.
+- `Configuration` getters are removed
+- Iterator types have been shuffled around and exposed more consistently:
+    - combinator types live in `rayon::iter`, e.g. `rayon::iter::Filter`
+    - iterators over various types live in a module named after their type,
+      e.g. `rayon::slice::Windows`
+- When doing a `sum()` or `product()`, type annotations are needed for the result
+  since it is now possible to have the resulting sum be of a type other than the value
+  you are iterating over (this mirrors sequential iterators).
+
+### Experimental features
+
+Experimental features require the use of the `unstable` feature. Their
+APIs may change or disappear entirely in future releases (even minor
+releases) and hence they should be avoided for production code.
+
+- We now have (unstable) support for futures integration. You can use
+  `Scope::spawn_future` or `rayon::spawn_future_async()`.
+- There is now a `rayon::spawn_async()` function for using the Rayon
+  threadpool to run tasks that do not have references to the stack.
+
+### Contributors
+
+Thanks to the following people for their contributions to this release:
+
+- @Aaronepower
+- @ChristopherDavenport
+- @bluss
+- @cuviper
+- @froydnj
+- @gaurikholkar
+- @hniksic
+- @leodasvacas
+- @leshow
+- @martinhath
+- @mbrubeck
+- @nikomatsakis
+- @pegomes
+- @schuster
+- @torkleyy
+
+
+# Release 0.6 (2016-12-21)
+
+This release includes a lot of progress towards the goal of parity
+with the sequential iterator API, though there are still a few methods
+that are not yet complete. If you'd like to help with that effort,
+[check out the milestone](https://github.com/nikomatsakis/rayon/issues?q=is%3Aopen+is%3Aissue+milestone%3A%22Parity+with+the+%60Iterator%60+trait%22)
+to see the remaining issues.
+
+**Announcement:** @cuviper has been added as a collaborator to the
+Rayon repository for all of his outstanding work on Rayon, which
+includes both internal refactoring and helping to shape the public
+API. Thanks @cuviper! Keep it up.
+
+- We now support `collect()` and not just `collect_with()`.
+  You can use `collect()` to build a number of collections,
+  including vectors, maps, and sets. Moreover, when building a vector
+  with `collect()`, you are no longer limited to exact parallel iterators.
+  Thanks @nikomatsakis, @cuviper!
+- We now support `skip()` and `take()` on parallel iterators.
+  Thanks @martinhath!
+- **Breaking change:** We now match the sequential APIs for `min()` and `max()`.
+  We also support `min_by_key()` and `max_by_key()`. Thanks @tapeinosyne!
+- **Breaking change:** The `mul()` method is now renamed to `product()`,
+  to match sequential iterators. Thanks @jonathandturner!
+- We now support parallel iterator over ranges on `u64` values. Thanks @cuviper!
+- We now offer a `par_chars()` method on strings for iterating over characters
+  in parallel. Thanks @cuviper!
+- We now have new demos: a traveling salesman problem solver as well as matrix
+  multiplication. Thanks @nikomatsakis, @edre!
+- We are now documenting our minimum rustc requirement (currently
+  v1.12.0).  We will attempt to maintain compatibility with rustc
+  stable v1.12.0 as long as it remains convenient, but if new features
+  are stabilized or added that would be helpful to Rayon, or there are
+  bug fixes that we need, we will bump to the most recent rustc. Thanks @cuviper!
+- The `reduce()` functionality now has better inlining.
+  Thanks @bluss!
+- The `join()` function now has some documentation. Thanks @gsquire!
+- The project source has now been fully run through rustfmt.
+  Thanks @ChristopherDavenport!
+- Exposed helper methods for accessing the current thread index.
+  Thanks @bholley!
+
+
+# Release 0.5 (2016-11-04)
+
+- **Breaking change:** The `reduce` method has been vastly
+  simplified, and `reduce_with_identity` has been deprecated.
+- **Breaking change:** The `fold` method has been changed. It used to
+  always reduce the values, but now instead it is a combinator that
+  returns a parallel iterator which can itself be reduced. See the
+  docs for more information.
+- The following parallel iterator combinators are now available (thanks @cuviper!):
+  - `find_any()`: similar to `find` on a sequential iterator,
+    but doesn't necessarily return the *first* matching item
+  - `position_any()`: similar to `position` on a sequential iterator,
+    but doesn't necessarily return the index of *first* matching item
+  - `any()`, `all()`: just like their sequential counterparts
+- The `count()` combinator is now available for parallel iterators.
+- We now build with older versions of rustc again (thanks @durango!),
+  as we removed a stray semicolon from `thread_local!`.
+- Various improvements to the (unstable) `scope()` API implementation.
+
+
+# Release 0.4.3 (2016-10-25)
+
+- Parallel iterators now offer an adaptive weight scheme,
+  which means that explicit weights should no longer
+  be necessary in most cases! Thanks @cuviper!
+  - We are considering removing weights or changing the weight mechanism
+    before 1.0. Examples of scenarios where you still need weights even
+    with this adaptive mechanism would be great. Join the discussion
+    at <https://github.com/nikomatsakis/rayon/issues/111>.
+- New (unstable) scoped threads API, see `rayon::scope` for details.
+  - You will need to supply the [cargo feature] `unstable`.
+- The various demos and benchmarks have been consolidated into one
+  program, `rayon-demo`.
+- Optimizations in Rayon's inner workings. Thanks @emilio!
+- Update `num_cpus` to 1.0. Thanks @jamwt!
+- Various internal cleanup in the implementation and typo fixes.
+  Thanks @cuviper, @Eh2406, and @spacejam!
+
+[cargo feature]: http://doc.crates.io/manifest.html#the-features-section
+
+
+# Release 0.4.2 (2016-09-15)
+
+- Updated crates.io metadata.
+
+
+# Release 0.4.1 (2016-09-14)
+
+- New `chain` combinator for parallel iterators.
+- `Option`, `Result`, as well as many more collection types now have
+  parallel iterators.
+- New mergesort demo.
+- Misc fixes.
+
+Thanks to @cuviper, @edre, @jdanford, @frewsxcv for their contributions!
+
+
+# Release 0.4 (2016-05-16)
+
+- Make use of latest versions of catch-panic and various fixes to panic propagation.
+- Add new prime sieve demo.
+- Add `cloned()` and `inspect()` combinators.
+- Misc fixes for Rust RFC 1214.
+
+Thanks to @areilb1, @Amanieu, @SharplEr, and @cuviper for their contributions!
+
+
+# Release 0.3 (2016-02-23)
+
+- Expanded `par_iter` APIs now available:
+  - `into_par_iter` is now supported on vectors (taking ownership of the elements)
+- Panic handling is much improved:
+  - if you use the Nightly feature, experimental panic recovery is available
+  - otherwise, panics propagate out and poision the workpool
+- New `Configuration` object to control number of threads and other details
+- New demos and benchmarks
+  - try `cargo run --release -- visualize` in `demo/nbody` :)
+    - Note: a nightly compiler is required for this demo due to the
+      use of the `+=` syntax
+
+Thanks to @bjz, @cuviper, @Amanieu, and @willi-kappler for their contributions!
+
+
+# Release 0.2 and earlier
+
+No release notes were being kept at this time.
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..70e301b
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,6 @@
+fn main() {
+    let ac = autocfg::new();
+    if ac.probe_expression("(0..10).step_by(2).rev()") {
+        autocfg::emit("step_by");
+    }
+}
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..ec08a4d
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,3 @@
+We use this directory for interactive tests that can't be run in an
+automatic fashion. For examples of how to use Rayon, or benchmarks,
+see `rayon-demo`.
diff --git a/examples/cpu_monitor.rs b/examples/cpu_monitor.rs
new file mode 100644
index 0000000..bbe1316
--- /dev/null
+++ b/examples/cpu_monitor.rs
@@ -0,0 +1,81 @@
+use docopt::Docopt;
+use std::io;
+use std::process;
+
+const USAGE: &str = "
+Usage: cpu_monitor [options] <scenario>
+       cpu_monitor --help
+
+A test for monitoring how much CPU usage Rayon consumes under various
+scenarios. This test is intended to be executed interactively, like so:
+
+    cargo run --example cpu_monitor -- tasks_ended
+
+The list of scenarios you can try are as follows:
+
+- tasks_ended: after all tasks have finished, go to sleep
+- task_stall_root: a root task stalls for a very long time
+- task_stall_scope: a task in a scope stalls for a very long time
+
+Options:
+    -h, --help                   Show this message.
+    -d N, --depth N              Control how hard the dummy task works [default: 27]
+";
+
+#[derive(serde::Deserialize)]
+pub struct Args {
+    arg_scenario: String,
+    flag_depth: usize,
+}
+
+fn main() {
+    let args: &Args = &Docopt::new(USAGE)
+        .and_then(|d| d.deserialize())
+        .unwrap_or_else(|e| e.exit());
+
+    match &args.arg_scenario[..] {
+        "tasks_ended" => tasks_ended(args),
+        "task_stall_root" => task_stall_root(args),
+        "task_stall_scope" => task_stall_scope(args),
+        _ => {
+            println!("unknown scenario: `{}`", args.arg_scenario);
+            println!("try --help");
+            process::exit(1);
+        }
+    }
+}
+
+fn wait_for_user() {
+    let mut input = String::new();
+    io::stdin().read_line(&mut input).unwrap();
+}
+
+fn task(args: &Args) {
+    fn join_recursively(n: usize) {
+        if n == 0 {
+            return;
+        }
+        rayon::join(|| join_recursively(n - 1), || join_recursively(n - 1));
+    }
+
+    println!("Starting heavy work at depth {}...wait.", args.flag_depth);
+    join_recursively(args.flag_depth);
+    println!("Heavy work done; check top. You should see CPU usage drop to zero soon.");
+    println!("Press <enter> to quit...");
+}
+
+fn tasks_ended(args: &Args) {
+    task(args);
+    wait_for_user();
+}
+
+fn task_stall_root(args: &Args) {
+    rayon::join(|| task(args), wait_for_user);
+}
+
+fn task_stall_scope(args: &Args) {
+    rayon::scope(|scope| {
+        scope.spawn(move |_| task(args));
+        scope.spawn(move |_| wait_for_user());
+    });
+}
diff --git a/src/collections/binary_heap.rs b/src/collections/binary_heap.rs
new file mode 100644
index 0000000..fa90312
--- /dev/null
+++ b/src/collections/binary_heap.rs
@@ -0,0 +1,120 @@
+//! This module contains the parallel iterator types for heaps
+//! (`BinaryHeap<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BinaryHeap;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a binary heap
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Ord + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+impl<T: Ord + Send> IntoParallelIterator for BinaryHeap<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter {
+            inner: Vec::from(self).into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Ord + Send>
+}
+
+/// Parallel iterator over an immutable reference to a binary heap
+#[derive(Debug)]
+pub struct Iter<'a, T: Ord + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Ord + Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BinaryHeap<T> => Iter<'a, T>,
+    impl<'a, T: Ord + Sync>
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Ord + Sync + 'a>
+}
+
+// `BinaryHeap` doesn't have a mutable `Iterator`
+
+/// Draining parallel iterator that moves out of a binary heap,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Ord + Send> {
+    heap: &'a mut BinaryHeap<T>,
+}
+
+impl<'a, T: Ord + Send> ParallelDrainFull for &'a mut BinaryHeap<T> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain(self) -> Self::Iter {
+        Drain { heap: self }
+    }
+}
+
+impl<'a, T: Ord + Send> ParallelIterator for Drain<'a, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'a, T: Ord + Send> IndexedParallelIterator for Drain<'a, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.heap.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        super::DrainGuard::new(self.heap)
+            .par_drain(..)
+            .with_producer(callback)
+    }
+}
+
+impl<'a, T: Ord + Send> Drop for Drain<'a, T> {
+    fn drop(&mut self) {
+        if !self.heap.is_empty() {
+            // We must not have produced, so just call a normal drain to remove the items.
+            self.heap.drain();
+        }
+    }
+}
diff --git a/src/collections/btree_map.rs b/src/collections/btree_map.rs
new file mode 100644
index 0000000..12436dc
--- /dev/null
+++ b/src/collections/btree_map.rs
@@ -0,0 +1,66 @@
+//! This module contains the parallel iterator types for B-Tree maps
+//! (`BTreeMap<K, V>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BTreeMap;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a B-Tree map
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<K: Ord + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+}
+
+into_par_vec! {
+    BTreeMap<K, V> => IntoIter<K, V>,
+    impl<K: Ord + Send, V: Send>
+}
+
+delegate_iterator! {
+    IntoIter<K, V> => (K, V),
+    impl<K: Ord + Send, V: Send>
+}
+
+/// Parallel iterator over an immutable reference to a B-Tree map
+#[derive(Debug)]
+pub struct Iter<'a, K: Ord + Sync, V: Sync> {
+    inner: vec::IntoIter<(&'a K, &'a V)>,
+}
+
+impl<'a, K: Ord + Sync, V: Sync> Clone for Iter<'a, K, V> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BTreeMap<K, V> => Iter<'a, K, V>,
+    impl<'a, K: Ord + Sync, V: Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, K, V> => (&'a K, &'a V),
+    impl<'a, K: Ord + Sync + 'a, V: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a B-Tree map
+#[derive(Debug)]
+pub struct IterMut<'a, K: Ord + Sync, V: Send> {
+    inner: vec::IntoIter<(&'a K, &'a mut V)>,
+}
+
+into_par_vec! {
+    &'a mut BTreeMap<K, V> => IterMut<'a, K, V>,
+    impl<'a, K: Ord + Sync, V: Send>
+}
+
+delegate_iterator! {
+    IterMut<'a, K, V> => (&'a K, &'a mut V),
+    impl<'a, K: Ord + Sync + 'a, V: Send + 'a>
+}
diff --git a/src/collections/btree_set.rs b/src/collections/btree_set.rs
new file mode 100644
index 0000000..061d37c
--- /dev/null
+++ b/src/collections/btree_set.rs
@@ -0,0 +1,52 @@
+//! This module contains the parallel iterator types for B-Tree sets
+//! (`BTreeSet<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BTreeSet;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a B-Tree set
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<T: Ord + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    BTreeSet<T> => IntoIter<T>,
+    impl<T: Ord + Send>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Ord + Send>
+}
+
+/// Parallel iterator over an immutable reference to a B-Tree set
+#[derive(Debug)]
+pub struct Iter<'a, T: Ord + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Ord + Sync + 'a> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BTreeSet<T> => Iter<'a, T>,
+    impl<'a, T: Ord + Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Ord + Sync + 'a>
+}
+
+// `BTreeSet` doesn't have a mutable `Iterator`
diff --git a/src/collections/hash_map.rs b/src/collections/hash_map.rs
new file mode 100644
index 0000000..b657851
--- /dev/null
+++ b/src/collections/hash_map.rs
@@ -0,0 +1,96 @@
+//! This module contains the parallel iterator types for hash maps
+//! (`HashMap<K, V>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::HashMap;
+use std::hash::{BuildHasher, Hash};
+use std::marker::PhantomData;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a hash map
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<K: Hash + Eq + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+}
+
+into_par_vec! {
+    HashMap<K, V, S> => IntoIter<K, V>,
+    impl<K: Hash + Eq + Send, V: Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IntoIter<K, V> => (K, V),
+    impl<K: Hash + Eq + Send, V: Send>
+}
+
+/// Parallel iterator over an immutable reference to a hash map
+#[derive(Debug)]
+pub struct Iter<'a, K: Hash + Eq + Sync, V: Sync> {
+    inner: vec::IntoIter<(&'a K, &'a V)>,
+}
+
+impl<'a, K: Hash + Eq + Sync, V: Sync> Clone for Iter<'a, K, V> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a HashMap<K, V, S> => Iter<'a, K, V>,
+    impl<'a, K: Hash + Eq + Sync, V: Sync, S: BuildHasher>
+}
+
+delegate_iterator! {
+    Iter<'a, K, V> => (&'a K, &'a V),
+    impl<'a, K: Hash + Eq + Sync + 'a, V: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a hash map
+#[derive(Debug)]
+pub struct IterMut<'a, K: Hash + Eq + Sync, V: Send> {
+    inner: vec::IntoIter<(&'a K, &'a mut V)>,
+}
+
+into_par_vec! {
+    &'a mut HashMap<K, V, S> => IterMut<'a, K, V>,
+    impl<'a, K: Hash + Eq + Sync, V: Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IterMut<'a, K, V> => (&'a K, &'a mut V),
+    impl<'a, K: Hash + Eq + Sync + 'a, V: Send + 'a>
+}
+
+/// Draining parallel iterator that moves out of a hash map,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, K: Hash + Eq + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+    marker: PhantomData<&'a mut HashMap<K, V>>,
+}
+
+impl<'a, K: Hash + Eq + Send, V: Send, S: BuildHasher> ParallelDrainFull
+    for &'a mut HashMap<K, V, S>
+{
+    type Iter = Drain<'a, K, V>;
+    type Item = (K, V);
+
+    fn par_drain(self) -> Self::Iter {
+        let vec: Vec<_> = self.drain().collect();
+        Drain {
+            inner: vec.into_par_iter(),
+            marker: PhantomData,
+        }
+    }
+}
+
+delegate_iterator! {
+    Drain<'_, K, V> => (K, V),
+    impl<K: Hash + Eq + Send, V: Send>
+}
diff --git a/src/collections/hash_set.rs b/src/collections/hash_set.rs
new file mode 100644
index 0000000..b6ee1c1
--- /dev/null
+++ b/src/collections/hash_set.rs
@@ -0,0 +1,80 @@
+//! This module contains the parallel iterator types for hash sets
+//! (`HashSet<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::HashSet;
+use std::hash::{BuildHasher, Hash};
+use std::marker::PhantomData;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a hash set
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<T: Hash + Eq + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    HashSet<T, S> => IntoIter<T>,
+    impl<T: Hash + Eq + Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Hash + Eq + Send>
+}
+
+/// Parallel iterator over an immutable reference to a hash set
+#[derive(Debug)]
+pub struct Iter<'a, T: Hash + Eq + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Hash + Eq + Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a HashSet<T, S> => Iter<'a, T>,
+    impl<'a, T: Hash + Eq + Sync, S: BuildHasher>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Hash + Eq + Sync + 'a>
+}
+
+// `HashSet` doesn't have a mutable `Iterator`
+
+/// Draining parallel iterator that moves out of a hash set,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Hash + Eq + Send> {
+    inner: vec::IntoIter<T>,
+    marker: PhantomData<&'a mut HashSet<T>>,
+}
+
+impl<'a, T: Hash + Eq + Send, S: BuildHasher> ParallelDrainFull for &'a mut HashSet<T, S> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain(self) -> Self::Iter {
+        let vec: Vec<_> = self.drain().collect();
+        Drain {
+            inner: vec.into_par_iter(),
+            marker: PhantomData,
+        }
+    }
+}
+
+delegate_iterator! {
+    Drain<'_, T> => T,
+    impl<T: Hash + Eq + Send>
+}
diff --git a/src/collections/linked_list.rs b/src/collections/linked_list.rs
new file mode 100644
index 0000000..bddd2b0
--- /dev/null
+++ b/src/collections/linked_list.rs
@@ -0,0 +1,66 @@
+//! This module contains the parallel iterator types for linked lists
+//! (`LinkedList<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::LinkedList;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a linked list
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    LinkedList<T> => IntoIter<T>,
+    impl<T: Send>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a linked list
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a LinkedList<T> => Iter<'a, T>,
+    impl<'a, T: Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a linked list
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: vec::IntoIter<&'a mut T>,
+}
+
+into_par_vec! {
+    &'a mut LinkedList<T> => IterMut<'a, T>,
+    impl<'a, T: Send>
+}
+
+delegate_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
diff --git a/src/collections/mod.rs b/src/collections/mod.rs
new file mode 100644
index 0000000..d9b7988
--- /dev/null
+++ b/src/collections/mod.rs
@@ -0,0 +1,84 @@
+//! Parallel iterator types for [standard collections][std::collections]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::collections]: https://doc.rust-lang.org/stable/std/collections/
+
+/// Convert an iterable collection into a parallel iterator by first
+/// collecting into a temporary `Vec`, then iterating that.
+macro_rules! into_par_vec {
+    ($t:ty => $iter:ident<$($i:tt),*>, impl $($args:tt)*) => {
+        impl $($args)* IntoParallelIterator for $t {
+            type Item = <$t as IntoIterator>::Item;
+            type Iter = $iter<$($i),*>;
+
+            fn into_par_iter(self) -> Self::Iter {
+                use std::iter::FromIterator;
+                $iter { inner: Vec::from_iter(self).into_par_iter() }
+            }
+        }
+    };
+}
+
+pub mod binary_heap;
+pub mod btree_map;
+pub mod btree_set;
+pub mod hash_map;
+pub mod hash_set;
+pub mod linked_list;
+pub mod vec_deque;
+
+use self::drain_guard::DrainGuard;
+
+mod drain_guard {
+    use crate::iter::ParallelDrainRange;
+    use std::mem;
+    use std::ops::RangeBounds;
+
+    /// A proxy for draining a collection by converting to a `Vec` and back.
+    ///
+    /// This is used for draining `BinaryHeap` and `VecDeque`, which both have
+    /// zero-allocation conversions to/from `Vec`, though not zero-cost:
+    /// - `BinaryHeap` will heapify from `Vec`, but at least that will be empty.
+    /// - `VecDeque` has to shift items to offset 0 when converting to `Vec`.
+    #[allow(missing_debug_implementations)]
+    pub(super) struct DrainGuard<'a, T, C: From<Vec<T>>> {
+        collection: &'a mut C,
+        vec: Vec<T>,
+    }
+
+    impl<'a, T, C> DrainGuard<'a, T, C>
+    where
+        C: Default + From<Vec<T>>,
+        Vec<T>: From<C>,
+    {
+        pub(super) fn new(collection: &'a mut C) -> Self {
+            Self {
+                // Temporarily steal the inner `Vec` so we can drain in place.
+                vec: Vec::from(mem::replace(collection, C::default())),
+                collection,
+            }
+        }
+    }
+
+    impl<'a, T, C: From<Vec<T>>> Drop for DrainGuard<'a, T, C> {
+        fn drop(&mut self) {
+            // Restore the collection from the `Vec` with its original capacity.
+            *self.collection = C::from(mem::replace(&mut self.vec, Vec::new()));
+        }
+    }
+
+    impl<'a, T, C> ParallelDrainRange<usize> for &'a mut DrainGuard<'_, T, C>
+    where
+        T: Send,
+        C: From<Vec<T>>,
+    {
+        type Iter = crate::vec::Drain<'a, T>;
+        type Item = T;
+
+        fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+            self.vec.par_drain(range)
+        }
+    }
+}
diff --git a/src/collections/vec_deque.rs b/src/collections/vec_deque.rs
new file mode 100644
index 0000000..f87ce6b
--- /dev/null
+++ b/src/collections/vec_deque.rs
@@ -0,0 +1,159 @@
+//! This module contains the parallel iterator types for double-ended queues
+//! (`VecDeque<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::VecDeque;
+use std::ops::{Range, RangeBounds};
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::simplify_range;
+
+use crate::slice;
+use crate::vec;
+
+/// Parallel iterator over a double-ended queue
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: vec::IntoIter<T>,
+}
+
+impl<T: Send> IntoParallelIterator for VecDeque<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        // NOTE: requires data movement if the deque doesn't start at offset 0.
+        let inner = Vec::from(self).into_par_iter();
+        IntoIter { inner }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a double-ended queue
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: Chain<slice::Iter<'a, T>, slice::Iter<'a, T>>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync> IntoParallelIterator for &'a VecDeque<T> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        let (a, b) = self.as_slices();
+        Iter {
+            inner: a.into_par_iter().chain(b),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a double-ended queue
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: Chain<slice::IterMut<'a, T>, slice::IterMut<'a, T>>,
+}
+
+impl<'a, T: Send> IntoParallelIterator for &'a mut VecDeque<T> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        let (a, b) = self.as_mut_slices();
+        IterMut {
+            inner: a.into_par_iter().chain(b),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Draining parallel iterator that moves a range out of a double-ended queue,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Send> {
+    deque: &'a mut VecDeque<T>,
+    range: Range<usize>,
+    orig_len: usize,
+}
+
+impl<'a, T: Send> ParallelDrainRange<usize> for &'a mut VecDeque<T> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            orig_len: self.len(),
+            range: simplify_range(range, self.len()),
+            deque: self,
+        }
+    }
+}
+
+impl<'a, T: Send> ParallelIterator for Drain<'a, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'a, T: Send> IndexedParallelIterator for Drain<'a, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.range.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // NOTE: requires data movement if the deque doesn't start at offset 0.
+        super::DrainGuard::new(self.deque)
+            .par_drain(self.range.clone())
+            .with_producer(callback)
+    }
+}
+
+impl<'a, T: Send> Drop for Drain<'a, T> {
+    fn drop(&mut self) {
+        if self.deque.len() != self.orig_len - self.range.len() {
+            // We must not have produced, so just call a normal drain to remove the items.
+            assert_eq!(self.deque.len(), self.orig_len);
+            self.deque.drain(self.range.clone());
+        }
+    }
+}
diff --git a/src/compile_fail/cannot_collect_filtermap_data.rs b/src/compile_fail/cannot_collect_filtermap_data.rs
new file mode 100644
index 0000000..65ff875
--- /dev/null
+++ b/src/compile_fail/cannot_collect_filtermap_data.rs
@@ -0,0 +1,16 @@
+/*! ```compile_fail,E0599
+
+use rayon::prelude::*;
+
+// zip requires data of exact size, but filter yields only bounded
+// size, so check that we cannot apply it.
+
+fn main() {
+    let a: Vec<usize> = (0..1024).collect();
+    let mut v = vec![];
+    a.par_iter()
+     .filter_map(|&x| Some(x as f32))
+     .collect_into_vec(&mut v); //~ ERROR no method
+}
+
+``` */
diff --git a/src/compile_fail/cannot_zip_filtered_data.rs b/src/compile_fail/cannot_zip_filtered_data.rs
new file mode 100644
index 0000000..de43fca
--- /dev/null
+++ b/src/compile_fail/cannot_zip_filtered_data.rs
@@ -0,0 +1,16 @@
+/*! ```compile_fail,E0277
+
+use rayon::prelude::*;
+
+// zip requires data of exact size, but filter yields only bounded
+// size, so check that we cannot apply it.
+
+fn main() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter()
+     .zip(b.par_iter().filter(|&&x| x > 3)); //~ ERROR
+}
+
+``` */
diff --git a/src/compile_fail/cell_par_iter.rs b/src/compile_fail/cell_par_iter.rs
new file mode 100644
index 0000000..4af04b1
--- /dev/null
+++ b/src/compile_fail/cell_par_iter.rs
@@ -0,0 +1,15 @@
+/*! ```compile_fail,E0277
+
+// Check that we can't use the par-iter API to access contents of a `Cell`.
+
+use rayon::prelude::*;
+use std::cell::Cell;
+
+fn main() {
+    let c = Cell::new(42_i32);
+    (0_i32..1024).into_par_iter()
+             .map(|_| c.get()) //~ ERROR E0277
+             .min();
+}
+
+``` */
diff --git a/src/compile_fail/mod.rs b/src/compile_fail/mod.rs
new file mode 100644
index 0000000..13209a4
--- /dev/null
+++ b/src/compile_fail/mod.rs
@@ -0,0 +1,7 @@
+// These modules contain `compile_fail` doc tests.
+mod cannot_collect_filtermap_data;
+mod cannot_zip_filtered_data;
+mod cell_par_iter;
+mod must_use;
+mod no_send_par_iter;
+mod rc_par_iter;
diff --git a/src/compile_fail/must_use.rs b/src/compile_fail/must_use.rs
new file mode 100644
index 0000000..ac50a62
--- /dev/null
+++ b/src/compile_fail/must_use.rs
@@ -0,0 +1,67 @@
+// Check that we are flagged for ignoring `must_use` parallel adaptors.
+// (unfortunately there's no error code for `unused_must_use`)
+
+macro_rules! must_use {
+    ($( $name:ident #[$expr:meta] )*) => {$(
+        /// First sanity check that the expression is OK.
+        ///
+        /// ```
+        /// #![deny(unused_must_use)]
+        ///
+        /// use rayon::prelude::*;
+        ///
+        /// let v: Vec<_> = (0..100).map(Some).collect();
+        /// let _ =
+        #[$expr]
+        /// ```
+        ///
+        /// Now trigger the `must_use`.
+        ///
+        /// ```compile_fail
+        /// #![deny(unused_must_use)]
+        ///
+        /// use rayon::prelude::*;
+        ///
+        /// let v: Vec<_> = (0..100).map(Some).collect();
+        #[$expr]
+        /// ```
+        mod $name {}
+    )*}
+}
+
+must_use! {
+    step_by             /** v.par_iter().step_by(2); */
+    chain               /** v.par_iter().chain(&v); */
+    chunks              /** v.par_iter().chunks(2); */
+    cloned              /** v.par_iter().cloned(); */
+    copied              /** v.par_iter().copied(); */
+    enumerate           /** v.par_iter().enumerate(); */
+    filter              /** v.par_iter().filter(|_| true); */
+    filter_map          /** v.par_iter().filter_map(|x| *x); */
+    flat_map            /** v.par_iter().flat_map(|x| *x); */
+    flat_map_iter       /** v.par_iter().flat_map_iter(|x| *x); */
+    flatten             /** v.par_iter().flatten(); */
+    flatten_iter        /** v.par_iter().flatten_iter(); */
+    fold                /** v.par_iter().fold(|| 0, |x, _| x); */
+    fold_with           /** v.par_iter().fold_with(0, |x, _| x); */
+    try_fold            /** v.par_iter().try_fold(|| 0, |x, _| Some(x)); */
+    try_fold_with       /** v.par_iter().try_fold_with(0, |x, _| Some(x)); */
+    inspect             /** v.par_iter().inspect(|_| {}); */
+    interleave          /** v.par_iter().interleave(&v); */
+    interleave_shortest /** v.par_iter().interleave_shortest(&v); */
+    intersperse         /** v.par_iter().intersperse(&None); */
+    map                 /** v.par_iter().map(|x| x); */
+    map_with            /** v.par_iter().map_with(0, |_, x| x); */
+    map_init            /** v.par_iter().map_init(|| 0, |_, x| x); */
+    panic_fuse          /** v.par_iter().panic_fuse(); */
+    positions           /** v.par_iter().positions(|_| true); */
+    rev                 /** v.par_iter().rev(); */
+    skip                /** v.par_iter().skip(1); */
+    take                /** v.par_iter().take(1); */
+    update              /** v.par_iter().update(|_| {}); */
+    while_some          /** v.par_iter().cloned().while_some(); */
+    with_max_len        /** v.par_iter().with_max_len(1); */
+    with_min_len        /** v.par_iter().with_min_len(1); */
+    zip                 /** v.par_iter().zip(&v); */
+    zip_eq              /** v.par_iter().zip_eq(&v); */
+}
diff --git a/src/compile_fail/no_send_par_iter.rs b/src/compile_fail/no_send_par_iter.rs
new file mode 100644
index 0000000..1362c98
--- /dev/null
+++ b/src/compile_fail/no_send_par_iter.rs
@@ -0,0 +1,64 @@
+// Check that `!Send` types fail early.
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .map(|&x| x) //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod map {}
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .filter_map(|&x| Some(x)) //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod filter_map {}
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .cloned() //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod cloned {}
diff --git a/src/compile_fail/rc_par_iter.rs b/src/compile_fail/rc_par_iter.rs
new file mode 100644
index 0000000..feaedb3
--- /dev/null
+++ b/src/compile_fail/rc_par_iter.rs
@@ -0,0 +1,17 @@
+/*! ```compile_fail,E0599
+
+// Check that we can't use the par-iter API to access contents of an
+// `Rc`.
+
+use rayon::prelude::*;
+use std::rc::Rc;
+
+fn main() {
+    let x = vec![Rc::new(22), Rc::new(23)];
+    let mut y = vec![];
+    x.into_par_iter() //~ ERROR no method named `into_par_iter`
+     .map(|rc| *rc)
+     .collect_into_vec(&mut y);
+}
+
+``` */
diff --git a/src/delegate.rs b/src/delegate.rs
new file mode 100644
index 0000000..a537489
--- /dev/null
+++ b/src/delegate.rs
@@ -0,0 +1,70 @@
+//! Macros for delegating newtype iterators to inner types.
+
+// Note: these place `impl` bounds at the end, as token gobbling is the only way
+// I know how to consume an arbitrary list of constraints, with `$($args:tt)*`.
+
+/// Creates a parallel iterator implementation which simply wraps an inner type
+/// and delegates all methods inward.  The actual struct must already be
+/// declared with an `inner` field.
+///
+/// The implementation of `IntoParallelIterator` should be added separately.
+///
+/// # Example
+///
+/// ```
+/// delegate_iterator!{
+///     MyIntoIter<T, U> => (T, U),
+///     impl<T: Ord + Send, U: Send>
+/// }
+/// ```
+macro_rules! delegate_iterator {
+    ($iter:ty => $item:ty ,
+     impl $( $args:tt )*
+     ) => {
+        impl $( $args )* ParallelIterator for $iter {
+            type Item = $item;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+                where C: UnindexedConsumer<Self::Item>
+            {
+                self.inner.drive_unindexed(consumer)
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                self.inner.opt_len()
+            }
+        }
+    }
+}
+
+/// Creates an indexed parallel iterator implementation which simply wraps an
+/// inner type and delegates all methods inward.  The actual struct must already
+/// be declared with an `inner` field.
+macro_rules! delegate_indexed_iterator {
+    ($iter:ty => $item:ty ,
+     impl $( $args:tt )*
+     ) => {
+        delegate_iterator!{
+            $iter => $item ,
+            impl $( $args )*
+        }
+
+        impl $( $args )* IndexedParallelIterator for $iter {
+            fn drive<C>(self, consumer: C) -> C::Result
+                where C: Consumer<Self::Item>
+            {
+                self.inner.drive(consumer)
+            }
+
+            fn len(&self) -> usize {
+                self.inner.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+                where CB: ProducerCallback<Self::Item>
+            {
+                self.inner.with_producer(callback)
+            }
+        }
+    }
+}
diff --git a/src/iter/chain.rs b/src/iter/chain.rs
new file mode 100644
index 0000000..48fce07
--- /dev/null
+++ b/src/iter/chain.rs
@@ -0,0 +1,268 @@
+use super::plumbing::*;
+use super::*;
+use rayon_core::join;
+use std::cmp;
+use std::iter;
+
+/// `Chain` is an iterator that joins `b` after `a` in one continuous iterator.
+/// This struct is created by the [`chain()`] method on [`ParallelIterator`]
+///
+/// [`chain()`]: trait.ParallelIterator.html#method.chain
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    a: A,
+    b: B,
+}
+
+impl<A, B> Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    /// Creates a new `Chain` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        Chain { a, b }
+    }
+}
+
+impl<A, B> ParallelIterator for Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    type Item = A::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let Chain { a, b } = self;
+
+        // If we returned a value from our own `opt_len`, then the collect consumer in particular
+        // will balk at being treated like an actual `UnindexedConsumer`.  But when we do know the
+        // length, we can use `Consumer::split_at` instead, and this is still harmless for other
+        // truly-unindexed consumers too.
+        let (left, right, reducer) = if let Some(len) = a.opt_len() {
+            consumer.split_at(len)
+        } else {
+            let reducer = consumer.to_reducer();
+            (consumer.split_off_left(), consumer, reducer)
+        };
+
+        let (a, b) = join(|| a.drive_unindexed(left), || b.drive_unindexed(right));
+        reducer.reduce(a, b)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.a.opt_len()?.checked_add(self.b.opt_len()?)
+    }
+}
+
+impl<A, B> IndexedParallelIterator for Chain<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator<Item = A::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let Chain { a, b } = self;
+        let (left, right, reducer) = consumer.split_at(a.len());
+        let (a, b) = join(|| a.drive(left), || b.drive(right));
+        reducer.reduce(a, b)
+    }
+
+    fn len(&self) -> usize {
+        self.a.len().checked_add(self.b.len()).expect("overflow")
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let a_len = self.a.len();
+        return self.a.with_producer(CallbackA {
+            callback,
+            a_len,
+            b: self.b,
+        });
+
+        struct CallbackA<CB, B> {
+            callback: CB,
+            a_len: usize,
+            b: B,
+        }
+
+        impl<CB, B> ProducerCallback<B::Item> for CallbackA<CB, B>
+        where
+            B: IndexedParallelIterator,
+            CB: ProducerCallback<B::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<A>(self, a_producer: A) -> Self::Output
+            where
+                A: Producer<Item = B::Item>,
+            {
+                self.b.with_producer(CallbackB {
+                    callback: self.callback,
+                    a_len: self.a_len,
+                    a_producer,
+                })
+            }
+        }
+
+        struct CallbackB<CB, A> {
+            callback: CB,
+            a_len: usize,
+            a_producer: A,
+        }
+
+        impl<CB, A> ProducerCallback<A::Item> for CallbackB<CB, A>
+        where
+            A: Producer,
+            CB: ProducerCallback<A::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<B>(self, b_producer: B) -> Self::Output
+            where
+                B: Producer<Item = A::Item>,
+            {
+                let producer = ChainProducer::new(self.a_len, self.a_producer, b_producer);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    a_len: usize,
+    a: A,
+    b: B,
+}
+
+impl<A, B> ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    fn new(a_len: usize, a: A, b: B) -> Self {
+        ChainProducer { a_len, a, b }
+    }
+}
+
+impl<A, B> Producer for ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    type Item = A::Item;
+    type IntoIter = ChainSeq<A::IntoIter, B::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        ChainSeq::new(self.a.into_iter(), self.b.into_iter())
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.a.min_len(), self.b.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.a.max_len(), self.b.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        if index <= self.a_len {
+            let a_rem = self.a_len - index;
+            let (a_left, a_right) = self.a.split_at(index);
+            let (b_left, b_right) = self.b.split_at(0);
+            (
+                ChainProducer::new(index, a_left, b_left),
+                ChainProducer::new(a_rem, a_right, b_right),
+            )
+        } else {
+            let (a_left, a_right) = self.a.split_at(self.a_len);
+            let (b_left, b_right) = self.b.split_at(index - self.a_len);
+            (
+                ChainProducer::new(self.a_len, a_left, b_left),
+                ChainProducer::new(0, a_right, b_right),
+            )
+        }
+    }
+
+    fn fold_with<F>(self, mut folder: F) -> F
+    where
+        F: Folder<A::Item>,
+    {
+        folder = self.a.fold_with(folder);
+        if folder.full() {
+            folder
+        } else {
+            self.b.fold_with(folder)
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Wrapper for Chain to implement ExactSizeIterator
+
+struct ChainSeq<A, B> {
+    chain: iter::Chain<A, B>,
+}
+
+impl<A, B> ChainSeq<A, B> {
+    fn new(a: A, b: B) -> ChainSeq<A, B>
+    where
+        A: ExactSizeIterator,
+        B: ExactSizeIterator<Item = A::Item>,
+    {
+        ChainSeq { chain: a.chain(b) }
+    }
+}
+
+impl<A, B> Iterator for ChainSeq<A, B>
+where
+    A: Iterator,
+    B: Iterator<Item = A::Item>,
+{
+    type Item = A::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.chain.next()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.chain.size_hint()
+    }
+}
+
+impl<A, B> ExactSizeIterator for ChainSeq<A, B>
+where
+    A: ExactSizeIterator,
+    B: ExactSizeIterator<Item = A::Item>,
+{
+}
+
+impl<A, B> DoubleEndedIterator for ChainSeq<A, B>
+where
+    A: DoubleEndedIterator,
+    B: DoubleEndedIterator<Item = A::Item>,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.chain.next_back()
+    }
+}
diff --git a/src/iter/chunks.rs b/src/iter/chunks.rs
new file mode 100644
index 0000000..be5f84c
--- /dev/null
+++ b/src/iter/chunks.rs
@@ -0,0 +1,216 @@
+use std::cmp::min;
+
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `Chunks` is an iterator that groups elements of an underlying iterator.
+///
+/// This struct is created by the [`chunks()`] method on [`IndexedParallelIterator`]
+///
+/// [`chunks()`]: trait.IndexedParallelIterator.html#method.chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    size: usize,
+    i: I,
+}
+
+impl<I> Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Chunks` iterator
+    pub(super) fn new(i: I, size: usize) -> Self {
+        Chunks { i, size }
+    }
+}
+
+impl<I> ParallelIterator for Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = Vec<I::Item>;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Vec<I::Item>>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.i.len(), self.size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.i.len();
+        return self.i.with_producer(Callback {
+            size: self.size,
+            len,
+            callback,
+        });
+
+        struct Callback<CB> {
+            size: usize,
+            len: usize,
+            callback: CB,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<Vec<T>>,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                self.callback.callback(ChunkProducer {
+                    chunk_size: self.size,
+                    len: self.len,
+                    base,
+                })
+            }
+        }
+    }
+}
+
+struct ChunkProducer<P>
+where
+    P: Producer,
+{
+    chunk_size: usize,
+    len: usize,
+    base: P,
+}
+
+impl<P> Producer for ChunkProducer<P>
+where
+    P: Producer,
+{
+    type Item = Vec<P::Item>;
+    type IntoIter = ChunkSeq<P>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        ChunkSeq {
+            chunk_size: self.chunk_size,
+            len: self.len,
+            inner: if self.len > 0 { Some(self.base) } else { None },
+        }
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = min(index * self.chunk_size, self.len);
+        let (left, right) = self.base.split_at(elem_index);
+        (
+            ChunkProducer {
+                chunk_size: self.chunk_size,
+                len: elem_index,
+                base: left,
+            },
+            ChunkProducer {
+                chunk_size: self.chunk_size,
+                len: self.len - elem_index,
+                base: right,
+            },
+        )
+    }
+
+    fn min_len(&self) -> usize {
+        div_round_up(self.base.min_len(), self.chunk_size)
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len() / self.chunk_size
+    }
+}
+
+struct ChunkSeq<P> {
+    chunk_size: usize,
+    len: usize,
+    inner: Option<P>,
+}
+
+impl<P> Iterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    type Item = Vec<P::Item>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let producer = self.inner.take()?;
+        if self.len > self.chunk_size {
+            let (left, right) = producer.split_at(self.chunk_size);
+            self.inner = Some(right);
+            self.len -= self.chunk_size;
+            Some(left.into_iter().collect())
+        } else {
+            debug_assert!(self.len > 0);
+            self.len = 0;
+            Some(producer.into_iter().collect())
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+}
+
+impl<P> ExactSizeIterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    #[inline]
+    fn len(&self) -> usize {
+        div_round_up(self.len, self.chunk_size)
+    }
+}
+
+impl<P> DoubleEndedIterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let producer = self.inner.take()?;
+        if self.len > self.chunk_size {
+            let mut size = self.len % self.chunk_size;
+            if size == 0 {
+                size = self.chunk_size;
+            }
+            let (left, right) = producer.split_at(self.len - size);
+            self.inner = Some(left);
+            self.len -= size;
+            Some(right.into_iter().collect())
+        } else {
+            debug_assert!(self.len > 0);
+            self.len = 0;
+            Some(producer.into_iter().collect())
+        }
+    }
+}
diff --git a/src/iter/cloned.rs b/src/iter/cloned.rs
new file mode 100644
index 0000000..8d5f420
--- /dev/null
+++ b/src/iter/cloned.rs
@@ -0,0 +1,223 @@
+use super::plumbing::*;
+use super::*;
+
+use std::iter;
+
+/// `Cloned` is an iterator that clones the elements of an underlying iterator.
+///
+/// This struct is created by the [`cloned()`] method on [`ParallelIterator`]
+///
+/// [`cloned()`]: trait.ParallelIterator.html#method.cloned
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Cloned<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Cloned<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Cloned` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Cloned { base }
+    }
+}
+
+impl<'a, T, I> ParallelIterator for Cloned<I>
+where
+    I: ParallelIterator<Item = &'a T>,
+    T: 'a + Clone + Send + Sync,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = ClonedConsumer::new(consumer);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<'a, T, I> IndexedParallelIterator for Cloned<I>
+where
+    I: IndexedParallelIterator<Item = &'a T>,
+    T: 'a + Clone + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = ClonedConsumer::new(consumer);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<'a, T, CB> ProducerCallback<&'a T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+            T: 'a + Clone + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = &'a T>,
+            {
+                let producer = ClonedProducer { base };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ClonedProducer<P> {
+    base: P,
+}
+
+impl<'a, T, P> Producer for ClonedProducer<P>
+where
+    P: Producer<Item = &'a T>,
+    T: 'a + Clone,
+{
+    type Item = T;
+    type IntoIter = iter::Cloned<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().cloned()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            ClonedProducer { base: left },
+            ClonedProducer { base: right },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(ClonedFolder { base: folder }).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct ClonedConsumer<C> {
+    base: C,
+}
+
+impl<C> ClonedConsumer<C> {
+    fn new(base: C) -> Self {
+        ClonedConsumer { base }
+    }
+}
+
+impl<'a, T, C> Consumer<&'a T> for ClonedConsumer<C>
+where
+    C: Consumer<T>,
+    T: 'a + Clone,
+{
+    type Folder = ClonedFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            ClonedConsumer::new(left),
+            ClonedConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ClonedFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<&'a T> for ClonedConsumer<C>
+where
+    C: UnindexedConsumer<T>,
+    T: 'a + Clone,
+{
+    fn split_off_left(&self) -> Self {
+        ClonedConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct ClonedFolder<F> {
+    base: F,
+}
+
+impl<'a, T, F> Folder<&'a T> for ClonedFolder<F>
+where
+    F: Folder<T>,
+    T: 'a + Clone,
+{
+    type Result = F::Result;
+
+    fn consume(self, item: &'a T) -> Self {
+        ClonedFolder {
+            base: self.base.consume(item.clone()),
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = &'a T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().cloned());
+        self
+    }
+
+    fn complete(self) -> F::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/collect/consumer.rs b/src/iter/collect/consumer.rs
new file mode 100644
index 0000000..689f29c
--- /dev/null
+++ b/src/iter/collect/consumer.rs
@@ -0,0 +1,159 @@
+use super::super::plumbing::*;
+use std::marker::PhantomData;
+use std::ptr;
+use std::slice;
+
+pub(super) struct CollectConsumer<'c, T: Send> {
+    /// A slice covering the target memory, not yet initialized!
+    target: &'c mut [T],
+}
+
+pub(super) struct CollectFolder<'c, T: Send> {
+    /// The folder writes into `result` and must extend the result
+    /// up to exactly this number of elements.
+    final_len: usize,
+
+    /// The current written-to part of our slice of the target
+    result: CollectResult<'c, T>,
+}
+
+impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
+    /// The target memory is considered uninitialized, and will be
+    /// overwritten without reading or dropping existing values.
+    pub(super) fn new(target: &'c mut [T]) -> Self {
+        CollectConsumer { target }
+    }
+}
+
+/// CollectResult represents an initialized part of the target slice.
+///
+/// This is a proxy owner of the elements in the slice; when it drops,
+/// the elements will be dropped, unless its ownership is released before then.
+#[must_use]
+pub(super) struct CollectResult<'c, T> {
+    start: *mut T,
+    len: usize,
+    invariant_lifetime: PhantomData<&'c mut &'c mut [T]>,
+}
+
+unsafe impl<'c, T> Send for CollectResult<'c, T> where T: Send {}
+
+impl<'c, T> CollectResult<'c, T> {
+    /// The current length of the collect result
+    pub(super) fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Release ownership of the slice of elements, and return the length
+    pub(super) fn release_ownership(mut self) -> usize {
+        let ret = self.len;
+        self.len = 0;
+        ret
+    }
+}
+
+impl<'c, T> Drop for CollectResult<'c, T> {
+    fn drop(&mut self) {
+        // Drop the first `self.len` elements, which have been recorded
+        // to be initialized by the folder.
+        unsafe {
+            ptr::drop_in_place(slice::from_raw_parts_mut(self.start, self.len));
+        }
+    }
+}
+
+impl<'c, T: Send + 'c> Consumer<T> for CollectConsumer<'c, T> {
+    type Folder = CollectFolder<'c, T>;
+    type Reducer = CollectReducer;
+    type Result = CollectResult<'c, T>;
+
+    fn split_at(self, index: usize) -> (Self, Self, CollectReducer) {
+        let CollectConsumer { target } = self;
+
+        // Produce new consumers. Normal slicing ensures that the
+        // memory range given to each consumer is disjoint.
+        let (left, right) = target.split_at_mut(index);
+        (
+            CollectConsumer::new(left),
+            CollectConsumer::new(right),
+            CollectReducer,
+        )
+    }
+
+    fn into_folder(self) -> CollectFolder<'c, T> {
+        // Create a folder that consumes values and writes them
+        // into target. The initial result has length 0.
+        CollectFolder {
+            final_len: self.target.len(),
+            result: CollectResult {
+                start: self.target.as_mut_ptr(),
+                len: 0,
+                invariant_lifetime: PhantomData,
+            },
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'c, T: Send + 'c> Folder<T> for CollectFolder<'c, T> {
+    type Result = CollectResult<'c, T>;
+
+    fn consume(mut self, item: T) -> CollectFolder<'c, T> {
+        if self.result.len >= self.final_len {
+            panic!("too many values pushed to consumer");
+        }
+
+        // Compute target pointer and write to it, and
+        // extend the current result by one element
+        unsafe {
+            self.result.start.add(self.result.len).write(item);
+            self.result.len += 1;
+        }
+
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        // NB: We don't explicitly check that the local writes were complete,
+        // but Collect will assert the total result length in the end.
+        self.result
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+/// Pretend to be unindexed for `special_collect_into_vec`,
+/// but we should never actually get used that way...
+impl<'c, T: Send + 'c> UnindexedConsumer<T> for CollectConsumer<'c, T> {
+    fn split_off_left(&self) -> Self {
+        unreachable!("CollectConsumer must be indexed!")
+    }
+    fn to_reducer(&self) -> Self::Reducer {
+        CollectReducer
+    }
+}
+
+/// CollectReducer combines adjacent chunks; the result must always
+/// be contiguous so that it is one combined slice.
+pub(super) struct CollectReducer;
+
+impl<'c, T> Reducer<CollectResult<'c, T>> for CollectReducer {
+    fn reduce(
+        self,
+        mut left: CollectResult<'c, T>,
+        right: CollectResult<'c, T>,
+    ) -> CollectResult<'c, T> {
+        // Merge if the CollectResults are adjacent and in left to right order
+        // else: drop the right piece now and total length will end up short in the end,
+        // when the correctness of the collected result is asserted.
+        if left.start.wrapping_add(left.len) == right.start {
+            left.len += right.release_ownership();
+        }
+        left
+    }
+}
diff --git a/src/iter/collect/mod.rs b/src/iter/collect/mod.rs
new file mode 100644
index 0000000..e18298e
--- /dev/null
+++ b/src/iter/collect/mod.rs
@@ -0,0 +1,171 @@
+use super::{IndexedParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
+use std::slice;
+
+mod consumer;
+use self::consumer::CollectConsumer;
+use self::consumer::CollectResult;
+use super::unzip::unzip_indexed;
+
+mod test;
+
+/// Collects the results of the exact iterator into the specified vector.
+///
+/// This is called by `IndexedParallelIterator::collect_into_vec`.
+pub(super) fn collect_into_vec<I, T>(pi: I, v: &mut Vec<T>)
+where
+    I: IndexedParallelIterator<Item = T>,
+    T: Send,
+{
+    v.truncate(0); // clear any old data
+    let len = pi.len();
+    Collect::new(v, len).with_consumer(|consumer| pi.drive(consumer));
+}
+
+/// Collects the results of the iterator into the specified vector.
+///
+/// Technically, this only works for `IndexedParallelIterator`, but we're faking a
+/// bit of specialization here until Rust can do that natively.  Callers are
+/// using `opt_len` to find the length before calling this, and only exact
+/// iterators will return anything but `None` there.
+///
+/// Since the type system doesn't understand that contract, we have to allow
+/// *any* `ParallelIterator` here, and `CollectConsumer` has to also implement
+/// `UnindexedConsumer`.  That implementation panics `unreachable!` in case
+/// there's a bug where we actually do try to use this unindexed.
+fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
+where
+    I: ParallelIterator<Item = T>,
+    T: Send,
+{
+    Collect::new(v, len).with_consumer(|consumer| pi.drive_unindexed(consumer));
+}
+
+/// Unzips the results of the exact iterator into the specified vectors.
+///
+/// This is called by `IndexedParallelIterator::unzip_into_vecs`.
+pub(super) fn unzip_into_vecs<I, A, B>(pi: I, left: &mut Vec<A>, right: &mut Vec<B>)
+where
+    I: IndexedParallelIterator<Item = (A, B)>,
+    A: Send,
+    B: Send,
+{
+    // clear any old data
+    left.truncate(0);
+    right.truncate(0);
+
+    let len = pi.len();
+    Collect::new(right, len).with_consumer(|right_consumer| {
+        let mut right_result = None;
+        Collect::new(left, len).with_consumer(|left_consumer| {
+            let (left_r, right_r) = unzip_indexed(pi, left_consumer, right_consumer);
+            right_result = Some(right_r);
+            left_r
+        });
+        right_result.unwrap()
+    });
+}
+
+/// Manage the collection vector.
+struct Collect<'c, T: Send> {
+    vec: &'c mut Vec<T>,
+    len: usize,
+}
+
+impl<'c, T: Send + 'c> Collect<'c, T> {
+    fn new(vec: &'c mut Vec<T>, len: usize) -> Self {
+        Collect { vec, len }
+    }
+
+    /// Create a consumer on the slice of memory we are collecting into.
+    ///
+    /// The consumer needs to be used inside the scope function, and the
+    /// complete collect result passed back.
+    ///
+    /// This method will verify the collect result, and panic if the slice
+    /// was not fully written into. Otherwise, in the successful case,
+    /// the vector is complete with the collected result.
+    fn with_consumer<F>(mut self, scope_fn: F)
+    where
+        F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
+    {
+        unsafe {
+            let slice = Self::reserve_get_tail_slice(&mut self.vec, self.len);
+            let result = scope_fn(CollectConsumer::new(slice));
+
+            // The CollectResult represents a contiguous part of the
+            // slice, that has been written to.
+            // On unwind here, the CollectResult will be dropped.
+            // If some producers on the way did not produce enough elements,
+            // partial CollectResults may have been dropped without
+            // being reduced to the final result, and we will see
+            // that as the length coming up short.
+            //
+            // Here, we assert that `slice` is fully initialized. This is
+            // checked by the following assert, which verifies if a
+            // complete CollectResult was produced; if the length is
+            // correct, it is necessarily covering the target slice.
+            // Since we know that the consumer cannot have escaped from
+            // `drive` (by parametricity, essentially), we know that any
+            // stores that will happen, have happened. Unless some code is buggy,
+            // that means we should have seen `len` total writes.
+            let actual_writes = result.len();
+            assert!(
+                actual_writes == self.len,
+                "expected {} total writes, but got {}",
+                self.len,
+                actual_writes
+            );
+
+            // Release the result's mutable borrow and "proxy ownership"
+            // of the elements, before the vector takes it over.
+            result.release_ownership();
+
+            let new_len = self.vec.len() + self.len;
+            self.vec.set_len(new_len);
+        }
+    }
+
+    /// Reserve space for `len` more elements in the vector,
+    /// and return a slice to the uninitialized tail of the vector
+    ///
+    /// Safety: The tail slice is uninitialized
+    unsafe fn reserve_get_tail_slice(vec: &mut Vec<T>, len: usize) -> &mut [T] {
+        // Reserve the new space.
+        vec.reserve(len);
+
+        // Get a correct borrow, then extend it for the newly added length.
+        let start = vec.len();
+        let slice = &mut vec[start..];
+        slice::from_raw_parts_mut(slice.as_mut_ptr(), len)
+    }
+}
+
+/// Extends a vector with items from a parallel iterator.
+impl<T> ParallelExtend<T> for Vec<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        // See the vec_collect benchmarks in rayon-demo for different strategies.
+        let par_iter = par_iter.into_par_iter();
+        match par_iter.opt_len() {
+            Some(len) => {
+                // When Rust gets specialization, we can get here for indexed iterators
+                // without relying on `opt_len`.  Until then, `special_extend()` fakes
+                // an unindexed mode on the promise that `opt_len()` is accurate.
+                special_extend(par_iter, len, self);
+            }
+            None => {
+                // This works like `extend`, but `Vec::append` is more efficient.
+                let list = super::extend::collect(par_iter);
+                self.reserve(super::extend::len(&list));
+                for mut vec in list {
+                    self.append(&mut vec);
+                }
+            }
+        }
+    }
+}
diff --git a/src/iter/collect/test.rs b/src/iter/collect/test.rs
new file mode 100644
index 0000000..00c16c4
--- /dev/null
+++ b/src/iter/collect/test.rs
@@ -0,0 +1,385 @@
+#![cfg(test)]
+#![allow(unused_assignments)]
+
+// These tests are primarily targeting "abusive" producers that will
+// try to drive the "collect consumer" incorrectly. These should
+// result in panics.
+
+use super::Collect;
+use crate::iter::plumbing::*;
+use rayon_core::join;
+
+use std::fmt;
+use std::panic;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread::Result as ThreadResult;
+
+/// Promises to produce 2 items, but then produces 3.  Does not do any
+/// splits at all.
+#[test]
+#[should_panic(expected = "too many values")]
+fn produce_too_many_items() {
+    let mut v = vec![];
+    Collect::new(&mut v, 2).with_consumer(|consumer| {
+        let mut folder = consumer.into_folder();
+        folder = folder.consume(22);
+        folder = folder.consume(23);
+        folder.consume(24);
+        unreachable!("folder does not complete")
+    });
+}
+
+/// Produces fewer items than promised. Does not do any
+/// splits at all.
+#[test]
+#[should_panic(expected = "expected 5 total writes, but got 2")]
+fn produce_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 5);
+    collect.with_consumer(|consumer| {
+        let mut folder = consumer.into_folder();
+        folder = folder.consume(22);
+        folder = folder.consume(23);
+        folder.complete()
+    });
+}
+
+// Complete is not called by the consumer. Hence,the collection vector is not fully initialized.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn left_produces_items_with_no_complete() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        right_folder.complete()
+    });
+}
+
+// Complete is not called by the right consumer. Hence,the
+// collection vector is not fully initialized.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn right_produces_items_with_no_complete() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        left_folder.complete()
+    });
+}
+
+// Complete is not called by the consumer. Hence,the collection vector is not fully initialized.
+#[test]
+fn produces_items_with_no_complete() {
+    let counter = DropCounter::default();
+    let mut v = vec![];
+    let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+        let collect = Collect::new(&mut v, 2);
+        collect.with_consumer(|consumer| {
+            let mut folder = consumer.into_folder();
+            folder = folder.consume(counter.element());
+            folder = folder.consume(counter.element());
+            panic!("folder does not complete");
+        });
+    }));
+    assert!(v.is_empty());
+    assert_is_panic_with_message(&panic_result, "folder does not complete");
+    counter.assert_drop_count();
+}
+
+// The left consumer produces too many items while the right
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "too many values")]
+fn left_produces_too_many_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1).consume(2);
+        right_folder = right_folder.consume(2).consume(3);
+        let _ = right_folder.complete();
+        unreachable!("folder does not complete");
+    });
+}
+
+// The right consumer produces too many items while the left
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "too many values")]
+fn right_produces_too_many_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3).consume(4);
+        let _ = left_folder.complete();
+        unreachable!("folder does not complete");
+    });
+}
+
+// The left consumer produces fewer items while the right
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 1")]
+fn left_produces_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(left_result, right_result)
+    });
+}
+
+// The left and right consumer produce the correct number but
+// only left result is returned
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn only_left_result() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let _ = right_folder.complete();
+        left_result
+    });
+}
+
+// The left and right consumer produce the correct number but
+// only right result is returned
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn only_right_result() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let _ = left_folder.complete();
+        right_folder.complete()
+    });
+}
+
+// The left and right consumer produce the correct number but reduce
+// in the wrong order.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn reducer_does_not_preserve_order() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(right_result, left_result)
+    });
+}
+
+// The right consumer produces fewer items while the left
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 3")]
+fn right_produces_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(left_result, right_result)
+    });
+}
+
+// The left consumer panics and the right stops short, like `panic_fuse()`.
+// We should get the left panic without finishing `Collect::with_consumer`.
+#[test]
+#[should_panic(expected = "left consumer panic")]
+fn left_panics() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let (left_result, right_result) = join(
+            || {
+                let mut left_folder = left_consumer.into_folder();
+                left_folder = left_folder.consume(0);
+                panic!("left consumer panic");
+            },
+            || {
+                let mut right_folder = right_consumer.into_folder();
+                right_folder = right_folder.consume(2);
+                right_folder.complete() // early return
+            },
+        );
+        reducer.reduce(left_result, right_result)
+    });
+    unreachable!();
+}
+
+// The right consumer panics and the left stops short, like `panic_fuse()`.
+// We should get the right panic without finishing `Collect::with_consumer`.
+#[test]
+#[should_panic(expected = "right consumer panic")]
+fn right_panics() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let (left_result, right_result) = join(
+            || {
+                let mut left_folder = left_consumer.into_folder();
+                left_folder = left_folder.consume(0);
+                left_folder.complete() // early return
+            },
+            || {
+                let mut right_folder = right_consumer.into_folder();
+                right_folder = right_folder.consume(2);
+                panic!("right consumer panic");
+            },
+        );
+        reducer.reduce(left_result, right_result)
+    });
+    unreachable!();
+}
+
+// The left consumer produces fewer items while the right
+// consumer produces correct number; check that created elements are dropped
+#[test]
+fn left_produces_fewer_items_drops() {
+    let counter = DropCounter::default();
+    let mut v = vec![];
+    let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+        let collect = Collect::new(&mut v, 4);
+        collect.with_consumer(|consumer| {
+            let reducer = consumer.to_reducer();
+            let (left_consumer, right_consumer, _) = consumer.split_at(2);
+            let mut left_folder = left_consumer.into_folder();
+            let mut right_folder = right_consumer.into_folder();
+            left_folder = left_folder.consume(counter.element());
+            right_folder = right_folder
+                .consume(counter.element())
+                .consume(counter.element());
+            let left_result = left_folder.complete();
+            let right_result = right_folder.complete();
+            reducer.reduce(left_result, right_result)
+        });
+    }));
+    assert!(v.is_empty());
+    assert_is_panic_with_message(&panic_result, "expected 4 total writes, but got 1");
+    counter.assert_drop_count();
+}
+
+/// This counter can create elements, and then count and verify
+/// the number of which have actually been dropped again.
+#[derive(Default)]
+struct DropCounter {
+    created: AtomicUsize,
+    dropped: AtomicUsize,
+}
+
+struct Element<'a>(&'a AtomicUsize);
+
+impl DropCounter {
+    fn created(&self) -> usize {
+        self.created.load(Ordering::SeqCst)
+    }
+
+    fn dropped(&self) -> usize {
+        self.dropped.load(Ordering::SeqCst)
+    }
+
+    fn element(&self) -> Element<'_> {
+        self.created.fetch_add(1, Ordering::SeqCst);
+        Element(&self.dropped)
+    }
+
+    fn assert_drop_count(&self) {
+        assert_eq!(
+            self.created(),
+            self.dropped(),
+            "Expected {} dropped elements, but found {}",
+            self.created(),
+            self.dropped()
+        );
+    }
+}
+
+impl<'a> Drop for Element<'a> {
+    fn drop(&mut self) {
+        self.0.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+/// Assert that the result from catch_unwind is a panic that contains expected message
+fn assert_is_panic_with_message<T>(result: &ThreadResult<T>, expected: &str)
+where
+    T: fmt::Debug,
+{
+    match result {
+        Ok(value) => {
+            panic!(
+                "assertion failure: Expected panic, got successful {:?}",
+                value
+            );
+        }
+        Err(error) => {
+            let message_str = error.downcast_ref::<&'static str>().cloned();
+            let message_string = error.downcast_ref::<String>().map(String::as_str);
+            if let Some(message) = message_str.or(message_string) {
+                if !message.contains(expected) {
+                    panic!(
+                        "assertion failure: Expected {:?}, but found panic with {:?}",
+                        expected, message
+                    );
+                }
+            // assertion passes
+            } else {
+                panic!(
+                    "assertion failure: Expected {:?}, but found panic with unknown value",
+                    expected
+                );
+            }
+        }
+    }
+}
diff --git a/src/iter/copied.rs b/src/iter/copied.rs
new file mode 100644
index 0000000..12c9c5b
--- /dev/null
+++ b/src/iter/copied.rs
@@ -0,0 +1,223 @@
+use super::plumbing::*;
+use super::*;
+
+use std::iter;
+
+/// `Copied` is an iterator that copies the elements of an underlying iterator.
+///
+/// This struct is created by the [`copied()`] method on [`ParallelIterator`]
+///
+/// [`copied()`]: trait.ParallelIterator.html#method.copied
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Copied<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Copied<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Copied` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Copied { base }
+    }
+}
+
+impl<'a, T, I> ParallelIterator for Copied<I>
+where
+    I: ParallelIterator<Item = &'a T>,
+    T: 'a + Copy + Send + Sync,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = CopiedConsumer::new(consumer);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<'a, T, I> IndexedParallelIterator for Copied<I>
+where
+    I: IndexedParallelIterator<Item = &'a T>,
+    T: 'a + Copy + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = CopiedConsumer::new(consumer);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<'a, T, CB> ProducerCallback<&'a T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+            T: 'a + Copy + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = &'a T>,
+            {
+                let producer = CopiedProducer { base };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct CopiedProducer<P> {
+    base: P,
+}
+
+impl<'a, T, P> Producer for CopiedProducer<P>
+where
+    P: Producer<Item = &'a T>,
+    T: 'a + Copy,
+{
+    type Item = T;
+    type IntoIter = iter::Copied<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().copied()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            CopiedProducer { base: left },
+            CopiedProducer { base: right },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(CopiedFolder { base: folder }).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct CopiedConsumer<C> {
+    base: C,
+}
+
+impl<C> CopiedConsumer<C> {
+    fn new(base: C) -> Self {
+        CopiedConsumer { base }
+    }
+}
+
+impl<'a, T, C> Consumer<&'a T> for CopiedConsumer<C>
+where
+    C: Consumer<T>,
+    T: 'a + Copy,
+{
+    type Folder = CopiedFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            CopiedConsumer::new(left),
+            CopiedConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        CopiedFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<&'a T> for CopiedConsumer<C>
+where
+    C: UnindexedConsumer<T>,
+    T: 'a + Copy,
+{
+    fn split_off_left(&self) -> Self {
+        CopiedConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct CopiedFolder<F> {
+    base: F,
+}
+
+impl<'a, T, F> Folder<&'a T> for CopiedFolder<F>
+where
+    F: Folder<T>,
+    T: 'a + Copy,
+{
+    type Result = F::Result;
+
+    fn consume(self, &item: &'a T) -> Self {
+        CopiedFolder {
+            base: self.base.consume(item),
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = &'a T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().copied());
+        self
+    }
+
+    fn complete(self) -> F::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/empty.rs b/src/iter/empty.rs
new file mode 100644
index 0000000..85a2e5f
--- /dev/null
+++ b/src/iter/empty.rs
@@ -0,0 +1,104 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use std::fmt;
+use std::marker::PhantomData;
+
+/// Creates a parallel iterator that produces nothing.
+///
+/// This admits no parallelism on its own, but it could be used for code that
+/// deals with generic parallel iterators.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::empty;
+///
+/// let pi = (0..1234).into_par_iter()
+///     .chain(empty())
+///     .chain(1234..10_000);
+///
+/// assert_eq!(pi.count(), 10_000);
+/// ```
+pub fn empty<T: Send>() -> Empty<T> {
+    Empty {
+        marker: PhantomData,
+    }
+}
+
+/// Iterator adaptor for [the `empty()` function](fn.empty.html).
+pub struct Empty<T: Send> {
+    marker: PhantomData<T>,
+}
+
+impl<T: Send> Clone for Empty<T> {
+    fn clone(&self) -> Self {
+        empty()
+    }
+}
+
+impl<T: Send> fmt::Debug for Empty<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.pad("Empty")
+    }
+}
+
+impl<T: Send> ParallelIterator for Empty<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(0)
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for Empty<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        consumer.into_folder().complete()
+    }
+
+    fn len(&self) -> usize {
+        0
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(EmptyProducer(PhantomData))
+    }
+}
+
+/// Private empty producer
+struct EmptyProducer<T: Send>(PhantomData<T>);
+
+impl<T: Send> Producer for EmptyProducer<T> {
+    type Item = T;
+    type IntoIter = std::iter::Empty<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        std::iter::empty()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert_eq!(index, 0);
+        (self, EmptyProducer(PhantomData))
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder
+    }
+}
diff --git a/src/iter/enumerate.rs b/src/iter/enumerate.rs
new file mode 100644
index 0000000..980ee7c
--- /dev/null
+++ b/src/iter/enumerate.rs
@@ -0,0 +1,133 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+use std::ops::Range;
+use std::usize;
+
+/// `Enumerate` is an iterator that returns the current count along with the element.
+/// This struct is created by the [`enumerate()`] method on [`IndexedParallelIterator`]
+///
+/// [`enumerate()`]: trait.IndexedParallelIterator.html#method.enumerate
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Enumerate<I: IndexedParallelIterator> {
+    base: I,
+}
+
+impl<I> Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Enumerate` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Enumerate { base }
+    }
+}
+
+impl<I> ParallelIterator for Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = (usize, I::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<I, CB> ProducerCallback<I> for Callback<CB>
+        where
+            CB: ProducerCallback<(usize, I)>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = I>,
+            {
+                let producer = EnumerateProducer { base, offset: 0 };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct EnumerateProducer<P> {
+    base: P,
+    offset: usize,
+}
+
+impl<P> Producer for EnumerateProducer<P>
+where
+    P: Producer,
+{
+    type Item = (usize, P::Item);
+    type IntoIter = iter::Zip<Range<usize>, P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        // Enumerate only works for IndexedParallelIterators. Since those
+        // have a max length of usize::MAX, their max index is
+        // usize::MAX - 1, so the range 0..usize::MAX includes all
+        // possible indices.
+        //
+        // However, we should to use a precise end to the range, otherwise
+        // reversing the iterator may have to walk back a long ways before
+        // `Zip::next_back` can produce anything.
+        let base = self.base.into_iter();
+        let end = self.offset + base.len();
+        (self.offset..end).zip(base)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            EnumerateProducer {
+                base: left,
+                offset: self.offset,
+            },
+            EnumerateProducer {
+                base: right,
+                offset: self.offset + index,
+            },
+        )
+    }
+}
diff --git a/src/iter/extend.rs b/src/iter/extend.rs
new file mode 100644
index 0000000..fb89249
--- /dev/null
+++ b/src/iter/extend.rs
@@ -0,0 +1,376 @@
+use super::noop::NoopConsumer;
+use super::{IntoParallelIterator, ParallelExtend, ParallelIterator};
+
+use std::borrow::Cow;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::hash::{BuildHasher, Hash};
+
+/// Performs a generic `par_extend` by collecting to a `LinkedList<Vec<_>>` in
+/// parallel, then extending the collection sequentially.
+fn extend<C, I, F>(collection: &mut C, par_iter: I, reserve: F)
+where
+    I: IntoParallelIterator,
+    F: FnOnce(&mut C, &LinkedList<Vec<I::Item>>),
+    C: Extend<I::Item>,
+{
+    let list = collect(par_iter);
+    reserve(collection, &list);
+    for vec in list {
+        collection.extend(vec);
+    }
+}
+
+pub(super) fn collect<I>(par_iter: I) -> LinkedList<Vec<I::Item>>
+where
+    I: IntoParallelIterator,
+{
+    par_iter
+        .into_par_iter()
+        .fold(Vec::new, vec_push)
+        .map(as_list)
+        .reduce(LinkedList::new, list_append)
+}
+
+fn vec_push<T>(mut vec: Vec<T>, elem: T) -> Vec<T> {
+    vec.push(elem);
+    vec
+}
+
+fn as_list<T>(item: T) -> LinkedList<T> {
+    let mut list = LinkedList::new();
+    list.push_back(item);
+    list
+}
+
+fn list_append<T>(mut list1: LinkedList<T>, mut list2: LinkedList<T>) -> LinkedList<T> {
+    list1.append(&mut list2);
+    list1
+}
+
+/// Computes the total length of a `LinkedList<Vec<_>>`.
+pub(super) fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
+    list.iter().map(Vec::len).sum()
+}
+
+fn no_reserve<C, T>(_: &mut C, _: &LinkedList<Vec<T>>) {}
+
+fn heap_reserve<T: Ord, U>(heap: &mut BinaryHeap<T>, list: &LinkedList<Vec<U>>) {
+    heap.reserve(len(list));
+}
+
+/// Extends a binary heap with items from a parallel iterator.
+impl<T> ParallelExtend<T> for BinaryHeap<T>
+where
+    T: Ord + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, heap_reserve);
+    }
+}
+
+/// Extends a binary heap with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for BinaryHeap<T>
+where
+    T: 'a + Copy + Ord + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, heap_reserve);
+    }
+}
+
+/// Extends a B-tree map with items from a parallel iterator.
+impl<K, V> ParallelExtend<(K, V)> for BTreeMap<K, V>
+where
+    K: Ord + Send,
+    V: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree map with copied items from a parallel iterator.
+impl<'a, K: 'a, V: 'a> ParallelExtend<(&'a K, &'a V)> for BTreeMap<K, V>
+where
+    K: Copy + Ord + Send + Sync,
+    V: Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (&'a K, &'a V)>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree set with items from a parallel iterator.
+impl<T> ParallelExtend<T> for BTreeSet<T>
+where
+    T: Ord + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree set with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for BTreeSet<T>
+where
+    T: 'a + Copy + Ord + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+fn map_reserve<K, V, S, U>(map: &mut HashMap<K, V, S>, list: &LinkedList<Vec<U>>)
+where
+    K: Eq + Hash,
+    S: BuildHasher,
+{
+    map.reserve(len(list));
+}
+
+/// Extends a hash map with items from a parallel iterator.
+impl<K, V, S> ParallelExtend<(K, V)> for HashMap<K, V, S>
+where
+    K: Eq + Hash + Send,
+    V: Send,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        // See the map_collect benchmarks in rayon-demo for different strategies.
+        extend(self, par_iter, map_reserve);
+    }
+}
+
+/// Extends a hash map with copied items from a parallel iterator.
+impl<'a, K: 'a, V: 'a, S> ParallelExtend<(&'a K, &'a V)> for HashMap<K, V, S>
+where
+    K: Copy + Eq + Hash + Send + Sync,
+    V: Copy + Send + Sync,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (&'a K, &'a V)>,
+    {
+        extend(self, par_iter, map_reserve);
+    }
+}
+
+fn set_reserve<T, S, U>(set: &mut HashSet<T, S>, list: &LinkedList<Vec<U>>)
+where
+    T: Eq + Hash,
+    S: BuildHasher,
+{
+    set.reserve(len(list));
+}
+
+/// Extends a hash set with items from a parallel iterator.
+impl<T, S> ParallelExtend<T> for HashSet<T, S>
+where
+    T: Eq + Hash + Send,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, set_reserve);
+    }
+}
+
+/// Extends a hash set with copied items from a parallel iterator.
+impl<'a, T, S> ParallelExtend<&'a T> for HashSet<T, S>
+where
+    T: 'a + Copy + Eq + Hash + Send + Sync,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, set_reserve);
+    }
+}
+
+fn list_push_back<T>(mut list: LinkedList<T>, elem: T) -> LinkedList<T> {
+    list.push_back(elem);
+    list
+}
+
+/// Extends a linked list with items from a parallel iterator.
+impl<T> ParallelExtend<T> for LinkedList<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        let mut list = par_iter
+            .into_par_iter()
+            .fold(LinkedList::new, list_push_back)
+            .reduce(LinkedList::new, list_append);
+        self.append(&mut list);
+    }
+}
+
+/// Extends a linked list with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for LinkedList<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+fn string_push(mut string: String, ch: char) -> String {
+    string.push(ch);
+    string
+}
+
+/// Extends a string with characters from a parallel iterator.
+impl ParallelExtend<char> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = char>,
+    {
+        // This is like `extend`, but `Vec<char>` is less efficient to deal
+        // with than `String`, so instead collect to `LinkedList<String>`.
+        let list: LinkedList<_> = par_iter
+            .into_par_iter()
+            .fold(String::new, string_push)
+            .map(as_list)
+            .reduce(LinkedList::new, list_append);
+
+        self.reserve(list.iter().map(String::len).sum());
+        self.extend(list)
+    }
+}
+
+/// Extends a string with copied characters from a parallel iterator.
+impl<'a> ParallelExtend<&'a char> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a char>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+fn string_reserve<T: AsRef<str>>(string: &mut String, list: &LinkedList<Vec<T>>) {
+    let len = list.iter().flatten().map(T::as_ref).map(str::len).sum();
+    string.reserve(len);
+}
+
+/// Extends a string with string slices from a parallel iterator.
+impl<'a> ParallelExtend<&'a str> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a str>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+/// Extends a string with strings from a parallel iterator.
+impl ParallelExtend<String> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = String>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+/// Extends a string with string slices from a parallel iterator.
+impl<'a> ParallelExtend<Cow<'a, str>> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = Cow<'a, str>>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+fn deque_reserve<T, U>(deque: &mut VecDeque<T>, list: &LinkedList<Vec<U>>) {
+    deque.reserve(len(list));
+}
+
+/// Extends a deque with items from a parallel iterator.
+impl<T> ParallelExtend<T> for VecDeque<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, deque_reserve);
+    }
+}
+
+/// Extends a deque with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for VecDeque<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, deque_reserve);
+    }
+}
+
+// See the `collect` module for the `Vec<T>` implementation.
+// impl<T> ParallelExtend<T> for Vec<T>
+
+/// Extends a vector with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for Vec<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+/// Collapses all unit items from a parallel iterator into one.
+impl ParallelExtend<()> for () {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = ()>,
+    {
+        par_iter.into_par_iter().drive_unindexed(NoopConsumer)
+    }
+}
diff --git a/src/iter/filter.rs b/src/iter/filter.rs
new file mode 100644
index 0000000..38627f7
--- /dev/null
+++ b/src/iter/filter.rs
@@ -0,0 +1,141 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Filter` takes a predicate `filter_op` and filters out elements that match.
+/// This struct is created by the [`filter()`] method on [`ParallelIterator`]
+///
+/// [`filter()`]: trait.ParallelIterator.html#method.filter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Filter<I: ParallelIterator, P> {
+    base: I,
+    filter_op: P,
+}
+
+impl<I: ParallelIterator + Debug, P> Debug for Filter<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Filter").field("base", &self.base).finish()
+    }
+}
+
+impl<I, P> Filter<I, P>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Filter` iterator.
+    pub(super) fn new(base: I, filter_op: P) -> Self {
+        Filter { base, filter_op }
+    }
+}
+
+impl<I, P> ParallelIterator for Filter<I, P>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FilterConsumer::new(consumer, &self.filter_op);
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FilterConsumer<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P> FilterConsumer<'p, C, P> {
+    fn new(base: C, filter_op: &'p P) -> Self {
+        FilterConsumer { base, filter_op }
+    }
+}
+
+impl<'p, T, C, P: 'p> Consumer<T> for FilterConsumer<'p, C, P>
+where
+    C: Consumer<T>,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FilterFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FilterConsumer::new(left, self.filter_op),
+            FilterConsumer::new(right, self.filter_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FilterFolder {
+            base: self.base.into_folder(),
+            filter_op: self.filter_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'p, T, C, P: 'p> UnindexedConsumer<T> for FilterConsumer<'p, C, P>
+where
+    C: UnindexedConsumer<T>,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        FilterConsumer::new(self.base.split_off_left(), &self.filter_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FilterFolder<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P, T> Folder<T> for FilterFolder<'p, C, P>
+where
+    C: Folder<T>,
+    P: Fn(&T) -> bool + 'p,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let filter_op = self.filter_op;
+        if filter_op(&item) {
+            let base = self.base.consume(item);
+            FilterFolder { base, filter_op }
+        } else {
+            self
+        }
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/filter_map.rs b/src/iter/filter_map.rs
new file mode 100644
index 0000000..f19c385
--- /dev/null
+++ b/src/iter/filter_map.rs
@@ -0,0 +1,142 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FilterMap` creates an iterator that uses `filter_op` to both filter and map elements.
+/// This struct is created by the [`filter_map()`] method on [`ParallelIterator`].
+///
+/// [`filter_map()`]: trait.ParallelIterator.html#method.filter_map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FilterMap<I: ParallelIterator, P> {
+    base: I,
+    filter_op: P,
+}
+
+impl<I: ParallelIterator + Debug, P> Debug for FilterMap<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FilterMap")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I: ParallelIterator, P> FilterMap<I, P> {
+    /// Creates a new `FilterMap` iterator.
+    pub(super) fn new(base: I, filter_op: P) -> Self {
+        FilterMap { base, filter_op }
+    }
+}
+
+impl<I, P, R> ParallelIterator for FilterMap<I, P>
+where
+    I: ParallelIterator,
+    P: Fn(I::Item) -> Option<R> + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FilterMapConsumer::new(consumer, &self.filter_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FilterMapConsumer<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P: 'p> FilterMapConsumer<'p, C, P> {
+    fn new(base: C, filter_op: &'p P) -> Self {
+        FilterMapConsumer { base, filter_op }
+    }
+}
+
+impl<'p, T, U, C, P> Consumer<T> for FilterMapConsumer<'p, C, P>
+where
+    C: Consumer<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    type Folder = FilterMapFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FilterMapConsumer::new(left, self.filter_op),
+            FilterMapConsumer::new(right, self.filter_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        let base = self.base.into_folder();
+        FilterMapFolder {
+            base,
+            filter_op: self.filter_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'p, T, U, C, P> UnindexedConsumer<T> for FilterMapConsumer<'p, C, P>
+where
+    C: UnindexedConsumer<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    fn split_off_left(&self) -> Self {
+        FilterMapConsumer::new(self.base.split_off_left(), &self.filter_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FilterMapFolder<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, T, U, C, P> Folder<T> for FilterMapFolder<'p, C, P>
+where
+    C: Folder<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let filter_op = self.filter_op;
+        if let Some(mapped_item) = filter_op(item) {
+            let base = self.base.consume(mapped_item);
+            FilterMapFolder { base, filter_op }
+        } else {
+            self
+        }
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/find.rs b/src/iter/find.rs
new file mode 100644
index 0000000..971db2b
--- /dev/null
+++ b/src/iter/find.rs
@@ -0,0 +1,120 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn find<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let found = AtomicBool::new(false);
+    let consumer = FindConsumer::new(&find_op, &found);
+    pi.drive_unindexed(consumer)
+}
+
+struct FindConsumer<'p, P> {
+    find_op: &'p P,
+    found: &'p AtomicBool,
+}
+
+impl<'p, P> FindConsumer<'p, P> {
+    fn new(find_op: &'p P, found: &'p AtomicBool) -> Self {
+        FindConsumer { find_op, found }
+    }
+}
+
+impl<'p, T, P: 'p> Consumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FindFolder<'p, T, P>;
+    type Reducer = FindReducer;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        (self.split_off_left(), self, FindReducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FindFolder {
+            find_op: self.find_op,
+            found: self.found,
+            item: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.found.load(Ordering::Relaxed)
+    }
+}
+
+impl<'p, T, P: 'p> UnindexedConsumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        FindConsumer::new(self.find_op, self.found)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        FindReducer
+    }
+}
+
+struct FindFolder<'p, T, P> {
+    find_op: &'p P,
+    found: &'p AtomicBool,
+    item: Option<T>,
+}
+
+impl<'p, T, P> Folder<T> for FindFolder<'p, T, P>
+where
+    P: Fn(&T) -> bool + 'p,
+{
+    type Result = Option<T>;
+
+    fn consume(mut self, item: T) -> Self {
+        if (self.find_op)(&item) {
+            self.found.store(true, Ordering::Relaxed);
+            self.item = Some(item);
+        }
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn not_full<T>(found: &AtomicBool) -> impl Fn(&T) -> bool + '_ {
+            move |_| !found.load(Ordering::Relaxed)
+        }
+
+        self.item = iter
+            .into_iter()
+            // stop iterating if another thread has found something
+            .take_while(not_full(&self.found))
+            .find(self.find_op);
+        if self.item.is_some() {
+            self.found.store(true, Ordering::Relaxed)
+        }
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        self.found.load(Ordering::Relaxed)
+    }
+}
+
+struct FindReducer;
+
+impl<T> Reducer<Option<T>> for FindReducer {
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        left.or(right)
+    }
+}
diff --git a/src/iter/find_first_last/mod.rs b/src/iter/find_first_last/mod.rs
new file mode 100644
index 0000000..e5da8f0
--- /dev/null
+++ b/src/iter/find_first_last/mod.rs
@@ -0,0 +1,238 @@
+use super::plumbing::*;
+use super::*;
+use std::cell::Cell;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+#[cfg(test)]
+mod test;
+
+// The key optimization for find_first is that a consumer can stop its search if
+// some consumer to its left already found a match (and similarly for consumers
+// to the right for find_last). To make this work, all consumers need some
+// notion of their position in the data relative to other consumers, including
+// unindexed consumers that have no built-in notion of position.
+//
+// To solve this, we assign each consumer a lower and upper bound for an
+// imaginary "range" of data that it consumes. The initial consumer starts with
+// the range 0..usize::max_value(). The split divides this range in half so that
+// one resulting consumer has the range 0..(usize::max_value() / 2), and the
+// other has (usize::max_value() / 2)..usize::max_value(). Every subsequent
+// split divides the range in half again until it cannot be split anymore
+// (i.e. its length is 1), in which case the split returns two consumers with
+// the same range. In that case both consumers will continue to consume all
+// their data regardless of whether a better match is found, but the reducer
+// will still return the correct answer.
+
+#[derive(Copy, Clone)]
+enum MatchPosition {
+    Leftmost,
+    Rightmost,
+}
+
+/// Returns true if pos1 is a better match than pos2 according to MatchPosition
+#[inline]
+fn better_position(pos1: usize, pos2: usize, mp: MatchPosition) -> bool {
+    match mp {
+        MatchPosition::Leftmost => pos1 < pos2,
+        MatchPosition::Rightmost => pos1 > pos2,
+    }
+}
+
+pub(super) fn find_first<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let best_found = AtomicUsize::new(usize::max_value());
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &best_found);
+    pi.drive_unindexed(consumer)
+}
+
+pub(super) fn find_last<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let best_found = AtomicUsize::new(0);
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &best_found);
+    pi.drive_unindexed(consumer)
+}
+
+struct FindConsumer<'p, P> {
+    find_op: &'p P,
+    lower_bound: Cell<usize>,
+    upper_bound: usize,
+    match_position: MatchPosition,
+    best_found: &'p AtomicUsize,
+}
+
+impl<'p, P> FindConsumer<'p, P> {
+    fn new(find_op: &'p P, match_position: MatchPosition, best_found: &'p AtomicUsize) -> Self {
+        FindConsumer {
+            find_op,
+            lower_bound: Cell::new(0),
+            upper_bound: usize::max_value(),
+            match_position,
+            best_found,
+        }
+    }
+
+    fn current_index(&self) -> usize {
+        match self.match_position {
+            MatchPosition::Leftmost => self.lower_bound.get(),
+            MatchPosition::Rightmost => self.upper_bound,
+        }
+    }
+}
+
+impl<'p, T, P> Consumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FindFolder<'p, T, P>;
+    type Reducer = FindReducer;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        let dir = self.match_position;
+        (
+            self.split_off_left(),
+            self,
+            FindReducer {
+                match_position: dir,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FindFolder {
+            find_op: self.find_op,
+            boundary: self.current_index(),
+            match_position: self.match_position,
+            best_found: self.best_found,
+            item: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        // can stop consuming if the best found index so far is *strictly*
+        // better than anything this consumer will find
+        better_position(
+            self.best_found.load(Ordering::Relaxed),
+            self.current_index(),
+            self.match_position,
+        )
+    }
+}
+
+impl<'p, T, P> UnindexedConsumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        // Upper bound for one consumer will be lower bound for the other. This
+        // overlap is okay, because only one of the bounds will be used for
+        // comparing against best_found; the other is kept only to be able to
+        // divide the range in half.
+        //
+        // When the resolution of usize has been exhausted (i.e. when
+        // upper_bound = lower_bound), both results of this split will have the
+        // same range. When that happens, we lose the ability to tell one
+        // consumer to stop working when the other finds a better match, but the
+        // reducer ensures that the best answer is still returned (see the test
+        // above).
+        let old_lower_bound = self.lower_bound.get();
+        let median = old_lower_bound + ((self.upper_bound - old_lower_bound) / 2);
+        self.lower_bound.set(median);
+
+        FindConsumer {
+            find_op: self.find_op,
+            lower_bound: Cell::new(old_lower_bound),
+            upper_bound: median,
+            match_position: self.match_position,
+            best_found: self.best_found,
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        FindReducer {
+            match_position: self.match_position,
+        }
+    }
+}
+
+struct FindFolder<'p, T, P> {
+    find_op: &'p P,
+    boundary: usize,
+    match_position: MatchPosition,
+    best_found: &'p AtomicUsize,
+    item: Option<T>,
+}
+
+impl<'p, P: 'p + Fn(&T) -> bool, T> Folder<T> for FindFolder<'p, T, P> {
+    type Result = Option<T>;
+
+    fn consume(mut self, item: T) -> Self {
+        let found_best_in_range = match self.match_position {
+            MatchPosition::Leftmost => self.item.is_some(),
+            MatchPosition::Rightmost => false,
+        };
+
+        if !found_best_in_range && (self.find_op)(&item) {
+            // Continuously try to set best_found until we succeed or we
+            // discover a better match was already found.
+            let mut current = self.best_found.load(Ordering::Relaxed);
+            loop {
+                if better_position(current, self.boundary, self.match_position) {
+                    break;
+                }
+                match self.best_found.compare_exchange_weak(
+                    current,
+                    self.boundary,
+                    Ordering::Relaxed,
+                    Ordering::Relaxed,
+                ) {
+                    Ok(_) => {
+                        self.item = Some(item);
+                        break;
+                    }
+                    Err(v) => current = v,
+                }
+            }
+        }
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        let found_best_in_range = match self.match_position {
+            MatchPosition::Leftmost => self.item.is_some(),
+            MatchPosition::Rightmost => false,
+        };
+
+        found_best_in_range
+            || better_position(
+                self.best_found.load(Ordering::Relaxed),
+                self.boundary,
+                self.match_position,
+            )
+    }
+}
+
+struct FindReducer {
+    match_position: MatchPosition,
+}
+
+impl<T> Reducer<Option<T>> for FindReducer {
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        match self.match_position {
+            MatchPosition::Leftmost => left.or(right),
+            MatchPosition::Rightmost => right.or(left),
+        }
+    }
+}
diff --git a/src/iter/find_first_last/test.rs b/src/iter/find_first_last/test.rs
new file mode 100644
index 0000000..05271bc
--- /dev/null
+++ b/src/iter/find_first_last/test.rs
@@ -0,0 +1,106 @@
+use super::*;
+use std::sync::atomic::AtomicUsize;
+
+#[test]
+fn same_range_first_consumers_return_correct_answer() {
+    let find_op = |x: &i32| x % 2 == 0;
+    let first_found = AtomicUsize::new(usize::max_value());
+    let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found);
+
+    // We save a consumer that will be far to the right of the main consumer (and therefore not
+    // sharing an index range with that consumer) for fullness testing
+    let consumer = far_right_consumer.split_off_left();
+
+    // split until we have an indivisible range
+    let bits_in_usize = usize::min_value().count_zeros();
+
+    for _ in 0..bits_in_usize {
+        consumer.split_off_left();
+    }
+
+    let reducer = consumer.to_reducer();
+    // the left and right folders should now have the same range, having
+    // exhausted the resolution of usize
+    let left_folder = consumer.split_off_left().into_folder();
+    let right_folder = consumer.into_folder();
+
+    let left_folder = left_folder.consume(0).consume(1);
+    assert_eq!(left_folder.boundary, right_folder.boundary);
+    // expect not full even though a better match has been found because the
+    // ranges are the same
+    assert!(!right_folder.full());
+    assert!(far_right_consumer.full());
+    let right_folder = right_folder.consume(2).consume(3);
+    assert_eq!(
+        reducer.reduce(left_folder.complete(), right_folder.complete()),
+        Some(0)
+    );
+}
+
+#[test]
+fn same_range_last_consumers_return_correct_answer() {
+    let find_op = |x: &i32| x % 2 == 0;
+    let last_found = AtomicUsize::new(0);
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found);
+
+    // We save a consumer that will be far to the left of the main consumer (and therefore not
+    // sharing an index range with that consumer) for fullness testing
+    let far_left_consumer = consumer.split_off_left();
+
+    // split until we have an indivisible range
+    let bits_in_usize = usize::min_value().count_zeros();
+    for _ in 0..bits_in_usize {
+        consumer.split_off_left();
+    }
+
+    let reducer = consumer.to_reducer();
+    // due to the exact calculation in split_off_left, the very last consumer has a
+    // range of width 2, so we use the second-to-last consumer instead to get
+    // the same boundary on both folders
+    let consumer = consumer.split_off_left();
+    let left_folder = consumer.split_off_left().into_folder();
+    let right_folder = consumer.into_folder();
+    let right_folder = right_folder.consume(2).consume(3);
+    assert_eq!(left_folder.boundary, right_folder.boundary);
+    // expect not full even though a better match has been found because the
+    // ranges are the same
+    assert!(!left_folder.full());
+    assert!(far_left_consumer.full());
+    let left_folder = left_folder.consume(0).consume(1);
+    assert_eq!(
+        reducer.reduce(left_folder.complete(), right_folder.complete()),
+        Some(2)
+    );
+}
+
+// These tests requires that a folder be assigned to an iterator with more than
+// one element. We can't necessarily determine when that will happen for a given
+// input to find_first/find_last, so we test the folder directly here instead.
+#[test]
+fn find_first_folder_does_not_clobber_first_found() {
+    let best_found = AtomicUsize::new(usize::max_value());
+    let f = FindFolder {
+        find_op: &(|&_: &i32| -> bool { true }),
+        boundary: 0,
+        match_position: MatchPosition::Leftmost,
+        best_found: &best_found,
+        item: None,
+    };
+    let f = f.consume(0_i32).consume(1_i32).consume(2_i32);
+    assert!(f.full());
+    assert_eq!(f.complete(), Some(0_i32));
+}
+
+#[test]
+fn find_last_folder_yields_last_match() {
+    let best_found = AtomicUsize::new(0);
+    let f = FindFolder {
+        find_op: &(|&_: &i32| -> bool { true }),
+        boundary: 0,
+        match_position: MatchPosition::Rightmost,
+        best_found: &best_found,
+        item: None,
+    };
+    let f = f.consume(0_i32).consume(1_i32).consume(2_i32);
+    assert_eq!(f.complete(), Some(2_i32));
+}
diff --git a/src/iter/flat_map.rs b/src/iter/flat_map.rs
new file mode 100644
index 0000000..f264e1e
--- /dev/null
+++ b/src/iter/flat_map.rs
@@ -0,0 +1,154 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FlatMap` maps each element to a parallel iterator, then flattens these iterators together.
+/// This struct is created by the [`flat_map()`] method on [`ParallelIterator`]
+///
+/// [`flat_map()`]: trait.ParallelIterator.html#method.flat_map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FlatMap<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for FlatMap<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlatMap").field("base", &self.base).finish()
+    }
+}
+
+impl<I: ParallelIterator, F> FlatMap<I, F> {
+    /// Creates a new `FlatMap` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        FlatMap { base, map_op }
+    }
+}
+
+impl<I, F, PI> ParallelIterator for FlatMap<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> PI + Sync + Send,
+    PI: IntoParallelIterator,
+{
+    type Item = PI::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlatMapConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlatMapConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> FlatMapConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        FlatMapConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, U, C, F> Consumer<T> for FlatMapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    type Folder = FlatMapFolder<'f, C, F, C::Result>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlatMapConsumer::new(left, self.map_op),
+            FlatMapConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlatMapFolder {
+            base: self.base,
+            map_op: self.map_op,
+            previous: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, C, F> UnindexedConsumer<T> for FlatMapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlatMapConsumer::new(self.base.split_off_left(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlatMapFolder<'f, C, F, R> {
+    base: C,
+    map_op: &'f F,
+    previous: Option<R>,
+}
+
+impl<'f, T, U, C, F> Folder<T> for FlatMapFolder<'f, C, F, C::Result>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let map_op = self.map_op;
+        let par_iter = map_op(item).into_par_iter();
+        let consumer = self.base.split_off_left();
+        let result = par_iter.drive_unindexed(consumer);
+
+        let previous = match self.previous {
+            None => Some(result),
+            Some(previous) => {
+                let reducer = self.base.to_reducer();
+                Some(reducer.reduce(previous, result))
+            }
+        };
+
+        FlatMapFolder {
+            base: self.base,
+            map_op,
+            previous,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        match self.previous {
+            Some(previous) => previous,
+            None => self.base.into_folder().complete(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flat_map_iter.rs b/src/iter/flat_map_iter.rs
new file mode 100644
index 0000000..c76cf68
--- /dev/null
+++ b/src/iter/flat_map_iter.rs
@@ -0,0 +1,147 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FlatMapIter` maps each element to a serial iterator, then flattens these iterators together.
+/// This struct is created by the [`flat_map_iter()`] method on [`ParallelIterator`]
+///
+/// [`flat_map_iter()`]: trait.ParallelIterator.html#method.flat_map_iter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FlatMapIter<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for FlatMapIter<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlatMapIter")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I: ParallelIterator, F> FlatMapIter<I, F> {
+    /// Creates a new `FlatMapIter` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        FlatMapIter { base, map_op }
+    }
+}
+
+impl<I, F, SI> ParallelIterator for FlatMapIter<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> SI + Sync + Send,
+    SI: IntoIterator,
+    SI::Item: Send,
+{
+    type Item = SI::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlatMapIterConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlatMapIterConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> FlatMapIterConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        FlatMapIterConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, U, C, F> Consumer<T> for FlatMapIterConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoIterator,
+{
+    type Folder = FlatMapIterFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlatMapIterConsumer::new(left, self.map_op),
+            FlatMapIterConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlatMapIterFolder {
+            base: self.base.into_folder(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, C, F> UnindexedConsumer<T> for FlatMapIterConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlatMapIterConsumer::new(self.base.split_off_left(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlatMapIterFolder<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, T, U, C, F> Folder<T> for FlatMapIterFolder<'f, C, F>
+where
+    C: Folder<U::Item>,
+    F: Fn(T) -> U,
+    U: IntoIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let map_op = self.map_op;
+        let base = self.base.consume_iter(map_op(item));
+        FlatMapIterFolder { base, map_op }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let map_op = self.map_op;
+        let iter = iter.into_iter().flat_map(map_op);
+        let base = self.base.consume_iter(iter);
+        FlatMapIterFolder { base, map_op }
+    }
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flatten.rs b/src/iter/flatten.rs
new file mode 100644
index 0000000..29d88f9
--- /dev/null
+++ b/src/iter/flatten.rs
@@ -0,0 +1,140 @@
+use super::plumbing::*;
+use super::*;
+
+/// `Flatten` turns each element to a parallel iterator, then flattens these iterators
+/// together. This struct is created by the [`flatten()`] method on [`ParallelIterator`].
+///
+/// [`flatten()`]: trait.ParallelIterator.html#method.flatten
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Flatten<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Flatten<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoParallelIterator,
+{
+    /// Creates a new `Flatten` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Flatten { base }
+    }
+}
+
+impl<I> ParallelIterator for Flatten<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoParallelIterator,
+{
+    type Item = <I::Item as IntoParallelIterator>::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlattenConsumer::new(consumer);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlattenConsumer<C> {
+    base: C,
+}
+
+impl<C> FlattenConsumer<C> {
+    fn new(base: C) -> Self {
+        FlattenConsumer { base }
+    }
+}
+
+impl<T, C> Consumer<T> for FlattenConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    type Folder = FlattenFolder<C, C::Result>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlattenConsumer::new(left),
+            FlattenConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlattenFolder {
+            base: self.base,
+            previous: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<T, C> UnindexedConsumer<T> for FlattenConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlattenConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlattenFolder<C, R> {
+    base: C,
+    previous: Option<R>,
+}
+
+impl<T, C> Folder<T> for FlattenFolder<C, C::Result>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let par_iter = item.into_par_iter();
+        let consumer = self.base.split_off_left();
+        let result = par_iter.drive_unindexed(consumer);
+
+        let previous = match self.previous {
+            None => Some(result),
+            Some(previous) => {
+                let reducer = self.base.to_reducer();
+                Some(reducer.reduce(previous, result))
+            }
+        };
+
+        FlattenFolder {
+            base: self.base,
+            previous,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        match self.previous {
+            Some(previous) => previous,
+            None => self.base.into_folder().complete(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flatten_iter.rs b/src/iter/flatten_iter.rs
new file mode 100644
index 0000000..3ce0a3c
--- /dev/null
+++ b/src/iter/flatten_iter.rs
@@ -0,0 +1,132 @@
+use super::plumbing::*;
+use super::*;
+
+/// `FlattenIter` turns each element to a serial iterator, then flattens these iterators
+/// together. This struct is created by the [`flatten_iter()`] method on [`ParallelIterator`].
+///
+/// [`flatten_iter()`]: trait.ParallelIterator.html#method.flatten_iter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct FlattenIter<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> FlattenIter<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoIterator,
+    <I::Item as IntoIterator>::Item: Send,
+{
+    /// Creates a new `FlattenIter` iterator.
+    pub(super) fn new(base: I) -> Self {
+        FlattenIter { base }
+    }
+}
+
+impl<I> ParallelIterator for FlattenIter<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoIterator,
+    <I::Item as IntoIterator>::Item: Send,
+{
+    type Item = <I::Item as IntoIterator>::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlattenIterConsumer::new(consumer);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlattenIterConsumer<C> {
+    base: C,
+}
+
+impl<C> FlattenIterConsumer<C> {
+    fn new(base: C) -> Self {
+        FlattenIterConsumer { base }
+    }
+}
+
+impl<T, C> Consumer<T> for FlattenIterConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoIterator,
+{
+    type Folder = FlattenIterFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlattenIterConsumer::new(left),
+            FlattenIterConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlattenIterFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<T, C> UnindexedConsumer<T> for FlattenIterConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlattenIterConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlattenIterFolder<C> {
+    base: C,
+}
+
+impl<T, C> Folder<T> for FlattenIterFolder<C>
+where
+    C: Folder<T::Item>,
+    T: IntoIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let base = self.base.consume_iter(item);
+        FlattenIterFolder { base }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let iter = iter.into_iter().flatten();
+        let base = self.base.consume_iter(iter);
+        FlattenIterFolder { base }
+    }
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/fold.rs b/src/iter/fold.rs
new file mode 100644
index 0000000..345afbd
--- /dev/null
+++ b/src/iter/fold.rs
@@ -0,0 +1,302 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+impl<U, I, ID, F> Fold<I, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U + Sync + Send,
+    U: Send,
+{
+    pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
+        Fold {
+            base,
+            identity,
+            fold_op,
+        }
+    }
+}
+
+/// `Fold` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`fold()`] method on [`ParallelIterator`]
+///
+/// [`fold()`]: trait.ParallelIterator.html#method.fold
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Fold<I, ID, F> {
+    base: I,
+    identity: ID,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, ID, F> Debug for Fold<I, ID, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Fold").field("base", &self.base).finish()
+    }
+}
+
+impl<U, I, ID, F> ParallelIterator for Fold<I, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U + Sync + Send,
+    U: Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FoldConsumer {
+            base: consumer,
+            fold_op: &self.fold_op,
+            identity: &self.identity,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct FoldConsumer<'c, C, ID, F> {
+    base: C,
+    fold_op: &'c F,
+    identity: &'c ID,
+}
+
+impl<'r, U, T, C, ID, F> Consumer<T> for FoldConsumer<'r, C, ID, F>
+where
+    C: Consumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    ID: Fn() -> U + Sync,
+    U: Send,
+{
+    type Folder = FoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FoldConsumer { base: left, ..self },
+            FoldConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FoldFolder {
+            base: self.base.into_folder(),
+            item: (self.identity)(),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for FoldConsumer<'r, C, ID, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    ID: Fn() -> U + Sync,
+    U: Send,
+{
+    fn split_off_left(&self) -> Self {
+        FoldConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FoldFolder<'r, C, ID, F> {
+    base: C,
+    fold_op: &'r F,
+    item: ID,
+}
+
+impl<'r, C, ID, F, T> Folder<T> for FoldFolder<'r, C, ID, F>
+where
+    C: Folder<ID>,
+    F: Fn(ID, T) -> ID + Sync,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let item = (self.fold_op)(self.item, item);
+        FoldFolder {
+            base: self.base,
+            fold_op: self.fold_op,
+            item,
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn not_full<C, ID, T>(base: &C) -> impl Fn(&T) -> bool + '_
+        where
+            C: Folder<ID>,
+        {
+            move |_| !base.full()
+        }
+
+        let base = self.base;
+        let item = iter
+            .into_iter()
+            // stop iterating if another thread has finished
+            .take_while(not_full(&base))
+            .fold(self.item, self.fold_op);
+
+        FoldFolder {
+            base,
+            item,
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.consume(self.item).complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+// ///////////////////////////////////////////////////////////////////////////
+
+impl<U, I, F> FoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    U: Send + Clone,
+{
+    pub(super) fn new(base: I, item: U, fold_op: F) -> Self {
+        FoldWith {
+            base,
+            item,
+            fold_op,
+        }
+    }
+}
+
+/// `FoldWith` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`fold_with()`] method on [`ParallelIterator`]
+///
+/// [`fold_with()`]: trait.ParallelIterator.html#method.fold_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldWith<I, U, F> {
+    base: I,
+    item: U,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, U: Debug, F> Debug for FoldWith<I, U, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FoldWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<U, I, F> ParallelIterator for FoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    U: Send + Clone,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FoldWithConsumer {
+            base: consumer,
+            item: self.item,
+            fold_op: &self.fold_op,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct FoldWithConsumer<'c, C, U, F> {
+    base: C,
+    item: U,
+    fold_op: &'c F,
+}
+
+impl<'r, U, T, C, F> Consumer<T> for FoldWithConsumer<'r, C, U, F>
+where
+    C: Consumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    U: Send + Clone,
+{
+    type Folder = FoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FoldWithConsumer {
+                base: left,
+                item: self.item.clone(),
+                ..self
+            },
+            FoldWithConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FoldFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, F> UnindexedConsumer<T> for FoldWithConsumer<'r, C, U, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    U: Send + Clone,
+{
+    fn split_off_left(&self) -> Self {
+        FoldWithConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/for_each.rs b/src/iter/for_each.rs
new file mode 100644
index 0000000..3b77beb
--- /dev/null
+++ b/src/iter/for_each.rs
@@ -0,0 +1,77 @@
+use super::noop::*;
+use super::plumbing::*;
+use super::ParallelIterator;
+
+pub(super) fn for_each<I, F, T>(pi: I, op: &F)
+where
+    I: ParallelIterator<Item = T>,
+    F: Fn(T) + Sync,
+    T: Send,
+{
+    let consumer = ForEachConsumer { op };
+    pi.drive_unindexed(consumer)
+}
+
+struct ForEachConsumer<'f, F> {
+    op: &'f F,
+}
+
+impl<'f, F, T> Consumer<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    type Folder = ForEachConsumer<'f, F>;
+    type Reducer = NoopReducer;
+    type Result = ();
+
+    fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) {
+        (self.split_off_left(), self, NoopReducer)
+    }
+
+    fn into_folder(self) -> Self {
+        self
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'f, F, T> Folder<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    type Result = ();
+
+    fn consume(self, item: T) -> Self {
+        (self.op)(item);
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        iter.into_iter().for_each(self.op);
+        self
+    }
+
+    fn complete(self) {}
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'f, F, T> UnindexedConsumer<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        ForEachConsumer { op: self.op }
+    }
+
+    fn to_reducer(&self) -> NoopReducer {
+        NoopReducer
+    }
+}
diff --git a/src/iter/from_par_iter.rs b/src/iter/from_par_iter.rs
new file mode 100644
index 0000000..3240f32
--- /dev/null
+++ b/src/iter/from_par_iter.rs
@@ -0,0 +1,228 @@
+use super::noop::NoopConsumer;
+use super::{FromParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
+
+use std::borrow::Cow;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::hash::{BuildHasher, Hash};
+
+/// Creates an empty default collection and extends it.
+fn collect_extended<C, I>(par_iter: I) -> C
+where
+    I: IntoParallelIterator,
+    C: ParallelExtend<I::Item> + Default,
+{
+    let mut collection = C::default();
+    collection.par_extend(par_iter);
+    collection
+}
+
+/// Collects items from a parallel iterator into a vector.
+impl<T> FromParallelIterator<T> for Vec<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects items from a parallel iterator into a vecdeque.
+impl<T> FromParallelIterator<T> for VecDeque<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Vec::from_par_iter(par_iter).into()
+    }
+}
+
+/// Collects items from a parallel iterator into a binaryheap.
+/// The heap-ordering is calculated serially after all items are collected.
+impl<T> FromParallelIterator<T> for BinaryHeap<T>
+where
+    T: Ord + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Vec::from_par_iter(par_iter).into()
+    }
+}
+
+/// Collects items from a parallel iterator into a freshly allocated
+/// linked list.
+impl<T> FromParallelIterator<T> for LinkedList<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects (key, value) pairs from a parallel iterator into a
+/// hashmap. If multiple pairs correspond to the same key, then the
+/// ones produced earlier in the parallel iterator will be
+/// overwritten, just as with a sequential iterator.
+impl<K, V, S> FromParallelIterator<(K, V)> for HashMap<K, V, S>
+where
+    K: Eq + Hash + Send,
+    V: Send,
+    S: BuildHasher + Default + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects (key, value) pairs from a parallel iterator into a
+/// btreemap. If multiple pairs correspond to the same key, then the
+/// ones produced earlier in the parallel iterator will be
+/// overwritten, just as with a sequential iterator.
+impl<K, V> FromParallelIterator<(K, V)> for BTreeMap<K, V>
+where
+    K: Ord + Send,
+    V: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects values from a parallel iterator into a hashset.
+impl<V, S> FromParallelIterator<V> for HashSet<V, S>
+where
+    V: Eq + Hash + Send,
+    S: BuildHasher + Default + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = V>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects values from a parallel iterator into a btreeset.
+impl<V> FromParallelIterator<V> for BTreeSet<V>
+where
+    V: Send + Ord,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = V>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects characters from a parallel iterator into a string.
+impl FromParallelIterator<char> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = char>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects characters from a parallel iterator into a string.
+impl<'a> FromParallelIterator<&'a char> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = &'a char>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects string slices from a parallel iterator into a string.
+impl<'a> FromParallelIterator<&'a str> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = &'a str>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects strings from a parallel iterator into one large string.
+impl FromParallelIterator<String> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = String>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects string slices from a parallel iterator into a string.
+impl<'a> FromParallelIterator<Cow<'a, str>> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Cow<'a, str>>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects an arbitrary `Cow` collection.
+///
+/// Note, the standard library only has `FromIterator` for `Cow<'a, str>` and
+/// `Cow<'a, [T]>`, because no one thought to add a blanket implementation
+/// before it was stabilized.
+impl<'a, C: ?Sized, T> FromParallelIterator<T> for Cow<'a, C>
+where
+    C: ToOwned,
+    C::Owned: FromParallelIterator<T>,
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Cow::Owned(C::Owned::from_par_iter(par_iter))
+    }
+}
+
+/// Collapses all unit items from a parallel iterator into one.
+///
+/// This is more useful when combined with higher-level abstractions, like
+/// collecting to a `Result<(), E>` where you only care about errors:
+///
+/// ```
+/// use std::io::*;
+/// use rayon::prelude::*;
+///
+/// let data = vec![1, 2, 3, 4, 5];
+/// let res: Result<()> = data.par_iter()
+///     .map(|x| writeln!(stdout(), "{}", x))
+///     .collect();
+/// assert!(res.is_ok());
+/// ```
+impl FromParallelIterator<()> for () {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = ()>,
+    {
+        par_iter.into_par_iter().drive_unindexed(NoopConsumer)
+    }
+}
diff --git a/src/iter/inspect.rs b/src/iter/inspect.rs
new file mode 100644
index 0000000..9b1cd09
--- /dev/null
+++ b/src/iter/inspect.rs
@@ -0,0 +1,257 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+use std::iter;
+
+/// `Inspect` is an iterator that calls a function with a reference to each
+/// element before yielding it.
+///
+/// This struct is created by the [`inspect()`] method on [`ParallelIterator`]
+///
+/// [`inspect()`]: trait.ParallelIterator.html#method.inspect
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Inspect<I: ParallelIterator, F> {
+    base: I,
+    inspect_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Inspect<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Inspect").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Inspect<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Inspect` iterator.
+    pub(super) fn new(base: I, inspect_op: F) -> Self {
+        Inspect { base, inspect_op }
+    }
+}
+
+impl<I, F> ParallelIterator for Inspect<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(&I::Item) + Sync + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = InspectConsumer::new(consumer, &self.inspect_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F> IndexedParallelIterator for Inspect<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(&I::Item) + Sync + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = InspectConsumer::new(consumer, &self.inspect_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            inspect_op: self.inspect_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            inspect_op: F,
+        }
+
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<T>,
+            F: Fn(&T) + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = InspectProducer {
+                    base,
+                    inspect_op: &self.inspect_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct InspectProducer<'f, P, F> {
+    base: P,
+    inspect_op: &'f F,
+}
+
+impl<'f, P, F> Producer for InspectProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(&P::Item) + Sync,
+{
+    type Item = P::Item;
+    type IntoIter = iter::Inspect<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().inspect(self.inspect_op)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            InspectProducer {
+                base: left,
+                inspect_op: self.inspect_op,
+            },
+            InspectProducer {
+                base: right,
+                inspect_op: self.inspect_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = InspectFolder {
+            base: folder,
+            inspect_op: self.inspect_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct InspectConsumer<'f, C, F> {
+    base: C,
+    inspect_op: &'f F,
+}
+
+impl<'f, C, F> InspectConsumer<'f, C, F> {
+    fn new(base: C, inspect_op: &'f F) -> Self {
+        InspectConsumer { base, inspect_op }
+    }
+}
+
+impl<'f, T, C, F> Consumer<T> for InspectConsumer<'f, C, F>
+where
+    C: Consumer<T>,
+    F: Fn(&T) + Sync,
+{
+    type Folder = InspectFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            InspectConsumer::new(left, self.inspect_op),
+            InspectConsumer::new(right, self.inspect_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        InspectFolder {
+            base: self.base.into_folder(),
+            inspect_op: self.inspect_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, C, F> UnindexedConsumer<T> for InspectConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<T>,
+    F: Fn(&T) + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        InspectConsumer::new(self.base.split_off_left(), &self.inspect_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct InspectFolder<'f, C, F> {
+    base: C,
+    inspect_op: &'f F,
+}
+
+impl<'f, T, C, F> Folder<T> for InspectFolder<'f, C, F>
+where
+    C: Folder<T>,
+    F: Fn(&T),
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        (self.inspect_op)(&item);
+        InspectFolder {
+            base: self.base.consume(item),
+            inspect_op: self.inspect_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.base = self
+            .base
+            .consume_iter(iter.into_iter().inspect(self.inspect_op));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/interleave.rs b/src/iter/interleave.rs
new file mode 100644
index 0000000..b5d43d5
--- /dev/null
+++ b/src/iter/interleave.rs
@@ -0,0 +1,336 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+use std::iter::Fuse;
+
+/// `Interleave` is an iterator that interleaves elements of iterators
+/// `i` and `j` in one continuous iterator. This struct is created by
+/// the [`interleave()`] method on [`IndexedParallelIterator`]
+///
+/// [`interleave()`]: trait.IndexedParallelIterator.html#method.interleave
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    i: I,
+    j: J,
+}
+
+impl<I, J> Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    /// Creates a new `Interleave` iterator
+    pub(super) fn new(i: I, j: J) -> Self {
+        Interleave { i, j }
+    }
+}
+
+impl<I, J> ParallelIterator for Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<I::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, J> IndexedParallelIterator for Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.i.len().checked_add(self.j.len()).expect("overflow")
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let (i_len, j_len) = (self.i.len(), self.j.len());
+        return self.i.with_producer(CallbackI {
+            callback,
+            i_len,
+            j_len,
+            i_next: false,
+            j: self.j,
+        });
+
+        struct CallbackI<CB, J> {
+            callback: CB,
+            i_len: usize,
+            j_len: usize,
+            i_next: bool,
+            j: J,
+        }
+
+        impl<CB, J> ProducerCallback<J::Item> for CallbackI<CB, J>
+        where
+            J: IndexedParallelIterator,
+            CB: ProducerCallback<J::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<I>(self, i_producer: I) -> Self::Output
+            where
+                I: Producer<Item = J::Item>,
+            {
+                self.j.with_producer(CallbackJ {
+                    i_producer,
+                    i_len: self.i_len,
+                    j_len: self.j_len,
+                    i_next: self.i_next,
+                    callback: self.callback,
+                })
+            }
+        }
+
+        struct CallbackJ<CB, I> {
+            callback: CB,
+            i_len: usize,
+            j_len: usize,
+            i_next: bool,
+            i_producer: I,
+        }
+
+        impl<CB, I> ProducerCallback<I::Item> for CallbackJ<CB, I>
+        where
+            I: Producer,
+            CB: ProducerCallback<I::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<J>(self, j_producer: J) -> Self::Output
+            where
+                J: Producer<Item = I::Item>,
+            {
+                let producer = InterleaveProducer::new(
+                    self.i_producer,
+                    j_producer,
+                    self.i_len,
+                    self.j_len,
+                    self.i_next,
+                );
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    i: I,
+    j: J,
+    i_len: usize,
+    j_len: usize,
+    i_next: bool,
+}
+
+impl<I, J> InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    fn new(i: I, j: J, i_len: usize, j_len: usize, i_next: bool) -> InterleaveProducer<I, J> {
+        InterleaveProducer {
+            i,
+            j,
+            i_len,
+            j_len,
+            i_next,
+        }
+    }
+}
+
+impl<I, J> Producer for InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    type Item = I::Item;
+    type IntoIter = InterleaveSeq<I::IntoIter, J::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        InterleaveSeq {
+            i: self.i.into_iter().fuse(),
+            j: self.j.into_iter().fuse(),
+            i_next: self.i_next,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.i.min_len(), self.j.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.i.max_len(), self.j.max_len())
+    }
+
+    /// We know 0 < index <= self.i_len + self.j_len
+    ///
+    /// Find a, b satisfying:
+    ///
+    ///  (1) 0 < a <= self.i_len
+    ///  (2) 0 < b <= self.j_len
+    ///  (3) a + b == index
+    ///
+    /// For even splits, set a = b = index/2.
+    /// For odd splits, set a = (index/2)+1, b = index/2, if `i`
+    /// should yield the next element, otherwise, if `j` should yield
+    /// the next element, set a = index/2 and b = (index/2)+1
+    fn split_at(self, index: usize) -> (Self, Self) {
+        #[inline]
+        fn odd_offset(flag: bool) -> usize {
+            (!flag) as usize
+        }
+
+        let even = index % 2 == 0;
+        let idx = index >> 1;
+
+        // desired split
+        let (i_idx, j_idx) = (
+            idx + odd_offset(even || self.i_next),
+            idx + odd_offset(even || !self.i_next),
+        );
+
+        let (i_split, j_split) = if self.i_len >= i_idx && self.j_len >= j_idx {
+            (i_idx, j_idx)
+        } else if self.i_len >= i_idx {
+            // j too short
+            (index - self.j_len, self.j_len)
+        } else {
+            // i too short
+            (self.i_len, index - self.i_len)
+        };
+
+        let trailing_i_next = even == self.i_next;
+        let (i_left, i_right) = self.i.split_at(i_split);
+        let (j_left, j_right) = self.j.split_at(j_split);
+
+        (
+            InterleaveProducer::new(i_left, j_left, i_split, j_split, self.i_next),
+            InterleaveProducer::new(
+                i_right,
+                j_right,
+                self.i_len - i_split,
+                self.j_len - j_split,
+                trailing_i_next,
+            ),
+        )
+    }
+}
+
+/// Wrapper for Interleave to implement DoubleEndedIterator and
+/// ExactSizeIterator.
+///
+/// This iterator is fused.
+struct InterleaveSeq<I, J> {
+    i: Fuse<I>,
+    j: Fuse<J>,
+
+    /// Flag to control which iterator should provide the next element. When
+    /// `false` then `i` produces the next element, otherwise `j` produces the
+    /// next element.
+    i_next: bool,
+}
+
+/// Iterator implementation for InterleaveSeq. This implementation is
+/// taken more or less verbatim from itertools. It is replicated here
+/// (instead of calling itertools directly), because we also need to
+/// implement `DoubledEndedIterator` and `ExactSizeIterator`.
+impl<I, J> Iterator for InterleaveSeq<I, J>
+where
+    I: Iterator,
+    J: Iterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.i_next = !self.i_next;
+        if self.i_next {
+            match self.i.next() {
+                None => self.j.next(),
+                r => r,
+            }
+        } else {
+            match self.j.next() {
+                None => self.i.next(),
+                r => r,
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let (ih, jh) = (self.i.size_hint(), self.j.size_hint());
+        let min = ih.0.saturating_add(jh.0);
+        let max = match (ih.1, jh.1) {
+            (Some(x), Some(y)) => x.checked_add(y),
+            _ => None,
+        };
+        (min, max)
+    }
+}
+
+// The implementation for DoubleEndedIterator requires
+// ExactSizeIterator to provide `next_back()`. The last element will
+// come from the iterator that runs out last (ie has the most elements
+// in it). If the iterators have the same number of elements, then the
+// last iterator will provide the last element.
+impl<I, J> DoubleEndedIterator for InterleaveSeq<I, J>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    J: DoubleEndedIterator<Item = I::Item> + ExactSizeIterator<Item = I::Item>,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<I::Item> {
+        if self.i.len() == self.j.len() {
+            if self.i_next {
+                self.i.next_back()
+            } else {
+                self.j.next_back()
+            }
+        } else if self.i.len() < self.j.len() {
+            self.j.next_back()
+        } else {
+            self.i.next_back()
+        }
+    }
+}
+
+impl<I, J> ExactSizeIterator for InterleaveSeq<I, J>
+where
+    I: ExactSizeIterator,
+    J: ExactSizeIterator<Item = I::Item>,
+{
+    #[inline]
+    fn len(&self) -> usize {
+        self.i.len() + self.j.len()
+    }
+}
diff --git a/src/iter/interleave_shortest.rs b/src/iter/interleave_shortest.rs
new file mode 100644
index 0000000..7d81369
--- /dev/null
+++ b/src/iter/interleave_shortest.rs
@@ -0,0 +1,85 @@
+use super::plumbing::*;
+use super::*;
+
+/// `InterleaveShortest` is an iterator that works similarly to
+/// `Interleave`, but this version stops returning elements once one
+/// of the iterators run out.
+///
+/// This struct is created by the [`interleave_shortest()`] method on
+/// [`IndexedParallelIterator`].
+///
+/// [`interleave_shortest()`]: trait.IndexedParallelIterator.html#method.interleave_shortest
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    interleave: Interleave<Take<I>, Take<J>>,
+}
+
+impl<I, J> InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    /// Creates a new `InterleaveShortest` iterator
+    pub(super) fn new(i: I, j: J) -> Self {
+        InterleaveShortest {
+            interleave: if i.len() <= j.len() {
+                // take equal lengths from both iterators
+                let n = i.len();
+                i.take(n).interleave(j.take(n))
+            } else {
+                // take one extra item from the first iterator
+                let n = j.len();
+                i.take(n + 1).interleave(j.take(n))
+            },
+        }
+    }
+}
+
+impl<I, J> ParallelIterator for InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<I::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, J> IndexedParallelIterator for InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.interleave.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        self.interleave.with_producer(callback)
+    }
+}
diff --git a/src/iter/intersperse.rs b/src/iter/intersperse.rs
new file mode 100644
index 0000000..798bdc1
--- /dev/null
+++ b/src/iter/intersperse.rs
@@ -0,0 +1,410 @@
+use super::plumbing::*;
+use super::*;
+use std::cell::Cell;
+use std::iter::{self, Fuse};
+
+/// `Intersperse` is an iterator that inserts a particular item between each
+/// item of the adapted iterator.  This struct is created by the
+/// [`intersperse()`] method on [`ParallelIterator`]
+///
+/// [`intersperse()`]: trait.ParallelIterator.html#method.intersperse
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone, Debug)]
+pub struct Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone,
+{
+    base: I,
+    item: I::Item,
+}
+
+impl<I> Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone,
+{
+    /// Creates a new `Intersperse` iterator
+    pub(super) fn new(base: I, item: I::Item) -> Self {
+        Intersperse { base, item }
+    }
+}
+
+impl<I> ParallelIterator for Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<I::Item>,
+    {
+        let consumer1 = IntersperseConsumer::new(consumer, self.item);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        match self.base.opt_len()? {
+            0 => Some(0),
+            len => len.checked_add(len - 1),
+        }
+    }
+}
+
+impl<I> IndexedParallelIterator for Intersperse<I>
+where
+    I: IndexedParallelIterator,
+    I::Item: Clone + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = IntersperseConsumer::new(consumer, self.item);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        let len = self.base.len();
+        if len > 0 {
+            len.checked_add(len - 1).expect("overflow")
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.len();
+        return self.base.with_producer(Callback {
+            callback,
+            item: self.item,
+            len,
+        });
+
+        struct Callback<CB, T> {
+            callback: CB,
+            item: T,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB, T>
+        where
+            CB: ProducerCallback<T>,
+            T: Clone + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = IntersperseProducer::new(base, self.item, self.len);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct IntersperseProducer<P>
+where
+    P: Producer,
+{
+    base: P,
+    item: P::Item,
+    len: usize,
+    clone_first: bool,
+}
+
+impl<P> IntersperseProducer<P>
+where
+    P: Producer,
+{
+    fn new(base: P, item: P::Item, len: usize) -> Self {
+        IntersperseProducer {
+            base,
+            item,
+            len,
+            clone_first: false,
+        }
+    }
+}
+
+impl<P> Producer for IntersperseProducer<P>
+where
+    P: Producer,
+    P::Item: Clone + Send,
+{
+    type Item = P::Item;
+    type IntoIter = IntersperseIter<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IntersperseIter {
+            base: self.base.into_iter().fuse(),
+            item: self.item,
+            clone_first: self.len > 0 && self.clone_first,
+
+            // If there's more than one item, then even lengths end the opposite
+            // of how they started with respect to interspersed clones.
+            clone_last: self.len > 1 && ((self.len & 1 == 0) ^ self.clone_first),
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert!(index <= self.len);
+
+        // The left needs half of the items from the base producer, and the
+        // other half will be our interspersed item.  If we're not leading with
+        // a cloned item, then we need to round up the base number of items,
+        // otherwise round down.
+        let base_index = (index + !self.clone_first as usize) / 2;
+        let (left_base, right_base) = self.base.split_at(base_index);
+
+        let left = IntersperseProducer {
+            base: left_base,
+            item: self.item.clone(),
+            len: index,
+            clone_first: self.clone_first,
+        };
+
+        let right = IntersperseProducer {
+            base: right_base,
+            item: self.item,
+            len: self.len - index,
+
+            // If the index is odd, the right side toggles `clone_first`.
+            clone_first: (index & 1 == 1) ^ self.clone_first,
+        };
+
+        (left, right)
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        let folder1 = IntersperseFolder {
+            base: folder,
+            item: self.item,
+            clone_first: self.clone_first,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct IntersperseIter<I>
+where
+    I: Iterator,
+{
+    base: Fuse<I>,
+    item: I::Item,
+    clone_first: bool,
+    clone_last: bool,
+}
+
+impl<I> Iterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.clone_first {
+            self.clone_first = false;
+            Some(self.item.clone())
+        } else if let next @ Some(_) = self.base.next() {
+            // If there are any items left, we'll need another clone in front.
+            self.clone_first = self.base.len() != 0;
+            next
+        } else if self.clone_last {
+            self.clone_last = false;
+            Some(self.item.clone())
+        } else {
+            None
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+}
+
+impl<I> DoubleEndedIterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.clone_last {
+            self.clone_last = false;
+            Some(self.item.clone())
+        } else if let next_back @ Some(_) = self.base.next_back() {
+            // If there are any items left, we'll need another clone in back.
+            self.clone_last = self.base.len() != 0;
+            next_back
+        } else if self.clone_first {
+            self.clone_first = false;
+            Some(self.item.clone())
+        } else {
+            None
+        }
+    }
+}
+
+impl<I> ExactSizeIterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    fn len(&self) -> usize {
+        let len = self.base.len();
+        len + len.saturating_sub(1) + self.clone_first as usize + self.clone_last as usize
+    }
+}
+
+struct IntersperseConsumer<C, T> {
+    base: C,
+    item: T,
+    clone_first: Cell<bool>,
+}
+
+impl<C, T> IntersperseConsumer<C, T>
+where
+    C: Consumer<T>,
+{
+    fn new(base: C, item: T) -> Self {
+        IntersperseConsumer {
+            base,
+            item,
+            clone_first: false.into(),
+        }
+    }
+}
+
+impl<C, T> Consumer<T> for IntersperseConsumer<C, T>
+where
+    C: Consumer<T>,
+    T: Clone + Send,
+{
+    type Folder = IntersperseFolder<C::Folder, T>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(mut self, index: usize) -> (Self, Self, Self::Reducer) {
+        // We'll feed twice as many items to the base consumer, except if we're
+        // not currently leading with a cloned item, then it's one less.
+        let base_index = index + index.saturating_sub(!self.clone_first.get() as usize);
+        let (left, right, reducer) = self.base.split_at(base_index);
+
+        let right = IntersperseConsumer {
+            base: right,
+            item: self.item.clone(),
+            clone_first: true.into(),
+        };
+        self.base = left;
+        (self, right, reducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        IntersperseFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            clone_first: self.clone_first.get(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<C, T> UnindexedConsumer<T> for IntersperseConsumer<C, T>
+where
+    C: UnindexedConsumer<T>,
+    T: Clone + Send,
+{
+    fn split_off_left(&self) -> Self {
+        let left = IntersperseConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            clone_first: self.clone_first.clone(),
+        };
+        self.clone_first.set(true);
+        left
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct IntersperseFolder<C, T> {
+    base: C,
+    item: T,
+    clone_first: bool,
+}
+
+impl<C, T> Folder<T> for IntersperseFolder<C, T>
+where
+    C: Folder<T>,
+    T: Clone,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        if self.clone_first {
+            self.base = self.base.consume(self.item.clone());
+            if self.base.full() {
+                return self;
+            }
+        } else {
+            self.clone_first = true;
+        }
+        self.base = self.base.consume(item);
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let mut clone_first = self.clone_first;
+        let between_item = self.item;
+        let base = self.base.consume_iter(iter.into_iter().flat_map(|item| {
+            let first = if clone_first {
+                Some(between_item.clone())
+            } else {
+                clone_first = true;
+                None
+            };
+            first.into_iter().chain(iter::once(item))
+        }));
+        IntersperseFolder {
+            base,
+            item: between_item,
+            clone_first,
+        }
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/len.rs b/src/iter/len.rs
new file mode 100644
index 0000000..e65b3c0
--- /dev/null
+++ b/src/iter/len.rs
@@ -0,0 +1,271 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+
+/// `MinLen` is an iterator that imposes a minimum length on iterator splits.
+/// This struct is created by the [`min_len()`] method on [`IndexedParallelIterator`]
+///
+/// [`min_len()`]: trait.IndexedParallelIterator.html#method.min_len
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct MinLen<I: IndexedParallelIterator> {
+    base: I,
+    min: usize,
+}
+
+impl<I> MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `MinLen` iterator.
+    pub(super) fn new(base: I, min: usize) -> Self {
+        MinLen { base, min }
+    }
+}
+
+impl<I> ParallelIterator for MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            min: self.min,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            min: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MinLenProducer {
+                    base,
+                    min: self.min,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// `MinLenProducer` implementation
+
+struct MinLenProducer<P> {
+    base: P,
+    min: usize,
+}
+
+impl<P> Producer for MinLenProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = P::IntoIter;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter()
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.min, self.base.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MinLenProducer {
+                base: left,
+                min: self.min,
+            },
+            MinLenProducer {
+                base: right,
+                min: self.min,
+            },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(folder)
+    }
+}
+
+/// `MaxLen` is an iterator that imposes a maximum length on iterator splits.
+/// This struct is created by the [`max_len()`] method on [`IndexedParallelIterator`]
+///
+/// [`max_len()`]: trait.IndexedParallelIterator.html#method.max_len
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct MaxLen<I: IndexedParallelIterator> {
+    base: I,
+    max: usize,
+}
+
+impl<I> MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `MaxLen` iterator.
+    pub(super) fn new(base: I, max: usize) -> Self {
+        MaxLen { base, max }
+    }
+}
+
+impl<I> ParallelIterator for MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            max: self.max,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            max: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MaxLenProducer {
+                    base,
+                    max: self.max,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// `MaxLenProducer` implementation
+
+struct MaxLenProducer<P> {
+    base: P,
+    max: usize,
+}
+
+impl<P> Producer for MaxLenProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = P::IntoIter;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.max, self.base.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MaxLenProducer {
+                base: left,
+                max: self.max,
+            },
+            MaxLenProducer {
+                base: right,
+                max: self.max,
+            },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(folder)
+    }
+}
diff --git a/src/iter/map.rs b/src/iter/map.rs
new file mode 100644
index 0000000..f2a35ff
--- /dev/null
+++ b/src/iter/map.rs
@@ -0,0 +1,259 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+use std::iter;
+
+/// `Map` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map()`] method on [`ParallelIterator`]
+///
+/// [`map()`]: trait.ParallelIterator.html#method.map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Map<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Map<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Map").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Map<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Map` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        Map { base, map_op }
+    }
+}
+
+impl<I, F, R> ParallelIterator for Map<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = F::Output;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F, R> IndexedParallelIterator for Map<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapConsumer::new(consumer, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            map_op: F,
+        }
+
+        impl<T, F, R, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<R>,
+            F: Fn(T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapProducer {
+                    base,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapProducer<'f, P, F> {
+    base: P,
+    map_op: &'f F,
+}
+
+impl<'f, P, F, R> Producer for MapProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = F::Output;
+    type IntoIter = iter::Map<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().map(self.map_op)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapProducer {
+                base: left,
+                map_op: self.map_op,
+            },
+            MapProducer {
+                base: right,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapFolder {
+            base: folder,
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> MapConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        MapConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, R, C, F> Consumer<T> for MapConsumer<'f, C, F>
+where
+    C: Consumer<F::Output>,
+    F: Fn(T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapConsumer::new(left, self.map_op),
+            MapConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapFolder {
+            base: self.base.into_folder(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, R, C, F> UnindexedConsumer<T> for MapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<F::Output>,
+    F: Fn(T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapConsumer::new(self.base.split_off_left(), &self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct MapFolder<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, T, R, C, F> Folder<T> for MapFolder<'f, C, F>
+where
+    C: Folder<F::Output>,
+    F: Fn(T) -> R,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let mapped_item = (self.map_op)(item);
+        MapFolder {
+            base: self.base.consume(mapped_item),
+            map_op: self.map_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().map(self.map_op));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/map_with.rs b/src/iter/map_with.rs
new file mode 100644
index 0000000..10b1b4c
--- /dev/null
+++ b/src/iter/map_with.rs
@@ -0,0 +1,573 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `MapWith` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map_with()`] method on [`ParallelIterator`]
+///
+/// [`map_with()`]: trait.ParallelIterator.html#method.map_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct MapWith<I: ParallelIterator, T, F> {
+    base: I,
+    item: T,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, T: Debug, F> Debug for MapWith<I, T, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MapWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<I, T, F> MapWith<I, T, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `MapWith` iterator.
+    pub(super) fn new(base: I, item: T, map_op: F) -> Self {
+        MapWith { base, item, map_op }
+    }
+}
+
+impl<I, T, F, R> ParallelIterator for MapWith<I, T, F>
+where
+    I: ParallelIterator,
+    T: Send + Clone,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, T, F, R> IndexedParallelIterator for MapWith<I, T, F>
+where
+    I: IndexedParallelIterator,
+    T: Send + Clone,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            item: self.item,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, U, F> {
+            callback: CB,
+            item: U,
+            map_op: F,
+        }
+
+        impl<T, U, F, R, CB> ProducerCallback<T> for Callback<CB, U, F>
+        where
+            CB: ProducerCallback<R>,
+            U: Send + Clone,
+            F: Fn(&mut U, T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapWithProducer {
+                    base,
+                    item: self.item,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapWithProducer<'f, P, U, F> {
+    base: P,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, P, U, F, R> Producer for MapWithProducer<'f, P, U, F>
+where
+    P: Producer,
+    U: Send + Clone,
+    F: Fn(&mut U, P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+    type IntoIter = MapWithIter<'f, P::IntoIter, U, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        MapWithIter {
+            base: self.base.into_iter(),
+            item: self.item,
+            map_op: self.map_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapWithProducer {
+                base: left,
+                item: self.item.clone(),
+                map_op: self.map_op,
+            },
+            MapWithProducer {
+                base: right,
+                item: self.item,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapWithFolder {
+            base: folder,
+            item: self.item,
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct MapWithIter<'f, I, U, F> {
+    base: I,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, I, U, F, R> Iterator for MapWithIter<'f, I, U, F>
+where
+    I: Iterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+
+    fn next(&mut self) -> Option<R> {
+        let item = self.base.next()?;
+        Some((self.map_op)(&mut self.item, item))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+}
+
+impl<'f, I, U, F, R> DoubleEndedIterator for MapWithIter<'f, I, U, F>
+where
+    I: DoubleEndedIterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+    fn next_back(&mut self) -> Option<R> {
+        let item = self.base.next_back()?;
+        Some((self.map_op)(&mut self.item, item))
+    }
+}
+
+impl<'f, I, U, F, R> ExactSizeIterator for MapWithIter<'f, I, U, F>
+where
+    I: ExactSizeIterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapWithConsumer<'f, C, U, F> {
+    base: C,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, C, U, F> MapWithConsumer<'f, C, U, F> {
+    fn new(base: C, item: U, map_op: &'f F) -> Self {
+        MapWithConsumer { base, item, map_op }
+    }
+}
+
+impl<'f, T, U, R, C, F> Consumer<T> for MapWithConsumer<'f, C, U, F>
+where
+    C: Consumer<R>,
+    U: Send + Clone,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapWithFolder<'f, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapWithConsumer::new(left, self.item.clone(), self.map_op),
+            MapWithConsumer::new(right, self.item, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapWithFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, R, C, F> UnindexedConsumer<T> for MapWithConsumer<'f, C, U, F>
+where
+    C: UnindexedConsumer<R>,
+    U: Send + Clone,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapWithConsumer::new(self.base.split_off_left(), self.item.clone(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct MapWithFolder<'f, C, U, F> {
+    base: C,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, T, U, R, C, F> Folder<T> for MapWithFolder<'f, C, U, F>
+where
+    C: Folder<R>,
+    F: Fn(&mut U, T) -> R,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let mapped_item = (self.map_op)(&mut self.item, item);
+        self.base = self.base.consume(mapped_item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn with<'f, T, U, R>(
+            item: &'f mut U,
+            map_op: impl Fn(&mut U, T) -> R + 'f,
+        ) -> impl FnMut(T) -> R + 'f {
+            move |x| map_op(item, x)
+        }
+
+        {
+            let mapped_iter = iter.into_iter().map(with(&mut self.item, self.map_op));
+            self.base = self.base.consume_iter(mapped_iter);
+        }
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+// ------------------------------------------------------------------------------------------------
+
+/// `MapInit` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map_init()`] method on [`ParallelIterator`]
+///
+/// [`map_init()`]: trait.ParallelIterator.html#method.map_init
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct MapInit<I: ParallelIterator, INIT, F> {
+    base: I,
+    init: INIT,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, INIT, F> Debug for MapInit<I, INIT, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MapInit").field("base", &self.base).finish()
+    }
+}
+
+impl<I, INIT, F> MapInit<I, INIT, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `MapInit` iterator.
+    pub(super) fn new(base: I, init: INIT, map_op: F) -> Self {
+        MapInit { base, init, map_op }
+    }
+}
+
+impl<I, INIT, T, F, R> ParallelIterator for MapInit<I, INIT, F>
+where
+    I: ParallelIterator,
+    INIT: Fn() -> T + Sync + Send,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, INIT, T, F, R> IndexedParallelIterator for MapInit<I, INIT, F>
+where
+    I: IndexedParallelIterator,
+    INIT: Fn() -> T + Sync + Send,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            init: self.init,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, INIT, F> {
+            callback: CB,
+            init: INIT,
+            map_op: F,
+        }
+
+        impl<T, INIT, U, F, R, CB> ProducerCallback<T> for Callback<CB, INIT, F>
+        where
+            CB: ProducerCallback<R>,
+            INIT: Fn() -> U + Sync,
+            F: Fn(&mut U, T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapInitProducer {
+                    base,
+                    init: &self.init,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapInitProducer<'f, P, INIT, F> {
+    base: P,
+    init: &'f INIT,
+    map_op: &'f F,
+}
+
+impl<'f, P, INIT, U, F, R> Producer for MapInitProducer<'f, P, INIT, F>
+where
+    P: Producer,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+    type IntoIter = MapWithIter<'f, P::IntoIter, U, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        MapWithIter {
+            base: self.base.into_iter(),
+            item: (self.init)(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapInitProducer {
+                base: left,
+                init: self.init,
+                map_op: self.map_op,
+            },
+            MapInitProducer {
+                base: right,
+                init: self.init,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapWithFolder {
+            base: folder,
+            item: (self.init)(),
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapInitConsumer<'f, C, INIT, F> {
+    base: C,
+    init: &'f INIT,
+    map_op: &'f F,
+}
+
+impl<'f, C, INIT, F> MapInitConsumer<'f, C, INIT, F> {
+    fn new(base: C, init: &'f INIT, map_op: &'f F) -> Self {
+        MapInitConsumer { base, init, map_op }
+    }
+}
+
+impl<'f, T, INIT, U, R, C, F> Consumer<T> for MapInitConsumer<'f, C, INIT, F>
+where
+    C: Consumer<R>,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapWithFolder<'f, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapInitConsumer::new(left, self.init, self.map_op),
+            MapInitConsumer::new(right, self.init, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapWithFolder {
+            base: self.base.into_folder(),
+            item: (self.init)(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, INIT, U, R, C, F> UnindexedConsumer<T> for MapInitConsumer<'f, C, INIT, F>
+where
+    C: UnindexedConsumer<R>,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapInitConsumer::new(self.base.split_off_left(), self.init, self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
new file mode 100644
index 0000000..0c82933
--- /dev/null
+++ b/src/iter/mod.rs
@@ -0,0 +1,3121 @@
+//! Traits for writing parallel programs using an iterator-style interface
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! Parallel iterators make it easy to write iterator-like chains that
+//! execute in parallel: typically all you have to do is convert the
+//! first `.iter()` (or `iter_mut()`, `into_iter()`, etc) method into
+//! `par_iter()` (or `par_iter_mut()`, `into_par_iter()`, etc). For
+//! example, to compute the sum of the squares of a sequence of
+//! integers, one might write:
+//!
+//! ```rust
+//! use rayon::prelude::*;
+//! fn sum_of_squares(input: &[i32]) -> i32 {
+//!     input.par_iter()
+//!          .map(|i| i * i)
+//!          .sum()
+//! }
+//! ```
+//!
+//! Or, to increment all the integers in a slice, you could write:
+//!
+//! ```rust
+//! use rayon::prelude::*;
+//! fn increment_all(input: &mut [i32]) {
+//!     input.par_iter_mut()
+//!          .for_each(|p| *p += 1);
+//! }
+//! ```
+//!
+//! To use parallel iterators, first import the traits by adding
+//! something like `use rayon::prelude::*` to your module. You can
+//! then call `par_iter`, `par_iter_mut`, or `into_par_iter` to get a
+//! parallel iterator. Like a [regular iterator][], parallel
+//! iterators work by first constructing a computation and then
+//! executing it.
+//!
+//! In addition to `par_iter()` and friends, some types offer other
+//! ways to create (or consume) parallel iterators:
+//!
+//! - Slices (`&[T]`, `&mut [T]`) offer methods like `par_split` and
+//!   `par_windows`, as well as various parallel sorting
+//!   operations. See [the `ParallelSlice` trait] for the full list.
+//! - Strings (`&str`) offer methods like `par_split` and `par_lines`.
+//!   See [the `ParallelString` trait] for the full list.
+//! - Various collections offer [`par_extend`], which grows a
+//!   collection given a parallel iterator. (If you don't have a
+//!   collection to extend, you can use [`collect()`] to create a new
+//!   one from scratch.)
+//!
+//! [the `ParallelSlice` trait]: ../slice/trait.ParallelSlice.html
+//! [the `ParallelString` trait]: ../str/trait.ParallelString.html
+//! [`par_extend`]: trait.ParallelExtend.html
+//! [`collect()`]: trait.ParallelIterator.html#method.collect
+//!
+//! To see the full range of methods available on parallel iterators,
+//! check out the [`ParallelIterator`] and [`IndexedParallelIterator`]
+//! traits.
+//!
+//! If you'd like to build a custom parallel iterator, or to write your own
+//! combinator, then check out the [split] function and the [plumbing] module.
+//!
+//! [regular iterator]: http://doc.rust-lang.org/std/iter/trait.Iterator.html
+//! [`ParallelIterator`]: trait.ParallelIterator.html
+//! [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+//! [split]: fn.split.html
+//! [plumbing]: plumbing/index.html
+//!
+//! Note: Several of the `ParallelIterator` methods rely on a `Try` trait which
+//! has been deliberately obscured from the public API.  This trait is intended
+//! to mirror the unstable `std::ops::Try` with implementations for `Option` and
+//! `Result`, where `Some`/`Ok` values will let those iterators continue, but
+//! `None`/`Err` values will exit early.
+//!
+//! A note about object safety: It is currently _not_ possible to wrap
+//! a `ParallelIterator` (or any trait that depends on it) using a
+//! `Box<dyn ParallelIterator>` or other kind of dynamic allocation,
+//! because `ParallelIterator` is **not object-safe**.
+//! (This keeps the implementation simpler and allows extra optimizations.)
+
+use self::plumbing::*;
+use self::private::Try;
+pub use either::Either;
+use std::cmp::{self, Ordering};
+use std::iter::{Product, Sum};
+use std::ops::{Fn, RangeBounds};
+
+pub mod plumbing;
+
+#[cfg(test)]
+mod test;
+
+// There is a method to the madness here:
+//
+// - These modules are private but expose certain types to the end-user
+//   (e.g., `enumerate::Enumerate`) -- specifically, the types that appear in the
+//   public API surface of the `ParallelIterator` traits.
+// - In **this** module, those public types are always used unprefixed, which forces
+//   us to add a `pub use` and helps identify if we missed anything.
+// - In contrast, items that appear **only** in the body of a method,
+//   e.g. `find::find()`, are always used **prefixed**, so that they
+//   can be readily distinguished.
+
+mod chain;
+mod chunks;
+mod cloned;
+mod collect;
+mod copied;
+mod empty;
+mod enumerate;
+mod extend;
+mod filter;
+mod filter_map;
+mod find;
+mod find_first_last;
+mod flat_map;
+mod flat_map_iter;
+mod flatten;
+mod flatten_iter;
+mod fold;
+mod for_each;
+mod from_par_iter;
+mod inspect;
+mod interleave;
+mod interleave_shortest;
+mod intersperse;
+mod len;
+mod map;
+mod map_with;
+mod multizip;
+mod noop;
+mod once;
+mod panic_fuse;
+mod par_bridge;
+mod positions;
+mod product;
+mod reduce;
+mod repeat;
+mod rev;
+mod skip;
+mod splitter;
+mod sum;
+mod take;
+mod try_fold;
+mod try_reduce;
+mod try_reduce_with;
+mod unzip;
+mod update;
+mod while_some;
+mod zip;
+mod zip_eq;
+
+pub use self::{
+    chain::Chain,
+    chunks::Chunks,
+    cloned::Cloned,
+    copied::Copied,
+    empty::{empty, Empty},
+    enumerate::Enumerate,
+    filter::Filter,
+    filter_map::FilterMap,
+    flat_map::FlatMap,
+    flat_map_iter::FlatMapIter,
+    flatten::Flatten,
+    flatten_iter::FlattenIter,
+    fold::{Fold, FoldWith},
+    inspect::Inspect,
+    interleave::Interleave,
+    interleave_shortest::InterleaveShortest,
+    intersperse::Intersperse,
+    len::{MaxLen, MinLen},
+    map::Map,
+    map_with::{MapInit, MapWith},
+    multizip::MultiZip,
+    once::{once, Once},
+    panic_fuse::PanicFuse,
+    par_bridge::{IterBridge, ParallelBridge},
+    positions::Positions,
+    repeat::{repeat, repeatn, Repeat, RepeatN},
+    rev::Rev,
+    skip::Skip,
+    splitter::{split, Split},
+    take::Take,
+    try_fold::{TryFold, TryFoldWith},
+    update::Update,
+    while_some::WhileSome,
+    zip::Zip,
+    zip_eq::ZipEq,
+};
+
+mod step_by;
+#[cfg(step_by)]
+pub use self::step_by::StepBy;
+
+/// `IntoParallelIterator` implements the conversion to a [`ParallelIterator`].
+///
+/// By implementing `IntoParallelIterator` for a type, you define how it will
+/// transformed into an iterator. This is a parallel version of the standard
+/// library's [`std::iter::IntoIterator`] trait.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`std::iter::IntoIterator`]: https://doc.rust-lang.org/std/iter/trait.IntoIterator.html
+pub trait IntoParallelIterator {
+    /// The parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    type Item: Send;
+
+    /// Converts `self` into a parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// println!("counting in parallel:");
+    /// (0..100).into_par_iter()
+    ///     .for_each(|i| println!("{}", i));
+    /// ```
+    ///
+    /// This conversion is often implicit for arguments to methods like [`zip`].
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let v: Vec<_> = (0..5).into_par_iter().zip(5..10).collect();
+    /// assert_eq!(v, [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]);
+    /// ```
+    ///
+    /// [`zip`]: trait.IndexedParallelIterator.html#method.zip
+    fn into_par_iter(self) -> Self::Iter;
+}
+
+/// `IntoParallelRefIterator` implements the conversion to a
+/// [`ParallelIterator`], providing shared references to the data.
+///
+/// This is a parallel version of the `iter()` method
+/// defined by various collections.
+///
+/// This trait is automatically implemented
+/// `for I where &I: IntoParallelIterator`. In most cases, users
+/// will want to implement [`IntoParallelIterator`] rather than implement
+/// this trait directly.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html
+pub trait IntoParallelRefIterator<'data> {
+    /// The type of the parallel iterator that will be returned.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This will typically be an `&'data T` reference type.
+    type Item: Send + 'data;
+
+    /// Converts `self` into a parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let v: Vec<_> = (0..100).collect();
+    /// assert_eq!(v.par_iter().sum::<i32>(), 100 * 99 / 2);
+    ///
+    /// // `v.par_iter()` is shorthand for `(&v).into_par_iter()`,
+    /// // producing the exact same references.
+    /// assert!(v.par_iter().zip(&v)
+    ///          .all(|(a, b)| std::ptr::eq(a, b)));
+    /// ```
+    fn par_iter(&'data self) -> Self::Iter;
+}
+
+impl<'data, I: 'data + ?Sized> IntoParallelRefIterator<'data> for I
+where
+    &'data I: IntoParallelIterator,
+{
+    type Iter = <&'data I as IntoParallelIterator>::Iter;
+    type Item = <&'data I as IntoParallelIterator>::Item;
+
+    fn par_iter(&'data self) -> Self::Iter {
+        self.into_par_iter()
+    }
+}
+
+/// `IntoParallelRefMutIterator` implements the conversion to a
+/// [`ParallelIterator`], providing mutable references to the data.
+///
+/// This is a parallel version of the `iter_mut()` method
+/// defined by various collections.
+///
+/// This trait is automatically implemented
+/// `for I where &mut I: IntoParallelIterator`. In most cases, users
+/// will want to implement [`IntoParallelIterator`] rather than implement
+/// this trait directly.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html
+pub trait IntoParallelRefMutIterator<'data> {
+    /// The type of iterator that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that will be produced; this is typically an
+    /// `&'data mut T` reference.
+    type Item: Send + 'data;
+
+    /// Creates the parallel iterator from `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0usize; 5];
+    /// v.par_iter_mut().enumerate().for_each(|(i, x)| *x = i);
+    /// assert_eq!(v, [0, 1, 2, 3, 4]);
+    /// ```
+    fn par_iter_mut(&'data mut self) -> Self::Iter;
+}
+
+impl<'data, I: 'data + ?Sized> IntoParallelRefMutIterator<'data> for I
+where
+    &'data mut I: IntoParallelIterator,
+{
+    type Iter = <&'data mut I as IntoParallelIterator>::Iter;
+    type Item = <&'data mut I as IntoParallelIterator>::Item;
+
+    fn par_iter_mut(&'data mut self) -> Self::Iter {
+        self.into_par_iter()
+    }
+}
+
+/// Parallel version of the standard iterator trait.
+///
+/// The combinators on this trait are available on **all** parallel
+/// iterators.  Additional methods can be found on the
+/// [`IndexedParallelIterator`] trait: those methods are only
+/// available for parallel iterators where the number of items is
+/// known in advance (so, e.g., after invoking `filter`, those methods
+/// become unavailable).
+///
+/// For examples of using parallel iterators, see [the docs on the
+/// `iter` module][iter].
+///
+/// [iter]: index.html
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+pub trait ParallelIterator: Sized + Send {
+    /// The type of item that this parallel iterator produces.
+    /// For example, if you use the [`for_each`] method, this is the type of
+    /// item that your closure will be invoked with.
+    ///
+    /// [`for_each`]: #method.for_each
+    type Item: Send;
+
+    /// Executes `OP` on each item produced by the iterator, in parallel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// (0..100).into_par_iter().for_each(|x| println!("{:?}", x));
+    /// ```
+    fn for_each<OP>(self, op: OP)
+    where
+        OP: Fn(Self::Item) + Sync + Send,
+    {
+        for_each::for_each(self, &op)
+    }
+
+    /// Executes `OP` on the given `init` value with each item produced by
+    /// the iterator, in parallel.
+    ///
+    /// The `init` value will be cloned only as needed to be paired with
+    /// the group of items in each rayon job.  It does not require the type
+    /// to be `Sync`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// (0..5).into_par_iter().for_each_with(sender, |s, x| s.send(x).unwrap());
+    ///
+    /// let mut res: Vec<_> = receiver.iter().collect();
+    ///
+    /// res.sort();
+    ///
+    /// assert_eq!(&res[..], &[0, 1, 2, 3, 4])
+    /// ```
+    fn for_each_with<OP, T>(self, init: T, op: OP)
+    where
+        OP: Fn(&mut T, Self::Item) + Sync + Send,
+        T: Send + Clone,
+    {
+        self.map_with(init, op).collect()
+    }
+
+    /// Executes `OP` on a value returned by `init` with each item produced by
+    /// the iterator, in parallel.
+    ///
+    /// The `init` function will be called only as needed for a value to be
+    /// paired with the group of items in each rayon job.  There is no
+    /// constraint on that returned type at all!
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0u8; 1_000_000];
+    ///
+    /// v.par_chunks_mut(1000)
+    ///     .for_each_init(
+    ///         || rand::thread_rng(),
+    ///         |rng, chunk| rng.fill(chunk),
+    ///     );
+    ///
+    /// // There's a remote chance that this will fail...
+    /// for i in 0u8..=255 {
+    ///     assert!(v.contains(&i));
+    /// }
+    /// ```
+    fn for_each_init<OP, INIT, T>(self, init: INIT, op: OP)
+    where
+        OP: Fn(&mut T, Self::Item) + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+    {
+        self.map_init(init, op).collect()
+    }
+
+    /// Executes a fallible `OP` on each item produced by the iterator, in parallel.
+    ///
+    /// If the `OP` returns `Result::Err` or `Option::None`, we will attempt to
+    /// stop processing the rest of the items in the iterator as soon as
+    /// possible, and we will return that terminating value.  Otherwise, we will
+    /// return an empty `Result::Ok(())` or `Option::Some(())`.  If there are
+    /// multiple errors in parallel, it is not specified which will be returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::io::{self, Write};
+    ///
+    /// // This will stop iteration early if there's any write error, like
+    /// // having piped output get closed on the other end.
+    /// (0..100).into_par_iter()
+    ///     .try_for_each(|x| writeln!(io::stdout(), "{:?}", x))
+    ///     .expect("expected no write errors");
+    /// ```
+    fn try_for_each<OP, R>(self, op: OP) -> R
+    where
+        OP: Fn(Self::Item) -> R + Sync + Send,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map(op).try_reduce(<()>::default, ok)
+    }
+
+    /// Executes a fallible `OP` on the given `init` value with each item
+    /// produced by the iterator, in parallel.
+    ///
+    /// This combines the `init` semantics of [`for_each_with()`] and the
+    /// failure semantics of [`try_for_each()`].
+    ///
+    /// [`for_each_with()`]: #method.for_each_with
+    /// [`try_for_each()`]: #method.try_for_each
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// (0..5).into_par_iter()
+    ///     .try_for_each_with(sender, |s, x| s.send(x))
+    ///     .expect("expected no send errors");
+    ///
+    /// let mut res: Vec<_> = receiver.iter().collect();
+    ///
+    /// res.sort();
+    ///
+    /// assert_eq!(&res[..], &[0, 1, 2, 3, 4])
+    /// ```
+    fn try_for_each_with<OP, T, R>(self, init: T, op: OP) -> R
+    where
+        OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        T: Send + Clone,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map_with(init, op).try_reduce(<()>::default, ok)
+    }
+
+    /// Executes a fallible `OP` on a value returned by `init` with each item
+    /// produced by the iterator, in parallel.
+    ///
+    /// This combines the `init` semantics of [`for_each_init()`] and the
+    /// failure semantics of [`try_for_each()`].
+    ///
+    /// [`for_each_init()`]: #method.for_each_init
+    /// [`try_for_each()`]: #method.try_for_each
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0u8; 1_000_000];
+    ///
+    /// v.par_chunks_mut(1000)
+    ///     .try_for_each_init(
+    ///         || rand::thread_rng(),
+    ///         |rng, chunk| rng.try_fill(chunk),
+    ///     )
+    ///     .expect("expected no rand errors");
+    ///
+    /// // There's a remote chance that this will fail...
+    /// for i in 0u8..=255 {
+    ///     assert!(v.contains(&i));
+    /// }
+    /// ```
+    fn try_for_each_init<OP, INIT, T, R>(self, init: INIT, op: OP) -> R
+    where
+        OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map_init(init, op).try_reduce(<()>::default, ok)
+    }
+
+    /// Counts the number of items in this parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let count = (0..100).into_par_iter().count();
+    ///
+    /// assert_eq!(count, 100);
+    /// ```
+    fn count(self) -> usize {
+        fn one<T>(_: T) -> usize {
+            1
+        }
+
+        self.map(one).sum()
+    }
+
+    /// Applies `map_op` to each item of this iterator, producing a new
+    /// iterator with the results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..5).into_par_iter().map(|x| x * 2);
+    ///
+    /// let doubles: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn map<F, R>(self, map_op: F) -> Map<Self, F>
+    where
+        F: Fn(Self::Item) -> R + Sync + Send,
+        R: Send,
+    {
+        Map::new(self, map_op)
+    }
+
+    /// Applies `map_op` to the given `init` value with each item of this
+    /// iterator, producing a new iterator with the results.
+    ///
+    /// The `init` value will be cloned only as needed to be paired with
+    /// the group of items in each rayon job.  It does not require the type
+    /// to be `Sync`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// let a: Vec<_> = (0..5)
+    ///                 .into_par_iter()            // iterating over i32
+    ///                 .map_with(sender, |s, x| {
+    ///                     s.send(x).unwrap();     // sending i32 values through the channel
+    ///                     x                       // returning i32
+    ///                 })
+    ///                 .collect();                 // collecting the returned values into a vector
+    ///
+    /// let mut b: Vec<_> = receiver.iter()         // iterating over the values in the channel
+    ///                             .collect();     // and collecting them
+    /// b.sort();
+    ///
+    /// assert_eq!(a, b);
+    /// ```
+    fn map_with<F, T, R>(self, init: T, map_op: F) -> MapWith<Self, T, F>
+    where
+        F: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        T: Send + Clone,
+        R: Send,
+    {
+        MapWith::new(self, init, map_op)
+    }
+
+    /// Applies `map_op` to a value returned by `init` with each item of this
+    /// iterator, producing a new iterator with the results.
+    ///
+    /// The `init` function will be called only as needed for a value to be
+    /// paired with the group of items in each rayon job.  There is no
+    /// constraint on that returned type at all!
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let a: Vec<_> = (1i32..1_000_000)
+    ///     .into_par_iter()
+    ///     .map_init(
+    ///         || rand::thread_rng(),  // get the thread-local RNG
+    ///         |rng, x| if rng.gen() { // randomly negate items
+    ///             -x
+    ///         } else {
+    ///             x
+    ///         },
+    ///     ).collect();
+    ///
+    /// // There's a remote chance that this will fail...
+    /// assert!(a.iter().any(|&x| x < 0));
+    /// assert!(a.iter().any(|&x| x > 0));
+    /// ```
+    fn map_init<F, INIT, T, R>(self, init: INIT, map_op: F) -> MapInit<Self, INIT, F>
+    where
+        F: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+        R: Send,
+    {
+        MapInit::new(self, init, map_op)
+    }
+
+    /// Creates an iterator which clones all of its elements.  This may be
+    /// useful when you have an iterator over `&T`, but you need `T`, and
+    /// that type implements `Clone`. See also [`copied()`].
+    ///
+    /// [`copied()`]: #method.copied
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3];
+    ///
+    /// let v_cloned: Vec<_> = a.par_iter().cloned().collect();
+    ///
+    /// // cloned is the same as .map(|&x| x), for integers
+    /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect();
+    ///
+    /// assert_eq!(v_cloned, vec![1, 2, 3]);
+    /// assert_eq!(v_map, vec![1, 2, 3]);
+    /// ```
+    fn cloned<'a, T>(self) -> Cloned<Self>
+    where
+        T: 'a + Clone + Send,
+        Self: ParallelIterator<Item = &'a T>,
+    {
+        Cloned::new(self)
+    }
+
+    /// Creates an iterator which copies all of its elements.  This may be
+    /// useful when you have an iterator over `&T`, but you need `T`, and
+    /// that type implements `Copy`. See also [`cloned()`].
+    ///
+    /// [`cloned()`]: #method.cloned
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3];
+    ///
+    /// let v_copied: Vec<_> = a.par_iter().copied().collect();
+    ///
+    /// // copied is the same as .map(|&x| x), for integers
+    /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect();
+    ///
+    /// assert_eq!(v_copied, vec![1, 2, 3]);
+    /// assert_eq!(v_map, vec![1, 2, 3]);
+    /// ```
+    fn copied<'a, T>(self) -> Copied<Self>
+    where
+        T: 'a + Copy + Send,
+        Self: ParallelIterator<Item = &'a T>,
+    {
+        Copied::new(self)
+    }
+
+    /// Applies `inspect_op` to a reference to each item of this iterator,
+    /// producing a new iterator passing through the original items.  This is
+    /// often useful for debugging to see what's happening in iterator stages.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 4, 2, 3];
+    ///
+    /// // this iterator sequence is complex.
+    /// let sum = a.par_iter()
+    ///             .cloned()
+    ///             .filter(|&x| x % 2 == 0)
+    ///             .reduce(|| 0, |sum, i| sum + i);
+    ///
+    /// println!("{}", sum);
+    ///
+    /// // let's add some inspect() calls to investigate what's happening
+    /// let sum = a.par_iter()
+    ///             .cloned()
+    ///             .inspect(|x| println!("about to filter: {}", x))
+    ///             .filter(|&x| x % 2 == 0)
+    ///             .inspect(|x| println!("made it through filter: {}", x))
+    ///             .reduce(|| 0, |sum, i| sum + i);
+    ///
+    /// println!("{}", sum);
+    /// ```
+    fn inspect<OP>(self, inspect_op: OP) -> Inspect<Self, OP>
+    where
+        OP: Fn(&Self::Item) + Sync + Send,
+    {
+        Inspect::new(self, inspect_op)
+    }
+
+    /// Mutates each item of this iterator before yielding it.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let par_iter = (0..5).into_par_iter().update(|x| {*x *= 2;});
+    ///
+    /// let doubles: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn update<F>(self, update_op: F) -> Update<Self, F>
+    where
+        F: Fn(&mut Self::Item) + Sync + Send,
+    {
+        Update::new(self, update_op)
+    }
+
+    /// Applies `filter_op` to each item of this iterator, producing a new
+    /// iterator with only the items that gave `true` results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..10).into_par_iter().filter(|x| x % 2 == 0);
+    ///
+    /// let even_numbers: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&even_numbers[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn filter<P>(self, filter_op: P) -> Filter<Self, P>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        Filter::new(self, filter_op)
+    }
+
+    /// Applies `filter_op` to each item of this iterator to get an `Option`,
+    /// producing a new iterator with only the items from `Some` results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..10).into_par_iter()
+    ///                         .filter_map(|x| {
+    ///                             if x % 2 == 0 { Some(x * 3) }
+    ///                             else { None }
+    ///                         });
+    ///
+    /// let even_numbers: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&even_numbers[..], &[0, 6, 12, 18, 24]);
+    /// ```
+    fn filter_map<P, R>(self, filter_op: P) -> FilterMap<Self, P>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        FilterMap::new(self, filter_op)
+    }
+
+    /// Applies `map_op` to each item of this iterator to get nested parallel iterators,
+    /// producing a new parallel iterator that flattens these back into one.
+    ///
+    /// See also [`flat_map_iter`](#method.flat_map_iter).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]];
+    ///
+    /// let par_iter = a.par_iter().cloned().flat_map(|a| a.to_vec());
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]);
+    /// ```
+    fn flat_map<F, PI>(self, map_op: F) -> FlatMap<Self, F>
+    where
+        F: Fn(Self::Item) -> PI + Sync + Send,
+        PI: IntoParallelIterator,
+    {
+        FlatMap::new(self, map_op)
+    }
+
+    /// Applies `map_op` to each item of this iterator to get nested serial iterators,
+    /// producing a new parallel iterator that flattens these back into one.
+    ///
+    /// # `flat_map_iter` versus `flat_map`
+    ///
+    /// These two methods are similar but behave slightly differently. With [`flat_map`],
+    /// each of the nested iterators must be a parallel iterator, and they will be further
+    /// split up with nested parallelism. With `flat_map_iter`, each nested iterator is a
+    /// sequential `Iterator`, and we only parallelize _between_ them, while the items
+    /// produced by each nested iterator are processed sequentially.
+    ///
+    /// When choosing between these methods, consider whether nested parallelism suits the
+    /// potential iterators at hand. If there's little computation involved, or its length
+    /// is much less than the outer parallel iterator, then it may perform better to avoid
+    /// the overhead of parallelism, just flattening sequentially with `flat_map_iter`.
+    /// If there is a lot of computation, potentially outweighing the outer parallel
+    /// iterator, then the nested parallelism of `flat_map` may be worthwhile.
+    ///
+    /// [`flat_map`]: #method.flat_map
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cell::RefCell;
+    ///
+    /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]];
+    ///
+    /// let par_iter = a.par_iter().flat_map_iter(|a| {
+    ///     // The serial iterator doesn't have to be thread-safe, just its items.
+    ///     let cell_iter = RefCell::new(a.iter().cloned());
+    ///     std::iter::from_fn(move || cell_iter.borrow_mut().next())
+    /// });
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]);
+    /// ```
+    fn flat_map_iter<F, SI>(self, map_op: F) -> FlatMapIter<Self, F>
+    where
+        F: Fn(Self::Item) -> SI + Sync + Send,
+        SI: IntoIterator,
+        SI::Item: Send,
+    {
+        FlatMapIter::new(self, map_op)
+    }
+
+    /// An adaptor that flattens parallel-iterable `Item`s into one large iterator.
+    ///
+    /// See also [`flatten_iter`](#method.flatten_iter).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x: Vec<Vec<_>> = vec![vec![1, 2], vec![3, 4]];
+    /// let y: Vec<_> = x.into_par_iter().flatten().collect();
+    ///
+    /// assert_eq!(y, vec![1, 2, 3, 4]);
+    /// ```
+    fn flatten(self) -> Flatten<Self>
+    where
+        Self::Item: IntoParallelIterator,
+    {
+        Flatten::new(self)
+    }
+
+    /// An adaptor that flattens serial-iterable `Item`s into one large iterator.
+    ///
+    /// See also [`flatten`](#method.flatten) and the analagous comparison of
+    /// [`flat_map_iter` versus `flat_map`](#flat_map_iter-versus-flat_map).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x: Vec<Vec<_>> = vec![vec![1, 2], vec![3, 4]];
+    /// let iters: Vec<_> = x.into_iter().map(Vec::into_iter).collect();
+    /// let y: Vec<_> = iters.into_par_iter().flatten_iter().collect();
+    ///
+    /// assert_eq!(y, vec![1, 2, 3, 4]);
+    /// ```
+    fn flatten_iter(self) -> FlattenIter<Self>
+    where
+        Self::Item: IntoIterator,
+        <Self::Item as IntoIterator>::Item: Send,
+    {
+        FlattenIter::new(self)
+    }
+
+    /// Reduces the items in the iterator into one item using `op`.
+    /// The argument `identity` should be a closure that can produce
+    /// "identity" value which may be inserted into the sequence as
+    /// needed to create opportunities for parallel execution. So, for
+    /// example, if you are doing a summation, then `identity()` ought
+    /// to produce something that represents the zero for your type
+    /// (but consider just calling `sum()` in that case).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// // Iterate over a sequence of pairs `(x0, y0), ..., (xN, yN)`
+    /// // and use reduce to compute one pair `(x0 + ... + xN, y0 + ... + yN)`
+    /// // where the first/second elements are summed separately.
+    /// use rayon::prelude::*;
+    /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)]
+    ///            .par_iter()        // iterating over &(i32, i32)
+    ///            .cloned()          // iterating over (i32, i32)
+    ///            .reduce(|| (0, 0), // the "identity" is 0 in both columns
+    ///                    |a, b| (a.0 + b.0, a.1 + b.1));
+    /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9));
+    /// ```
+    ///
+    /// **Note:** unlike a sequential `fold` operation, the order in
+    /// which `op` will be applied to reduce the result is not fully
+    /// specified. So `op` should be [associative] or else the results
+    /// will be non-deterministic. And of course `identity()` should
+    /// produce a true identity.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    fn reduce<OP, ID>(self, identity: ID, op: OP) -> Self::Item
+    where
+        OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send,
+        ID: Fn() -> Self::Item + Sync + Send,
+    {
+        reduce::reduce(self, identity, op)
+    }
+
+    /// Reduces the items in the iterator into one item using `op`.
+    /// If the iterator is empty, `None` is returned; otherwise,
+    /// `Some` is returned.
+    ///
+    /// This version of `reduce` is simple but somewhat less
+    /// efficient. If possible, it is better to call `reduce()`, which
+    /// requires an identity element.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)]
+    ///            .par_iter()        // iterating over &(i32, i32)
+    ///            .cloned()          // iterating over (i32, i32)
+    ///            .reduce_with(|a, b| (a.0 + b.0, a.1 + b.1))
+    ///            .unwrap();
+    /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9));
+    /// ```
+    ///
+    /// **Note:** unlike a sequential `fold` operation, the order in
+    /// which `op` will be applied to reduce the result is not fully
+    /// specified. So `op` should be [associative] or else the results
+    /// will be non-deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    fn reduce_with<OP>(self, op: OP) -> Option<Self::Item>
+    where
+        OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send,
+    {
+        fn opt_fold<T>(op: impl Fn(T, T) -> T) -> impl Fn(Option<T>, T) -> Option<T> {
+            move |opt_a, b| match opt_a {
+                Some(a) => Some(op(a, b)),
+                None => Some(b),
+            }
+        }
+
+        fn opt_reduce<T>(op: impl Fn(T, T) -> T) -> impl Fn(Option<T>, Option<T>) -> Option<T> {
+            move |opt_a, opt_b| match (opt_a, opt_b) {
+                (Some(a), Some(b)) => Some(op(a, b)),
+                (Some(v), None) | (None, Some(v)) => Some(v),
+                (None, None) => None,
+            }
+        }
+
+        self.fold(<_>::default, opt_fold(&op))
+            .reduce(<_>::default, opt_reduce(&op))
+    }
+
+    /// Reduces the items in the iterator into one item using a fallible `op`.
+    /// The `identity` argument is used the same way as in [`reduce()`].
+    ///
+    /// [`reduce()`]: #method.reduce
+    ///
+    /// If a `Result::Err` or `Option::None` item is found, or if `op` reduces
+    /// to one, we will attempt to stop processing the rest of the items in the
+    /// iterator as soon as possible, and we will return that terminating value.
+    /// Otherwise, we will return the final reduced `Result::Ok(T)` or
+    /// `Option::Some(T)`.  If there are multiple errors in parallel, it is not
+    /// specified which will be returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // Compute the sum of squares, being careful about overflow.
+    /// fn sum_squares<I: IntoParallelIterator<Item = i32>>(iter: I) -> Option<i32> {
+    ///     iter.into_par_iter()
+    ///         .map(|i| i.checked_mul(i))            // square each item,
+    ///         .try_reduce(|| 0, i32::checked_add)   // and add them up!
+    /// }
+    /// assert_eq!(sum_squares(0..5), Some(0 + 1 + 4 + 9 + 16));
+    ///
+    /// // The sum might overflow
+    /// assert_eq!(sum_squares(0..10_000), None);
+    ///
+    /// // Or the squares might overflow before it even reaches `try_reduce`
+    /// assert_eq!(sum_squares(1_000_000..1_000_001), None);
+    /// ```
+    fn try_reduce<T, OP, ID>(self, identity: ID, op: OP) -> Self::Item
+    where
+        OP: Fn(T, T) -> Self::Item + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        Self::Item: Try<Ok = T>,
+    {
+        try_reduce::try_reduce(self, identity, op)
+    }
+
+    /// Reduces the items in the iterator into one item using a fallible `op`.
+    ///
+    /// Like [`reduce_with()`], if the iterator is empty, `None` is returned;
+    /// otherwise, `Some` is returned.  Beyond that, it behaves like
+    /// [`try_reduce()`] for handling `Err`/`None`.
+    ///
+    /// [`reduce_with()`]: #method.reduce_with
+    /// [`try_reduce()`]: #method.try_reduce
+    ///
+    /// For instance, with `Option` items, the return value may be:
+    /// - `None`, the iterator was empty
+    /// - `Some(None)`, we stopped after encountering `None`.
+    /// - `Some(Some(x))`, the entire iterator reduced to `x`.
+    ///
+    /// With `Result` items, the nesting is more obvious:
+    /// - `None`, the iterator was empty
+    /// - `Some(Err(e))`, we stopped after encountering an error `e`.
+    /// - `Some(Ok(x))`, the entire iterator reduced to `x`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let files = ["/dev/null", "/does/not/exist"];
+    ///
+    /// // Find the biggest file
+    /// files.into_par_iter()
+    ///     .map(|path| std::fs::metadata(path).map(|m| (path, m.len())))
+    ///     .try_reduce_with(|a, b| {
+    ///         Ok(if a.1 >= b.1 { a } else { b })
+    ///     })
+    ///     .expect("Some value, since the iterator is not empty")
+    ///     .expect_err("not found");
+    /// ```
+    fn try_reduce_with<T, OP>(self, op: OP) -> Option<Self::Item>
+    where
+        OP: Fn(T, T) -> Self::Item + Sync + Send,
+        Self::Item: Try<Ok = T>,
+    {
+        try_reduce_with::try_reduce_with(self, op)
+    }
+
+    /// Parallel fold is similar to sequential fold except that the
+    /// sequence of items may be subdivided before it is
+    /// folded. Consider a list of numbers like `22 3 77 89 46`. If
+    /// you used sequential fold to add them (`fold(0, |a,b| a+b)`,
+    /// you would wind up first adding 0 + 22, then 22 + 3, then 25 +
+    /// 77, and so forth. The **parallel fold** works similarly except
+    /// that it first breaks up your list into sublists, and hence
+    /// instead of yielding up a single sum at the end, it yields up
+    /// multiple sums. The number of results is nondeterministic, as
+    /// is the point where the breaks occur.
+    ///
+    /// So if did the same parallel fold (`fold(0, |a,b| a+b)`) on
+    /// our example list, we might wind up with a sequence of two numbers,
+    /// like so:
+    ///
+    /// ```notrust
+    /// 22 3 77 89 46
+    ///       |     |
+    ///     102   135
+    /// ```
+    ///
+    /// Or perhaps these three numbers:
+    ///
+    /// ```notrust
+    /// 22 3 77 89 46
+    ///       |  |  |
+    ///     102 89 46
+    /// ```
+    ///
+    /// In general, Rayon will attempt to find good breaking points
+    /// that keep all of your cores busy.
+    ///
+    /// ### Fold versus reduce
+    ///
+    /// The `fold()` and `reduce()` methods each take an identity element
+    /// and a combining function, but they operate rather differently.
+    ///
+    /// `reduce()` requires that the identity function has the same
+    /// type as the things you are iterating over, and it fully
+    /// reduces the list of items into a single item. So, for example,
+    /// imagine we are iterating over a list of bytes `bytes: [128_u8,
+    /// 64_u8, 64_u8]`. If we used `bytes.reduce(|| 0_u8, |a: u8, b:
+    /// u8| a + b)`, we would get an overflow. This is because `0`,
+    /// `a`, and `b` here are all bytes, just like the numbers in the
+    /// list (I wrote the types explicitly above, but those are the
+    /// only types you can use). To avoid the overflow, we would need
+    /// to do something like `bytes.map(|b| b as u32).reduce(|| 0, |a,
+    /// b| a + b)`, in which case our result would be `256`.
+    ///
+    /// In contrast, with `fold()`, the identity function does not
+    /// have to have the same type as the things you are iterating
+    /// over, and you potentially get back many results. So, if we
+    /// continue with the `bytes` example from the previous paragraph,
+    /// we could do `bytes.fold(|| 0_u32, |a, b| a + (b as u32))` to
+    /// convert our bytes into `u32`. And of course we might not get
+    /// back a single sum.
+    ///
+    /// There is a more subtle distinction as well, though it's
+    /// actually implied by the above points. When you use `reduce()`,
+    /// your reduction function is sometimes called with values that
+    /// were never part of your original parallel iterator (for
+    /// example, both the left and right might be a partial sum). With
+    /// `fold()`, in contrast, the left value in the fold function is
+    /// always the accumulator, and the right value is always from
+    /// your original sequence.
+    ///
+    /// ### Fold vs Map/Reduce
+    ///
+    /// Fold makes sense if you have some operation where it is
+    /// cheaper to create groups of elements at a time. For example,
+    /// imagine collecting characters into a string. If you were going
+    /// to use map/reduce, you might try this:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let s =
+    ///     ['a', 'b', 'c', 'd', 'e']
+    ///     .par_iter()
+    ///     .map(|c: &char| format!("{}", c))
+    ///     .reduce(|| String::new(),
+    ///             |mut a: String, b: String| { a.push_str(&b); a });
+    ///
+    /// assert_eq!(s, "abcde");
+    /// ```
+    ///
+    /// Because reduce produces the same type of element as its input,
+    /// you have to first map each character into a string, and then
+    /// you can reduce them. This means we create one string per
+    /// element in our iterator -- not so great. Using `fold`, we can
+    /// do this instead:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let s =
+    ///     ['a', 'b', 'c', 'd', 'e']
+    ///     .par_iter()
+    ///     .fold(|| String::new(),
+    ///             |mut s: String, c: &char| { s.push(*c); s })
+    ///     .reduce(|| String::new(),
+    ///             |mut a: String, b: String| { a.push_str(&b); a });
+    ///
+    /// assert_eq!(s, "abcde");
+    /// ```
+    ///
+    /// Now `fold` will process groups of our characters at a time,
+    /// and we only make one string per group. We should wind up with
+    /// some small-ish number of strings roughly proportional to the
+    /// number of CPUs you have (it will ultimately depend on how busy
+    /// your processors are). Note that we still need to do a reduce
+    /// afterwards to combine those groups of strings into a single
+    /// string.
+    ///
+    /// You could use a similar trick to save partial results (e.g., a
+    /// cache) or something similar.
+    ///
+    /// ### Combining fold with other operations
+    ///
+    /// You can combine `fold` with `reduce` if you want to produce a
+    /// single value. This is then roughly equivalent to a map/reduce
+    /// combination in effect:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .fold(|| 0_u32, |a: u32, b: u8| a + (b as u32))
+    ///                .sum::<u32>();
+    ///
+    /// assert_eq!(sum, (0..22).sum()); // compare to sequential
+    /// ```
+    fn fold<T, ID, F>(self, identity: ID, fold_op: F) -> Fold<Self, ID, F>
+    where
+        F: Fn(T, Self::Item) -> T + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        T: Send,
+    {
+        Fold::new(self, identity, fold_op)
+    }
+
+    /// Applies `fold_op` to the given `init` value with each item of this
+    /// iterator, finally producing the value for further use.
+    ///
+    /// This works essentially like `fold(|| init.clone(), fold_op)`, except
+    /// it doesn't require the `init` type to be `Sync`, nor any other form
+    /// of added synchronization.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .fold_with(0_u32, |a: u32, b: u8| a + (b as u32))
+    ///                .sum::<u32>();
+    ///
+    /// assert_eq!(sum, (0..22).sum()); // compare to sequential
+    /// ```
+    fn fold_with<F, T>(self, init: T, fold_op: F) -> FoldWith<Self, T, F>
+    where
+        F: Fn(T, Self::Item) -> T + Sync + Send,
+        T: Send + Clone,
+    {
+        FoldWith::new(self, init, fold_op)
+    }
+
+    /// Performs a fallible parallel fold.
+    ///
+    /// This is a variation of [`fold()`] for operations which can fail with
+    /// `Option::None` or `Result::Err`.  The first such failure stops
+    /// processing the local set of items, without affecting other folds in the
+    /// iterator's subdivisions.
+    ///
+    /// Often, `try_fold()` will be followed by [`try_reduce()`]
+    /// for a final reduction and global short-circuiting effect.
+    ///
+    /// [`fold()`]: #method.fold
+    /// [`try_reduce()`]: #method.try_reduce
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .try_fold(|| 0_u32, |a: u32, b: u8| a.checked_add(b as u32))
+    ///                .try_reduce(|| 0, u32::checked_add);
+    ///
+    /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential
+    /// ```
+    fn try_fold<T, R, ID, F>(self, identity: ID, fold_op: F) -> TryFold<Self, R, ID, F>
+    where
+        F: Fn(T, Self::Item) -> R + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        R: Try<Ok = T> + Send,
+    {
+        TryFold::new(self, identity, fold_op)
+    }
+
+    /// Performs a fallible parallel fold with a cloneable `init` value.
+    ///
+    /// This combines the `init` semantics of [`fold_with()`] and the failure
+    /// semantics of [`try_fold()`].
+    ///
+    /// [`fold_with()`]: #method.fold_with
+    /// [`try_fold()`]: #method.try_fold
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .try_fold_with(0_u32, |a: u32, b: u8| a.checked_add(b as u32))
+    ///                .try_reduce(|| 0, u32::checked_add);
+    ///
+    /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential
+    /// ```
+    fn try_fold_with<F, T, R>(self, init: T, fold_op: F) -> TryFoldWith<Self, R, F>
+    where
+        F: Fn(T, Self::Item) -> R + Sync + Send,
+        R: Try<Ok = T> + Send,
+        T: Clone + Send,
+    {
+        TryFoldWith::new(self, init, fold_op)
+    }
+
+    /// Sums up the items in the iterator.
+    ///
+    /// Note that the order in items will be reduced is not specified,
+    /// so if the `+` operator is not truly [associative] \(as is the
+    /// case for floating point numbers), then the results are not
+    /// fully deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    ///
+    /// Basically equivalent to `self.reduce(|| 0, |a, b| a + b)`,
+    /// except that the type of `0` and the `+` operation may vary
+    /// depending on the type of value being produced.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 5, 7];
+    ///
+    /// let sum: i32 = a.par_iter().sum();
+    ///
+    /// assert_eq!(sum, 13);
+    /// ```
+    fn sum<S>(self) -> S
+    where
+        S: Send + Sum<Self::Item> + Sum<S>,
+    {
+        sum::sum(self)
+    }
+
+    /// Multiplies all the items in the iterator.
+    ///
+    /// Note that the order in items will be reduced is not specified,
+    /// so if the `*` operator is not truly [associative] \(as is the
+    /// case for floating point numbers), then the results are not
+    /// fully deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    ///
+    /// Basically equivalent to `self.reduce(|| 1, |a, b| a * b)`,
+    /// except that the type of `1` and the `*` operation may vary
+    /// depending on the type of value being produced.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// fn factorial(n: u32) -> u32 {
+    ///    (1..n+1).into_par_iter().product()
+    /// }
+    ///
+    /// assert_eq!(factorial(0), 1);
+    /// assert_eq!(factorial(1), 1);
+    /// assert_eq!(factorial(5), 120);
+    /// ```
+    fn product<P>(self) -> P
+    where
+        P: Send + Product<Self::Item> + Product<P>,
+    {
+        product::product(self)
+    }
+
+    /// Computes the minimum of all the items in the iterator. If the
+    /// iterator is empty, `None` is returned; otherwise, `Some(min)`
+    /// is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// Basically equivalent to `self.reduce_with(|a, b| cmp::min(a, b))`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [45, 74, 32];
+    ///
+    /// assert_eq!(a.par_iter().min(), Some(&32));
+    ///
+    /// let b: [i32; 0] = [];
+    ///
+    /// assert_eq!(b.par_iter().min(), None);
+    /// ```
+    fn min(self) -> Option<Self::Item>
+    where
+        Self::Item: Ord,
+    {
+        self.reduce_with(cmp::min)
+    }
+
+    /// Computes the minimum of all the items in the iterator with respect to
+    /// the given comparison function. If the iterator is empty, `None` is
+    /// returned; otherwise, `Some(min)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the comparison function is not associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 77, 53, 240, -1];
+    ///
+    /// assert_eq!(a.par_iter().min_by(|x, y| x.cmp(y)), Some(&-3));
+    /// ```
+    fn min_by<F>(self, f: F) -> Option<Self::Item>
+    where
+        F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering,
+    {
+        fn min<T>(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T {
+            move |a, b| match f(&a, &b) {
+                Ordering::Greater => b,
+                _ => a,
+            }
+        }
+
+        self.reduce_with(min(f))
+    }
+
+    /// Computes the item that yields the minimum value for the given
+    /// function. If the iterator is empty, `None` is returned;
+    /// otherwise, `Some(item)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 34, 2, 5, -10, -3, -23];
+    ///
+    /// assert_eq!(a.par_iter().min_by_key(|x| x.abs()), Some(&2));
+    /// ```
+    fn min_by_key<K, F>(self, f: F) -> Option<Self::Item>
+    where
+        K: Ord + Send,
+        F: Sync + Send + Fn(&Self::Item) -> K,
+    {
+        fn key<T, K>(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) {
+            move |x| (f(&x), x)
+        }
+
+        fn min_key<T, K: Ord>(a: (K, T), b: (K, T)) -> (K, T) {
+            match (a.0).cmp(&b.0) {
+                Ordering::Greater => b,
+                _ => a,
+            }
+        }
+
+        let (_, x) = self.map(key(f)).reduce_with(min_key)?;
+        Some(x)
+    }
+
+    /// Computes the maximum of all the items in the iterator. If the
+    /// iterator is empty, `None` is returned; otherwise, `Some(max)`
+    /// is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// Basically equivalent to `self.reduce_with(|a, b| cmp::max(a, b))`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [45, 74, 32];
+    ///
+    /// assert_eq!(a.par_iter().max(), Some(&74));
+    ///
+    /// let b: [i32; 0] = [];
+    ///
+    /// assert_eq!(b.par_iter().max(), None);
+    /// ```
+    fn max(self) -> Option<Self::Item>
+    where
+        Self::Item: Ord,
+    {
+        self.reduce_with(cmp::max)
+    }
+
+    /// Computes the maximum of all the items in the iterator with respect to
+    /// the given comparison function. If the iterator is empty, `None` is
+    /// returned; otherwise, `Some(min)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the comparison function is not associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 77, 53, 240, -1];
+    ///
+    /// assert_eq!(a.par_iter().max_by(|x, y| x.abs().cmp(&y.abs())), Some(&240));
+    /// ```
+    fn max_by<F>(self, f: F) -> Option<Self::Item>
+    where
+        F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering,
+    {
+        fn max<T>(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T {
+            move |a, b| match f(&a, &b) {
+                Ordering::Greater => a,
+                _ => b,
+            }
+        }
+
+        self.reduce_with(max(f))
+    }
+
+    /// Computes the item that yields the maximum value for the given
+    /// function. If the iterator is empty, `None` is returned;
+    /// otherwise, `Some(item)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 34, 2, 5, -10, -3, -23];
+    ///
+    /// assert_eq!(a.par_iter().max_by_key(|x| x.abs()), Some(&34));
+    /// ```
+    fn max_by_key<K, F>(self, f: F) -> Option<Self::Item>
+    where
+        K: Ord + Send,
+        F: Sync + Send + Fn(&Self::Item) -> K,
+    {
+        fn key<T, K>(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) {
+            move |x| (f(&x), x)
+        }
+
+        fn max_key<T, K: Ord>(a: (K, T), b: (K, T)) -> (K, T) {
+            match (a.0).cmp(&b.0) {
+                Ordering::Greater => a,
+                _ => b,
+            }
+        }
+
+        let (_, x) = self.map(key(f)).reduce_with(max_key)?;
+        Some(x)
+    }
+
+    /// Takes two iterators and creates a new iterator over both.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 1, 2];
+    /// let b = [9, 8, 7];
+    ///
+    /// let par_iter = a.par_iter().chain(b.par_iter());
+    ///
+    /// let chained: Vec<_> = par_iter.cloned().collect();
+    ///
+    /// assert_eq!(&chained[..], &[0, 1, 2, 9, 8, 7]);
+    /// ```
+    fn chain<C>(self, chain: C) -> Chain<Self, C::Iter>
+    where
+        C: IntoParallelIterator<Item = Self::Item>,
+    {
+        Chain::new(self, chain.into_par_iter())
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate and returns it. This operation
+    /// is similar to [`find` on sequential iterators][find] but
+    /// the item returned may not be the **first** one in the parallel
+    /// sequence which matches, since we search the entire sequence in parallel.
+    ///
+    /// Once a match is found, we will attempt to stop processing
+    /// the rest of the items in the iterator as soon as possible
+    /// (just as `find` stops iterating once a match is found).
+    ///
+    /// [find]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.find
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_any(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_any(|&&x| x == 100), None);
+    /// ```
+    fn find_any<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find::find(self, predicate)
+    }
+
+    /// Searches for the sequentially **first** item in the parallel iterator
+    /// that matches the given predicate and returns it.
+    ///
+    /// Once a match is found, all attempts to the right of the match
+    /// will be stopped, while attempts to the left must continue in case
+    /// an earlier match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous.  If you
+    /// just want the first match that discovered anywhere in the iterator,
+    /// `find_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_first(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_first(|&&x| x == 100), None);
+    /// ```
+    fn find_first<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find_first_last::find_first(self, predicate)
+    }
+
+    /// Searches for the sequentially **last** item in the parallel iterator
+    /// that matches the given predicate and returns it.
+    ///
+    /// Once a match is found, all attempts to the left of the match
+    /// will be stopped, while attempts to the right must continue in case
+    /// a later match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "last" may be nebulous.  When the
+    /// order doesn't actually matter to you, `find_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_last(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_last(|&&x| x == 100), None);
+    /// ```
+    fn find_last<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find_first_last::find_last(self, predicate)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator
+    /// and returns **any** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, we will
+    /// attempt to stop processing the rest of the items in the iterator
+    /// as soon as possible.
+    ///
+    /// Note that this method only returns **some** item in the parallel
+    /// iterator that is not None from the map predicate. The item returned
+    /// may not be the **first** non-None value produced in the parallel
+    /// sequence, since the entire sequence is mapped over in parallel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "5", "5"];
+    ///
+    /// let found_number = c.par_iter().find_map_any(|s| s.parse().ok());
+    ///
+    /// assert_eq!(found_number, Some(5));
+    /// ```
+    fn find_map_any<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_any(yes)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator and
+    /// returns the sequentially **first** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, all attempts
+    /// to the right of the match will be stopped, while attempts to the left
+    /// must continue in case an earlier match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous. If you
+    /// just want the first non-None value discovered anywhere in the iterator,
+    /// `find_map_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "2", "5"];
+    ///
+    /// let first_number = c.par_iter().find_map_first(|s| s.parse().ok());
+    ///
+    /// assert_eq!(first_number, Some(2));
+    /// ```
+    fn find_map_first<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_first(yes)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator and
+    /// returns the sequentially **last** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, all attempts
+    /// to the left of the match will be stopped, while attempts to the right
+    /// must continue in case a later match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous. If you
+    /// just want the first non-None value discovered anywhere in the iterator,
+    /// `find_map_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "2", "5"];
+    ///
+    /// let last_number = c.par_iter().find_map_last(|s| s.parse().ok());
+    ///
+    /// assert_eq!(last_number, Some(5));
+    /// ```
+    fn find_map_last<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_last(yes)
+    }
+
+    #[doc(hidden)]
+    #[deprecated(note = "parallel `find` does not search in order -- use `find_any`, \\
+                         `find_first`, or `find_last`")]
+    fn find<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        self.find_any(predicate)
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate, and if so returns true.  Once
+    /// a match is found, we'll attempt to stop process the rest
+    /// of the items.  Proving that there's no match, returning false,
+    /// does require visiting every item.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 12, 3, 4, 0, 23, 0];
+    ///
+    /// let is_valid = a.par_iter().any(|&x| x > 10);
+    ///
+    /// assert!(is_valid);
+    /// ```
+    fn any<P>(self, predicate: P) -> bool
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        self.map(predicate).find_any(bool::clone).is_some()
+    }
+
+    /// Tests that every item in the parallel iterator matches the given
+    /// predicate, and if so returns true.  If a counter-example is found,
+    /// we'll attempt to stop processing more items, then return false.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 12, 3, 4, 0, 23, 0];
+    ///
+    /// let is_valid = a.par_iter().all(|&x| x > 10);
+    ///
+    /// assert!(!is_valid);
+    /// ```
+    fn all<P>(self, predicate: P) -> bool
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn is_false(x: &bool) -> bool {
+            !x
+        }
+
+        self.map(predicate).find_any(is_false).is_none()
+    }
+
+    /// Creates an iterator over the `Some` items of this iterator, halting
+    /// as soon as any `None` is found.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::sync::atomic::{AtomicUsize, Ordering};
+    ///
+    /// let counter = AtomicUsize::new(0);
+    /// let value = (0_i32..2048)
+    ///     .into_par_iter()
+    ///     .map(|x| {
+    ///              counter.fetch_add(1, Ordering::SeqCst);
+    ///              if x < 1024 { Some(x) } else { None }
+    ///          })
+    ///     .while_some()
+    ///     .max();
+    ///
+    /// assert!(value < Some(1024));
+    /// assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+    /// ```
+    fn while_some<T>(self) -> WhileSome<Self>
+    where
+        Self: ParallelIterator<Item = Option<T>>,
+        T: Send,
+    {
+        WhileSome::new(self)
+    }
+
+    /// Wraps an iterator with a fuse in case of panics, to halt all threads
+    /// as soon as possible.
+    ///
+    /// Panics within parallel iterators are always propagated to the caller,
+    /// but they don't always halt the rest of the iterator right away, due to
+    /// the internal semantics of [`join`]. This adaptor makes a greater effort
+    /// to stop processing other items sooner, with the cost of additional
+    /// synchronization overhead, which may also inhibit some optimizations.
+    ///
+    /// [`join`]: ../fn.join.html#panics
+    ///
+    /// # Examples
+    ///
+    /// If this code didn't use `panic_fuse()`, it would continue processing
+    /// many more items in other threads (with long sleep delays) before the
+    /// panic is finally propagated.
+    ///
+    /// ```should_panic
+    /// use rayon::prelude::*;
+    /// use std::{thread, time};
+    ///
+    /// (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .panic_fuse()
+    ///     .for_each(|i| {
+    ///         // simulate some work
+    ///         thread::sleep(time::Duration::from_secs(1));
+    ///         assert!(i > 0); // oops!
+    ///     });
+    /// ```
+    fn panic_fuse(self) -> PanicFuse<Self> {
+        PanicFuse::new(self)
+    }
+
+    /// Creates a fresh collection containing all the elements produced
+    /// by this parallel iterator.
+    ///
+    /// You may prefer [`collect_into_vec()`] implemented on
+    /// [`IndexedParallelIterator`], if your underlying iterator also implements
+    /// it. [`collect_into_vec()`] allocates efficiently with precise knowledge
+    /// of how many elements the iterator contains, and even allows you to reuse
+    /// an existing vector's backing store rather than allocating a fresh vector.
+    ///
+    /// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+    /// [`collect_into_vec()`]:
+    ///     trait.IndexedParallelIterator.html#method.collect_into_vec
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let sync_vec: Vec<_> = (0..100).into_iter().collect();
+    ///
+    /// let async_vec: Vec<_> = (0..100).into_par_iter().collect();
+    ///
+    /// assert_eq!(sync_vec, async_vec);
+    /// ```
+    fn collect<C>(self) -> C
+    where
+        C: FromParallelIterator<Self::Item>,
+    {
+        C::from_par_iter(self)
+    }
+
+    /// Unzips the items of a parallel iterator into a pair of arbitrary
+    /// `ParallelExtend` containers.
+    ///
+    /// You may prefer to use `unzip_into_vecs()`, which allocates more
+    /// efficiently with precise knowledge of how many elements the
+    /// iterator contains, and even allows you to reuse existing
+    /// vectors' backing stores rather than allocating fresh vectors.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [(0, 1), (1, 2), (2, 3), (3, 4)];
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = a.par_iter().cloned().unzip();
+    ///
+    /// assert_eq!(left, [0, 1, 2, 3]);
+    /// assert_eq!(right, [1, 2, 3, 4]);
+    /// ```
+    ///
+    /// Nested pairs can be unzipped too.
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let (values, (squares, cubes)): (Vec<_>, (Vec<_>, Vec<_>)) = (0..4).into_par_iter()
+    ///     .map(|i| (i, (i * i, i * i * i)))
+    ///     .unzip();
+    ///
+    /// assert_eq!(values, [0, 1, 2, 3]);
+    /// assert_eq!(squares, [0, 1, 4, 9]);
+    /// assert_eq!(cubes, [0, 1, 8, 27]);
+    /// ```
+    fn unzip<A, B, FromA, FromB>(self) -> (FromA, FromB)
+    where
+        Self: ParallelIterator<Item = (A, B)>,
+        FromA: Default + Send + ParallelExtend<A>,
+        FromB: Default + Send + ParallelExtend<B>,
+        A: Send,
+        B: Send,
+    {
+        unzip::unzip(self)
+    }
+
+    /// Partitions the items of a parallel iterator into a pair of arbitrary
+    /// `ParallelExtend` containers.  Items for which the `predicate` returns
+    /// true go into the first container, and the rest go into the second.
+    ///
+    /// Note: unlike the standard `Iterator::partition`, this allows distinct
+    /// collection types for the left and right items.  This is more flexible,
+    /// but may require new type annotations when converting sequential code
+    /// that used type inferrence assuming the two were the same.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter().partition(|x| x % 2 == 0);
+    ///
+    /// assert_eq!(left, [0, 2, 4, 6]);
+    /// assert_eq!(right, [1, 3, 5, 7]);
+    /// ```
+    fn partition<A, B, P>(self, predicate: P) -> (A, B)
+    where
+        A: Default + Send + ParallelExtend<Self::Item>,
+        B: Default + Send + ParallelExtend<Self::Item>,
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        unzip::partition(self, predicate)
+    }
+
+    /// Partitions and maps the items of a parallel iterator into a pair of
+    /// arbitrary `ParallelExtend` containers.  `Either::Left` items go into
+    /// the first container, and `Either::Right` items go into the second.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use rayon::iter::Either;
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter()
+    ///     .partition_map(|x| {
+    ///         if x % 2 == 0 {
+    ///             Either::Left(x * 4)
+    ///         } else {
+    ///             Either::Right(x * 3)
+    ///         }
+    ///     });
+    ///
+    /// assert_eq!(left, [0, 8, 16, 24]);
+    /// assert_eq!(right, [3, 9, 15, 21]);
+    /// ```
+    ///
+    /// Nested `Either` enums can be split as well.
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use rayon::iter::Either::*;
+    ///
+    /// let ((fizzbuzz, fizz), (buzz, other)): ((Vec<_>, Vec<_>), (Vec<_>, Vec<_>)) = (1..20)
+    ///     .into_par_iter()
+    ///     .partition_map(|x| match (x % 3, x % 5) {
+    ///         (0, 0) => Left(Left(x)),
+    ///         (0, _) => Left(Right(x)),
+    ///         (_, 0) => Right(Left(x)),
+    ///         (_, _) => Right(Right(x)),
+    ///     });
+    ///
+    /// assert_eq!(fizzbuzz, [15]);
+    /// assert_eq!(fizz, [3, 6, 9, 12, 18]);
+    /// assert_eq!(buzz, [5, 10]);
+    /// assert_eq!(other, [1, 2, 4, 7, 8, 11, 13, 14, 16, 17, 19]);
+    /// ```
+    fn partition_map<A, B, P, L, R>(self, predicate: P) -> (A, B)
+    where
+        A: Default + Send + ParallelExtend<L>,
+        B: Default + Send + ParallelExtend<R>,
+        P: Fn(Self::Item) -> Either<L, R> + Sync + Send,
+        L: Send,
+        R: Send,
+    {
+        unzip::partition_map(self, predicate)
+    }
+
+    /// Intersperses clones of an element between items of this iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x = vec![1, 2, 3];
+    /// let r: Vec<_> = x.into_par_iter().intersperse(-1).collect();
+    ///
+    /// assert_eq!(r, vec![1, -1, 2, -1, 3]);
+    /// ```
+    fn intersperse(self, element: Self::Item) -> Intersperse<Self>
+    where
+        Self::Item: Clone,
+    {
+        Intersperse::new(self, element)
+    }
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method causes the iterator `self` to start producing
+    /// items and to feed them to the consumer `consumer` one by one.
+    /// It may split the consumer before doing so to create the
+    /// opportunity to produce in parallel.
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// Returns the number of items produced by this iterator, if known
+    /// statically. This can be used by consumers to trigger special fast
+    /// paths. Therefore, if `Some(_)` is returned, this iterator must only
+    /// use the (indexed) `Consumer` methods when driving a consumer, such
+    /// as `split_at()`. Calling `UnindexedConsumer::split_off_left()` or
+    /// other `UnindexedConsumer` methods -- or returning an inaccurate
+    /// value -- may result in panics.
+    ///
+    /// This method is currently used to optimize `collect` for want
+    /// of true Rust specialization; it may be removed when
+    /// specialization is stable.
+    fn opt_len(&self) -> Option<usize> {
+        None
+    }
+}
+
+impl<T: ParallelIterator> IntoParallelIterator for T {
+    type Iter = T;
+    type Item = T::Item;
+
+    fn into_par_iter(self) -> T {
+        self
+    }
+}
+
+/// An iterator that supports "random access" to its data, meaning
+/// that you can split it at arbitrary indices and draw data from
+/// those points.
+///
+/// **Note:** Not implemented for `u64`, `i64`, `u128`, or `i128` ranges
+pub trait IndexedParallelIterator: ParallelIterator {
+    /// Collects the results of the iterator into the specified
+    /// vector. The vector is always truncated before execution
+    /// begins. If possible, reusing the vector across calls can lead
+    /// to better performance since it reuses the same backing buffer.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // any prior data will be truncated
+    /// let mut vec = vec![-1, -2, -3];
+    ///
+    /// (0..5).into_par_iter()
+    ///     .collect_into_vec(&mut vec);
+    ///
+    /// assert_eq!(vec, [0, 1, 2, 3, 4]);
+    /// ```
+    fn collect_into_vec(self, target: &mut Vec<Self::Item>) {
+        collect::collect_into_vec(self, target);
+    }
+
+    /// Unzips the results of the iterator into the specified
+    /// vectors. The vectors are always truncated before execution
+    /// begins. If possible, reusing the vectors across calls can lead
+    /// to better performance since they reuse the same backing buffer.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // any prior data will be truncated
+    /// let mut left = vec![42; 10];
+    /// let mut right = vec![-1; 10];
+    ///
+    /// (10..15).into_par_iter()
+    ///     .enumerate()
+    ///     .unzip_into_vecs(&mut left, &mut right);
+    ///
+    /// assert_eq!(left, [0, 1, 2, 3, 4]);
+    /// assert_eq!(right, [10, 11, 12, 13, 14]);
+    /// ```
+    fn unzip_into_vecs<A, B>(self, left: &mut Vec<A>, right: &mut Vec<B>)
+    where
+        Self: IndexedParallelIterator<Item = (A, B)>,
+        A: Send,
+        B: Send,
+    {
+        collect::unzip_into_vecs(self, left, right);
+    }
+
+    /// Iterates over tuples `(A, B)`, where the items `A` are from
+    /// this iterator and `B` are from the iterator given as argument.
+    /// Like the `zip` method on ordinary iterators, if the two
+    /// iterators are of unequal length, you only get the items they
+    /// have in common.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (1..4)
+    ///     .into_par_iter()
+    ///     .zip(vec!['a', 'b', 'c'])
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [(1, 'a'), (2, 'b'), (3, 'c')]);
+    /// ```
+    fn zip<Z>(self, zip_op: Z) -> Zip<Self, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        Zip::new(self, zip_op.into_par_iter())
+    }
+
+    /// The same as `Zip`, but requires that both iterators have the same length.
+    ///
+    /// # Panics
+    /// Will panic if `self` and `zip_op` are not the same length.
+    ///
+    /// ```should_panic
+    /// use rayon::prelude::*;
+    ///
+    /// let one = [1u8];
+    /// let two = [2u8, 2];
+    /// let one_iter = one.par_iter();
+    /// let two_iter = two.par_iter();
+    ///
+    /// // this will panic
+    /// let zipped: Vec<(&u8, &u8)> = one_iter.zip_eq(two_iter).collect();
+    ///
+    /// // we should never get here
+    /// assert_eq!(1, zipped.len());
+    /// ```
+    fn zip_eq<Z>(self, zip_op: Z) -> ZipEq<Self, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        let zip_op_iter = zip_op.into_par_iter();
+        assert_eq!(self.len(), zip_op_iter.len());
+        ZipEq::new(self, zip_op_iter)
+    }
+
+    /// Interleaves elements of this iterator and the other given
+    /// iterator. Alternately yields elements from this iterator and
+    /// the given iterator, until both are exhausted. If one iterator
+    /// is exhausted before the other, the last elements are provided
+    /// from the other.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let (x, y) = (vec![1, 2], vec![3, 4, 5, 6]);
+    /// let r: Vec<i32> = x.into_par_iter().interleave(y).collect();
+    /// assert_eq!(r, vec![1, 3, 2, 4, 5, 6]);
+    /// ```
+    fn interleave<I>(self, other: I) -> Interleave<Self, I::Iter>
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator<Item = Self::Item>,
+    {
+        Interleave::new(self, other.into_par_iter())
+    }
+
+    /// Interleaves elements of this iterator and the other given
+    /// iterator, until one is exhausted.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let (x, y) = (vec![1, 2, 3, 4], vec![5, 6]);
+    /// let r: Vec<i32> = x.into_par_iter().interleave_shortest(y).collect();
+    /// assert_eq!(r, vec![1, 5, 2, 6, 3]);
+    /// ```
+    fn interleave_shortest<I>(self, other: I) -> InterleaveShortest<Self, I::Iter>
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator<Item = Self::Item>,
+    {
+        InterleaveShortest::new(self, other.into_par_iter())
+    }
+
+    /// Splits an iterator up into fixed-size chunks.
+    ///
+    /// Returns an iterator that returns `Vec`s of the given number of elements.
+    /// If the number of elements in the iterator is not divisible by `chunk_size`,
+    /// the last chunk may be shorter than `chunk_size`.
+    ///
+    /// See also [`par_chunks()`] and [`par_chunks_mut()`] for similar behavior on
+    /// slices, without having to allocate intermediate `Vec`s for the chunks.
+    ///
+    /// [`par_chunks()`]: ../slice/trait.ParallelSlice.html#method.par_chunks
+    /// [`par_chunks_mut()`]: ../slice/trait.ParallelSliceMut.html#method.par_chunks_mut
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    /// let r: Vec<Vec<i32>> = a.into_par_iter().chunks(3).collect();
+    /// assert_eq!(r, vec![vec![1,2,3], vec![4,5,6], vec![7,8,9], vec![10]]);
+    /// ```
+    fn chunks(self, chunk_size: usize) -> Chunks<Self> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        Chunks::new(self, chunk_size)
+    }
+
+    /// Lexicographically compares the elements of this `ParallelIterator` with those of
+    /// another.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cmp::Ordering::*;
+    ///
+    /// let x = vec![1, 2, 3];
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 3, 0]), Less);
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 2, 3]), Equal);
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 2]), Greater);
+    /// ```
+    fn cmp<I>(self, other: I) -> Ordering
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: Ord,
+    {
+        #[inline]
+        fn ordering<T: Ord>((x, y): (T, T)) -> Ordering {
+            Ord::cmp(&x, &y)
+        }
+
+        #[inline]
+        fn inequal(&ord: &Ordering) -> bool {
+            ord != Ordering::Equal
+        }
+
+        let other = other.into_par_iter();
+        let ord_len = self.len().cmp(&other.len());
+        self.zip(other)
+            .map(ordering)
+            .find_first(inequal)
+            .unwrap_or(ord_len)
+    }
+
+    /// Lexicographically compares the elements of this `ParallelIterator` with those of
+    /// another.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cmp::Ordering::*;
+    /// use std::f64::NAN;
+    ///
+    /// let x = vec![1.0, 2.0, 3.0];
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 3.0, 0.0]), Some(Less));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0, 3.0]), Some(Equal));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0]), Some(Greater));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, NAN]), None);
+    /// ```
+    fn partial_cmp<I>(self, other: I) -> Option<Ordering>
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        #[inline]
+        fn ordering<T: PartialOrd<U>, U>((x, y): (T, U)) -> Option<Ordering> {
+            PartialOrd::partial_cmp(&x, &y)
+        }
+
+        #[inline]
+        fn inequal(&ord: &Option<Ordering>) -> bool {
+            ord != Some(Ordering::Equal)
+        }
+
+        let other = other.into_par_iter();
+        let ord_len = self.len().cmp(&other.len());
+        self.zip(other)
+            .map(ordering)
+            .find_first(inequal)
+            .unwrap_or(Some(ord_len))
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are equal to those of another
+    fn eq<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialEq<I::Item>,
+    {
+        #[inline]
+        fn eq<T: PartialEq<U>, U>((x, y): (T, U)) -> bool {
+            PartialEq::eq(&x, &y)
+        }
+
+        let other = other.into_par_iter();
+        self.len() == other.len() && self.zip(other).all(eq)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are unequal to those of another
+    fn ne<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialEq<I::Item>,
+    {
+        !self.eq(other)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are lexicographically less than those of another.
+    fn lt<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        self.partial_cmp(other) == Some(Ordering::Less)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are less or equal to those of another.
+    fn le<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        let ord = self.partial_cmp(other);
+        ord == Some(Ordering::Equal) || ord == Some(Ordering::Less)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are lexicographically greater than those of another.
+    fn gt<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        self.partial_cmp(other) == Some(Ordering::Greater)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are less or equal to those of another.
+    fn ge<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        let ord = self.partial_cmp(other);
+        ord == Some(Ordering::Equal) || ord == Some(Ordering::Greater)
+    }
+
+    /// Yields an index along with each item.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let chars = vec!['a', 'b', 'c'];
+    /// let result: Vec<_> = chars
+    ///     .into_par_iter()
+    ///     .enumerate()
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [(0, 'a'), (1, 'b'), (2, 'c')]);
+    /// ```
+    fn enumerate(self) -> Enumerate<Self> {
+        Enumerate::new(self)
+    }
+
+    /// Creates an iterator that steps by the given amount
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///use rayon::prelude::*;
+    ///
+    /// let range = (3..10);
+    /// let result: Vec<i32> = range
+    ///    .into_par_iter()
+    ///    .step_by(3)
+    ///    .collect();
+    ///
+    /// assert_eq!(result, [3, 6, 9])
+    /// ```
+    ///
+    /// # Compatibility
+    ///
+    /// This method is only available on Rust 1.38 or greater.
+    #[cfg(step_by)]
+    fn step_by(self, step: usize) -> StepBy<Self> {
+        StepBy::new(self, step)
+    }
+
+    /// Creates an iterator that skips the first `n` elements.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..100)
+    ///     .into_par_iter()
+    ///     .skip(95)
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [95, 96, 97, 98, 99]);
+    /// ```
+    fn skip(self, n: usize) -> Skip<Self> {
+        Skip::new(self, n)
+    }
+
+    /// Creates an iterator that yields the first `n` elements.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..100)
+    ///     .into_par_iter()
+    ///     .take(5)
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [0, 1, 2, 3, 4]);
+    /// ```
+    fn take(self, n: usize) -> Take<Self> {
+        Take::new(self, n)
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate, and returns its index.  Like
+    /// `ParallelIterator::find_any`, the parallel search will not
+    /// necessarily find the **first** match, and once a match is
+    /// found we'll attempt to stop processing any more.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// let i = a.par_iter().position_any(|&x| x == 3).expect("found");
+    /// assert!(i == 2 || i == 3);
+    ///
+    /// assert_eq!(a.par_iter().position_any(|&x| x == 100), None);
+    /// ```
+    fn position_any<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_any(check)?;
+        Some(i)
+    }
+
+    /// Searches for the sequentially **first** item in the parallel iterator
+    /// that matches the given predicate, and returns its index.
+    ///
+    /// Like `ParallelIterator::find_first`, once a match is found,
+    /// all attempts to the right of the match will be stopped, while
+    /// attempts to the left must continue in case an earlier match
+    /// is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous.  If you
+    /// just want the first match that discovered anywhere in the iterator,
+    /// `position_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().position_first(|&x| x == 3), Some(2));
+    ///
+    /// assert_eq!(a.par_iter().position_first(|&x| x == 100), None);
+    /// ```
+    fn position_first<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_first(check)?;
+        Some(i)
+    }
+
+    /// Searches for the sequentially **last** item in the parallel iterator
+    /// that matches the given predicate, and returns its index.
+    ///
+    /// Like `ParallelIterator::find_last`, once a match is found,
+    /// all attempts to the left of the match will be stopped, while
+    /// attempts to the right must continue in case a later match
+    /// is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "last" may be nebulous.  When the
+    /// order doesn't actually matter to you, `position_any` is a better
+    /// choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().position_last(|&x| x == 3), Some(3));
+    ///
+    /// assert_eq!(a.par_iter().position_last(|&x| x == 100), None);
+    /// ```
+    fn position_last<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_last(check)?;
+        Some(i)
+    }
+
+    #[doc(hidden)]
+    #[deprecated(
+        note = "parallel `position` does not search in order -- use `position_any`, \\
+                `position_first`, or `position_last`"
+    )]
+    fn position<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        self.position_any(predicate)
+    }
+
+    /// Searches for items in the parallel iterator that match the given
+    /// predicate, and returns their indices.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let primes = vec![2, 3, 5, 7, 11, 13, 17, 19, 23, 29];
+    ///
+    /// // Find the positions of primes congruent to 1 modulo 6
+    /// let p1mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 1).collect();
+    /// assert_eq!(p1mod6, [3, 5, 7]); // primes 7, 13, and 19
+    ///
+    /// // Find the positions of primes congruent to 5 modulo 6
+    /// let p5mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 5).collect();
+    /// assert_eq!(p5mod6, [2, 4, 6, 8, 9]); // primes 5, 11, 17, 23, and 29
+    /// ```
+    fn positions<P>(self, predicate: P) -> Positions<Self, P>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        Positions::new(self, predicate)
+    }
+
+    /// Produces a new iterator with the elements of this iterator in
+    /// reverse order.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..5)
+    ///     .into_par_iter()
+    ///     .rev()
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [4, 3, 2, 1, 0]);
+    /// ```
+    fn rev(self) -> Rev<Self> {
+        Rev::new(self)
+    }
+
+    /// Sets the minimum length of iterators desired to process in each
+    /// thread.  Rayon will not split any smaller than this length, but
+    /// of course an iterator could already be smaller to begin with.
+    ///
+    /// Producers like `zip` and `interleave` will use greater of the two
+    /// minimums.
+    /// Chained iterators and iterators inside `flat_map` may each use
+    /// their own minimum length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let min = (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .with_min_len(1234)
+    ///     .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment
+    ///     .min().unwrap();
+    ///
+    /// assert!(min >= 1234);
+    /// ```
+    fn with_min_len(self, min: usize) -> MinLen<Self> {
+        MinLen::new(self, min)
+    }
+
+    /// Sets the maximum length of iterators desired to process in each
+    /// thread.  Rayon will try to split at least below this length,
+    /// unless that would put it below the length from `with_min_len()`.
+    /// For example, given min=10 and max=15, a length of 16 will not be
+    /// split any further.
+    ///
+    /// Producers like `zip` and `interleave` will use lesser of the two
+    /// maximums.
+    /// Chained iterators and iterators inside `flat_map` may each use
+    /// their own maximum length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let max = (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .with_max_len(1234)
+    ///     .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment
+    ///     .max().unwrap();
+    ///
+    /// assert!(max <= 1234);
+    /// ```
+    fn with_max_len(self, max: usize) -> MaxLen<Self> {
+        MaxLen::new(self, max)
+    }
+
+    /// Produces an exact count of how many items this iterator will
+    /// produce, presuming no panic occurs.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let par_iter = (0..100).into_par_iter().zip(vec![0; 10]);
+    /// assert_eq!(par_iter.len(), 10);
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    /// assert_eq!(vec.len(), 10);
+    /// ```
+    fn len(&self) -> usize;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method causes the iterator `self` to start producing
+    /// items and to feed them to the consumer `consumer` one by one.
+    /// It may split the consumer before doing so to create the
+    /// opportunity to produce in parallel. If a split does happen, it
+    /// will inform the consumer of the index where the split should
+    /// occur (unlike `ParallelIterator::drive_unindexed()`).
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method converts the iterator into a producer P and then
+    /// invokes `callback.callback()` with P. Note that the type of
+    /// this producer is not defined as part of the API, since
+    /// `callback` must be defined generically for all producers. This
+    /// allows the producer type to contain references; it also means
+    /// that parallel iterators can adjust that type without causing a
+    /// breaking change.
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output;
+}
+
+/// `FromParallelIterator` implements the creation of a collection
+/// from a [`ParallelIterator`]. By implementing
+/// `FromParallelIterator` for a given type, you define how it will be
+/// created from an iterator.
+///
+/// `FromParallelIterator` is used through [`ParallelIterator`]'s [`collect()`] method.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`collect()`]: trait.ParallelIterator.html#method.collect
+///
+/// # Examples
+///
+/// Implementing `FromParallelIterator` for your type:
+///
+/// ```
+/// use rayon::prelude::*;
+/// use std::mem;
+///
+/// struct BlackHole {
+///     mass: usize,
+/// }
+///
+/// impl<T: Send> FromParallelIterator<T> for BlackHole {
+///     fn from_par_iter<I>(par_iter: I) -> Self
+///         where I: IntoParallelIterator<Item = T>
+///     {
+///         let par_iter = par_iter.into_par_iter();
+///         BlackHole {
+///             mass: par_iter.count() * mem::size_of::<T>(),
+///         }
+///     }
+/// }
+///
+/// let bh: BlackHole = (0i32..1000).into_par_iter().collect();
+/// assert_eq!(bh.mass, 4000);
+/// ```
+pub trait FromParallelIterator<T>
+where
+    T: Send,
+{
+    /// Creates an instance of the collection from the parallel iterator `par_iter`.
+    ///
+    /// If your collection is not naturally parallel, the easiest (and
+    /// fastest) way to do this is often to collect `par_iter` into a
+    /// [`LinkedList`] or other intermediate data structure and then
+    /// sequentially extend your collection. However, a more 'native'
+    /// technique is to use the [`par_iter.fold`] or
+    /// [`par_iter.fold_with`] methods to create the collection.
+    /// Alternatively, if your collection is 'natively' parallel, you
+    /// can use `par_iter.for_each` to process each element in turn.
+    ///
+    /// [`LinkedList`]: https://doc.rust-lang.org/std/collections/struct.LinkedList.html
+    /// [`par_iter.fold`]: trait.ParallelIterator.html#method.fold
+    /// [`par_iter.fold_with`]: trait.ParallelIterator.html#method.fold_with
+    /// [`par_iter.for_each`]: trait.ParallelIterator.html#method.for_each
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>;
+}
+
+/// `ParallelExtend` extends an existing collection with items from a [`ParallelIterator`].
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+///
+/// # Examples
+///
+/// Implementing `ParallelExtend` for your type:
+///
+/// ```
+/// use rayon::prelude::*;
+/// use std::mem;
+///
+/// struct BlackHole {
+///     mass: usize,
+/// }
+///
+/// impl<T: Send> ParallelExtend<T> for BlackHole {
+///     fn par_extend<I>(&mut self, par_iter: I)
+///         where I: IntoParallelIterator<Item = T>
+///     {
+///         let par_iter = par_iter.into_par_iter();
+///         self.mass += par_iter.count() * mem::size_of::<T>();
+///     }
+/// }
+///
+/// let mut bh = BlackHole { mass: 0 };
+/// bh.par_extend(0i32..1000);
+/// assert_eq!(bh.mass, 4000);
+/// bh.par_extend(0i64..10);
+/// assert_eq!(bh.mass, 4080);
+/// ```
+pub trait ParallelExtend<T>
+where
+    T: Send,
+{
+    /// Extends an instance of the collection with the elements drawn
+    /// from the parallel iterator `par_iter`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut vec = vec![];
+    /// vec.par_extend(0..5);
+    /// vec.par_extend((0..5).into_par_iter().map(|i| i * i));
+    /// assert_eq!(vec, [0, 1, 2, 3, 4, 0, 1, 4, 9, 16]);
+    /// ```
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>;
+}
+
+/// `ParallelDrainFull` creates a parallel iterator that moves all items
+/// from a collection while retaining the original capacity.
+///
+/// Types which are indexable typically implement [`ParallelDrainRange`]
+/// instead, where you can drain fully with `par_drain(..)`.
+///
+/// [`ParallelDrainRange`]: trait.ParallelDrainRange.html
+pub trait ParallelDrainFull {
+    /// The draining parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This is usually the same as `IntoParallelIterator::Item`.
+    type Item: Send;
+
+    /// Returns a draining parallel iterator over an entire collection.
+    ///
+    /// When the iterator is dropped, all items are removed, even if the
+    /// iterator was not fully consumed. If the iterator is leaked, for example
+    /// using `std::mem::forget`, it is unspecified how many items are removed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::collections::{BinaryHeap, HashSet};
+    ///
+    /// let squares: HashSet<i32> = (0..10).map(|x| x * x).collect();
+    ///
+    /// let mut heap: BinaryHeap<_> = squares.iter().copied().collect();
+    /// assert_eq!(
+    ///     // heaps are drained in arbitrary order
+    ///     heap.par_drain()
+    ///         .inspect(|x| assert!(squares.contains(x)))
+    ///         .count(),
+    ///     squares.len(),
+    /// );
+    /// assert!(heap.is_empty());
+    /// assert!(heap.capacity() >= squares.len());
+    /// ```
+    fn par_drain(self) -> Self::Iter;
+}
+
+/// `ParallelDrainRange` creates a parallel iterator that moves a range of items
+/// from a collection while retaining the original capacity.
+///
+/// Types which are not indexable may implement [`ParallelDrainFull`] instead.
+///
+/// [`ParallelDrainFull`]: trait.ParallelDrainFull.html
+pub trait ParallelDrainRange<Idx = usize> {
+    /// The draining parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This is usually the same as `IntoParallelIterator::Item`.
+    type Item: Send;
+
+    /// Returns a draining parallel iterator over a range of the collection.
+    ///
+    /// When the iterator is dropped, all items in the range are removed, even
+    /// if the iterator was not fully consumed. If the iterator is leaked, for
+    /// example using `std::mem::forget`, it is unspecified how many items are
+    /// removed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let squares: Vec<i32> = (0..10).map(|x| x * x).collect();
+    ///
+    /// println!("RangeFull");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..)
+    ///            .eq(squares.par_iter().copied()));
+    /// assert!(vec.is_empty());
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeFrom");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(5..)
+    ///            .eq(squares[5..].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[..5]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeTo");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..5)
+    ///            .eq(squares[..5].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[5..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeToInclusive");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..=5)
+    ///            .eq(squares[..=5].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[6..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("Range");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(3..7)
+    ///            .eq(squares[3..7].par_iter().copied()));
+    /// assert_eq!(&vec[..3], &squares[..3]);
+    /// assert_eq!(&vec[3..], &squares[7..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeInclusive");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(3..=7)
+    ///            .eq(squares[3..=7].par_iter().copied()));
+    /// assert_eq!(&vec[..3], &squares[..3]);
+    /// assert_eq!(&vec[3..], &squares[8..]);
+    /// assert!(vec.capacity() >= squares.len());
+    /// ```
+    fn par_drain<R: RangeBounds<Idx>>(self, range: R) -> Self::Iter;
+}
+
+/// We hide the `Try` trait in a private module, as it's only meant to be a
+/// stable clone of the standard library's `Try` trait, as yet unstable.
+mod private {
+    /// Clone of `std::ops::Try`.
+    ///
+    /// Implementing this trait is not permitted outside of `rayon`.
+    pub trait Try {
+        private_decl! {}
+
+        type Ok;
+        type Error;
+        fn into_result(self) -> Result<Self::Ok, Self::Error>;
+        fn from_ok(v: Self::Ok) -> Self;
+        fn from_error(v: Self::Error) -> Self;
+    }
+
+    impl<T> Try for Option<T> {
+        private_impl! {}
+
+        type Ok = T;
+        type Error = ();
+
+        fn into_result(self) -> Result<T, ()> {
+            self.ok_or(())
+        }
+        fn from_ok(v: T) -> Self {
+            Some(v)
+        }
+        fn from_error(_: ()) -> Self {
+            None
+        }
+    }
+
+    impl<T, E> Try for Result<T, E> {
+        private_impl! {}
+
+        type Ok = T;
+        type Error = E;
+
+        fn into_result(self) -> Result<T, E> {
+            self
+        }
+        fn from_ok(v: T) -> Self {
+            Ok(v)
+        }
+        fn from_error(v: E) -> Self {
+            Err(v)
+        }
+    }
+}
diff --git a/src/iter/multizip.rs b/src/iter/multizip.rs
new file mode 100644
index 0000000..8e36d08
--- /dev/null
+++ b/src/iter/multizip.rs
@@ -0,0 +1,338 @@
+use super::plumbing::*;
+use super::*;
+
+/// `MultiZip` is an iterator that zips up a tuple of parallel iterators to
+/// produce tuples of their items.
+///
+/// It is created by calling `into_par_iter()` on a tuple of types that
+/// implement `IntoParallelIterator`, or `par_iter()`/`par_iter_mut()` with
+/// types that are iterable by reference.
+///
+/// The implementation currently support tuples up to length 12.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// // This will iterate `r` by mutable reference, like `par_iter_mut()`, while
+/// // ranges are all iterated by value like `into_par_iter()`.
+/// // Note that the zipped iterator is only as long as the shortest input.
+/// let mut r = vec![0; 3];
+/// (&mut r, 1..10, 10..100, 100..1000).into_par_iter()
+///     .for_each(|(r, x, y, z)| *r = x * y + z);
+///
+/// assert_eq!(&r, &[1 * 10 + 100, 2 * 11 + 101, 3 * 12 + 102]);
+/// ```
+///
+/// For a group that should all be iterated by reference, you can use a tuple reference.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let xs: Vec<_> = (1..10).collect();
+/// let ys: Vec<_> = (10..100).collect();
+/// let zs: Vec<_> = (100..1000).collect();
+///
+/// // Reference each input separately with `IntoParallelIterator`:
+/// let r1: Vec<_> = (&xs, &ys, &zs).into_par_iter()
+///     .map(|(x, y, z)| x * y + z)
+///     .collect();
+///
+/// // Reference them all together with `IntoParallelRefIterator`:
+/// let r2: Vec<_> = (xs, ys, zs).par_iter()
+///     .map(|(x, y, z)| x * y + z)
+///     .collect();
+///
+/// assert_eq!(r1, r2);
+/// ```
+///
+/// Mutable references to a tuple will work similarly.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let mut xs: Vec<_> = (1..4).collect();
+/// let mut ys: Vec<_> = (-4..-1).collect();
+/// let mut zs = vec![0; 3];
+///
+/// // Mutably reference each input separately with `IntoParallelIterator`:
+/// (&mut xs, &mut ys, &mut zs).into_par_iter().for_each(|(x, y, z)| {
+///     *z += *x + *y;
+///     std::mem::swap(x, y);
+/// });
+///
+/// assert_eq!(xs, (vec![-4, -3, -2]));
+/// assert_eq!(ys, (vec![1, 2, 3]));
+/// assert_eq!(zs, (vec![-3, -1, 1]));
+///
+/// // Mutably reference them all together with `IntoParallelRefMutIterator`:
+/// let mut tuple = (xs, ys, zs);
+/// tuple.par_iter_mut().for_each(|(x, y, z)| {
+///     *z += *x + *y;
+///     std::mem::swap(x, y);
+/// });
+///
+/// assert_eq!(tuple, (vec![1, 2, 3], vec![-4, -3, -2], vec![-6, -2, 2]));
+/// ```
+#[derive(Debug, Clone)]
+pub struct MultiZip<T> {
+    tuple: T,
+}
+
+// These macros greedily consume 4 or 2 items first to achieve log2 nesting depth.
+// For example, 5 => 4,1 => (2,2),1.
+//
+// The tuples go up to 12, so we might want to greedily consume 8 too, but
+// the depth works out the same if we let that expand on the right:
+//      9 => 4,5 => (2,2),(4,1) => (2,2),((2,2),1)
+//     12 => 4,8 => (2,2),(4,4) => (2,2),((2,2),(2,2))
+//
+// But if we ever increase to 13, we would want to split 8,5 rather than 4,9.
+
+macro_rules! reduce {
+    ($a:expr, $b:expr, $c:expr, $d:expr, $( $x:expr ),+ => $fn:path) => {
+        reduce!(reduce!($a, $b, $c, $d => $fn),
+                reduce!($( $x ),+ => $fn)
+                => $fn)
+    };
+    ($a:expr, $b:expr, $( $x:expr ),+ => $fn:path) => {
+        reduce!(reduce!($a, $b => $fn),
+                reduce!($( $x ),+ => $fn)
+                => $fn)
+    };
+    ($a:expr, $b:expr => $fn:path) => { $fn($a, $b) };
+    ($a:expr => $fn:path) => { $a };
+}
+
+macro_rules! nest {
+    ($A:tt, $B:tt, $C:tt, $D:tt, $( $X:tt ),+) => {
+        (nest!($A, $B, $C, $D), nest!($( $X ),+))
+    };
+    ($A:tt, $B:tt, $( $X:tt ),+) => {
+        (($A, $B), nest!($( $X ),+))
+    };
+    ($A:tt, $B:tt) => { ($A, $B) };
+    ($A:tt) => { $A };
+}
+
+macro_rules! flatten {
+    ($( $T:ident ),+) => {{
+        #[allow(non_snake_case)]
+        fn flatten<$( $T ),+>(nest!($( $T ),+) : nest!($( $T ),+)) -> ($( $T, )+) {
+            ($( $T, )+)
+        }
+        flatten
+    }};
+}
+
+macro_rules! multizip_impls {
+    ($(
+        $Tuple:ident {
+            $(($idx:tt) -> $T:ident)+
+        }
+    )+) => {
+        $(
+            impl<$( $T, )+> IntoParallelIterator for ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelIterator,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.into_par_iter(), )+ ),
+                    }
+                }
+            }
+
+            impl<'a, $( $T, )+> IntoParallelIterator for &'a ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelRefIterator<'a>,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.par_iter(), )+ ),
+                    }
+                }
+            }
+
+            impl<'a, $( $T, )+> IntoParallelIterator for &'a mut ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelRefMutIterator<'a>,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.par_iter_mut(), )+ ),
+                    }
+                }
+            }
+
+            impl<$( $T, )+> ParallelIterator for MultiZip<($( $T, )+)>
+            where
+                $( $T: IndexedParallelIterator, )+
+            {
+                type Item = ($( $T::Item, )+);
+
+                fn drive_unindexed<CONSUMER>(self, consumer: CONSUMER) -> CONSUMER::Result
+                where
+                    CONSUMER: UnindexedConsumer<Self::Item>,
+                {
+                    self.drive(consumer)
+                }
+
+                fn opt_len(&self) -> Option<usize> {
+                    Some(self.len())
+                }
+            }
+
+            impl<$( $T, )+> IndexedParallelIterator for MultiZip<($( $T, )+)>
+            where
+                $( $T: IndexedParallelIterator, )+
+            {
+                fn drive<CONSUMER>(self, consumer: CONSUMER) -> CONSUMER::Result
+                where
+                    CONSUMER: Consumer<Self::Item>,
+                {
+                    reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip)
+                        .map(flatten!($( $T ),+))
+                        .drive(consumer)
+                }
+
+                fn len(&self) -> usize {
+                    reduce!($( self.tuple.$idx.len() ),+ => Ord::min)
+                }
+
+                fn with_producer<CB>(self, callback: CB) -> CB::Output
+                where
+                    CB: ProducerCallback<Self::Item>,
+                {
+                    reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip)
+                        .map(flatten!($( $T ),+))
+                        .with_producer(callback)
+                }
+            }
+        )+
+    }
+}
+
+multizip_impls! {
+    Tuple1 {
+        (0) -> A
+    }
+    Tuple2 {
+        (0) -> A
+        (1) -> B
+    }
+    Tuple3 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+    }
+    Tuple4 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+    }
+    Tuple5 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+    }
+    Tuple6 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+    }
+    Tuple7 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+    }
+    Tuple8 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+    }
+    Tuple9 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+    }
+    Tuple10 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+    }
+    Tuple11 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+        (10) -> K
+    }
+    Tuple12 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+        (10) -> K
+        (11) -> L
+    }
+}
diff --git a/src/iter/noop.rs b/src/iter/noop.rs
new file mode 100644
index 0000000..1e55ecb
--- /dev/null
+++ b/src/iter/noop.rs
@@ -0,0 +1,59 @@
+use super::plumbing::*;
+
+pub(super) struct NoopConsumer;
+
+impl<T> Consumer<T> for NoopConsumer {
+    type Folder = NoopConsumer;
+    type Reducer = NoopReducer;
+    type Result = ();
+
+    fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) {
+        (NoopConsumer, NoopConsumer, NoopReducer)
+    }
+
+    fn into_folder(self) -> Self {
+        self
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T> Folder<T> for NoopConsumer {
+    type Result = ();
+
+    fn consume(self, _item: T) -> Self {
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        iter.into_iter().for_each(drop);
+        self
+    }
+
+    fn complete(self) {}
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T> UnindexedConsumer<T> for NoopConsumer {
+    fn split_off_left(&self) -> Self {
+        NoopConsumer
+    }
+
+    fn to_reducer(&self) -> NoopReducer {
+        NoopReducer
+    }
+}
+
+pub(super) struct NoopReducer;
+
+impl Reducer<()> for NoopReducer {
+    fn reduce(self, _left: (), _right: ()) {}
+}
diff --git a/src/iter/once.rs b/src/iter/once.rs
new file mode 100644
index 0000000..5140b6b
--- /dev/null
+++ b/src/iter/once.rs
@@ -0,0 +1,68 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+/// Creates a parallel iterator that produces an element exactly once.
+///
+/// This admits no parallelism on its own, but it could be chained to existing
+/// parallel iterators to extend their contents, or otherwise used for any code
+/// that deals with generic parallel iterators.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::once;
+///
+/// let pi = (0..1234).into_par_iter()
+///     .chain(once(-1))
+///     .chain(1234..10_000);
+///
+/// assert_eq!(pi.clone().count(), 10_001);
+/// assert_eq!(pi.clone().filter(|&x| x < 0).count(), 1);
+/// assert_eq!(pi.position_any(|x| x < 0), Some(1234));
+/// ```
+pub fn once<T: Send>(item: T) -> Once<T> {
+    Once { item }
+}
+
+/// Iterator adaptor for [the `once()` function](fn.once.html).
+#[derive(Clone, Debug)]
+pub struct Once<T: Send> {
+    item: T,
+}
+
+impl<T: Send> ParallelIterator for Once<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(1)
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for Once<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        consumer.into_folder().consume(self.item).complete()
+    }
+
+    fn len(&self) -> usize {
+        1
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // Let `OptionProducer` handle it.
+        Some(self.item).into_par_iter().with_producer(callback)
+    }
+}
diff --git a/src/iter/panic_fuse.rs b/src/iter/panic_fuse.rs
new file mode 100644
index 0000000..7487230
--- /dev/null
+++ b/src/iter/panic_fuse.rs
@@ -0,0 +1,342 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::thread;
+
+/// `PanicFuse` is an adaptor that wraps an iterator with a fuse in case
+/// of panics, to halt all threads as soon as possible.
+///
+/// This struct is created by the [`panic_fuse()`] method on [`ParallelIterator`]
+///
+/// [`panic_fuse()`]: trait.ParallelIterator.html#method.panic_fuse
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct PanicFuse<I: ParallelIterator> {
+    base: I,
+}
+
+/// Helper that sets a bool to `true` if dropped while unwinding.
+#[derive(Clone)]
+struct Fuse<'a>(&'a AtomicBool);
+
+impl<'a> Drop for Fuse<'a> {
+    #[inline]
+    fn drop(&mut self) {
+        if thread::panicking() {
+            self.0.store(true, Ordering::Relaxed);
+        }
+    }
+}
+
+impl<'a> Fuse<'a> {
+    #[inline]
+    fn panicked(&self) -> bool {
+        self.0.load(Ordering::Relaxed)
+    }
+}
+
+impl<I> PanicFuse<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `PanicFuse` iterator.
+    pub(super) fn new(base: I) -> PanicFuse<I> {
+        PanicFuse { base }
+    }
+}
+
+impl<I> ParallelIterator for PanicFuse<I>
+where
+    I: ParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let panicked = AtomicBool::new(false);
+        let consumer1 = PanicFuseConsumer {
+            base: consumer,
+            fuse: Fuse(&panicked),
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I> IndexedParallelIterator for PanicFuse<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let panicked = AtomicBool::new(false);
+        let consumer1 = PanicFuseConsumer {
+            base: consumer,
+            fuse: Fuse(&panicked),
+        };
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let panicked = AtomicBool::new(false);
+                let producer = PanicFuseProducer {
+                    base,
+                    fuse: Fuse(&panicked),
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct PanicFuseProducer<'a, P> {
+    base: P,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, P> Producer for PanicFuseProducer<'a, P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = PanicFuseIter<'a, P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        PanicFuseIter {
+            base: self.base.into_iter(),
+            fuse: self.fuse,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            PanicFuseProducer {
+                base: left,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseProducer {
+                base: right,
+                fuse: self.fuse,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = PanicFuseFolder {
+            base: folder,
+            fuse: self.fuse,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct PanicFuseIter<'a, I> {
+    base: I,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, I> Iterator for PanicFuseIter<'a, I>
+where
+    I: Iterator,
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.fuse.panicked() {
+            None
+        } else {
+            self.base.next()
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+}
+
+impl<'a, I> DoubleEndedIterator for PanicFuseIter<'a, I>
+where
+    I: DoubleEndedIterator,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.fuse.panicked() {
+            None
+        } else {
+            self.base.next_back()
+        }
+    }
+}
+
+impl<'a, I> ExactSizeIterator for PanicFuseIter<'a, I>
+where
+    I: ExactSizeIterator,
+{
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct PanicFuseConsumer<'a, C> {
+    base: C,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Consumer<T> for PanicFuseConsumer<'a, C>
+where
+    C: Consumer<T>,
+{
+    type Folder = PanicFuseFolder<'a, C::Folder>;
+    type Reducer = PanicFuseReducer<'a, C::Reducer>;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            PanicFuseConsumer {
+                base: left,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseConsumer {
+                base: right,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseReducer {
+                base: reducer,
+                _fuse: self.fuse,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        PanicFuseFolder {
+            base: self.base.into_folder(),
+            fuse: self.fuse,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.fuse.panicked() || self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<T> for PanicFuseConsumer<'a, C>
+where
+    C: UnindexedConsumer<T>,
+{
+    fn split_off_left(&self) -> Self {
+        PanicFuseConsumer {
+            base: self.base.split_off_left(),
+            fuse: self.fuse.clone(),
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        PanicFuseReducer {
+            base: self.base.to_reducer(),
+            _fuse: self.fuse.clone(),
+        }
+    }
+}
+
+struct PanicFuseFolder<'a, C> {
+    base: C,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Folder<T> for PanicFuseFolder<'a, C>
+where
+    C: Folder<T>,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        self.base = self.base.consume(item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn cool<'a, T>(fuse: &'a Fuse<'_>) -> impl Fn(&T) -> bool + 'a {
+            move |_| !fuse.panicked()
+        }
+
+        self.base = {
+            let fuse = &self.fuse;
+            let iter = iter.into_iter().take_while(cool(fuse));
+            self.base.consume_iter(iter)
+        };
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.fuse.panicked() || self.base.full()
+    }
+}
+
+struct PanicFuseReducer<'a, C> {
+    base: C,
+    _fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Reducer<T> for PanicFuseReducer<'a, C>
+where
+    C: Reducer<T>,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        self.base.reduce(left, right)
+    }
+}
diff --git a/src/iter/par_bridge.rs b/src/iter/par_bridge.rs
new file mode 100644
index 0000000..4c2b96e
--- /dev/null
+++ b/src/iter/par_bridge.rs
@@ -0,0 +1,201 @@
+use crossbeam_deque::{Steal, Stealer, Worker};
+
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Mutex, TryLockError};
+use std::thread::yield_now;
+
+use crate::current_num_threads;
+use crate::iter::plumbing::{bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer};
+use crate::iter::ParallelIterator;
+
+/// Conversion trait to convert an `Iterator` to a `ParallelIterator`.
+///
+/// This creates a "bridge" from a sequential iterator to a parallel one, by distributing its items
+/// across the Rayon thread pool. This has the advantage of being able to parallelize just about
+/// anything, but the resulting `ParallelIterator` can be less efficient than if you started with
+/// `par_iter` instead. However, it can still be useful for iterators that are difficult to
+/// parallelize by other means, like channels or file or network I/O.
+///
+/// The resulting iterator is not guaranteed to keep the order of the original iterator.
+///
+/// # Examples
+///
+/// To use this trait, take an existing `Iterator` and call `par_bridge` on it. After that, you can
+/// use any of the `ParallelIterator` methods:
+///
+/// ```
+/// use rayon::iter::ParallelBridge;
+/// use rayon::prelude::ParallelIterator;
+/// use std::sync::mpsc::channel;
+///
+/// let rx = {
+///     let (tx, rx) = channel();
+///
+///     tx.send("one!");
+///     tx.send("two!");
+///     tx.send("three!");
+///
+///     rx
+/// };
+///
+/// let mut output: Vec<&'static str> = rx.into_iter().par_bridge().collect();
+/// output.sort_unstable();
+///
+/// assert_eq!(&*output, &["one!", "three!", "two!"]);
+/// ```
+pub trait ParallelBridge: Sized {
+    /// Creates a bridge from this type to a `ParallelIterator`.
+    fn par_bridge(self) -> IterBridge<Self>;
+}
+
+impl<T: Iterator + Send> ParallelBridge for T
+where
+    T::Item: Send,
+{
+    fn par_bridge(self) -> IterBridge<Self> {
+        IterBridge { iter: self }
+    }
+}
+
+/// `IterBridge` is a parallel iterator that wraps a sequential iterator.
+///
+/// This type is created when using the `par_bridge` method on `ParallelBridge`. See the
+/// [`ParallelBridge`] documentation for details.
+///
+/// [`ParallelBridge`]: trait.ParallelBridge.html
+#[derive(Debug, Clone)]
+pub struct IterBridge<Iter> {
+    iter: Iter,
+}
+
+impl<Iter: Iterator + Send> ParallelIterator for IterBridge<Iter>
+where
+    Iter::Item: Send,
+{
+    type Item = Iter::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let split_count = AtomicUsize::new(current_num_threads());
+        let worker = Worker::new_fifo();
+        let stealer = worker.stealer();
+        let done = AtomicBool::new(false);
+        let iter = Mutex::new((self.iter, worker));
+
+        bridge_unindexed(
+            IterParallelProducer {
+                split_count: &split_count,
+                done: &done,
+                iter: &iter,
+                items: stealer,
+            },
+            consumer,
+        )
+    }
+}
+
+struct IterParallelProducer<'a, Iter: Iterator> {
+    split_count: &'a AtomicUsize,
+    done: &'a AtomicBool,
+    iter: &'a Mutex<(Iter, Worker<Iter::Item>)>,
+    items: Stealer<Iter::Item>,
+}
+
+// manual clone because T doesn't need to be Clone, but the derive assumes it should be
+impl<'a, Iter: Iterator + 'a> Clone for IterParallelProducer<'a, Iter> {
+    fn clone(&self) -> Self {
+        IterParallelProducer {
+            split_count: self.split_count,
+            done: self.done,
+            iter: self.iter,
+            items: self.items.clone(),
+        }
+    }
+}
+
+impl<'a, Iter: Iterator + Send + 'a> UnindexedProducer for IterParallelProducer<'a, Iter>
+where
+    Iter::Item: Send,
+{
+    type Item = Iter::Item;
+
+    fn split(self) -> (Self, Option<Self>) {
+        let mut count = self.split_count.load(Ordering::SeqCst);
+
+        loop {
+            let done = self.done.load(Ordering::SeqCst);
+            match count.checked_sub(1) {
+                Some(new_count) if !done => {
+                    let last_count =
+                        self.split_count
+                            .compare_and_swap(count, new_count, Ordering::SeqCst);
+                    if last_count == count {
+                        return (self.clone(), Some(self));
+                    } else {
+                        count = last_count;
+                    }
+                }
+                _ => {
+                    return (self, None);
+                }
+            }
+        }
+    }
+
+    fn fold_with<F>(self, mut folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        loop {
+            match self.items.steal() {
+                Steal::Success(it) => {
+                    folder = folder.consume(it);
+                    if folder.full() {
+                        return folder;
+                    }
+                }
+                Steal::Empty => {
+                    if self.done.load(Ordering::SeqCst) {
+                        // the iterator is out of items, no use in continuing
+                        return folder;
+                    } else {
+                        // our cache is out of items, time to load more from the iterator
+                        match self.iter.try_lock() {
+                            Ok(mut guard) => {
+                                let count = current_num_threads();
+                                let count = (count * count) * 2;
+
+                                let (ref mut iter, ref worker) = *guard;
+
+                                // while worker.len() < count {
+                                // FIXME the new deque doesn't let us count items.  We can just
+                                // push a number of items, but that doesn't consider active
+                                // stealers elsewhere.
+                                for _ in 0..count {
+                                    if let Some(it) = iter.next() {
+                                        worker.push(it);
+                                    } else {
+                                        self.done.store(true, Ordering::SeqCst);
+                                        break;
+                                    }
+                                }
+                            }
+                            Err(TryLockError::WouldBlock) => {
+                                // someone else has the mutex, just sit tight until it's ready
+                                yield_now(); //TODO: use a thread=pool-aware yield? (#548)
+                            }
+                            Err(TryLockError::Poisoned(_)) => {
+                                // any panics from other threads will have been caught by the pool,
+                                // and will be re-thrown when joined - just exit
+                                return folder;
+                            }
+                        }
+                    }
+                }
+                Steal::Retry => (),
+            }
+        }
+    }
+}
diff --git a/src/iter/plumbing/README.md b/src/iter/plumbing/README.md
new file mode 100644
index 0000000..cd94eae
--- /dev/null
+++ b/src/iter/plumbing/README.md
@@ -0,0 +1,315 @@
+# Parallel Iterators
+
+These are some notes on the design of the parallel iterator traits.
+This file does not describe how to **use** parallel iterators.
+
+## The challenge
+
+Parallel iterators are more complicated than sequential iterators.
+The reason is that they have to be able to split themselves up and
+operate in parallel across the two halves.
+
+The current design for parallel iterators has two distinct modes in
+which they can be used; as we will see, not all iterators support both
+modes (which is why there are two):
+
+- **Pull mode** (the `Producer` and `UnindexedProducer` traits): in this mode,
+  the iterator is asked to produce the next item using a call to `next`. This
+  is basically like a normal iterator, but with a twist: you can split the
+  iterator in half to produce disjoint items in separate threads.
+  - in the `Producer` trait, splitting is done with `split_at`, which accepts
+    an index where the split should be performed. Only indexed iterators can
+    work in this mode, as they know exactly how much data they will produce,
+    and how to locate the requested index.
+  - in the `UnindexedProducer` trait, splitting is done with `split`, which
+    simply requests that the producer divide itself *approximately* in half.
+    This is useful when the exact length and/or layout is unknown, as with
+    `String` characters, or when the length might exceed `usize`, as with
+    `Range<u64>` on 32-bit platforms.
+    - In theory, any `Producer` could act unindexed, but we don't currently
+      use that possibility.  When you know the exact length, a `split` can
+      simply be implemented as `split_at(length/2)`.
+- **Push mode** (the `Consumer` and `UnindexedConsumer` traits): in
+  this mode, the iterator instead is *given* each item in turn, which
+  is then processed. This is the opposite of a normal iterator. It's
+  more like a `for_each` call: each time a new item is produced, the
+  `consume` method is called with that item. (The traits themselves are
+  a bit more complex, as they support state that can be threaded
+  through and ultimately reduced.) Unlike producers, there are two
+  variants of consumers. The difference is how the split is performed:
+  - in the `Consumer` trait, splitting is done with `split_at`, which
+    accepts an index where the split should be performed. All
+    iterators can work in this mode. The resulting halves thus have an
+    idea about how much data they expect to consume.
+  - in the `UnindexedConsumer` trait, splitting is done with
+    `split_off_left`.  There is no index: the resulting halves must be
+    prepared to process any amount of data, and they don't know where that
+    data falls in the overall stream.
+    - Not all consumers can operate in this mode. It works for
+      `for_each` and `reduce`, for example, but it does not work for
+      `collect_into_vec`, since in that case the position of each item is
+      important for knowing where it ends up in the target collection.
+
+## How iterator execution proceeds
+
+We'll walk through this example iterator chain to start. This chain
+demonstrates more-or-less the full complexity of what can happen.
+
+```rust
+vec1.par_iter()
+    .zip(vec2.par_iter())
+    .flat_map(some_function)
+    .for_each(some_other_function)
+```
+
+To handle an iterator chain, we start by creating consumers. This
+works from the end. So in this case, the call to `for_each` is the
+final step, so it will create a `ForEachConsumer` that, given an item,
+just calls `some_other_function` with that item. (`ForEachConsumer` is
+a very simple consumer because it doesn't need to thread any state
+between items at all.)
+
+Now, the `for_each` call will pass this consumer to the base iterator,
+which is the `flat_map`. It will do this by calling the `drive_unindexed`
+method on the `ParallelIterator` trait. `drive_unindexed` basically
+says "produce items for this iterator and feed them to this consumer";
+it only works for unindexed consumers.
+
+(As an aside, it is interesting that only some consumers can work in
+unindexed mode, but all producers can *drive* an unindexed consumer.
+In contrast, only some producers can drive an *indexed* consumer, but
+all consumers can be supplied indexes. Isn't variance neat.)
+
+As it happens, `FlatMap` only works with unindexed consumers anyway.
+This is because flat-map basically has no idea how many items it will
+produce. If you ask flat-map to produce the 22nd item, it can't do it,
+at least not without some intermediate state. It doesn't know whether
+processing the first item will create 1 item, 3 items, or 100;
+therefore, to produce an arbitrary item, it would basically just have
+to start at the beginning and execute sequentially, which is not what
+we want. But for unindexed consumers, this doesn't matter, since they
+don't need to know how much data they will get.
+
+Therefore, `FlatMap` can wrap the `ForEachConsumer` with a
+`FlatMapConsumer` that feeds to it. This `FlatMapConsumer` will be
+given one item. It will then invoke `some_function` to get a parallel
+iterator out. It will then ask this new parallel iterator to drive the
+`ForEachConsumer`. The `drive_unindexed` method on `flat_map` can then
+pass the `FlatMapConsumer` up the chain to the previous item, which is
+`zip`. At this point, something interesting happens.
+
+## Switching from push to pull mode
+
+If you think about `zip`, it can't really be implemented as a
+consumer, at least not without an intermediate thread and some
+channels or something (or maybe coroutines). The problem is that it
+has to walk two iterators *in lockstep*. Basically, it can't call two
+`drive` methods simultaneously, it can only call one at a time. So at
+this point, the `zip` iterator needs to switch from *push mode* into
+*pull mode*.
+
+You'll note that `Zip` is only usable if its inputs implement
+`IndexedParallelIterator`, meaning that they can produce data starting
+at random points in the stream. This need to switch to push mode is
+exactly why. If we want to split a zip iterator at position 22, we
+need to be able to start zipping items from index 22 right away,
+without having to start from index 0.
+
+Anyway, so at this point, the `drive_unindexed` method for `Zip` stops
+creating consumers. Instead, it creates a *producer*, a `ZipProducer`,
+to be exact, and calls the `bridge` function in the `internals`
+module. Creating a `ZipProducer` will in turn create producers for
+the two iterators being zipped. This is possible because they both
+implement `IndexedParallelIterator`.
+
+The `bridge` function will then connect the consumer, which is
+handling the `flat_map` and `for_each`, with the producer, which is
+handling the `zip` and its preecessors. It will split down until the
+chunks seem reasonably small, then pull items from the producer and
+feed them to the consumer.
+
+## The base case
+
+The other time that `bridge` gets used is when we bottom out in an
+indexed producer, such as a slice or range.  There is also a
+`bridge_unindexed` equivalent for - you guessed it - unindexed producers,
+such as string characters.
+
+<a name="producer-callback">
+
+## What on earth is `ProducerCallback`?
+
+We saw that when you call a parallel action method like
+`par_iter.reduce()`, that will create a "reducing" consumer and then
+invoke `par_iter.drive_unindexed()` (or `par_iter.drive()`) as
+appropriate. This may create yet more consumers as we proceed up the
+parallel iterator chain. But at some point we're going to get to the
+start of the chain, or to a parallel iterator (like `zip()`) that has
+to coordinate multiple inputs. At that point, we need to start
+converting parallel iterators into producers.
+
+The way we do this is by invoking the method `with_producer()`, defined on
+`IndexedParallelIterator`. This is a callback scheme. In an ideal world,
+it would work like this:
+
+```rust
+base_iter.with_producer(|base_producer| {
+    // here, `base_producer` is the producer for `base_iter`
+});
+```
+
+In that case, we could implement a combinator like `map()` by getting
+the producer for the base iterator, wrapping it to make our own
+`MapProducer`, and then passing that to the callback. Something like
+this:
+
+```rust
+struct MapProducer<'f, P, F: 'f> {
+    base: P,
+    map_op: &'f F,
+}
+
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    fn with_producer<CB>(self, callback: CB) -> CB::Output {
+        let map_op = &self.map_op;
+        self.base_iter.with_producer(|base_producer| {
+            // Here `producer` is the producer for `self.base_iter`.
+            // Wrap that to make a `MapProducer`
+            let map_producer = MapProducer {
+                base: base_producer,
+                map_op: map_op
+            };
+
+            // invoke the callback with the wrapped version
+            callback(map_producer)
+        });
+    }
+});
+```
+
+This example demonstrates some of the power of the callback scheme.
+It winds up being a very flexible setup. For one thing, it means we
+can take ownership of `par_iter`; we can then in turn give ownership
+away of its bits and pieces into the producer (this is very useful if
+the iterator owns an `&mut` slice, for example), or create shared
+references and put *those* in the producer. In the case of map, for
+example, the parallel iterator owns the `map_op`, and we borrow
+references to it which we then put into the `MapProducer` (this means
+the `MapProducer` can easily split itself and share those references).
+The `with_producer` method can also create resources that are needed
+during the parallel execution, since the producer does not have to be
+returned.
+
+Unfortunately there is a catch. We can't actually use closures the way
+I showed you. To see why, think about the type that `map_producer`
+would have to have. If we were going to write the `with_producer`
+method using a closure, it would have to look something like this:
+
+```rust
+pub trait IndexedParallelIterator: ParallelIterator {
+    type Producer;
+    fn with_producer<CB, R>(self, callback: CB) -> R
+        where CB: FnOnce(Self::Producer) -> R;
+    ...
+}
+```
+
+Note that we had to add this associated type `Producer` so that
+we could specify the argument of the callback to be `Self::Producer`.
+Now, imagine trying to write that `MapProducer` impl using this style:
+
+```rust
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    type MapProducer = MapProducer<'f, P::Producer, F>;
+    //                             ^^ wait, what is this `'f`?
+
+    fn with_producer<CB, R>(self, callback: CB) -> R
+        where CB: FnOnce(Self::Producer) -> R
+    {
+        let map_op = &self.map_op;
+        //  ^^^^^^ `'f` is (conceptually) the lifetime of this reference,
+        //         so it will be different for each call to `with_producer`!
+    }
+}
+```
+
+This may look familiar to you: it's the same problem that we have
+trying to define an `Iterable` trait. Basically, the producer type
+needs to include a lifetime (here, `'f`) that refers to the body of
+`with_producer` and hence is not in scope at the impl level.
+
+If we had [associated type constructors][1598], we could solve this
+problem that way. But there is another solution. We can use a
+dedicated callback trait like `ProducerCallback`, instead of `FnOnce`:
+
+[1598]: https://github.com/rust-lang/rfcs/pull/1598
+
+```rust
+pub trait ProducerCallback<T> {
+    type Output;
+    fn callback<P>(self, producer: P) -> Self::Output
+        where P: Producer<Item=T>;
+}
+```
+
+Using this trait, the signature of `with_producer()` looks like this:
+
+```rust
+fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output;
+```
+
+Notice that this signature **never has to name the producer type** --
+there is no associated type `Producer` anymore. This is because the
+`callback()` method is generically over **all** producers `P`.
+
+The problem is that now the `||` sugar doesn't work anymore. So we
+have to manually create the callback struct, which is a mite tedious.
+So our `MapProducer` code looks like this:
+
+```rust
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+        where CB: ProducerCallback<Self::Item>
+    {
+        return self.base.with_producer(Callback { callback: callback, map_op: self.map_op });
+        //                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+        //                             Manual version of the closure sugar: create an instance
+        //                             of a struct that implements `ProducerCallback`.
+
+        // The struct declaration. Each field is something that need to capture from the
+        // creating scope.
+        struct Callback<CB, F> {
+            callback: CB,
+            map_op: F,
+        }
+
+        // Implement the `ProducerCallback` trait. This is pure boilerplate.
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+            where F: MapOp<T>,
+                  CB: ProducerCallback<F::Output>
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+                where P: Producer<Item=T>
+            {
+                // The body of the closure is here:
+                let producer = MapProducer { base: base,
+                                             map_op: &self.map_op };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+```
+
+OK, a bit tedious, but it works!
diff --git a/src/iter/plumbing/mod.rs b/src/iter/plumbing/mod.rs
new file mode 100644
index 0000000..71d4fb4
--- /dev/null
+++ b/src/iter/plumbing/mod.rs
@@ -0,0 +1,484 @@
+//! Traits and functions used to implement parallel iteration.  These are
+//! low-level details -- users of parallel iterators should not need to
+//! interact with them directly.  See [the `plumbing` README][r] for a general overview.
+//!
+//! [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+
+use crate::join_context;
+
+use super::IndexedParallelIterator;
+
+use std::cmp;
+use std::usize;
+
+/// The `ProducerCallback` trait is a kind of generic closure,
+/// [analogous to `FnOnce`][FnOnce]. See [the corresponding section in
+/// the plumbing README][r] for more details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md#producer-callback
+/// [FnOnce]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html
+pub trait ProducerCallback<T> {
+    /// The type of value returned by this callback. Analogous to
+    /// [`Output` from the `FnOnce` trait][Output].
+    ///
+    /// [Output]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html#associatedtype.Output
+    type Output;
+
+    /// Invokes the callback with the given producer as argument. The
+    /// key point of this trait is that this method is generic over
+    /// `P`, and hence implementors must be defined for any producer.
+    fn callback<P>(self, producer: P) -> Self::Output
+    where
+        P: Producer<Item = T>;
+}
+
+/// A `Producer` is effectively a "splittable `IntoIterator`". That
+/// is, a producer is a value which can be converted into an iterator
+/// at any time: at that point, it simply produces items on demand,
+/// like any iterator. But what makes a `Producer` special is that,
+/// *before* we convert to an iterator, we can also **split** it at a
+/// particular point using the `split_at` method. This will yield up
+/// two producers, one producing the items before that point, and one
+/// producing the items after that point (these two producers can then
+/// independently be split further, or be converted into iterators).
+/// In Rayon, this splitting is used to divide between threads.
+/// See [the `plumbing` README][r] for further details.
+///
+/// Note that each producer will always produce a fixed number of
+/// items N. However, this number N is not queryable through the API;
+/// the consumer is expected to track it.
+///
+/// NB. You might expect `Producer` to extend the `IntoIterator`
+/// trait.  However, [rust-lang/rust#20671][20671] prevents us from
+/// declaring the DoubleEndedIterator and ExactSizeIterator
+/// constraints on a required IntoIterator trait, so we inline
+/// IntoIterator here until that issue is fixed.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+/// [20671]: https://github.com/rust-lang/rust/issues/20671
+pub trait Producer: Send + Sized {
+    /// The type of item that will be produced by this producer once
+    /// it is converted into an iterator.
+    type Item;
+
+    /// The type of iterator we will become.
+    type IntoIter: Iterator<Item = Self::Item> + DoubleEndedIterator + ExactSizeIterator;
+
+    /// Convert `self` into an iterator; at this point, no more parallel splits
+    /// are possible.
+    fn into_iter(self) -> Self::IntoIter;
+
+    /// The minimum number of items that we will process
+    /// sequentially. Defaults to 1, which means that we will split
+    /// all the way down to a single item. This can be raised higher
+    /// using the [`with_min_len`] method, which will force us to
+    /// create sequential tasks at a larger granularity. Note that
+    /// Rayon automatically normally attempts to adjust the size of
+    /// parallel splits to reduce overhead, so this should not be
+    /// needed.
+    ///
+    /// [`with_min_len`]: ../trait.IndexedParallelIterator.html#method.with_min_len
+    fn min_len(&self) -> usize {
+        1
+    }
+
+    /// The maximum number of items that we will process
+    /// sequentially. Defaults to MAX, which means that we can choose
+    /// not to split at all. This can be lowered using the
+    /// [`with_max_len`] method, which will force us to create more
+    /// parallel tasks. Note that Rayon automatically normally
+    /// attempts to adjust the size of parallel splits to reduce
+    /// overhead, so this should not be needed.
+    ///
+    /// [`with_max_len`]: ../trait.IndexedParallelIterator.html#method.with_max_len
+    fn max_len(&self) -> usize {
+        usize::MAX
+    }
+
+    /// Split into two producers; one produces items `0..index`, the
+    /// other `index..N`. Index must be less than or equal to `N`.
+    fn split_at(self, index: usize) -> (Self, Self);
+
+    /// Iterate the producer, feeding each element to `folder`, and
+    /// stop when the folder is full (or all elements have been consumed).
+    ///
+    /// The provided implementation is sufficient for most iterables.
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.into_iter())
+    }
+}
+
+/// A consumer is effectively a [generalized "fold" operation][fold],
+/// and in fact each consumer will eventually be converted into a
+/// [`Folder`]. What makes a consumer special is that, like a
+/// [`Producer`], it can be **split** into multiple consumers using
+/// the `split_at` method. When a consumer is split, it produces two
+/// consumers, as well as a **reducer**. The two consumers can be fed
+/// items independently, and when they are done the reducer is used to
+/// combine their two results into one. See [the `plumbing`
+/// README][r] for further details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold
+/// [`Folder`]: trait.Folder.html
+/// [`Producer`]: trait.Producer.html
+pub trait Consumer<Item>: Send + Sized {
+    /// The type of folder that this consumer can be converted into.
+    type Folder: Folder<Item, Result = Self::Result>;
+
+    /// The type of reducer that is produced if this consumer is split.
+    type Reducer: Reducer<Self::Result>;
+
+    /// The type of result that this consumer will ultimately produce.
+    type Result: Send;
+
+    /// Divide the consumer into two consumers, one processing items
+    /// `0..index` and one processing items from `index..`. Also
+    /// produces a reducer that can be used to reduce the results at
+    /// the end.
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer);
+
+    /// Convert the consumer into a folder that can consume items
+    /// sequentially, eventually producing a final result.
+    fn into_folder(self) -> Self::Folder;
+
+    /// Hint whether this `Consumer` would like to stop processing
+    /// further items, e.g. if a search has been completed.
+    fn full(&self) -> bool;
+}
+
+/// The `Folder` trait encapsulates [the standard fold
+/// operation][fold].  It can be fed many items using the `consume`
+/// method. At the end, once all items have been consumed, it can then
+/// be converted (using `complete`) into a final value.
+///
+/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold
+pub trait Folder<Item>: Sized {
+    /// The type of result that will ultimately be produced by the folder.
+    type Result;
+
+    /// Consume next item and return new sequential state.
+    fn consume(self, item: Item) -> Self;
+
+    /// Consume items from the iterator until full, and return new sequential state.
+    ///
+    /// This method is **optional**. The default simply iterates over
+    /// `iter`, invoking `consume` and checking after each iteration
+    /// whether `full` returns false.
+    ///
+    /// The main reason to override it is if you can provide a more
+    /// specialized, efficient implementation.
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = Item>,
+    {
+        for item in iter {
+            self = self.consume(item);
+            if self.full() {
+                break;
+            }
+        }
+        self
+    }
+
+    /// Finish consuming items, produce final result.
+    fn complete(self) -> Self::Result;
+
+    /// Hint whether this `Folder` would like to stop processing
+    /// further items, e.g. if a search has been completed.
+    fn full(&self) -> bool;
+}
+
+/// The reducer is the final step of a `Consumer` -- after a consumer
+/// has been split into two parts, and each of those parts has been
+/// fully processed, we are left with two results. The reducer is then
+/// used to combine those two results into one. See [the `plumbing`
+/// README][r] for further details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+pub trait Reducer<Result> {
+    /// Reduce two final results into one; this is executed after a
+    /// split.
+    fn reduce(self, left: Result, right: Result) -> Result;
+}
+
+/// A stateless consumer can be freely copied. These consumers can be
+/// used like regular consumers, but they also support a
+/// `split_off_left` method that does not take an index to split, but
+/// simply splits at some arbitrary point (`for_each`, for example,
+/// produces an unindexed consumer).
+pub trait UnindexedConsumer<I>: Consumer<I> {
+    /// Splits off a "left" consumer and returns it. The `self`
+    /// consumer should then be used to consume the "right" portion of
+    /// the data. (The ordering matters for methods like find_first --
+    /// values produced by the returned value are given precedence
+    /// over values produced by `self`.) Once the left and right
+    /// halves have been fully consumed, you should reduce the results
+    /// with the result of `to_reducer`.
+    fn split_off_left(&self) -> Self;
+
+    /// Creates a reducer that can be used to combine the results from
+    /// a split consumer.
+    fn to_reducer(&self) -> Self::Reducer;
+}
+
+/// A variant on `Producer` which does not know its exact length or
+/// cannot represent it in a `usize`. These producers act like
+/// ordinary producers except that they cannot be told to split at a
+/// particular point. Instead, you just ask them to split 'somewhere'.
+///
+/// (In principle, `Producer` could extend this trait; however, it
+/// does not because to do so would require producers to carry their
+/// own length with them.)
+pub trait UnindexedProducer: Send + Sized {
+    /// The type of item returned by this producer.
+    type Item;
+
+    /// Split midway into a new producer if possible, otherwise return `None`.
+    fn split(self) -> (Self, Option<Self>);
+
+    /// Iterate the producer, feeding each element to `folder`, and
+    /// stop when the folder is full (or all elements have been consumed).
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>;
+}
+
+/// A splitter controls the policy for splitting into smaller work items.
+///
+/// Thief-splitting is an adaptive policy that starts by splitting into
+/// enough jobs for every worker thread, and then resets itself whenever a
+/// job is actually stolen into a different thread.
+#[derive(Clone, Copy)]
+struct Splitter {
+    /// The `splits` tell us approximately how many remaining times we'd
+    /// like to split this job.  We always just divide it by two though, so
+    /// the effective number of pieces will be `next_power_of_two()`.
+    splits: usize,
+}
+
+impl Splitter {
+    #[inline]
+    fn new() -> Splitter {
+        Splitter {
+            splits: crate::current_num_threads(),
+        }
+    }
+
+    #[inline]
+    fn try_split(&mut self, stolen: bool) -> bool {
+        let Splitter { splits } = *self;
+
+        if stolen {
+            // This job was stolen!  Reset the number of desired splits to the
+            // thread count, if that's more than we had remaining anyway.
+            self.splits = cmp::max(crate::current_num_threads(), self.splits / 2);
+            true
+        } else if splits > 0 {
+            // We have splits remaining, make it so.
+            self.splits /= 2;
+            true
+        } else {
+            // Not stolen, and no more splits -- we're done!
+            false
+        }
+    }
+}
+
+/// The length splitter is built on thief-splitting, but additionally takes
+/// into account the remaining length of the iterator.
+#[derive(Clone, Copy)]
+struct LengthSplitter {
+    inner: Splitter,
+
+    /// The smallest we're willing to divide into.  Usually this is just 1,
+    /// but you can choose a larger working size with `with_min_len()`.
+    min: usize,
+}
+
+impl LengthSplitter {
+    /// Creates a new splitter based on lengths.
+    ///
+    /// The `min` is a hard lower bound.  We'll never split below that, but
+    /// of course an iterator might start out smaller already.
+    ///
+    /// The `max` is an upper bound on the working size, used to determine
+    /// the minimum number of times we need to split to get under that limit.
+    /// The adaptive algorithm may very well split even further, but never
+    /// smaller than the `min`.
+    #[inline]
+    fn new(min: usize, max: usize, len: usize) -> LengthSplitter {
+        let mut splitter = LengthSplitter {
+            inner: Splitter::new(),
+            min: cmp::max(min, 1),
+        };
+
+        // Divide the given length by the max working length to get the minimum
+        // number of splits we need to get under that max.  This rounds down,
+        // but the splitter actually gives `next_power_of_two()` pieces anyway.
+        // e.g. len 12345 / max 100 = 123 min_splits -> 128 pieces.
+        let min_splits = len / cmp::max(max, 1);
+
+        // Only update the value if it's not splitting enough already.
+        if min_splits > splitter.inner.splits {
+            splitter.inner.splits = min_splits;
+        }
+
+        splitter
+    }
+
+    #[inline]
+    fn try_split(&mut self, len: usize, stolen: bool) -> bool {
+        // If splitting wouldn't make us too small, try the inner splitter.
+        len / 2 >= self.min && self.inner.try_split(stolen)
+    }
+}
+
+/// This helper function is used to "connect" a parallel iterator to a
+/// consumer. It will convert the `par_iter` into a producer P and
+/// then pull items from P and feed them to `consumer`, splitting and
+/// creating parallel threads as needed.
+///
+/// This is useful when you are implementing your own parallel
+/// iterators: it is often used as the definition of the
+/// [`drive_unindexed`] or [`drive`] methods.
+///
+/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed
+/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive
+pub fn bridge<I, C>(par_iter: I, consumer: C) -> C::Result
+where
+    I: IndexedParallelIterator,
+    C: Consumer<I::Item>,
+{
+    let len = par_iter.len();
+    return par_iter.with_producer(Callback { len, consumer });
+
+    struct Callback<C> {
+        len: usize,
+        consumer: C,
+    }
+
+    impl<C, I> ProducerCallback<I> for Callback<C>
+    where
+        C: Consumer<I>,
+    {
+        type Output = C::Result;
+        fn callback<P>(self, producer: P) -> C::Result
+        where
+            P: Producer<Item = I>,
+        {
+            bridge_producer_consumer(self.len, producer, self.consumer)
+        }
+    }
+}
+
+/// This helper function is used to "connect" a producer and a
+/// consumer. You may prefer to call [`bridge`], which wraps this
+/// function. This function will draw items from `producer` and feed
+/// them to `consumer`, splitting and creating parallel tasks when
+/// needed.
+///
+/// This is useful when you are implementing your own parallel
+/// iterators: it is often used as the definition of the
+/// [`drive_unindexed`] or [`drive`] methods.
+///
+/// [`bridge`]: fn.bridge.html
+/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed
+/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive
+pub fn bridge_producer_consumer<P, C>(len: usize, producer: P, consumer: C) -> C::Result
+where
+    P: Producer,
+    C: Consumer<P::Item>,
+{
+    let splitter = LengthSplitter::new(producer.min_len(), producer.max_len(), len);
+    return helper(len, false, splitter, producer, consumer);
+
+    fn helper<P, C>(
+        len: usize,
+        migrated: bool,
+        mut splitter: LengthSplitter,
+        producer: P,
+        consumer: C,
+    ) -> C::Result
+    where
+        P: Producer,
+        C: Consumer<P::Item>,
+    {
+        if consumer.full() {
+            consumer.into_folder().complete()
+        } else if splitter.try_split(len, migrated) {
+            let mid = len / 2;
+            let (left_producer, right_producer) = producer.split_at(mid);
+            let (left_consumer, right_consumer, reducer) = consumer.split_at(mid);
+            let (left_result, right_result) = join_context(
+                |context| {
+                    helper(
+                        mid,
+                        context.migrated(),
+                        splitter,
+                        left_producer,
+                        left_consumer,
+                    )
+                },
+                |context| {
+                    helper(
+                        len - mid,
+                        context.migrated(),
+                        splitter,
+                        right_producer,
+                        right_consumer,
+                    )
+                },
+            );
+            reducer.reduce(left_result, right_result)
+        } else {
+            producer.fold_with(consumer.into_folder()).complete()
+        }
+    }
+}
+
+/// A variant of [`bridge_producer_consumer`] where the producer is an unindexed producer.
+///
+/// [`bridge_producer_consumer`]: fn.bridge_producer_consumer.html
+pub fn bridge_unindexed<P, C>(producer: P, consumer: C) -> C::Result
+where
+    P: UnindexedProducer,
+    C: UnindexedConsumer<P::Item>,
+{
+    let splitter = Splitter::new();
+    bridge_unindexed_producer_consumer(false, splitter, producer, consumer)
+}
+
+fn bridge_unindexed_producer_consumer<P, C>(
+    migrated: bool,
+    mut splitter: Splitter,
+    producer: P,
+    consumer: C,
+) -> C::Result
+where
+    P: UnindexedProducer,
+    C: UnindexedConsumer<P::Item>,
+{
+    if consumer.full() {
+        consumer.into_folder().complete()
+    } else if splitter.try_split(migrated) {
+        match producer.split() {
+            (left_producer, Some(right_producer)) => {
+                let (reducer, left_consumer, right_consumer) =
+                    (consumer.to_reducer(), consumer.split_off_left(), consumer);
+                let bridge = bridge_unindexed_producer_consumer;
+                let (left_result, right_result) = join_context(
+                    |context| bridge(context.migrated(), splitter, left_producer, left_consumer),
+                    |context| bridge(context.migrated(), splitter, right_producer, right_consumer),
+                );
+                reducer.reduce(left_result, right_result)
+            }
+            (producer, None) => producer.fold_with(consumer.into_folder()).complete(),
+        }
+    } else {
+        producer.fold_with(consumer.into_folder()).complete()
+    }
+}
diff --git a/src/iter/positions.rs b/src/iter/positions.rs
new file mode 100644
index 0000000..f584bb2
--- /dev/null
+++ b/src/iter/positions.rs
@@ -0,0 +1,137 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Positions` takes a predicate `predicate` and filters out elements that match,
+/// yielding their indices.
+///
+/// This struct is created by the [`positions()`] method on [`IndexedParallelIterator`]
+///
+/// [`positions()`]: trait.IndexedParallelIterator.html#method.positions
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Positions<I: IndexedParallelIterator, P> {
+    base: I,
+    predicate: P,
+}
+
+impl<I: IndexedParallelIterator + Debug, P> Debug for Positions<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Positions")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I, P> Positions<I, P>
+where
+    I: IndexedParallelIterator,
+{
+    /// Create a new `Positions` iterator.
+    pub(super) fn new(base: I, predicate: P) -> Self {
+        Positions { base, predicate }
+    }
+}
+
+impl<I, P> ParallelIterator for Positions<I, P>
+where
+    I: IndexedParallelIterator,
+    P: Fn(I::Item) -> bool + Sync + Send,
+{
+    type Item = usize;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = PositionsConsumer::new(consumer, &self.predicate, 0);
+        self.base.drive(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct PositionsConsumer<'p, C, P> {
+    base: C,
+    predicate: &'p P,
+    offset: usize,
+}
+
+impl<'p, C, P> PositionsConsumer<'p, C, P> {
+    fn new(base: C, predicate: &'p P, offset: usize) -> Self {
+        PositionsConsumer {
+            base,
+            predicate,
+            offset,
+        }
+    }
+}
+
+impl<'p, T, C, P> Consumer<T> for PositionsConsumer<'p, C, P>
+where
+    C: Consumer<usize>,
+    P: Fn(T) -> bool + Sync,
+{
+    type Folder = PositionsFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            PositionsConsumer::new(left, self.predicate, self.offset),
+            PositionsConsumer::new(right, self.predicate, self.offset + index),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        PositionsFolder {
+            base: self.base.into_folder(),
+            predicate: self.predicate,
+            offset: self.offset,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+struct PositionsFolder<'p, F, P> {
+    base: F,
+    predicate: &'p P,
+    offset: usize,
+}
+
+impl<F, P, T> Folder<T> for PositionsFolder<'_, F, P>
+where
+    F: Folder<usize>,
+    P: Fn(T) -> bool,
+{
+    type Result = F::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let index = self.offset;
+        self.offset += 1;
+        if (self.predicate)(item) {
+            self.base = self.base.consume(index);
+        }
+        self
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/product.rs b/src/iter/product.rs
new file mode 100644
index 0000000..a3d0727
--- /dev/null
+++ b/src/iter/product.rs
@@ -0,0 +1,114 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use std::iter::{self, Product};
+use std::marker::PhantomData;
+
+pub(super) fn product<PI, P>(pi: PI) -> P
+where
+    PI: ParallelIterator,
+    P: Send + Product<PI::Item> + Product,
+{
+    pi.drive_unindexed(ProductConsumer::new())
+}
+
+fn mul<T: Product>(left: T, right: T) -> T {
+    iter::once(left).chain(iter::once(right)).product()
+}
+
+struct ProductConsumer<P: Send> {
+    _marker: PhantomData<*const P>,
+}
+
+unsafe impl<P: Send> Send for ProductConsumer<P> {}
+
+impl<P: Send> ProductConsumer<P> {
+    fn new() -> ProductConsumer<P> {
+        ProductConsumer {
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<P, T> Consumer<T> for ProductConsumer<P>
+where
+    P: Send + Product<T> + Product,
+{
+    type Folder = ProductFolder<P>;
+    type Reducer = Self;
+    type Result = P;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (
+            ProductConsumer::new(),
+            ProductConsumer::new(),
+            ProductConsumer::new(),
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ProductFolder {
+            product: iter::empty::<T>().product(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<P, T> UnindexedConsumer<T> for ProductConsumer<P>
+where
+    P: Send + Product<T> + Product,
+{
+    fn split_off_left(&self) -> Self {
+        ProductConsumer::new()
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        ProductConsumer::new()
+    }
+}
+
+impl<P> Reducer<P> for ProductConsumer<P>
+where
+    P: Send + Product,
+{
+    fn reduce(self, left: P, right: P) -> P {
+        mul(left, right)
+    }
+}
+
+struct ProductFolder<P> {
+    product: P,
+}
+
+impl<P, T> Folder<T> for ProductFolder<P>
+where
+    P: Product<T> + Product,
+{
+    type Result = P;
+
+    fn consume(self, item: T) -> Self {
+        ProductFolder {
+            product: mul(self.product, iter::once(item).product()),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        ProductFolder {
+            product: mul(self.product, iter.into_iter().product()),
+        }
+    }
+
+    fn complete(self) -> P {
+        self.product
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/reduce.rs b/src/iter/reduce.rs
new file mode 100644
index 0000000..321b5dd
--- /dev/null
+++ b/src/iter/reduce.rs
@@ -0,0 +1,116 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+pub(super) fn reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    let consumer = ReduceConsumer {
+        identity: &identity,
+        reduce_op: &reduce_op,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct ReduceConsumer<'r, R, ID> {
+    identity: &'r ID,
+    reduce_op: &'r R,
+}
+
+impl<'r, R, ID> Copy for ReduceConsumer<'r, R, ID> {}
+
+impl<'r, R, ID> Clone for ReduceConsumer<'r, R, ID> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Consumer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    type Folder = ReduceFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = T;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: (self.identity)(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'r, R, ID, T> UnindexedConsumer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Reducer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        (self.reduce_op)(left, right)
+    }
+}
+
+struct ReduceFolder<'r, R, T> {
+    reduce_op: &'r R,
+    item: T,
+}
+
+impl<'r, R, T> Folder<T> for ReduceFolder<'r, R, T>
+where
+    R: Fn(T, T) -> T,
+{
+    type Result = T;
+
+    fn consume(self, item: T) -> Self {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: (self.reduce_op)(self.item, item),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: iter.into_iter().fold(self.item, self.reduce_op),
+        }
+    }
+
+    fn complete(self) -> T {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/repeat.rs b/src/iter/repeat.rs
new file mode 100644
index 0000000..f84a6fe
--- /dev/null
+++ b/src/iter/repeat.rs
@@ -0,0 +1,241 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+use std::usize;
+
+/// Iterator adaptor for [the `repeat()` function](fn.repeat.html).
+#[derive(Debug, Clone)]
+pub struct Repeat<T: Clone + Send> {
+    element: T,
+}
+
+/// Creates a parallel iterator that endlessly repeats `elt` (by
+/// cloning it). Note that this iterator has "infinite" length, so
+/// typically you would want to use `zip` or `take` or some other
+/// means to shorten it, or consider using
+/// [the `repeatn()` function](fn.repeatn.html) instead.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::repeat;
+/// let x: Vec<(i32, i32)> = repeat(22).zip(0..3).collect();
+/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]);
+/// ```
+pub fn repeat<T: Clone + Send>(elt: T) -> Repeat<T> {
+    Repeat { element: elt }
+}
+
+impl<T> Repeat<T>
+where
+    T: Clone + Send,
+{
+    /// Takes only `n` repeats of the element, similar to the general
+    /// [`take()`](trait.IndexedParallelIterator.html#method.take).
+    ///
+    /// The resulting `RepeatN` is an `IndexedParallelIterator`, allowing
+    /// more functionality than `Repeat` alone.
+    pub fn take(self, n: usize) -> RepeatN<T> {
+        repeatn(self.element, n)
+    }
+
+    /// Iterates tuples, repeating the element with items from another
+    /// iterator, similar to the general
+    /// [`zip()`](trait.IndexedParallelIterator.html#method.zip).
+    pub fn zip<Z>(self, zip_op: Z) -> Zip<RepeatN<T>, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        let z = zip_op.into_par_iter();
+        let n = z.len();
+        self.take(n).zip(z)
+    }
+}
+
+impl<T> ParallelIterator for Repeat<T>
+where
+    T: Clone + Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = RepeatProducer {
+            element: self.element,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Unindexed producer for `Repeat`.
+struct RepeatProducer<T: Clone + Send> {
+    element: T,
+}
+
+impl<T: Clone + Send> UnindexedProducer for RepeatProducer<T> {
+    type Item = T;
+
+    fn split(self) -> (Self, Option<Self>) {
+        (
+            RepeatProducer {
+                element: self.element.clone(),
+            },
+            Some(RepeatProducer {
+                element: self.element,
+            }),
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<T>,
+    {
+        folder.consume_iter(iter::repeat(self.element))
+    }
+}
+
+/// Iterator adaptor for [the `repeatn()` function](fn.repeatn.html).
+#[derive(Debug, Clone)]
+pub struct RepeatN<T: Clone + Send> {
+    element: T,
+    count: usize,
+}
+
+/// Creates a parallel iterator that produces `n` repeats of `elt`
+/// (by cloning it).
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::repeatn;
+/// let x: Vec<(i32, i32)> = repeatn(22, 3).zip(0..3).collect();
+/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]);
+/// ```
+pub fn repeatn<T: Clone + Send>(elt: T, n: usize) -> RepeatN<T> {
+    RepeatN {
+        element: elt,
+        count: n,
+    }
+}
+
+impl<T> ParallelIterator for RepeatN<T>
+where
+    T: Clone + Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.count)
+    }
+}
+
+impl<T> IndexedParallelIterator for RepeatN<T>
+where
+    T: Clone + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(RepeatNProducer {
+            element: self.element,
+            count: self.count,
+        })
+    }
+
+    fn len(&self) -> usize {
+        self.count
+    }
+}
+
+/// Producer for `RepeatN`.
+struct RepeatNProducer<T: Clone + Send> {
+    element: T,
+    count: usize,
+}
+
+impl<T: Clone + Send> Producer for RepeatNProducer<T> {
+    type Item = T;
+    type IntoIter = Iter<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        Iter {
+            element: self.element,
+            count: self.count,
+        }
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        (
+            RepeatNProducer {
+                element: self.element.clone(),
+                count: index,
+            },
+            RepeatNProducer {
+                element: self.element,
+                count: self.count - index,
+            },
+        )
+    }
+}
+
+/// Iterator for `RepeatN`.
+///
+/// This is conceptually like `std::iter::Take<std::iter::Repeat<T>>`, but
+/// we need `DoubleEndedIterator` and unconditional `ExactSizeIterator`.
+struct Iter<T: Clone> {
+    element: T,
+    count: usize,
+}
+
+impl<T: Clone> Iterator for Iter<T> {
+    type Item = T;
+
+    #[inline]
+    fn next(&mut self) -> Option<T> {
+        if self.count > 0 {
+            self.count -= 1;
+            Some(self.element.clone())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.count, Some(self.count))
+    }
+}
+
+impl<T: Clone> DoubleEndedIterator for Iter<T> {
+    #[inline]
+    fn next_back(&mut self) -> Option<T> {
+        self.next()
+    }
+}
+
+impl<T: Clone> ExactSizeIterator for Iter<T> {
+    #[inline]
+    fn len(&self) -> usize {
+        self.count
+    }
+}
diff --git a/src/iter/rev.rs b/src/iter/rev.rs
new file mode 100644
index 0000000..a4c3b7c
--- /dev/null
+++ b/src/iter/rev.rs
@@ -0,0 +1,123 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+
+/// `Rev` is an iterator that produces elements in reverse order. This struct
+/// is created by the [`rev()`] method on [`IndexedParallelIterator`]
+///
+/// [`rev()`]: trait.IndexedParallelIterator.html#method.rev
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Rev<I: IndexedParallelIterator> {
+    base: I,
+}
+
+impl<I> Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Rev` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Rev { base }
+    }
+}
+
+impl<I> ParallelIterator for Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback { callback, len });
+
+        struct Callback<CB> {
+            callback: CB,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = RevProducer {
+                    base,
+                    len: self.len,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct RevProducer<P> {
+    base: P,
+    len: usize,
+}
+
+impl<P> Producer for RevProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = iter::Rev<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().rev()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(self.len - index);
+        (
+            RevProducer {
+                base: right,
+                len: index,
+            },
+            RevProducer {
+                base: left,
+                len: self.len - index,
+            },
+        )
+    }
+}
diff --git a/src/iter/skip.rs b/src/iter/skip.rs
new file mode 100644
index 0000000..9983f16
--- /dev/null
+++ b/src/iter/skip.rs
@@ -0,0 +1,88 @@
+use super::noop::NoopConsumer;
+use super::plumbing::*;
+use super::*;
+use std::cmp::min;
+
+/// `Skip` is an iterator that skips over the first `n` elements.
+/// This struct is created by the [`skip()`] method on [`IndexedParallelIterator`]
+///
+/// [`skip()`]: trait.IndexedParallelIterator.html#method.skip
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Skip<I> {
+    base: I,
+    n: usize,
+}
+
+impl<I> Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Skip` iterator.
+    pub(super) fn new(base: I, n: usize) -> Self {
+        let n = min(base.len(), n);
+        Skip { base, n }
+    }
+}
+
+impl<I> ParallelIterator for Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn len(&self) -> usize {
+        self.base.len() - self.n
+    }
+
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            n: self.n,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            n: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let (before_skip, after_skip) = base.split_at(self.n);
+                bridge_producer_consumer(self.n, before_skip, NoopConsumer);
+                self.callback.callback(after_skip)
+            }
+        }
+    }
+}
diff --git a/src/iter/splitter.rs b/src/iter/splitter.rs
new file mode 100644
index 0000000..40935ac
--- /dev/null
+++ b/src/iter/splitter.rs
@@ -0,0 +1,174 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// The `split` function takes arbitrary data and a closure that knows how to
+/// split it, and turns this into a `ParallelIterator`.
+///
+/// # Examples
+///
+/// As a simple example, Rayon can recursively split ranges of indices
+///
+/// ```
+/// use rayon::iter;
+/// use rayon::prelude::*;
+/// use std::ops::Range;
+///
+///
+/// // We define a range of indices as follows
+/// type Range1D = Range<usize>;
+///
+/// // Splitting it in two can be done like this
+/// fn split_range1(r: Range1D) -> (Range1D, Option<Range1D>) {
+///     // We are mathematically unable to split the range if there is only
+///     // one point inside of it, but we could stop splitting before that.
+///     if r.end - r.start <= 1 { return (r, None); }
+///
+///     // Here, our range is considered large enough to be splittable
+///     let midpoint = r.start + (r.end - r.start) / 2;
+///     (r.start..midpoint, Some(midpoint..r.end))
+/// }
+///
+/// // By using iter::split, Rayon will split the range until it has enough work
+/// // to feed the CPU cores, then give us the resulting sub-ranges
+/// iter::split(0..4096, split_range1).for_each(|sub_range| {
+///     // As our initial range had a power-of-two size, the final sub-ranges
+///     // should have power-of-two sizes too
+///     assert!((sub_range.end - sub_range.start).is_power_of_two());
+/// });
+/// ```
+///
+/// This recursive splitting can be extended to two or three dimensions,
+/// to reproduce a classic "block-wise" parallelization scheme of graphics and
+/// numerical simulations:
+///
+/// ```
+/// # use rayon::iter;
+/// # use rayon::prelude::*;
+/// # use std::ops::Range;
+/// # type Range1D = Range<usize>;
+/// # fn split_range1(r: Range1D) -> (Range1D, Option<Range1D>) {
+/// #     if r.end - r.start <= 1 { return (r, None); }
+/// #     let midpoint = r.start + (r.end - r.start) / 2;
+/// #     (r.start..midpoint, Some(midpoint..r.end))
+/// # }
+/// #
+/// // A two-dimensional range of indices can be built out of two 1D ones
+/// struct Range2D {
+///     // Range of horizontal indices
+///     pub rx: Range1D,
+///
+///     // Range of vertical indices
+///     pub ry: Range1D,
+/// }
+///
+/// // We want to recursively split them by the largest dimension until we have
+/// // enough sub-ranges to feed our mighty multi-core CPU. This function
+/// // carries out one such split.
+/// fn split_range2(r2: Range2D) -> (Range2D, Option<Range2D>) {
+///     // Decide on which axis (horizontal/vertical) the range should be split
+///     let width = r2.rx.end - r2.rx.start;
+///     let height = r2.ry.end - r2.ry.start;
+///     if width >= height {
+///         // This is a wide range, split it on the horizontal axis
+///         let (split_rx, ry) = (split_range1(r2.rx), r2.ry);
+///         let out1 = Range2D {
+///             rx: split_rx.0,
+///             ry: ry.clone(),
+///         };
+///         let out2 = split_rx.1.map(|rx| Range2D { rx, ry });
+///         (out1, out2)
+///     } else {
+///         // This is a tall range, split it on the vertical axis
+///         let (rx, split_ry) = (r2.rx, split_range1(r2.ry));
+///         let out1 = Range2D {
+///             rx: rx.clone(),
+///             ry: split_ry.0,
+///         };
+///         let out2 = split_ry.1.map(|ry| Range2D { rx, ry, });
+///         (out1, out2)
+///     }
+/// }
+///
+/// // Again, rayon can handle the recursive splitting for us
+/// let range = Range2D { rx: 0..800, ry: 0..600 };
+/// iter::split(range, split_range2).for_each(|sub_range| {
+///     // If the sub-ranges were indeed split by the largest dimension, then
+///     // if no dimension was twice larger than the other initially, this
+///     // property will remain true in the final sub-ranges.
+///     let width = sub_range.rx.end - sub_range.rx.start;
+///     let height = sub_range.ry.end - sub_range.ry.start;
+///     assert!((width / 2 <= height) && (height / 2 <= width));
+/// });
+/// ```
+///
+pub fn split<D, S>(data: D, splitter: S) -> Split<D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync,
+{
+    Split { data, splitter }
+}
+
+/// `Split` is a parallel iterator using arbitrary data and a splitting function.
+/// This struct is created by the [`split()`] function.
+///
+/// [`split()`]: fn.split.html
+#[derive(Clone)]
+pub struct Split<D, S> {
+    data: D,
+    splitter: S,
+}
+
+impl<D: Debug, S> Debug for Split<D, S> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Split").field("data", &self.data).finish()
+    }
+}
+
+impl<D, S> ParallelIterator for Split<D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync + Send,
+{
+    type Item = D;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer {
+            data: self.data,
+            splitter: &self.splitter,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+struct SplitProducer<'a, D, S> {
+    data: D,
+    splitter: &'a S,
+}
+
+impl<'a, D, S> UnindexedProducer for SplitProducer<'a, D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync,
+{
+    type Item = D;
+
+    fn split(mut self) -> (Self, Option<Self>) {
+        let splitter = self.splitter;
+        let (left, right) = splitter(self.data);
+        self.data = left;
+        (self, right.map(|data| SplitProducer { data, splitter }))
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume(self.data)
+    }
+}
diff --git a/src/iter/step_by.rs b/src/iter/step_by.rs
new file mode 100644
index 0000000..2002f42
--- /dev/null
+++ b/src/iter/step_by.rs
@@ -0,0 +1,144 @@
+#![cfg(step_by)]
+use std::cmp::min;
+
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+use std::iter;
+use std::usize;
+
+/// `StepBy` is an iterator that skips `n` elements between each yield, where `n` is the given step.
+/// This struct is created by the [`step_by()`] method on [`IndexedParallelIterator`]
+///
+/// [`step_by()`]: trait.IndexedParallelIterator.html#method.step_by
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct StepBy<I: IndexedParallelIterator> {
+    base: I,
+    step: usize,
+}
+
+impl<I> StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `StepBy` iterator.
+    pub(super) fn new(base: I, step: usize) -> Self {
+        StepBy { base, step }
+    }
+}
+
+impl<I> ParallelIterator for StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.base.len(), self.step)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback {
+            callback,
+            step: self.step,
+            len,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            step: usize,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = StepByProducer {
+                    base,
+                    step: self.step,
+                    len: self.len,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct StepByProducer<P> {
+    base: P,
+    step: usize,
+    len: usize,
+}
+
+impl<P> Producer for StepByProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = iter::StepBy<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().step_by(self.step)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = min(index * self.step, self.len);
+
+        let (left, right) = self.base.split_at(elem_index);
+        (
+            StepByProducer {
+                base: left,
+                step: self.step,
+                len: elem_index,
+            },
+            StepByProducer {
+                base: right,
+                step: self.step,
+                len: self.len - elem_index,
+            },
+        )
+    }
+
+    fn min_len(&self) -> usize {
+        div_round_up(self.base.min_len(), self.step)
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len() / self.step
+    }
+}
diff --git a/src/iter/sum.rs b/src/iter/sum.rs
new file mode 100644
index 0000000..a73e0bf
--- /dev/null
+++ b/src/iter/sum.rs
@@ -0,0 +1,110 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use std::iter::{self, Sum};
+use std::marker::PhantomData;
+
+pub(super) fn sum<PI, S>(pi: PI) -> S
+where
+    PI: ParallelIterator,
+    S: Send + Sum<PI::Item> + Sum,
+{
+    pi.drive_unindexed(SumConsumer::new())
+}
+
+fn add<T: Sum>(left: T, right: T) -> T {
+    iter::once(left).chain(iter::once(right)).sum()
+}
+
+struct SumConsumer<S: Send> {
+    _marker: PhantomData<*const S>,
+}
+
+unsafe impl<S: Send> Send for SumConsumer<S> {}
+
+impl<S: Send> SumConsumer<S> {
+    fn new() -> SumConsumer<S> {
+        SumConsumer {
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<S, T> Consumer<T> for SumConsumer<S>
+where
+    S: Send + Sum<T> + Sum,
+{
+    type Folder = SumFolder<S>;
+    type Reducer = Self;
+    type Result = S;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (SumConsumer::new(), SumConsumer::new(), SumConsumer::new())
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        SumFolder {
+            sum: iter::empty::<T>().sum(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<S, T> UnindexedConsumer<T> for SumConsumer<S>
+where
+    S: Send + Sum<T> + Sum,
+{
+    fn split_off_left(&self) -> Self {
+        SumConsumer::new()
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        SumConsumer::new()
+    }
+}
+
+impl<S> Reducer<S> for SumConsumer<S>
+where
+    S: Send + Sum,
+{
+    fn reduce(self, left: S, right: S) -> S {
+        add(left, right)
+    }
+}
+
+struct SumFolder<S> {
+    sum: S,
+}
+
+impl<S, T> Folder<T> for SumFolder<S>
+where
+    S: Sum<T> + Sum,
+{
+    type Result = S;
+
+    fn consume(self, item: T) -> Self {
+        SumFolder {
+            sum: add(self.sum, iter::once(item).sum()),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        SumFolder {
+            sum: add(self.sum, iter.into_iter().sum()),
+        }
+    }
+
+    fn complete(self) -> S {
+        self.sum
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/take.rs b/src/iter/take.rs
new file mode 100644
index 0000000..52d15d8
--- /dev/null
+++ b/src/iter/take.rs
@@ -0,0 +1,86 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp::min;
+
+/// `Take` is an iterator that iterates over the first `n` elements.
+/// This struct is created by the [`take()`] method on [`IndexedParallelIterator`]
+///
+/// [`take()`]: trait.IndexedParallelIterator.html#method.take
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Take<I> {
+    base: I,
+    n: usize,
+}
+
+impl<I> Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Take` iterator.
+    pub(super) fn new(base: I, n: usize) -> Self {
+        let n = min(base.len(), n);
+        Take { base, n }
+    }
+}
+
+impl<I> ParallelIterator for Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn len(&self) -> usize {
+        self.n
+    }
+
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            n: self.n,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            n: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let (producer, _) = base.split_at(self.n);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
diff --git a/src/iter/test.rs b/src/iter/test.rs
new file mode 100644
index 0000000..bc5106b
--- /dev/null
+++ b/src/iter/test.rs
@@ -0,0 +1,2188 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use super::*;
+use crate::prelude::*;
+use rayon_core::*;
+
+use rand::distributions::Standard;
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::f64;
+use std::fmt::Debug;
+use std::sync::mpsc;
+use std::usize;
+
+fn is_indexed<T: IndexedParallelIterator>(_: T) {}
+
+fn seeded_rng() -> XorShiftRng {
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    XorShiftRng::from_seed(seed)
+}
+
+#[test]
+fn execute() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b = vec![];
+    a.par_iter().map(|&i| i + 1).collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_cloned() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b: Vec<i32> = vec![];
+    a.par_iter().cloned().collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_range() {
+    let a = 0i32..1024;
+    let mut b = vec![];
+    a.into_par_iter().map(|i| i + 1).collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_unindexed_range() {
+    let a = 0i64..1024;
+    let b: LinkedList<i64> = a.into_par_iter().map(|i| i + 1).collect();
+    let c: LinkedList<i64> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_pseudo_indexed_range() {
+    use std::i128::MAX;
+    let range = MAX - 1024..MAX;
+
+    // Given `Some` length, collecting `Vec` will try to act indexed.
+    let a = range.clone().into_par_iter();
+    assert_eq!(a.opt_len(), Some(1024));
+
+    let b: Vec<i128> = a.map(|i| i + 1).collect();
+    let c: Vec<i128> = range.map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn check_map_indexed() {
+    let a = [1, 2, 3];
+    is_indexed(a.par_iter().map(|x| x));
+}
+
+#[test]
+fn map_sum() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1: i32 = a.par_iter().map(|&i| i + 1).sum();
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, r2);
+}
+
+#[test]
+fn map_reduce() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1 = a.par_iter().map(|&i| i + 1).reduce(|| 0, |i, j| i + j);
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, r2);
+}
+
+#[test]
+fn map_reduce_with() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j);
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, Some(r2));
+}
+
+#[test]
+fn fold_map_reduce() {
+    // Kind of a weird test, but it demonstrates various
+    // transformations that are taking place. Relies on
+    // `with_max_len(1).fold()` being equivalent to `map()`.
+    //
+    // Take each number from 0 to 32 and fold them by appending to a
+    // vector.  Because of `with_max_len(1)`, this will produce 32 vectors,
+    // each with one item.  We then collect all of these into an
+    // individual vector by mapping each into their own vector (so we
+    // have Vec<Vec<i32>>) and then reducing those into a single
+    // vector.
+    let r1 = (0_i32..32)
+        .into_par_iter()
+        .with_max_len(1)
+        .fold(
+            || vec![],
+            |mut v, e| {
+                v.push(e);
+                v
+            },
+        )
+        .map(|v| vec![v])
+        .reduce_with(|mut v_a, v_b| {
+            v_a.extend(v_b);
+            v_a
+        });
+    assert_eq!(
+        r1,
+        Some(vec![
+            vec![0],
+            vec![1],
+            vec![2],
+            vec![3],
+            vec![4],
+            vec![5],
+            vec![6],
+            vec![7],
+            vec![8],
+            vec![9],
+            vec![10],
+            vec![11],
+            vec![12],
+            vec![13],
+            vec![14],
+            vec![15],
+            vec![16],
+            vec![17],
+            vec![18],
+            vec![19],
+            vec![20],
+            vec![21],
+            vec![22],
+            vec![23],
+            vec![24],
+            vec![25],
+            vec![26],
+            vec![27],
+            vec![28],
+            vec![29],
+            vec![30],
+            vec![31]
+        ])
+    );
+}
+
+#[test]
+fn fold_is_full() {
+    let counter = AtomicUsize::new(0);
+    let a = (0_i32..2048)
+        .into_par_iter()
+        .inspect(|_| {
+            counter.fetch_add(1, Ordering::SeqCst);
+        })
+        .fold(|| 0, |a, b| a + b)
+        .find_any(|_| true);
+    assert!(a.is_some());
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn check_step_by() {
+    let a: Vec<i32> = (0..1024).step_by(2).collect();
+    let b: Vec<i32> = (0..1024).into_par_iter().step_by(2).collect();
+
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_step_by_unaligned() {
+    let a: Vec<i32> = (0..1029).step_by(10).collect();
+    let b: Vec<i32> = (0..1029).into_par_iter().step_by(10).collect();
+
+    assert_eq!(a, b)
+}
+
+#[test]
+fn check_step_by_rev() {
+    let a: Vec<i32> = (0..1024).step_by(2).rev().collect();
+    let b: Vec<i32> = (0..1024).into_par_iter().step_by(2).rev().collect();
+
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_enumerate() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+
+    let mut b = vec![];
+    a.par_iter()
+        .enumerate()
+        .map(|(i, &x)| i + x)
+        .collect_into_vec(&mut b);
+    assert!(b.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_enumerate_rev() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+
+    let mut b = vec![];
+    a.par_iter()
+        .enumerate()
+        .rev()
+        .map(|(i, &x)| i + x)
+        .collect_into_vec(&mut b);
+    assert!(b.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_indices_after_enumerate_split() {
+    let a: Vec<i32> = (0..1024).collect();
+    a.par_iter().enumerate().with_producer(WithProducer);
+
+    struct WithProducer;
+    impl<'a> ProducerCallback<(usize, &'a i32)> for WithProducer {
+        type Output = ();
+        fn callback<P>(self, producer: P)
+        where
+            P: Producer<Item = (usize, &'a i32)>,
+        {
+            let (a, b) = producer.split_at(512);
+            for ((index, value), trusted_index) in a.into_iter().zip(0..) {
+                assert_eq!(index, trusted_index);
+                assert_eq!(index, *value as usize);
+            }
+            for ((index, value), trusted_index) in b.into_iter().zip(512..) {
+                assert_eq!(index, trusted_index);
+                assert_eq!(index, *value as usize);
+            }
+        }
+    }
+}
+
+#[test]
+fn check_increment() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut().enumerate().for_each(|(i, v)| *v += i);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_skip() {
+    let a: Vec<usize> = (0..1024).collect();
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(16).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(16).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(2048).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(2048).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(0).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(0).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    // Check that the skipped elements side effects are executed
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    let num = AtomicUsize::new(0);
+    a.par_iter()
+        .map(|&n| num.fetch_add(n, Ordering::Relaxed))
+        .skip(512)
+        .count();
+    assert_eq!(num.load(Ordering::Relaxed), a.iter().sum::<usize>());
+}
+
+#[test]
+fn check_take() {
+    let a: Vec<usize> = (0..1024).collect();
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(16).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(16).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(2048).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(2048).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(0).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(0).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+}
+
+#[test]
+fn check_inspect() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    let a = AtomicUsize::new(0);
+    let b: usize = (0_usize..1024)
+        .into_par_iter()
+        .inspect(|&i| {
+            a.fetch_add(i, Ordering::Relaxed);
+        })
+        .sum();
+
+    assert_eq!(a.load(Ordering::Relaxed), b);
+}
+
+#[test]
+fn check_move() {
+    let a = vec![vec![1, 2, 3]];
+    let ptr = a[0].as_ptr();
+
+    let mut b = vec![];
+    a.into_par_iter().collect_into_vec(&mut b);
+
+    // a simple move means the inner vec will be completely unchanged
+    assert_eq!(ptr, b[0].as_ptr());
+}
+
+#[test]
+fn check_drops() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    let c = AtomicUsize::new(0);
+    let a = vec![DropCounter(&c); 10];
+
+    let mut b = vec![];
+    a.clone().into_par_iter().collect_into_vec(&mut b);
+    assert_eq!(c.load(Ordering::Relaxed), 0);
+
+    b.into_par_iter();
+    assert_eq!(c.load(Ordering::Relaxed), 10);
+
+    a.into_par_iter().with_producer(Partial);
+    assert_eq!(c.load(Ordering::Relaxed), 20);
+
+    #[derive(Clone)]
+    struct DropCounter<'a>(&'a AtomicUsize);
+    impl<'a> Drop for DropCounter<'a> {
+        fn drop(&mut self) {
+            self.0.fetch_add(1, Ordering::Relaxed);
+        }
+    }
+
+    struct Partial;
+    impl<'a> ProducerCallback<DropCounter<'a>> for Partial {
+        type Output = ();
+        fn callback<P>(self, producer: P)
+        where
+            P: Producer<Item = DropCounter<'a>>,
+        {
+            let (a, _) = producer.split_at(5);
+            a.into_iter().next();
+        }
+    }
+}
+
+#[test]
+fn check_slice_indexed() {
+    let a = vec![1, 2, 3];
+    is_indexed(a.par_iter());
+}
+
+#[test]
+fn check_slice_mut_indexed() {
+    let mut a = vec![1, 2, 3];
+    is_indexed(a.par_iter_mut());
+}
+
+#[test]
+fn check_vec_indexed() {
+    let a = vec![1, 2, 3];
+    is_indexed(a.clone().into_par_iter());
+}
+
+#[test]
+fn check_range_indexed() {
+    is_indexed((1..5).into_par_iter());
+}
+
+#[test]
+fn check_cmp_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Equal);
+}
+
+#[test]
+fn check_cmp_to_seq() {
+    assert_eq!(
+        (0..1024).into_par_iter().cmp(0..1024),
+        (0..1024).cmp(0..1024)
+    );
+}
+
+#[test]
+fn check_cmp_rng_to_seq() {
+    let mut rng = seeded_rng();
+    let rng = &mut rng;
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    let b: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..a.len() {
+        let par_result = a[i..].par_iter().cmp(b[i..].par_iter());
+        let seq_result = a[i..].iter().cmp(b[i..].iter());
+
+        assert_eq!(par_result, seq_result);
+    }
+}
+
+#[test]
+fn check_cmp_lt_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Less);
+}
+
+#[test]
+fn check_cmp_lt_to_seq() {
+    assert_eq!(
+        (0..1024).into_par_iter().cmp(1..1024),
+        (0..1024).cmp(1..1024)
+    )
+}
+
+#[test]
+fn check_cmp_gt_direct() {
+    let a = (1..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Greater);
+}
+
+#[test]
+fn check_cmp_gt_to_seq() {
+    assert_eq!(
+        (1..1024).into_par_iter().cmp(0..1024),
+        (1..1024).cmp(0..1024)
+    )
+}
+
+#[test]
+fn check_cmp_short_circuit() {
+    // We only use a single thread in order to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0; 1024];
+    let mut b = a.clone();
+    b[42] = 1;
+
+    pool.install(|| {
+        let expected = ::std::cmp::Ordering::Less;
+        assert_eq!(a.par_iter().cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_short_circuit() {
+    // We only use a single thread to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0; 1024];
+    let mut b = a.clone();
+    b[42] = 1;
+
+    pool.install(|| {
+        let expected = Some(::std::cmp::Ordering::Less);
+        assert_eq!(a.par_iter().partial_cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .partial_cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_nan_short_circuit() {
+    // We only use a single thread to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0.0; 1024];
+    let mut b = a.clone();
+    b[42] = f64::NAN;
+
+    pool.install(|| {
+        let expected = None;
+        assert_eq!(a.par_iter().partial_cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .partial_cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Equal));
+}
+
+#[test]
+fn check_partial_cmp_to_seq() {
+    let par_result = (0..1024).into_par_iter().partial_cmp(0..1024);
+    let seq_result = (0..1024).partial_cmp(0..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_rng_to_seq() {
+    let mut rng = seeded_rng();
+    let rng = &mut rng;
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    let b: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..a.len() {
+        let par_result = a[i..].par_iter().partial_cmp(b[i..].par_iter());
+        let seq_result = a[i..].iter().partial_cmp(b[i..].iter());
+
+        assert_eq!(par_result, seq_result);
+    }
+}
+
+#[test]
+fn check_partial_cmp_lt_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Less));
+}
+
+#[test]
+fn check_partial_cmp_lt_to_seq() {
+    let par_result = (0..1024).into_par_iter().partial_cmp(1..1024);
+    let seq_result = (0..1024).partial_cmp(1..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_gt_direct() {
+    let a = (1..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Greater));
+}
+
+#[test]
+fn check_partial_cmp_gt_to_seq() {
+    let par_result = (1..1024).into_par_iter().partial_cmp(0..1024);
+    let seq_result = (1..1024).partial_cmp(0..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_none_direct() {
+    let a = vec![f64::NAN, 0.0];
+    let b = vec![0.0, 1.0];
+
+    let result = a.par_iter().partial_cmp(b.par_iter());
+
+    assert!(result == None);
+}
+
+#[test]
+fn check_partial_cmp_none_to_seq() {
+    let a = vec![f64::NAN, 0.0];
+    let b = vec![0.0, 1.0];
+
+    let par_result = a.par_iter().partial_cmp(b.par_iter());
+    let seq_result = a.iter().partial_cmp(b.iter());
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_late_nan_direct() {
+    let a = vec![0.0, f64::NAN];
+    let b = vec![1.0, 1.0];
+
+    let result = a.par_iter().partial_cmp(b.par_iter());
+
+    assert!(result == Some(::std::cmp::Ordering::Less));
+}
+
+#[test]
+fn check_partial_cmp_late_nane_to_seq() {
+    let a = vec![0.0, f64::NAN];
+    let b = vec![1.0, 1.0];
+
+    let par_result = a.par_iter().partial_cmp(b.par_iter());
+    let seq_result = a.iter().partial_cmp(b.iter());
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_cmp_lengths() {
+    // comparisons should consider length if they are otherwise equal
+    let a = vec![0; 1024];
+    let b = vec![0; 1025];
+
+    assert_eq!(a.par_iter().cmp(&b), a.iter().cmp(&b));
+    assert_eq!(a.par_iter().partial_cmp(&b), a.iter().partial_cmp(&b));
+}
+
+#[test]
+fn check_eq_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.eq(b);
+
+    assert!(result);
+}
+
+#[test]
+fn check_eq_to_seq() {
+    let par_result = (0..1024).into_par_iter().eq((0..1024).into_par_iter());
+    let seq_result = (0..1024).eq(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ne_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.ne(b);
+
+    assert!(result);
+}
+
+#[test]
+fn check_ne_to_seq() {
+    let par_result = (0..1024).into_par_iter().ne((1..1025).into_par_iter());
+    let seq_result = (0..1024).ne(1..1025);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ne_lengths() {
+    // equality should consider length too
+    let a = vec![0; 1024];
+    let b = vec![0; 1025];
+
+    assert_eq!(a.par_iter().eq(&b), a.iter().eq(&b));
+    assert_eq!(a.par_iter().ne(&b), a.iter().ne(&b));
+}
+
+#[test]
+fn check_lt_direct() {
+    assert!((0..1024).into_par_iter().lt(1..1024));
+    assert!(!(1..1024).into_par_iter().lt(0..1024));
+}
+
+#[test]
+fn check_lt_to_seq() {
+    let par_result = (0..1024).into_par_iter().lt((1..1024).into_par_iter());
+    let seq_result = (0..1024).lt(1..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_le_equal_direct() {
+    assert!((0..1024).into_par_iter().le((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_le_equal_to_seq() {
+    let par_result = (0..1024).into_par_iter().le((0..1024).into_par_iter());
+    let seq_result = (0..1024).le(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_le_less_direct() {
+    assert!((0..1024).into_par_iter().le((1..1024).into_par_iter()));
+}
+
+#[test]
+fn check_le_less_to_seq() {
+    let par_result = (0..1024).into_par_iter().le((1..1024).into_par_iter());
+    let seq_result = (0..1024).le(1..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_gt_direct() {
+    assert!((1..1024).into_par_iter().gt((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_gt_to_seq() {
+    let par_result = (1..1024).into_par_iter().gt((0..1024).into_par_iter());
+    let seq_result = (1..1024).gt(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ge_equal_direct() {
+    assert!((0..1024).into_par_iter().ge((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_ge_equal_to_seq() {
+    let par_result = (0..1024).into_par_iter().ge((0..1024).into_par_iter());
+    let seq_result = (0..1024).ge(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ge_greater_direct() {
+    assert!((1..1024).into_par_iter().ge((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_ge_greater_to_seq() {
+    let par_result = (1..1024).into_par_iter().ge((0..1024).into_par_iter());
+    let seq_result = (1..1024).ge(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_zip() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut().zip(&b[..]).for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_into_par_iter() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut()
+        .zip(&b) // here we rely on &b iterating over &usize
+        .for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_into_mut_par_iter() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let mut b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter().zip(&mut b).for_each(|(&a, b)| *b += a);
+
+    assert!(b.iter().all(|&x| x == b.len() - 1));
+}
+
+#[test]
+fn check_zip_range() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut()
+        .zip(0usize..1024)
+        .for_each(|(a, b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut().zip_eq(&b[..]).for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_into_par_iter() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut()
+        .zip_eq(&b) // here we rely on &b iterating over &usize
+        .for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_into_mut_par_iter() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let mut b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter().zip_eq(&mut b).for_each(|(&a, b)| *b += a);
+
+    assert!(b.iter().all(|&x| x == b.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_range() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut()
+        .zip_eq(0usize..1024)
+        .for_each(|(a, b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_sum_filtered_ints() {
+    let a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let par_sum_evens: i32 = a.par_iter().filter(|&x| (x & 1) == 0).sum();
+    let seq_sum_evens = a.iter().filter(|&x| (x & 1) == 0).sum();
+    assert_eq!(par_sum_evens, seq_sum_evens);
+}
+
+#[test]
+fn check_sum_filtermap_ints() {
+    let a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let par_sum_evens: u32 = a
+        .par_iter()
+        .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None })
+        .sum();
+    let seq_sum_evens = a
+        .iter()
+        .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None })
+        .sum();
+    assert_eq!(par_sum_evens, seq_sum_evens);
+}
+
+#[test]
+fn check_flat_map_nested_ranges() {
+    // FIXME -- why are precise type hints required on the integers here?
+
+    let v: i32 = (0_i32..10)
+        .into_par_iter()
+        .flat_map(|i| (0_i32..10).into_par_iter().map(move |j| (i, j)))
+        .map(|(i, j)| i * j)
+        .sum();
+
+    let w = (0_i32..10)
+        .flat_map(|i| (0_i32..10).map(move |j| (i, j)))
+        .map(|(i, j)| i * j)
+        .sum();
+
+    assert_eq!(v, w);
+}
+
+#[test]
+fn check_empty_flat_map_sum() {
+    let a: Vec<i32> = (0..1024).collect();
+    let empty = &a[..0];
+
+    // empty on the inside
+    let b: i32 = a.par_iter().flat_map(|_| empty).sum();
+    assert_eq!(b, 0);
+
+    // empty on the outside
+    let c: i32 = empty.par_iter().flat_map(|_| a.par_iter()).sum();
+    assert_eq!(c, 0);
+}
+
+#[test]
+fn check_flatten_vec() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: Vec<Vec<i32>> = vec![a.clone(), a.clone(), a.clone(), a.clone()];
+    let c: Vec<i32> = b.par_iter().flatten().cloned().collect();
+    let mut d = a.clone();
+    d.extend(&a);
+    d.extend(&a);
+    d.extend(&a);
+
+    assert_eq!(d, c);
+}
+
+#[test]
+fn check_flatten_vec_empty() {
+    let a: Vec<Vec<i32>> = vec![vec![]];
+    let b: Vec<i32> = a.par_iter().flatten().cloned().collect();
+
+    assert_eq!(vec![] as Vec<i32>, b);
+}
+
+#[test]
+fn check_slice_split() {
+    let v: Vec<_> = (0..1000).collect();
+    for m in 1..100 {
+        let a: Vec<_> = v.split(|x| x % m == 0).collect();
+        let b: Vec<_> = v.par_split(|x| x % m == 0).collect();
+        assert_eq!(a, b);
+    }
+
+    // same as std::slice::split() examples
+    let slice = [10, 40, 33, 20];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..2], &slice[3..]]);
+
+    let slice = [10, 40, 33];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..2], &slice[..0]]);
+
+    let slice = [10, 6, 33, 20];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..1], &slice[..0], &slice[3..]]);
+}
+
+#[test]
+fn check_slice_split_mut() {
+    let mut v1: Vec<_> = (0..1000).collect();
+    let mut v2 = v1.clone();
+    for m in 1..100 {
+        let a: Vec<_> = v1.split_mut(|x| x % m == 0).collect();
+        let b: Vec<_> = v2.par_split_mut(|x| x % m == 0).collect();
+        assert_eq!(a, b);
+    }
+
+    // same as std::slice::split_mut() example
+    let mut v = [10, 40, 30, 20, 60, 50];
+    v.par_split_mut(|num| num % 3 == 0).for_each(|group| {
+        group[0] = 1;
+    });
+    assert_eq!(v, [1, 40, 30, 1, 60, 1]);
+}
+
+#[test]
+fn check_chunks() {
+    let a: Vec<i32> = vec![1, 5, 10, 4, 100, 3, 1000, 2, 10000, 1];
+    let par_sum_product_pairs: i32 = a.par_chunks(2).map(|c| c.iter().product::<i32>()).sum();
+    let seq_sum_product_pairs = a.chunks(2).map(|c| c.iter().product::<i32>()).sum();
+    assert_eq!(par_sum_product_pairs, 12345);
+    assert_eq!(par_sum_product_pairs, seq_sum_product_pairs);
+
+    let par_sum_product_triples: i32 = a.par_chunks(3).map(|c| c.iter().product::<i32>()).sum();
+    let seq_sum_product_triples = a.chunks(3).map(|c| c.iter().product::<i32>()).sum();
+    assert_eq!(par_sum_product_triples, 5_0 + 12_00 + 20_000_000 + 1);
+    assert_eq!(par_sum_product_triples, seq_sum_product_triples);
+}
+
+#[test]
+fn check_chunks_mut() {
+    let mut a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let mut b: Vec<i32> = a.clone();
+    a.par_chunks_mut(2).for_each(|c| c[0] = c.iter().sum());
+    b.chunks_mut(2).for_each(|c| c[0] = c.iter().sum());
+    assert_eq!(a, &[3, 2, 7, 4, 11, 6, 15, 8, 19, 10]);
+    assert_eq!(a, b);
+
+    let mut a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let mut b: Vec<i32> = a.clone();
+    a.par_chunks_mut(3).for_each(|c| c[0] = c.iter().sum());
+    b.chunks_mut(3).for_each(|c| c[0] = c.iter().sum());
+    assert_eq!(a, &[6, 2, 3, 15, 5, 6, 24, 8, 9, 10]);
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_windows() {
+    let a: Vec<i32> = (0..1024).collect();
+    let par: Vec<_> = a.par_windows(2).collect();
+    let seq: Vec<_> = a.windows(2).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a.par_windows(100).collect();
+    let seq: Vec<_> = a.windows(100).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a.par_windows(1_000_000).collect();
+    let seq: Vec<_> = a.windows(1_000_000).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a
+        .par_windows(2)
+        .chain(a.par_windows(1_000_000))
+        .zip(a.par_windows(2))
+        .collect();
+    let seq: Vec<_> = a
+        .windows(2)
+        .chain(a.windows(1_000_000))
+        .zip(a.windows(2))
+        .collect();
+    assert_eq!(par, seq);
+}
+
+#[test]
+fn check_options() {
+    let mut a = vec![None, Some(1), None, None, Some(2), Some(4)];
+
+    assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::<i32>());
+    assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::<i32>());
+
+    a.par_iter_mut()
+        .flat_map(|opt| opt)
+        .for_each(|x| *x = *x * *x);
+
+    assert_eq!(21, a.into_par_iter().flat_map(|opt| opt).sum::<i32>());
+}
+
+#[test]
+fn check_results() {
+    let mut a = vec![Err(()), Ok(1i32), Err(()), Err(()), Ok(2), Ok(4)];
+
+    assert_eq!(7, a.par_iter().flat_map(|res| res).sum::<i32>());
+
+    assert_eq!(Err::<i32, ()>(()), a.par_iter().cloned().sum());
+    assert_eq!(Ok(7), a.par_iter().cloned().filter(Result::is_ok).sum());
+
+    assert_eq!(Err::<i32, ()>(()), a.par_iter().cloned().product());
+    assert_eq!(Ok(8), a.par_iter().cloned().filter(Result::is_ok).product());
+
+    a.par_iter_mut()
+        .flat_map(|res| res)
+        .for_each(|x| *x = *x * *x);
+
+    assert_eq!(21, a.into_par_iter().flat_map(|res| res).sum::<i32>());
+}
+
+#[test]
+fn check_binary_heap() {
+    use std::collections::BinaryHeap;
+
+    let a: BinaryHeap<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_btree_map() {
+    use std::collections::BTreeMap;
+
+    let mut a: BTreeMap<i32, i32> = (0..10).map(|i| (i, -i)).collect();
+
+    assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::<i32>());
+    assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::<i32>());
+
+    a.par_iter_mut().for_each(|(k, v)| *v += *k);
+
+    assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::<i32>());
+}
+
+#[test]
+fn check_btree_set() {
+    use std::collections::BTreeSet;
+
+    let a: BTreeSet<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_hash_map() {
+    use std::collections::HashMap;
+
+    let mut a: HashMap<i32, i32> = (0..10).map(|i| (i, -i)).collect();
+
+    assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::<i32>());
+    assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::<i32>());
+
+    a.par_iter_mut().for_each(|(k, v)| *v += *k);
+
+    assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::<i32>());
+}
+
+#[test]
+fn check_hash_set() {
+    use std::collections::HashSet;
+
+    let a: HashSet<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_linked_list() {
+    use std::collections::LinkedList;
+
+    let mut a: LinkedList<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+
+    a.par_iter_mut().for_each(|x| *x = -*x);
+
+    assert_eq!(-45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_vec_deque() {
+    use std::collections::VecDeque;
+
+    let mut a: VecDeque<i32> = (0..10).collect();
+
+    // try to get it to wrap around
+    a.drain(..5);
+    a.extend(0..5);
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+
+    a.par_iter_mut().for_each(|x| *x = -*x);
+
+    assert_eq!(-45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_chain() {
+    let mut res = vec![];
+
+    // stays indexed in the face of madness
+    Some(0)
+        .into_par_iter()
+        .chain(Ok::<_, ()>(1))
+        .chain(1..4)
+        .chain(Err("huh?"))
+        .chain(None)
+        .chain(vec![5, 8, 13])
+        .map(|x| (x as u8 + b'a') as char)
+        .chain(vec!['x', 'y', 'z'])
+        .zip((0i32..1000).into_par_iter().map(|x| -x))
+        .enumerate()
+        .map(|(a, (b, c))| (a, b, c))
+        .chain(None)
+        .collect_into_vec(&mut res);
+
+    assert_eq!(
+        res,
+        vec![
+            (0, 'a', 0),
+            (1, 'b', -1),
+            (2, 'b', -2),
+            (3, 'c', -3),
+            (4, 'd', -4),
+            (5, 'f', -5),
+            (6, 'i', -6),
+            (7, 'n', -7),
+            (8, 'x', -8),
+            (9, 'y', -9),
+            (10, 'z', -10)
+        ]
+    );
+
+    // unindexed is ok too
+    let res: Vec<i32> = Some(1i32)
+        .into_par_iter()
+        .chain(
+            (2i32..4)
+                .into_par_iter()
+                .chain(vec![5, 6, 7, 8, 9])
+                .chain(Some((10, 100)).into_par_iter().flat_map(|(a, b)| a..b))
+                .filter(|x| x & 1 == 1),
+        )
+        .collect();
+    let other: Vec<i32> = (0..100).filter(|x| x & 1 == 1).collect();
+    assert_eq!(res, other);
+
+    // chain collect is ok with the "fake" specialization
+    let res: Vec<i32> = Some(1i32).into_par_iter().chain(None).collect();
+    assert_eq!(res, &[1]);
+}
+
+#[test]
+fn check_count() {
+    let c0 = (0_u32..24 * 1024).filter(|i| i % 2 == 0).count();
+    let c1 = (0_u32..24 * 1024)
+        .into_par_iter()
+        .filter(|i| i % 2 == 0)
+        .count();
+    assert_eq!(c0, c1);
+}
+
+#[test]
+fn find_any() {
+    let a: Vec<i32> = (0..1024).collect();
+
+    assert!(a.par_iter().find_any(|&&x| x % 42 == 41).is_some());
+    assert_eq!(
+        a.par_iter().find_any(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_any(|&&x| x < 0), None);
+
+    assert!(a.par_iter().position_any(|&x| x % 42 == 41).is_some());
+    assert_eq!(
+        a.par_iter().position_any(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_any(|&x| x < 0), None);
+
+    assert!(a.par_iter().any(|&x| x > 1000));
+    assert!(!a.par_iter().any(|&x| x < 0));
+
+    assert!(!a.par_iter().all(|&x| x > 1000));
+    assert!(a.par_iter().all(|&x| x >= 0));
+}
+
+#[test]
+fn find_first_or_last() {
+    let a: Vec<i32> = (0..1024).collect();
+
+    assert_eq!(a.par_iter().find_first(|&&x| x % 42 == 41), Some(&41_i32));
+    assert_eq!(
+        a.par_iter().find_first(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_first(|&&x| x < 0), None);
+
+    assert_eq!(
+        a.par_iter().position_first(|&x| x % 42 == 41),
+        Some(41_usize)
+    );
+    assert_eq!(
+        a.par_iter().position_first(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_first(|&x| x < 0), None);
+
+    assert_eq!(a.par_iter().find_last(|&&x| x % 42 == 41), Some(&1007_i32));
+    assert_eq!(
+        a.par_iter().find_last(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_last(|&&x| x < 0), None);
+
+    assert_eq!(
+        a.par_iter().position_last(|&x| x % 42 == 41),
+        Some(1007_usize)
+    );
+    assert_eq!(
+        a.par_iter().position_last(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_last(|&x| x < 0), None);
+}
+
+#[test]
+fn find_map_first_or_last_or_any() {
+    let mut a: Vec<i32> = vec![];
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_first(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_last(half_if_positive).is_none());
+
+    a = (-1024..-3).collect();
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_first(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_last(half_if_positive).is_none());
+
+    assert!(a.par_iter().find_map_any(half_if_negative).is_some());
+    assert_eq!(
+        a.par_iter().find_map_first(half_if_negative),
+        Some(-512_i32)
+    );
+    assert_eq!(a.par_iter().find_map_last(half_if_negative), Some(-2_i32));
+
+    a.append(&mut (2..1025).collect());
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_some());
+    assert_eq!(a.par_iter().find_map_first(half_if_positive), Some(1_i32));
+    assert_eq!(a.par_iter().find_map_last(half_if_positive), Some(512_i32));
+
+    fn half_if_positive(x: &i32) -> Option<i32> {
+        if *x > 0 {
+            Some(x / 2)
+        } else {
+            None
+        }
+    }
+
+    fn half_if_negative(x: &i32) -> Option<i32> {
+        if *x < 0 {
+            Some(x / 2)
+        } else {
+            None
+        }
+    }
+}
+
+#[test]
+fn check_find_not_present() {
+    let counter = AtomicUsize::new(0);
+    let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
+        counter.fetch_add(1, Ordering::SeqCst);
+        p >= 2048
+    });
+    assert!(value.is_none());
+    assert!(counter.load(Ordering::SeqCst) == 2048); // should have visited every single one
+}
+
+#[test]
+fn check_find_is_present() {
+    let counter = AtomicUsize::new(0);
+    let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
+        counter.fetch_add(1, Ordering::SeqCst);
+        p >= 1024 && p < 1096
+    });
+    let q = value.unwrap();
+    assert!(q >= 1024 && q < 1096);
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn check_while_some() {
+    let value = (0_i32..2048).into_par_iter().map(Some).while_some().max();
+    assert_eq!(value, Some(2047));
+
+    let counter = AtomicUsize::new(0);
+    let value = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| {
+            counter.fetch_add(1, Ordering::SeqCst);
+            if x < 1024 {
+                Some(x)
+            } else {
+                None
+            }
+        })
+        .while_some()
+        .max();
+    assert!(value < Some(1024));
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn par_iter_collect_option() {
+    let a: Option<Vec<_>> = (0_i32..2048).map(Some).collect();
+    let b: Option<Vec<_>> = (0_i32..2048).into_par_iter().map(Some).collect();
+    assert_eq!(a, b);
+
+    let c: Option<Vec<_>> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x == 1234 { None } else { Some(x) })
+        .collect();
+    assert_eq!(c, None);
+}
+
+#[test]
+fn par_iter_collect_result() {
+    let a: Result<Vec<_>, ()> = (0_i32..2048).map(Ok).collect();
+    let b: Result<Vec<_>, ()> = (0_i32..2048).into_par_iter().map(Ok).collect();
+    assert_eq!(a, b);
+
+    let c: Result<Vec<_>, _> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x == 1234 { Err(x) } else { Ok(x) })
+        .collect();
+    assert_eq!(c, Err(1234));
+
+    let d: Result<Vec<_>, _> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x % 100 == 99 { Err(x) } else { Ok(x) })
+        .collect();
+    assert_eq!(d.map_err(|x| x % 100), Err(99));
+}
+
+#[test]
+fn par_iter_collect() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: Vec<i32> = a.par_iter().map(|&i| i + 1).collect();
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_vecdeque() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: VecDeque<i32> = a.par_iter().cloned().collect();
+    let c: VecDeque<i32> = a.iter().cloned().collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_binaryheap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b: BinaryHeap<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.peek(), Some(&1023));
+    assert_eq!(b.len(), 1024);
+    for n in (0..1024).rev() {
+        assert_eq!(b.pop(), Some(n));
+        assert_eq!(b.len() as i32, n);
+    }
+}
+
+#[test]
+fn par_iter_collect_hashmap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: HashMap<i32, String> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(&b[&3], "3");
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_hashset() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: HashSet<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_btreemap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: BTreeMap<i32, String> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(&b[&3], "3");
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_btreeset() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: BTreeSet<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_linked_list() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: LinkedList<_> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    let c: LinkedList<_> = a.iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_linked_list_flat_map_filter() {
+    let b: LinkedList<i32> = (0_i32..1024)
+        .into_par_iter()
+        .flat_map(|i| (0..i))
+        .filter(|&i| i % 2 == 0)
+        .collect();
+    let c: LinkedList<i32> = (0_i32..1024)
+        .flat_map(|i| (0..i))
+        .filter(|&i| i % 2 == 0)
+        .collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_cows() {
+    use std::borrow::Cow;
+
+    let s = "Fearless Concurrency with Rust";
+
+    // Collects `i32` into a `Vec`
+    let a: Cow<'_, [i32]> = (0..1024).collect();
+    let b: Cow<'_, [i32]> = a.par_iter().cloned().collect();
+    assert_eq!(a, b);
+
+    // Collects `char` into a `String`
+    let a: Cow<'_, str> = s.chars().collect();
+    let b: Cow<'_, str> = s.par_chars().collect();
+    assert_eq!(a, b);
+
+    // Collects `str` into a `String`
+    let a: Cow<'_, str> = s.split_whitespace().collect();
+    let b: Cow<'_, str> = s.par_split_whitespace().collect();
+    assert_eq!(a, b);
+
+    // Collects `String` into a `String`
+    let a: Cow<'_, str> = s.split_whitespace().map(str::to_owned).collect();
+    let b: Cow<'_, str> = s.par_split_whitespace().map(str::to_owned).collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn par_iter_unindexed_flat_map() {
+    let b: Vec<i64> = (0_i64..1024).into_par_iter().flat_map(Some).collect();
+    let c: Vec<i64> = (0_i64..1024).flat_map(Some).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn min_max() {
+    let rng = seeded_rng();
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(slice.par_iter().min(), slice.iter().min());
+        assert_eq!(slice.par_iter().max(), slice.iter().max());
+    }
+}
+
+#[test]
+fn min_max_by() {
+    let rng = seeded_rng();
+    // Make sure there are duplicate keys, for testing sort stability
+    let r: Vec<i32> = rng.sample_iter(&Standard).take(512).collect();
+    let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(
+            slice.par_iter().min_by(|x, y| x.0.cmp(&y.0)),
+            slice.iter().min_by(|x, y| x.0.cmp(&y.0))
+        );
+        assert_eq!(
+            slice.par_iter().max_by(|x, y| x.0.cmp(&y.0)),
+            slice.iter().max_by(|x, y| x.0.cmp(&y.0))
+        );
+    }
+}
+
+#[test]
+fn min_max_by_key() {
+    let rng = seeded_rng();
+    // Make sure there are duplicate keys, for testing sort stability
+    let r: Vec<i32> = rng.sample_iter(&Standard).take(512).collect();
+    let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(
+            slice.par_iter().min_by_key(|x| x.0),
+            slice.iter().min_by_key(|x| x.0)
+        );
+        assert_eq!(
+            slice.par_iter().max_by_key(|x| x.0),
+            slice.iter().max_by_key(|x| x.0)
+        );
+    }
+}
+
+#[test]
+fn check_rev() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    assert!(a.par_iter().rev().zip(b).all(|(&a, b)| a == b));
+}
+
+#[test]
+fn scope_mix() {
+    let counter_p = &AtomicUsize::new(0);
+    scope(|s| {
+        s.spawn(move |s| {
+            divide_and_conquer(s, counter_p, 1024);
+        });
+        s.spawn(move |_| {
+            let a: Vec<i32> = (0..1024).collect();
+            let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j);
+            let r2 = a.iter().map(|&i| i + 1).sum();
+            assert_eq!(r1.unwrap(), r2);
+        });
+    });
+}
+
+fn divide_and_conquer<'scope>(scope: &Scope<'scope>, counter: &'scope AtomicUsize, size: usize) {
+    if size > 1 {
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+    } else {
+        // count the leaves
+        counter.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+#[test]
+fn check_split() {
+    use std::ops::Range;
+
+    let a = (0..1024).into_par_iter();
+
+    let b = split(0..1024, |Range { start, end }| {
+        let mid = (end - start) / 2;
+        if mid > start {
+            (start..mid, Some(mid..end))
+        } else {
+            (start..end, None)
+        }
+    })
+    .flat_map(|range| range);
+
+    assert_eq!(a.collect::<Vec<_>>(), b.collect::<Vec<_>>());
+}
+
+#[test]
+fn check_lengths() {
+    fn check(min: usize, max: usize) {
+        let range = 0..1024 * 1024;
+
+        // Check against normalized values.
+        let min_check = cmp::min(cmp::max(min, 1), range.len());
+        let max_check = cmp::max(max, min_check.saturating_add(min_check - 1));
+
+        assert!(
+            range
+                .into_par_iter()
+                .with_min_len(min)
+                .with_max_len(max)
+                .fold(|| 0, |count, _| count + 1)
+                .all(|c| c >= min_check && c <= max_check),
+            "check_lengths failed {:?} -> {:?} ",
+            (min, max),
+            (min_check, max_check)
+        );
+    }
+
+    let lengths = [0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, usize::MAX];
+    for &min in &lengths {
+        for &max in &lengths {
+            check(min, max);
+        }
+    }
+}
+
+#[test]
+fn check_map_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .map_with(sender, |s, i| s.send(i).unwrap())
+        .count();
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_fold_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .fold_with(sender, |s, i| {
+            s.send(i).unwrap();
+            s
+        })
+        .count();
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_for_each_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .for_each_with(sender, |s, i| s.send(i).unwrap());
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_extend_items() {
+    fn check<C>()
+    where
+        C: Default
+            + Eq
+            + Debug
+            + Extend<i32>
+            + for<'a> Extend<&'a i32>
+            + ParallelExtend<i32>
+            + for<'a> ParallelExtend<&'a i32>,
+    {
+        let mut serial = C::default();
+        let mut parallel = C::default();
+
+        // extend with references
+        let v: Vec<_> = (0..128).collect();
+        serial.extend(&v);
+        parallel.par_extend(&v);
+        assert_eq!(serial, parallel);
+
+        // extend with values
+        serial.extend(-128..0);
+        parallel.par_extend(-128..0);
+        assert_eq!(serial, parallel);
+    }
+
+    check::<BTreeSet<_>>();
+    check::<HashSet<_>>();
+    check::<LinkedList<_>>();
+    check::<Vec<_>>();
+    check::<VecDeque<_>>();
+}
+
+#[test]
+fn check_extend_heap() {
+    let mut serial: BinaryHeap<_> = Default::default();
+    let mut parallel: BinaryHeap<_> = Default::default();
+
+    // extend with references
+    let v: Vec<_> = (0..128).collect();
+    serial.extend(&v);
+    parallel.par_extend(&v);
+    assert_eq!(
+        serial.clone().into_sorted_vec(),
+        parallel.clone().into_sorted_vec()
+    );
+
+    // extend with values
+    serial.extend(-128..0);
+    parallel.par_extend(-128..0);
+    assert_eq!(serial.into_sorted_vec(), parallel.into_sorted_vec());
+}
+
+#[test]
+fn check_extend_pairs() {
+    fn check<C>()
+    where
+        C: Default
+            + Eq
+            + Debug
+            + Extend<(usize, i32)>
+            + for<'a> Extend<(&'a usize, &'a i32)>
+            + ParallelExtend<(usize, i32)>
+            + for<'a> ParallelExtend<(&'a usize, &'a i32)>,
+    {
+        let mut serial = C::default();
+        let mut parallel = C::default();
+
+        // extend with references
+        let m: HashMap<_, _> = (0..128).enumerate().collect();
+        serial.extend(&m);
+        parallel.par_extend(&m);
+        assert_eq!(serial, parallel);
+
+        // extend with values
+        let v: Vec<(_, _)> = (-128..0).enumerate().collect();
+        serial.extend(v.clone());
+        parallel.par_extend(v);
+        assert_eq!(serial, parallel);
+    }
+
+    check::<BTreeMap<usize, i32>>();
+    check::<HashMap<usize, i32>>();
+}
+
+#[test]
+fn check_unzip_into_vecs() {
+    let mut a = vec![];
+    let mut b = vec![];
+    (0..1024)
+        .into_par_iter()
+        .map(|i| i * i)
+        .enumerate()
+        .unzip_into_vecs(&mut a, &mut b);
+
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_unzip() {
+    // indexed, unindexed
+    let (a, b): (Vec<_>, HashSet<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (Vec<_>, HashSet<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // unindexed, indexed
+    let (a, b): (HashSet<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (HashSet<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // indexed, indexed
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // unindexed producer
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024)
+        .into_par_iter()
+        .filter_map(|i| Some((i, i * i)))
+        .unzip();
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| (i, i * i)).unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_partition() {
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().partition(|&i| i % 3 == 0);
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).partition(|&i| i % 3 == 0);
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_partition_map() {
+    let input = "a b c 1 2 3 x y z";
+    let (a, b): (Vec<_>, String) =
+        input
+            .par_split_whitespace()
+            .partition_map(|s| match s.parse::<i32>() {
+                Ok(n) => Either::Left(n),
+                Err(_) => Either::Right(s),
+            });
+    assert_eq!(a, vec![1, 2, 3]);
+    assert_eq!(b, "abcxyz");
+}
+
+#[test]
+fn check_either() {
+    type I = crate::vec::IntoIter<i32>;
+    type E = Either<I, I>;
+
+    let v: Vec<i32> = (0..1024).collect();
+
+    // try iterating the left side
+    let left: E = Either::Left(v.clone().into_par_iter());
+    assert!(left.eq(v.clone()));
+
+    // try iterating the right side
+    let right: E = Either::Right(v.clone().into_par_iter());
+    assert!(right.eq(v.clone()));
+
+    // try an indexed iterator
+    let left: E = Either::Left(v.clone().into_par_iter());
+    assert!(left.enumerate().eq(v.clone().into_par_iter().enumerate()));
+}
+
+#[test]
+fn check_either_extend() {
+    type E = Either<Vec<i32>, HashSet<i32>>;
+
+    let v: Vec<i32> = (0..1024).collect();
+
+    // try extending the left side
+    let mut left: E = Either::Left(vec![]);
+    left.par_extend(v.clone());
+    assert_eq!(left.as_ref(), Either::Left(&v));
+
+    // try extending the right side
+    let mut right: E = Either::Right(HashSet::default());
+    right.par_extend(v.clone());
+    assert_eq!(right, Either::Right(v.iter().cloned().collect()));
+}
+
+#[test]
+fn check_interleave_eq() {
+    let xs: Vec<usize> = (0..10).collect();
+    let ys: Vec<usize> = (10..20).collect();
+
+    let mut actual = vec![];
+    xs.par_iter()
+        .interleave(&ys)
+        .map(|&i| i)
+        .collect_into_vec(&mut actual);
+
+    let expected: Vec<usize> = (0..10)
+        .zip(10..20)
+        .flat_map(|(i, j)| vec![i, j].into_iter())
+        .collect();
+    assert_eq!(expected, actual);
+}
+
+#[test]
+fn check_interleave_uneven() {
+    let cases: Vec<(Vec<usize>, Vec<usize>, Vec<usize>)> = vec![
+        (
+            (0..9).collect(),
+            vec![10],
+            vec![0, 10, 1, 2, 3, 4, 5, 6, 7, 8],
+        ),
+        (
+            vec![10],
+            (0..9).collect(),
+            vec![10, 0, 1, 2, 3, 4, 5, 6, 7, 8],
+        ),
+        (
+            (0..5).collect(),
+            (5..10).collect(),
+            (0..5)
+                .zip(5..10)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+        (vec![], (0..9).collect(), (0..9).collect()),
+        ((0..9).collect(), vec![], (0..9).collect()),
+        (
+            (0..50).collect(),
+            (50..100).collect(),
+            (0..50)
+                .zip(50..100)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+    ];
+
+    for (i, (xs, ys, expected)) in cases.into_iter().enumerate() {
+        let mut res = vec![];
+        xs.par_iter()
+            .interleave(&ys)
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        xs.par_iter()
+            .interleave(&ys)
+            .rev()
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<usize>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+fn check_interleave_shortest() {
+    let cases: Vec<(Vec<usize>, Vec<usize>, Vec<usize>)> = vec![
+        ((0..9).collect(), vec![10], vec![0, 10, 1]),
+        (vec![10], (0..9).collect(), vec![10, 0]),
+        (
+            (0..5).collect(),
+            (5..10).collect(),
+            (0..5)
+                .zip(5..10)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+        (vec![], (0..9).collect(), vec![]),
+        ((0..9).collect(), vec![], vec![0]),
+        (
+            (0..50).collect(),
+            (50..100).collect(),
+            (0..50)
+                .zip(50..100)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+    ];
+
+    for (i, (xs, ys, expected)) in cases.into_iter().enumerate() {
+        let mut res = vec![];
+        xs.par_iter()
+            .interleave_shortest(&ys)
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        xs.par_iter()
+            .interleave_shortest(&ys)
+            .rev()
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<usize>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+#[should_panic(expected = "chunk_size must not be zero")]
+fn check_chunks_zero_size() {
+    let _: Vec<Vec<i32>> = vec![1, 2, 3].into_par_iter().chunks(0).collect();
+}
+
+#[test]
+fn check_chunks_even_size() {
+    assert_eq!(
+        vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]],
+        (1..10).into_par_iter().chunks(3).collect::<Vec<Vec<i32>>>()
+    );
+}
+
+#[test]
+fn check_chunks_empty() {
+    let v: Vec<i32> = vec![];
+    let expected: Vec<Vec<i32>> = vec![];
+    assert_eq!(
+        expected,
+        v.into_par_iter().chunks(2).collect::<Vec<Vec<i32>>>()
+    );
+}
+
+#[test]
+fn check_chunks_len() {
+    assert_eq!(4, (0..8).into_par_iter().chunks(2).len());
+    assert_eq!(3, (0..9).into_par_iter().chunks(3).len());
+    assert_eq!(3, (0..8).into_par_iter().chunks(3).len());
+    assert_eq!(1, (&[1]).par_iter().chunks(3).len());
+    assert_eq!(0, (0..0).into_par_iter().chunks(3).len());
+}
+
+#[test]
+fn check_chunks_uneven() {
+    let cases: Vec<(Vec<u32>, usize, Vec<Vec<u32>>)> = vec![
+        ((0..5).collect(), 3, vec![vec![0, 1, 2], vec![3, 4]]),
+        (vec![1], 5, vec![vec![1]]),
+        ((0..4).collect(), 3, vec![vec![0, 1, 2], vec![3]]),
+    ];
+
+    for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+        let mut res: Vec<Vec<u32>> = vec![];
+        v.par_iter()
+            .chunks(n)
+            .map(|v| v.into_iter().cloned().collect())
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        v.into_par_iter().chunks(n).rev().collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<Vec<u32>>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+#[ignore] // it's quick enough on optimized 32-bit platforms, but otherwise... ... ...
+#[should_panic(expected = "overflow")]
+#[cfg(debug_assertions)]
+fn check_repeat_unbounded() {
+    // use just one thread, so we don't get infinite adaptive splitting
+    // (forever stealing and re-splitting jobs that will panic on overflow)
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    pool.install(|| {
+        println!("counted {} repeats", repeat(()).count());
+    });
+}
+
+#[test]
+fn check_repeat_find_any() {
+    let even = repeat(4).find_any(|&x| x % 2 == 0);
+    assert_eq!(even, Some(4));
+}
+
+#[test]
+fn check_repeat_take() {
+    let v: Vec<_> = repeat(4).take(4).collect();
+    assert_eq!(v, [4, 4, 4, 4]);
+}
+
+#[test]
+fn check_repeat_zip() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = repeat(4).zip(v).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_repeatn_zip_left() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = repeatn(4, usize::MAX).zip(v).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_repeatn_zip_right() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = v.into_par_iter().zip(repeatn(4, usize::MAX)).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_empty() {
+    // drive_unindexed
+    let mut v: Vec<i32> = empty().filter(|_| unreachable!()).collect();
+    assert!(v.is_empty());
+
+    // drive (indexed)
+    empty().collect_into_vec(&mut v);
+    assert!(v.is_empty());
+
+    // with_producer
+    let v: Vec<(i32, i32)> = empty().zip(1..10).collect();
+    assert!(v.is_empty());
+}
+
+#[test]
+fn check_once() {
+    // drive_unindexed
+    let mut v: Vec<i32> = once(42).filter(|_| true).collect();
+    assert_eq!(v, &[42]);
+
+    // drive (indexed)
+    once(42).collect_into_vec(&mut v);
+    assert_eq!(v, &[42]);
+
+    // with_producer
+    let v: Vec<(i32, i32)> = once(42).zip(1..10).collect();
+    assert_eq!(v, &[(42, 1)]);
+}
+
+#[test]
+fn check_update() {
+    let mut v: Vec<Vec<_>> = vec![vec![1], vec![3, 2, 1]];
+    v.par_iter_mut().update(|v| v.push(0)).for_each(|_| ());
+
+    assert_eq!(v, vec![vec![1, 0], vec![3, 2, 1, 0]]);
+}
diff --git a/src/iter/try_fold.rs b/src/iter/try_fold.rs
new file mode 100644
index 0000000..c1a57a4
--- /dev/null
+++ b/src/iter/try_fold.rs
@@ -0,0 +1,294 @@
+use super::plumbing::*;
+use super::*;
+
+use super::private::Try;
+use std::fmt::{self, Debug};
+use std::marker::PhantomData;
+
+impl<U, I, ID, F> TryFold<I, U, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Ok + Sync + Send,
+    U: Try + Send,
+{
+    pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
+        TryFold {
+            base,
+            identity,
+            fold_op,
+            marker: PhantomData,
+        }
+    }
+}
+
+/// `TryFold` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`try_fold()`] method on [`ParallelIterator`]
+///
+/// [`try_fold()`]: trait.ParallelIterator.html#method.try_fold
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct TryFold<I, U, ID, F> {
+    base: I,
+    identity: ID,
+    fold_op: F,
+    marker: PhantomData<U>,
+}
+
+impl<U, I: ParallelIterator + Debug, ID, F> Debug for TryFold<I, U, ID, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("TryFold").field("base", &self.base).finish()
+    }
+}
+
+impl<U, I, ID, F> ParallelIterator for TryFold<I, U, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Ok + Sync + Send,
+    U: Try + Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = TryFoldConsumer {
+            base: consumer,
+            identity: &self.identity,
+            fold_op: &self.fold_op,
+            marker: PhantomData,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct TryFoldConsumer<'c, U, C, ID, F> {
+    base: C,
+    identity: &'c ID,
+    fold_op: &'c F,
+    marker: PhantomData<U>,
+}
+
+impl<'r, U, T, C, ID, F> Consumer<T> for TryFoldConsumer<'r, U, C, ID, F>
+where
+    C: Consumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    ID: Fn() -> U::Ok + Sync,
+    U: Try + Send,
+{
+    type Folder = TryFoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            TryFoldConsumer { base: left, ..self },
+            TryFoldConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryFoldFolder {
+            base: self.base.into_folder(),
+            result: Ok((self.identity)()),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for TryFoldConsumer<'r, U, C, ID, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    ID: Fn() -> U::Ok + Sync,
+    U: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        TryFoldConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct TryFoldFolder<'r, C, U: Try, F> {
+    base: C,
+    fold_op: &'r F,
+    result: Result<U::Ok, U::Error>,
+}
+
+impl<'r, C, U, F, T> Folder<T> for TryFoldFolder<'r, C, U, F>
+where
+    C: Folder<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let fold_op = self.fold_op;
+        if let Ok(acc) = self.result {
+            self.result = fold_op(acc, item).into_result();
+        }
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        let item = match self.result {
+            Ok(ok) => U::from_ok(ok),
+            Err(error) => U::from_error(error),
+        };
+        self.base.consume(item).complete()
+    }
+
+    fn full(&self) -> bool {
+        self.result.is_err() || self.base.full()
+    }
+}
+
+// ///////////////////////////////////////////////////////////////////////////
+
+impl<U, I, F> TryFoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    pub(super) fn new(base: I, item: U::Ok, fold_op: F) -> Self {
+        TryFoldWith {
+            base,
+            item,
+            fold_op,
+        }
+    }
+}
+
+/// `TryFoldWith` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`try_fold_with()`] method on [`ParallelIterator`]
+///
+/// [`try_fold_with()`]: trait.ParallelIterator.html#method.try_fold_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct TryFoldWith<I, U: Try, F> {
+    base: I,
+    item: U::Ok,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, U: Try, F> Debug for TryFoldWith<I, U, F>
+where
+    U::Ok: Debug,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("TryFoldWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<U, I, F> ParallelIterator for TryFoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = TryFoldWithConsumer {
+            base: consumer,
+            item: self.item,
+            fold_op: &self.fold_op,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct TryFoldWithConsumer<'c, C, U: Try, F> {
+    base: C,
+    item: U::Ok,
+    fold_op: &'c F,
+}
+
+impl<'r, U, T, C, F> Consumer<T> for TryFoldWithConsumer<'r, C, U, F>
+where
+    C: Consumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    type Folder = TryFoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            TryFoldWithConsumer {
+                base: left,
+                item: self.item.clone(),
+                ..self
+            },
+            TryFoldWithConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryFoldFolder {
+            base: self.base.into_folder(),
+            result: Ok(self.item),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, F> UnindexedConsumer<T> for TryFoldWithConsumer<'r, C, U, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    fn split_off_left(&self) -> Self {
+        TryFoldWithConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/try_reduce.rs b/src/iter/try_reduce.rs
new file mode 100644
index 0000000..76b3850
--- /dev/null
+++ b/src/iter/try_reduce.rs
@@ -0,0 +1,129 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use super::private::Try;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn try_reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    let full = AtomicBool::new(false);
+    let consumer = TryReduceConsumer {
+        identity: &identity,
+        reduce_op: &reduce_op,
+        full: &full,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct TryReduceConsumer<'r, R, ID> {
+    identity: &'r ID,
+    reduce_op: &'r R,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, ID> Copy for TryReduceConsumer<'r, R, ID> {}
+
+impl<'r, R, ID> Clone for TryReduceConsumer<'r, R, ID> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Consumer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    type Folder = TryReduceFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = T;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryReduceFolder {
+            reduce_op: self.reduce_op,
+            result: Ok((self.identity)()),
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
+
+impl<'r, R, ID, T> UnindexedConsumer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Reducer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        match (left.into_result(), right.into_result()) {
+            (Ok(left), Ok(right)) => (self.reduce_op)(left, right),
+            (Err(e), _) | (_, Err(e)) => T::from_error(e),
+        }
+    }
+}
+
+struct TryReduceFolder<'r, R, T: Try> {
+    reduce_op: &'r R,
+    result: Result<T::Ok, T::Error>,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, T> Folder<T> for TryReduceFolder<'r, R, T>
+where
+    R: Fn(T::Ok, T::Ok) -> T,
+    T: Try,
+{
+    type Result = T;
+
+    fn consume(mut self, item: T) -> Self {
+        let reduce_op = self.reduce_op;
+        if let Ok(left) = self.result {
+            self.result = match item.into_result() {
+                Ok(right) => reduce_op(left, right).into_result(),
+                Err(error) => Err(error),
+            };
+        }
+        if self.result.is_err() {
+            self.full.store(true, Ordering::Relaxed)
+        }
+        self
+    }
+
+    fn complete(self) -> T {
+        match self.result {
+            Ok(ok) => T::from_ok(ok),
+            Err(error) => T::from_error(error),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
diff --git a/src/iter/try_reduce_with.rs b/src/iter/try_reduce_with.rs
new file mode 100644
index 0000000..6be3100
--- /dev/null
+++ b/src/iter/try_reduce_with.rs
@@ -0,0 +1,134 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use super::private::Try;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn try_reduce_with<PI, R, T>(pi: PI, reduce_op: R) -> Option<T>
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    let full = AtomicBool::new(false);
+    let consumer = TryReduceWithConsumer {
+        reduce_op: &reduce_op,
+        full: &full,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct TryReduceWithConsumer<'r, R> {
+    reduce_op: &'r R,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R> Copy for TryReduceWithConsumer<'r, R> {}
+
+impl<'r, R> Clone for TryReduceWithConsumer<'r, R> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, T> Consumer<T> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    type Folder = TryReduceWithFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryReduceWithFolder {
+            reduce_op: self.reduce_op,
+            opt_result: None,
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
+
+impl<'r, R, T> UnindexedConsumer<T> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, T> Reducer<Option<T>> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try,
+{
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        let reduce_op = self.reduce_op;
+        match (left, right) {
+            (None, x) | (x, None) => x,
+            (Some(a), Some(b)) => match (a.into_result(), b.into_result()) {
+                (Ok(a), Ok(b)) => Some(reduce_op(a, b)),
+                (Err(e), _) | (_, Err(e)) => Some(T::from_error(e)),
+            },
+        }
+    }
+}
+
+struct TryReduceWithFolder<'r, R, T: Try> {
+    reduce_op: &'r R,
+    opt_result: Option<Result<T::Ok, T::Error>>,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, T> Folder<T> for TryReduceWithFolder<'r, R, T>
+where
+    R: Fn(T::Ok, T::Ok) -> T,
+    T: Try,
+{
+    type Result = Option<T>;
+
+    fn consume(self, item: T) -> Self {
+        let reduce_op = self.reduce_op;
+        let result = match self.opt_result {
+            None => item.into_result(),
+            Some(Ok(a)) => match item.into_result() {
+                Ok(b) => reduce_op(a, b).into_result(),
+                Err(e) => Err(e),
+            },
+            Some(Err(e)) => Err(e),
+        };
+        if result.is_err() {
+            self.full.store(true, Ordering::Relaxed)
+        }
+        TryReduceWithFolder {
+            opt_result: Some(result),
+            ..self
+        }
+    }
+
+    fn complete(self) -> Option<T> {
+        let result = self.opt_result?;
+        Some(match result {
+            Ok(ok) => T::from_ok(ok),
+            Err(error) => T::from_error(error),
+        })
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
diff --git a/src/iter/unzip.rs b/src/iter/unzip.rs
new file mode 100644
index 0000000..219b909
--- /dev/null
+++ b/src/iter/unzip.rs
@@ -0,0 +1,464 @@
+use super::plumbing::*;
+use super::*;
+
+/// This trait abstracts the different ways we can "unzip" one parallel
+/// iterator into two distinct consumers, which we can handle almost
+/// identically apart from how to process the individual items.
+trait UnzipOp<T>: Sync + Send {
+    /// The type of item expected by the left consumer.
+    type Left: Send;
+
+    /// The type of item expected by the right consumer.
+    type Right: Send;
+
+    /// Consumes one item and feeds it to one or both of the underlying folders.
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<Self::Left>,
+        FB: Folder<Self::Right>;
+
+    /// Reports whether this op may support indexed consumers.
+    /// - e.g. true for `unzip` where the item count passed through directly.
+    /// - e.g. false for `partition` where the sorting is not yet known.
+    fn indexable() -> bool {
+        false
+    }
+}
+
+/// Runs an unzip-like operation into default `ParallelExtend` collections.
+fn execute<I, OP, FromA, FromB>(pi: I, op: OP) -> (FromA, FromB)
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromA: Default + Send + ParallelExtend<OP::Left>,
+    FromB: Default + Send + ParallelExtend<OP::Right>,
+{
+    let mut a = FromA::default();
+    let mut b = FromB::default();
+    execute_into(&mut a, &mut b, pi, op);
+    (a, b)
+}
+
+/// Runs an unzip-like operation into `ParallelExtend` collections.
+fn execute_into<I, OP, FromA, FromB>(a: &mut FromA, b: &mut FromB, pi: I, op: OP)
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromA: Send + ParallelExtend<OP::Left>,
+    FromB: Send + ParallelExtend<OP::Right>,
+{
+    // We have no idea what the consumers will look like for these
+    // collections' `par_extend`, but we can intercept them in our own
+    // `drive_unindexed`.  Start with the left side, type `A`:
+    let iter = UnzipA { base: pi, op, b };
+    a.par_extend(iter);
+}
+
+/// Unzips the items of a parallel iterator into a pair of arbitrary
+/// `ParallelExtend` containers.
+///
+/// This is called by `ParallelIterator::unzip`.
+pub(super) fn unzip<I, A, B, FromA, FromB>(pi: I) -> (FromA, FromB)
+where
+    I: ParallelIterator<Item = (A, B)>,
+    FromA: Default + Send + ParallelExtend<A>,
+    FromB: Default + Send + ParallelExtend<B>,
+    A: Send,
+    B: Send,
+{
+    execute(pi, Unzip)
+}
+
+/// Unzips an `IndexedParallelIterator` into two arbitrary `Consumer`s.
+///
+/// This is called by `super::collect::unzip_into_vecs`.
+pub(super) fn unzip_indexed<I, A, B, CA, CB>(pi: I, left: CA, right: CB) -> (CA::Result, CB::Result)
+where
+    I: IndexedParallelIterator<Item = (A, B)>,
+    CA: Consumer<A>,
+    CB: Consumer<B>,
+    A: Send,
+    B: Send,
+{
+    let consumer = UnzipConsumer {
+        op: &Unzip,
+        left,
+        right,
+    };
+    pi.drive(consumer)
+}
+
+/// An `UnzipOp` that splits a tuple directly into the two consumers.
+struct Unzip;
+
+impl<A: Send, B: Send> UnzipOp<(A, B)> for Unzip {
+    type Left = A;
+    type Right = B;
+
+    fn consume<FA, FB>(&self, item: (A, B), left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<A>,
+        FB: Folder<B>,
+    {
+        (left.consume(item.0), right.consume(item.1))
+    }
+
+    fn indexable() -> bool {
+        true
+    }
+}
+
+/// Partitions the items of a parallel iterator into a pair of arbitrary
+/// `ParallelExtend` containers.
+///
+/// This is called by `ParallelIterator::partition`.
+pub(super) fn partition<I, A, B, P>(pi: I, predicate: P) -> (A, B)
+where
+    I: ParallelIterator,
+    A: Default + Send + ParallelExtend<I::Item>,
+    B: Default + Send + ParallelExtend<I::Item>,
+    P: Fn(&I::Item) -> bool + Sync + Send,
+{
+    execute(pi, Partition { predicate })
+}
+
+/// An `UnzipOp` that routes items depending on a predicate function.
+struct Partition<P> {
+    predicate: P,
+}
+
+impl<P, T> UnzipOp<T> for Partition<P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Send,
+{
+    type Left = T;
+    type Right = T;
+
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<T>,
+        FB: Folder<T>,
+    {
+        if (self.predicate)(&item) {
+            (left.consume(item), right)
+        } else {
+            (left, right.consume(item))
+        }
+    }
+}
+
+/// Partitions and maps the items of a parallel iterator into a pair of
+/// arbitrary `ParallelExtend` containers.
+///
+/// This called by `ParallelIterator::partition_map`.
+pub(super) fn partition_map<I, A, B, P, L, R>(pi: I, predicate: P) -> (A, B)
+where
+    I: ParallelIterator,
+    A: Default + Send + ParallelExtend<L>,
+    B: Default + Send + ParallelExtend<R>,
+    P: Fn(I::Item) -> Either<L, R> + Sync + Send,
+    L: Send,
+    R: Send,
+{
+    execute(pi, PartitionMap { predicate })
+}
+
+/// An `UnzipOp` that routes items depending on how they are mapped `Either`.
+struct PartitionMap<P> {
+    predicate: P,
+}
+
+impl<P, L, R, T> UnzipOp<T> for PartitionMap<P>
+where
+    P: Fn(T) -> Either<L, R> + Sync + Send,
+    L: Send,
+    R: Send,
+{
+    type Left = L;
+    type Right = R;
+
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<L>,
+        FB: Folder<R>,
+    {
+        match (self.predicate)(item) {
+            Either::Left(item) => (left.consume(item), right),
+            Either::Right(item) => (left, right.consume(item)),
+        }
+    }
+}
+
+/// A fake iterator to intercept the `Consumer` for type `A`.
+struct UnzipA<'b, I, OP, FromB> {
+    base: I,
+    op: OP,
+    b: &'b mut FromB,
+}
+
+impl<'b, I, OP, FromB> ParallelIterator for UnzipA<'b, I, OP, FromB>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromB: Send + ParallelExtend<OP::Right>,
+{
+    type Item = OP::Left;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let mut result = None;
+        {
+            // Now it's time to find the consumer for type `B`
+            let iter = UnzipB {
+                base: self.base,
+                op: self.op,
+                left_consumer: consumer,
+                left_result: &mut result,
+            };
+            self.b.par_extend(iter);
+        }
+        // NB: If for some reason `b.par_extend` doesn't actually drive the
+        // iterator, then we won't have a result for the left side to return
+        // at all.  We can't fake an arbitrary consumer's result, so panic.
+        result.expect("unzip consumers didn't execute!")
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        if OP::indexable() {
+            self.base.opt_len()
+        } else {
+            None
+        }
+    }
+}
+
+/// A fake iterator to intercept the `Consumer` for type `B`.
+struct UnzipB<'r, I, OP, CA>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    CA: UnindexedConsumer<OP::Left>,
+    CA::Result: 'r,
+{
+    base: I,
+    op: OP,
+    left_consumer: CA,
+    left_result: &'r mut Option<CA::Result>,
+}
+
+impl<'r, I, OP, CA> ParallelIterator for UnzipB<'r, I, OP, CA>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    CA: UnindexedConsumer<OP::Left>,
+{
+    type Item = OP::Right;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        // Now that we have two consumers, we can unzip the real iterator.
+        let consumer = UnzipConsumer {
+            op: &self.op,
+            left: self.left_consumer,
+            right: consumer,
+        };
+
+        let result = self.base.drive_unindexed(consumer);
+        *self.left_result = Some(result.0);
+        result.1
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        if OP::indexable() {
+            self.base.opt_len()
+        } else {
+            None
+        }
+    }
+}
+
+/// `Consumer` that unzips into two other `Consumer`s
+struct UnzipConsumer<'a, OP, CA, CB> {
+    op: &'a OP,
+    left: CA,
+    right: CB,
+}
+
+impl<'a, T, OP, CA, CB> Consumer<T> for UnzipConsumer<'a, OP, CA, CB>
+where
+    OP: UnzipOp<T>,
+    CA: Consumer<OP::Left>,
+    CB: Consumer<OP::Right>,
+{
+    type Folder = UnzipFolder<'a, OP, CA::Folder, CB::Folder>;
+    type Reducer = UnzipReducer<CA::Reducer, CB::Reducer>;
+    type Result = (CA::Result, CB::Result);
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left1, left2, left_reducer) = self.left.split_at(index);
+        let (right1, right2, right_reducer) = self.right.split_at(index);
+
+        (
+            UnzipConsumer {
+                op: self.op,
+                left: left1,
+                right: right1,
+            },
+            UnzipConsumer {
+                op: self.op,
+                left: left2,
+                right: right2,
+            },
+            UnzipReducer {
+                left: left_reducer,
+                right: right_reducer,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        UnzipFolder {
+            op: self.op,
+            left: self.left.into_folder(),
+            right: self.right.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        // don't stop until everyone is full
+        self.left.full() && self.right.full()
+    }
+}
+
+impl<'a, T, OP, CA, CB> UnindexedConsumer<T> for UnzipConsumer<'a, OP, CA, CB>
+where
+    OP: UnzipOp<T>,
+    CA: UnindexedConsumer<OP::Left>,
+    CB: UnindexedConsumer<OP::Right>,
+{
+    fn split_off_left(&self) -> Self {
+        UnzipConsumer {
+            op: self.op,
+            left: self.left.split_off_left(),
+            right: self.right.split_off_left(),
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        UnzipReducer {
+            left: self.left.to_reducer(),
+            right: self.right.to_reducer(),
+        }
+    }
+}
+
+/// `Folder` that unzips into two other `Folder`s
+struct UnzipFolder<'a, OP, FA, FB> {
+    op: &'a OP,
+    left: FA,
+    right: FB,
+}
+
+impl<'a, T, OP, FA, FB> Folder<T> for UnzipFolder<'a, OP, FA, FB>
+where
+    OP: UnzipOp<T>,
+    FA: Folder<OP::Left>,
+    FB: Folder<OP::Right>,
+{
+    type Result = (FA::Result, FB::Result);
+
+    fn consume(self, item: T) -> Self {
+        let (left, right) = self.op.consume(item, self.left, self.right);
+        UnzipFolder {
+            op: self.op,
+            left,
+            right,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        (self.left.complete(), self.right.complete())
+    }
+
+    fn full(&self) -> bool {
+        // don't stop until everyone is full
+        self.left.full() && self.right.full()
+    }
+}
+
+/// `Reducer` that unzips into two other `Reducer`s
+struct UnzipReducer<RA, RB> {
+    left: RA,
+    right: RB,
+}
+
+impl<A, B, RA, RB> Reducer<(A, B)> for UnzipReducer<RA, RB>
+where
+    RA: Reducer<A>,
+    RB: Reducer<B>,
+{
+    fn reduce(self, left: (A, B), right: (A, B)) -> (A, B) {
+        (
+            self.left.reduce(left.0, right.0),
+            self.right.reduce(left.1, right.1),
+        )
+    }
+}
+
+impl<A, B, FromA, FromB> ParallelExtend<(A, B)> for (FromA, FromB)
+where
+    A: Send,
+    B: Send,
+    FromA: Send + ParallelExtend<A>,
+    FromB: Send + ParallelExtend<B>,
+{
+    fn par_extend<I>(&mut self, pi: I)
+    where
+        I: IntoParallelIterator<Item = (A, B)>,
+    {
+        execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), Unzip);
+    }
+}
+
+impl<L, R, A, B> ParallelExtend<Either<L, R>> for (A, B)
+where
+    L: Send,
+    R: Send,
+    A: Send + ParallelExtend<L>,
+    B: Send + ParallelExtend<R>,
+{
+    fn par_extend<I>(&mut self, pi: I)
+    where
+        I: IntoParallelIterator<Item = Either<L, R>>,
+    {
+        execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), UnEither);
+    }
+}
+
+/// An `UnzipOp` that routes items depending on their `Either` variant.
+struct UnEither;
+
+impl<L, R> UnzipOp<Either<L, R>> for UnEither
+where
+    L: Send,
+    R: Send,
+{
+    type Left = L;
+    type Right = R;
+
+    fn consume<FL, FR>(&self, item: Either<L, R>, left: FL, right: FR) -> (FL, FR)
+    where
+        FL: Folder<L>,
+        FR: Folder<R>,
+    {
+        match item {
+            Either::Left(item) => (left.consume(item), right),
+            Either::Right(item) => (left, right.consume(item)),
+        }
+    }
+}
diff --git a/src/iter/update.rs b/src/iter/update.rs
new file mode 100644
index 0000000..373a4d7
--- /dev/null
+++ b/src/iter/update.rs
@@ -0,0 +1,327 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Update` is an iterator that mutates the elements of an
+/// underlying iterator before they are yielded.
+///
+/// This struct is created by the [`update()`] method on [`ParallelIterator`]
+///
+/// [`update()`]: trait.ParallelIterator.html#method.update
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Update<I: ParallelIterator, F> {
+    base: I,
+    update_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Update<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Update").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Update<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Update` iterator.
+    pub(super) fn new(base: I, update_op: F) -> Self {
+        Update { base, update_op }
+    }
+}
+
+impl<I, F> ParallelIterator for Update<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(&mut I::Item) + Send + Sync,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = UpdateConsumer::new(consumer, &self.update_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F> IndexedParallelIterator for Update<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(&mut I::Item) + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = UpdateConsumer::new(consumer, &self.update_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            update_op: self.update_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            update_op: F,
+        }
+
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<T>,
+            F: Fn(&mut T) + Send + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = UpdateProducer {
+                    base,
+                    update_op: &self.update_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct UpdateProducer<'f, P, F> {
+    base: P,
+    update_op: &'f F,
+}
+
+impl<'f, P, F> Producer for UpdateProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(&mut P::Item) + Send + Sync,
+{
+    type Item = P::Item;
+    type IntoIter = UpdateSeq<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        UpdateSeq {
+            base: self.base.into_iter(),
+            update_op: self.update_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            UpdateProducer {
+                base: left,
+                update_op: self.update_op,
+            },
+            UpdateProducer {
+                base: right,
+                update_op: self.update_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = UpdateFolder {
+            base: folder,
+            update_op: self.update_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct UpdateConsumer<'f, C, F> {
+    base: C,
+    update_op: &'f F,
+}
+
+impl<'f, C, F> UpdateConsumer<'f, C, F> {
+    fn new(base: C, update_op: &'f F) -> Self {
+        UpdateConsumer { base, update_op }
+    }
+}
+
+impl<'f, T, C, F> Consumer<T> for UpdateConsumer<'f, C, F>
+where
+    C: Consumer<T>,
+    F: Fn(&mut T) + Send + Sync,
+{
+    type Folder = UpdateFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            UpdateConsumer::new(left, self.update_op),
+            UpdateConsumer::new(right, self.update_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        UpdateFolder {
+            base: self.base.into_folder(),
+            update_op: self.update_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, C, F> UnindexedConsumer<T> for UpdateConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<T>,
+    F: Fn(&mut T) + Send + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        UpdateConsumer::new(self.base.split_off_left(), &self.update_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct UpdateFolder<'f, C, F> {
+    base: C,
+    update_op: &'f F,
+}
+
+fn apply<T>(update_op: impl Fn(&mut T)) -> impl Fn(T) -> T {
+    move |mut item| {
+        update_op(&mut item);
+        item
+    }
+}
+
+impl<'f, T, C, F> Folder<T> for UpdateFolder<'f, C, F>
+where
+    C: Folder<T>,
+    F: Fn(&mut T),
+{
+    type Result = C::Result;
+
+    fn consume(self, mut item: T) -> Self {
+        (self.update_op)(&mut item);
+
+        UpdateFolder {
+            base: self.base.consume(item),
+            update_op: self.update_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let update_op = self.update_op;
+        self.base = self
+            .base
+            .consume_iter(iter.into_iter().map(apply(update_op)));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+/// Standard Update adaptor, based on `itertools::adaptors::Update`
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+struct UpdateSeq<I, F> {
+    base: I,
+    update_op: F,
+}
+
+impl<I, F> Iterator for UpdateSeq<I, F>
+where
+    I: Iterator,
+    F: Fn(&mut I::Item),
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut v = self.base.next()?;
+        (self.update_op)(&mut v);
+        Some(v)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+
+    fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
+    where
+        G: FnMut(Acc, Self::Item) -> Acc,
+    {
+        self.base.map(apply(self.update_op)).fold(init, g)
+    }
+
+    // if possible, re-use inner iterator specializations in collect
+    fn collect<C>(self) -> C
+    where
+        C: ::std::iter::FromIterator<Self::Item>,
+    {
+        self.base.map(apply(self.update_op)).collect()
+    }
+}
+
+impl<I, F> ExactSizeIterator for UpdateSeq<I, F>
+where
+    I: ExactSizeIterator,
+    F: Fn(&mut I::Item),
+{
+}
+
+impl<I, F> DoubleEndedIterator for UpdateSeq<I, F>
+where
+    I: DoubleEndedIterator,
+    F: Fn(&mut I::Item),
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let mut v = self.base.next_back()?;
+        (self.update_op)(&mut v);
+        Some(v)
+    }
+}
diff --git a/src/iter/while_some.rs b/src/iter/while_some.rs
new file mode 100644
index 0000000..215047b
--- /dev/null
+++ b/src/iter/while_some.rs
@@ -0,0 +1,154 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// `WhileSome` is an iterator that yields the `Some` elements of an iterator,
+/// halting as soon as any `None` is produced.
+///
+/// This struct is created by the [`while_some()`] method on [`ParallelIterator`]
+///
+/// [`while_some()`]: trait.ParallelIterator.html#method.while_some
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct WhileSome<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> WhileSome<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `WhileSome` iterator.
+    pub(super) fn new(base: I) -> Self {
+        WhileSome { base }
+    }
+}
+
+impl<I, T> ParallelIterator for WhileSome<I>
+where
+    I: ParallelIterator<Item = Option<T>>,
+    T: Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let full = AtomicBool::new(false);
+        let consumer1 = WhileSomeConsumer {
+            base: consumer,
+            full: &full,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct WhileSomeConsumer<'f, C> {
+    base: C,
+    full: &'f AtomicBool,
+}
+
+impl<'f, T, C> Consumer<Option<T>> for WhileSomeConsumer<'f, C>
+where
+    C: Consumer<T>,
+    T: Send,
+{
+    type Folder = WhileSomeFolder<'f, C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            WhileSomeConsumer { base: left, ..self },
+            WhileSomeConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        WhileSomeFolder {
+            base: self.base.into_folder(),
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed) || self.base.full()
+    }
+}
+
+impl<'f, T, C> UnindexedConsumer<Option<T>> for WhileSomeConsumer<'f, C>
+where
+    C: UnindexedConsumer<T>,
+    T: Send,
+{
+    fn split_off_left(&self) -> Self {
+        WhileSomeConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct WhileSomeFolder<'f, C> {
+    base: C,
+    full: &'f AtomicBool,
+}
+
+impl<'f, T, C> Folder<Option<T>> for WhileSomeFolder<'f, C>
+where
+    C: Folder<T>,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: Option<T>) -> Self {
+        match item {
+            Some(item) => self.base = self.base.consume(item),
+            None => self.full.store(true, Ordering::Relaxed),
+        }
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = Option<T>>,
+    {
+        fn some<T>(full: &AtomicBool) -> impl Fn(&Option<T>) -> bool + '_ {
+            move |x| match *x {
+                Some(_) => !full.load(Ordering::Relaxed),
+                None => {
+                    full.store(true, Ordering::Relaxed);
+                    false
+                }
+            }
+        }
+
+        self.base = self.base.consume_iter(
+            iter.into_iter()
+                .take_while(some(self.full))
+                .map(Option::unwrap),
+        );
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed) || self.base.full()
+    }
+}
diff --git a/src/iter/zip.rs b/src/iter/zip.rs
new file mode 100644
index 0000000..33823db
--- /dev/null
+++ b/src/iter/zip.rs
@@ -0,0 +1,159 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+use std::iter;
+
+/// `Zip` is an iterator that zips up `a` and `b` into a single iterator
+/// of pairs. This struct is created by the [`zip()`] method on
+/// [`IndexedParallelIterator`]
+///
+/// [`zip()`]: trait.IndexedParallelIterator.html#method.zip
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Zip<A: IndexedParallelIterator, B: IndexedParallelIterator> {
+    a: A,
+    b: B,
+}
+
+impl<A, B> Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    /// Creates a new `Zip` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        Zip { a, b }
+    }
+}
+
+impl<A, B> ParallelIterator for Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    type Item = (A::Item, B::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<A, B> IndexedParallelIterator for Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        cmp::min(self.a.len(), self.b.len())
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.a.with_producer(CallbackA {
+            callback,
+            b: self.b,
+        });
+
+        struct CallbackA<CB, B> {
+            callback: CB,
+            b: B,
+        }
+
+        impl<CB, ITEM, B> ProducerCallback<ITEM> for CallbackA<CB, B>
+        where
+            B: IndexedParallelIterator,
+            CB: ProducerCallback<(ITEM, B::Item)>,
+        {
+            type Output = CB::Output;
+
+            fn callback<A>(self, a_producer: A) -> Self::Output
+            where
+                A: Producer<Item = ITEM>,
+            {
+                self.b.with_producer(CallbackB {
+                    a_producer,
+                    callback: self.callback,
+                })
+            }
+        }
+
+        struct CallbackB<CB, A> {
+            a_producer: A,
+            callback: CB,
+        }
+
+        impl<CB, A, ITEM> ProducerCallback<ITEM> for CallbackB<CB, A>
+        where
+            A: Producer,
+            CB: ProducerCallback<(A::Item, ITEM)>,
+        {
+            type Output = CB::Output;
+
+            fn callback<B>(self, b_producer: B) -> Self::Output
+            where
+                B: Producer<Item = ITEM>,
+            {
+                self.callback.callback(ZipProducer {
+                    a: self.a_producer,
+                    b: b_producer,
+                })
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ZipProducer<A: Producer, B: Producer> {
+    a: A,
+    b: B,
+}
+
+impl<A: Producer, B: Producer> Producer for ZipProducer<A, B> {
+    type Item = (A::Item, B::Item);
+    type IntoIter = iter::Zip<A::IntoIter, B::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.a.into_iter().zip(self.b.into_iter())
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.a.min_len(), self.b.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.a.max_len(), self.b.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (a_left, a_right) = self.a.split_at(index);
+        let (b_left, b_right) = self.b.split_at(index);
+        (
+            ZipProducer {
+                a: a_left,
+                b: b_left,
+            },
+            ZipProducer {
+                a: a_right,
+                b: b_right,
+            },
+        )
+    }
+}
diff --git a/src/iter/zip_eq.rs b/src/iter/zip_eq.rs
new file mode 100644
index 0000000..4e64397
--- /dev/null
+++ b/src/iter/zip_eq.rs
@@ -0,0 +1,72 @@
+use super::plumbing::*;
+use super::*;
+
+/// An [`IndexedParallelIterator`] that iterates over two parallel iterators of equal
+/// length simultaneously.
+///
+/// This struct is created by the [`zip_eq`] method on [`IndexedParallelIterator`],
+/// see its documentation for more information.
+///
+/// [`zip_eq`]: trait.IndexedParallelIterator.html#method.zip_eq
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct ZipEq<A: IndexedParallelIterator, B: IndexedParallelIterator> {
+    zip: Zip<A, B>,
+}
+
+impl<A, B> ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    /// Creates a new `ZipEq` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        ZipEq {
+            zip: super::Zip::new(a, b),
+        }
+    }
+}
+
+impl<A, B> ParallelIterator for ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    type Item = (A::Item, B::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self.zip, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.zip.len())
+    }
+}
+
+impl<A, B> IndexedParallelIterator for ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self.zip, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.zip.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        self.zip.with_producer(callback)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..d5d0314
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,120 @@
+#![doc(html_root_url = "https://docs.rs/rayon/1.5")]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(unreachable_pub)]
+#![warn(rust_2018_idioms)]
+
+//! Data-parallelism library that makes it easy to convert sequential
+//! computations into parallel
+//!
+//! Rayon is lightweight and convenient for introducing parallelism into existing
+//! code. It guarantees data-race free executions and takes advantage of
+//! parallelism when sensible, based on work-load at runtime.
+//!
+//! # How to use Rayon
+//!
+//! There are two ways to use Rayon:
+//!
+//! - **High-level parallel constructs** are the simplest way to use Rayon and also
+//!   typically the most efficient.
+//!   - [Parallel iterators][iter module] make it easy to convert a sequential iterator to
+//!     execute in parallel.
+//!     - The [`ParallelIterator`] trait defines general methods for all parallel iterators.
+//!     - The [`IndexedParallelIterator`] trait adds methods for iterators that support random
+//!       access.
+//!   - The [`par_sort`] method sorts `&mut [T]` slices (or vectors) in parallel.
+//!   - [`par_extend`] can be used to efficiently grow collections with items produced
+//!     by a parallel iterator.
+//! - **Custom tasks** let you divide your work into parallel tasks yourself.
+//!   - [`join`] is used to subdivide a task into two pieces.
+//!   - [`scope`] creates a scope within which you can create any number of parallel tasks.
+//!   - [`ThreadPoolBuilder`] can be used to create your own thread pools or customize
+//!     the global one.
+//!
+//! [iter module]: iter/index.html
+//! [`join`]: fn.join.html
+//! [`scope`]: fn.scope.html
+//! [`par_sort`]: slice/trait.ParallelSliceMut.html#method.par_sort
+//! [`par_extend`]: iter/trait.ParallelExtend.html#tymethod.par_extend
+//! [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+//!
+//! # Basic usage and the Rayon prelude
+//!
+//! First, you will need to add `rayon` to your `Cargo.toml`.
+//!
+//! Next, to use parallel iterators or the other high-level methods,
+//! you need to import several traits. Those traits are bundled into
+//! the module [`rayon::prelude`]. It is recommended that you import
+//! all of these traits at once by adding `use rayon::prelude::*` at
+//! the top of each module that uses Rayon methods.
+//!
+//! These traits give you access to the `par_iter` method which provides
+//! parallel implementations of many iterative functions such as [`map`],
+//! [`for_each`], [`filter`], [`fold`], and [more].
+//!
+//! [`rayon::prelude`]: prelude/index.html
+//! [`map`]: iter/trait.ParallelIterator.html#method.map
+//! [`for_each`]: iter/trait.ParallelIterator.html#method.for_each
+//! [`filter`]: iter/trait.ParallelIterator.html#method.filter
+//! [`fold`]: iter/trait.ParallelIterator.html#method.fold
+//! [more]: iter/trait.ParallelIterator.html#provided-methods
+//! [`ParallelIterator`]: iter/trait.ParallelIterator.html
+//! [`IndexedParallelIterator`]: iter/trait.IndexedParallelIterator.html
+//!
+//! # Crate Layout
+//!
+//! Rayon extends many of the types found in the standard library with
+//! parallel iterator implementations. The modules in the `rayon`
+//! crate mirror [`std`] itself: so, e.g., the `option` module in
+//! Rayon contains parallel iterators for the `Option` type, which is
+//! found in [the `option` module of `std`]. Similarly, the
+//! `collections` module in Rayon offers parallel iterator types for
+//! [the `collections` from `std`]. You will rarely need to access
+//! these submodules unless you need to name iterator types
+//! explicitly.
+//!
+//! [the `option` module of `std`]: https://doc.rust-lang.org/std/option/index.html
+//! [the `collections` from `std`]: https://doc.rust-lang.org/std/collections/index.html
+//! [`std`]: https://doc.rust-lang.org/std/
+//!
+//! # Other questions?
+//!
+//! See [the Rayon FAQ][faq].
+//!
+//! [faq]: https://github.com/rayon-rs/rayon/blob/master/FAQ.md
+
+#[macro_use]
+mod delegate;
+
+#[macro_use]
+mod private;
+
+mod split_producer;
+
+pub mod collections;
+pub mod iter;
+pub mod option;
+pub mod prelude;
+pub mod range;
+pub mod range_inclusive;
+pub mod result;
+pub mod slice;
+pub mod str;
+pub mod string;
+pub mod vec;
+
+mod math;
+mod par_either;
+
+mod compile_fail;
+
+pub use rayon_core::FnContext;
+pub use rayon_core::ThreadBuilder;
+pub use rayon_core::ThreadPool;
+pub use rayon_core::ThreadPoolBuildError;
+pub use rayon_core::ThreadPoolBuilder;
+pub use rayon_core::{current_num_threads, current_thread_index};
+pub use rayon_core::{join, join_context};
+pub use rayon_core::{scope, Scope};
+pub use rayon_core::{scope_fifo, ScopeFifo};
+pub use rayon_core::{spawn, spawn_fifo};
diff --git a/src/math.rs b/src/math.rs
new file mode 100644
index 0000000..9de5889
--- /dev/null
+++ b/src/math.rs
@@ -0,0 +1,54 @@
+use std::ops::{Bound, Range, RangeBounds};
+
+/// Divide `n` by `divisor`, and round up to the nearest integer
+/// if not evenly divisable.
+#[inline]
+pub(super) fn div_round_up(n: usize, divisor: usize) -> usize {
+    debug_assert!(divisor != 0, "Division by zero!");
+    if n == 0 {
+        0
+    } else {
+        (n - 1) / divisor + 1
+    }
+}
+
+/// Normalize arbitrary `RangeBounds` to a `Range`
+pub(super) fn simplify_range(range: impl RangeBounds<usize>, len: usize) -> Range<usize> {
+    let start = match range.start_bound() {
+        Bound::Unbounded => 0,
+        Bound::Included(&i) if i <= len => i,
+        Bound::Excluded(&i) if i < len => i + 1,
+        bound => panic!("range start {:?} should be <= length {}", bound, len),
+    };
+    let end = match range.end_bound() {
+        Bound::Unbounded => len,
+        Bound::Excluded(&i) if i <= len => i,
+        Bound::Included(&i) if i < len => i + 1,
+        bound => panic!("range end {:?} should be <= length {}", bound, len),
+    };
+    if start > end {
+        panic!(
+            "range start {:?} should be <= range end {:?}",
+            range.start_bound(),
+            range.end_bound()
+        );
+    }
+    start..end
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_div_round_up() {
+        assert_eq!(0, div_round_up(0, 5));
+        assert_eq!(1, div_round_up(5, 5));
+        assert_eq!(1, div_round_up(1, 5));
+        assert_eq!(2, div_round_up(3, 2));
+        assert_eq!(
+            usize::max_value() / 2 + 1,
+            div_round_up(usize::max_value(), 2)
+        );
+    }
+}
diff --git a/src/option.rs b/src/option.rs
new file mode 100644
index 0000000..0f56896
--- /dev/null
+++ b/src/option.rs
@@ -0,0 +1,203 @@
+//! Parallel iterator types for [options][std::option]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::option]: https://doc.rust-lang.org/stable/std/option/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// A parallel iterator over the value in [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`into_par_iter`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`into_par_iter`]: ../iter/trait.IntoParallelIterator.html#tymethod.into_par_iter
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    opt: Option<T>,
+}
+
+impl<T: Send> IntoParallelIterator for Option<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter { opt: self }
+    }
+}
+
+impl<T: Send> ParallelIterator for IntoIter<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for IntoIter<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let mut folder = consumer.into_folder();
+        if let Some(item) = self.opt {
+            folder = folder.consume(item);
+        }
+        folder.complete()
+    }
+
+    fn len(&self) -> usize {
+        match self.opt {
+            Some(_) => 1,
+            None => 0,
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(OptionProducer { opt: self.opt })
+    }
+}
+
+/// A parallel iterator over a reference to the [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`par_iter`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`par_iter`]: ../iter/trait.IntoParallelRefIterator.html#tymethod.par_iter
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync> IntoParallelIterator for &'a Option<T> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter {
+            inner: self.as_ref().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// A parallel iterator over a mutable reference to the [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`par_iter_mut`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`par_iter_mut`]: ../iter/trait.IntoParallelRefMutIterator.html#tymethod.par_iter_mut
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: IntoIter<&'a mut T>,
+}
+
+impl<'a, T: Send> IntoParallelIterator for &'a mut Option<T> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut {
+            inner: self.as_mut().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Private producer for an option
+struct OptionProducer<T: Send> {
+    opt: Option<T>,
+}
+
+impl<T: Send> Producer for OptionProducer<T> {
+    type Item = T;
+    type IntoIter = std::option::IntoIter<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.opt.into_iter()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert!(index <= 1);
+        let none = OptionProducer { opt: None };
+        if index == 0 {
+            (none, self)
+        } else {
+            (self, none)
+        }
+    }
+}
+
+/// Collect an arbitrary `Option`-wrapped collection.
+///
+/// If any item is `None`, then all previous items collected are discarded,
+/// and it returns only `None`.
+impl<C, T> FromParallelIterator<Option<T>> for Option<C>
+where
+    C: FromParallelIterator<T>,
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Option<T>>,
+    {
+        fn check<T>(found_none: &AtomicBool) -> impl Fn(&Option<T>) + '_ {
+            move |item| {
+                if item.is_none() {
+                    found_none.store(true, Ordering::Relaxed);
+                }
+            }
+        }
+
+        let found_none = AtomicBool::new(false);
+        let collection = par_iter
+            .into_par_iter()
+            .inspect(check(&found_none))
+            .while_some()
+            .collect();
+
+        if found_none.load(Ordering::Relaxed) {
+            None
+        } else {
+            Some(collection)
+        }
+    }
+}
diff --git a/src/par_either.rs b/src/par_either.rs
new file mode 100644
index 0000000..a19ce53
--- /dev/null
+++ b/src/par_either.rs
@@ -0,0 +1,74 @@
+use crate::iter::plumbing::*;
+use crate::iter::Either::{Left, Right};
+use crate::iter::*;
+
+/// `Either<L, R>` is a parallel iterator if both `L` and `R` are parallel iterators.
+impl<L, R> ParallelIterator for Either<L, R>
+where
+    L: ParallelIterator,
+    R: ParallelIterator<Item = L::Item>,
+{
+    type Item = L::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.drive_unindexed(consumer),
+            Right(iter) => iter.drive_unindexed(consumer),
+        }
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.as_ref().either(L::opt_len, R::opt_len)
+    }
+}
+
+impl<L, R> IndexedParallelIterator for Either<L, R>
+where
+    L: IndexedParallelIterator,
+    R: IndexedParallelIterator<Item = L::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.drive(consumer),
+            Right(iter) => iter.drive(consumer),
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.as_ref().either(L::len, R::len)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.with_producer(callback),
+            Right(iter) => iter.with_producer(callback),
+        }
+    }
+}
+
+/// `Either<L, R>` can be extended if both `L` and `R` are parallel extendable.
+impl<L, R, T> ParallelExtend<T> for Either<L, R>
+where
+    L: ParallelExtend<T>,
+    R: ParallelExtend<T>,
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        match self.as_mut() {
+            Left(collection) => collection.par_extend(par_iter),
+            Right(collection) => collection.par_extend(par_iter),
+        }
+    }
+}
diff --git a/src/prelude.rs b/src/prelude.rs
new file mode 100644
index 0000000..6eaca06
--- /dev/null
+++ b/src/prelude.rs
@@ -0,0 +1,17 @@
+//! The rayon prelude imports the various `ParallelIterator` traits.
+//! The intention is that one can include `use rayon::prelude::*` and
+//! have easy access to the various traits and methods you will need.
+
+pub use crate::iter::FromParallelIterator;
+pub use crate::iter::IndexedParallelIterator;
+pub use crate::iter::IntoParallelIterator;
+pub use crate::iter::IntoParallelRefIterator;
+pub use crate::iter::IntoParallelRefMutIterator;
+pub use crate::iter::ParallelBridge;
+pub use crate::iter::ParallelDrainFull;
+pub use crate::iter::ParallelDrainRange;
+pub use crate::iter::ParallelExtend;
+pub use crate::iter::ParallelIterator;
+pub use crate::slice::ParallelSlice;
+pub use crate::slice::ParallelSliceMut;
+pub use crate::str::ParallelString;
diff --git a/src/private.rs b/src/private.rs
new file mode 100644
index 0000000..c85e77b
--- /dev/null
+++ b/src/private.rs
@@ -0,0 +1,26 @@
+//! The public parts of this private module are used to create traits
+//! that cannot be implemented outside of our own crate.  This way we
+//! can feel free to extend those traits without worrying about it
+//! being a breaking change for other implementations.
+
+/// If this type is pub but not publicly reachable, third parties
+/// can't name it and can't implement traits using it.
+#[allow(missing_debug_implementations)]
+pub struct PrivateMarker;
+
+macro_rules! private_decl {
+    () => {
+        /// This trait is private; this method exists to make it
+        /// impossible to implement outside the crate.
+        #[doc(hidden)]
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker;
+    };
+}
+
+macro_rules! private_impl {
+    () => {
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker {
+            crate::private::PrivateMarker
+        }
+    };
+}
diff --git a/src/range.rs b/src/range.rs
new file mode 100644
index 0000000..09ba25e
--- /dev/null
+++ b/src/range.rs
@@ -0,0 +1,368 @@
+//! Parallel iterator types for [ranges][std::range],
+//! the type for values created by `a..b` expressions
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! ```
+//! use rayon::prelude::*;
+//!
+//! let r = (0..100u64).into_par_iter()
+//!                    .sum();
+//!
+//! // compare result with sequential calculation
+//! assert_eq!((0..100).sum::<u64>(), r);
+//! ```
+//!
+//! [std::range]: https://doc.rust-lang.org/core/ops/struct.Range.html
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::char;
+use std::ops::Range;
+use std::usize;
+
+/// Parallel iterator over a range, implemented for all integer types.
+///
+/// **Note:** The `zip` operation requires `IndexedParallelIterator`
+/// which is not implemented for `u64`, `i64`, `u128`, or `i128`.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let p = (0..25usize).into_par_iter()
+///                   .zip(0..25usize)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum::<usize>();
+///
+/// let s = (0..25usize).zip(0..25)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum();
+///
+/// assert_eq!(p, s);
+/// ```
+#[derive(Debug, Clone)]
+pub struct Iter<T> {
+    range: Range<T>,
+}
+
+impl<T> IntoParallelIterator for Range<T>
+where
+    Iter<T>: ParallelIterator,
+{
+    type Item = <Iter<T> as ParallelIterator>::Item;
+    type Iter = Iter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { range: self }
+    }
+}
+
+struct IterProducer<T> {
+    range: Range<T>,
+}
+
+impl<T> IntoIterator for IterProducer<T>
+where
+    Range<T>: Iterator,
+{
+    type Item = <Range<T> as Iterator>::Item;
+    type IntoIter = Range<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.range
+    }
+}
+
+macro_rules! indexed_range_impl {
+    ( $t:ty ) => {
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                bridge(self, consumer)
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                Some(self.len())
+            }
+        }
+
+        impl IndexedParallelIterator for Iter<$t> {
+            fn drive<C>(self, consumer: C) -> C::Result
+            where
+                C: Consumer<Self::Item>,
+            {
+                bridge(self, consumer)
+            }
+
+            fn len(&self) -> usize {
+                self.range.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+            where
+                CB: ProducerCallback<Self::Item>,
+            {
+                callback.callback(IterProducer { range: self.range })
+            }
+        }
+
+        impl Producer for IterProducer<$t> {
+            type Item = <Range<$t> as Iterator>::Item;
+            type IntoIter = Range<$t>;
+            fn into_iter(self) -> Self::IntoIter {
+                self.range
+            }
+
+            fn split_at(self, index: usize) -> (Self, Self) {
+                assert!(index <= self.range.len());
+                // For signed $t, the length and requested index could be greater than $t::MAX, and
+                // then `index as $t` could wrap to negative, so wrapping_add is necessary.
+                let mid = self.range.start.wrapping_add(index as $t);
+                let left = self.range.start..mid;
+                let right = mid..self.range.end;
+                (IterProducer { range: left }, IterProducer { range: right })
+            }
+        }
+    };
+}
+
+trait UnindexedRangeLen<L> {
+    fn len(&self) -> L;
+}
+
+macro_rules! unindexed_range_impl {
+    ( $t:ty, $len_t:ty ) => {
+        impl UnindexedRangeLen<$len_t> for Range<$t> {
+            fn len(&self) -> $len_t {
+                let &Range { start, end } = self;
+                if end > start {
+                    end.wrapping_sub(start) as $len_t
+                } else {
+                    0
+                }
+            }
+        }
+
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                #[inline]
+                fn offset(start: $t) -> impl Fn(usize) -> $t {
+                    move |i| start.wrapping_add(i as $t)
+                }
+
+                if let Some(len) = self.opt_len() {
+                    // Drive this in indexed mode for better `collect`.
+                    (0..len)
+                        .into_par_iter()
+                        .map(offset(self.range.start))
+                        .drive(consumer)
+                } else {
+                    bridge_unindexed(IterProducer { range: self.range }, consumer)
+                }
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                let len = self.range.len();
+                if len <= usize::MAX as $len_t {
+                    Some(len as usize)
+                } else {
+                    None
+                }
+            }
+        }
+
+        impl UnindexedProducer for IterProducer<$t> {
+            type Item = $t;
+
+            fn split(mut self) -> (Self, Option<Self>) {
+                let index = self.range.len() / 2;
+                if index > 0 {
+                    let mid = self.range.start.wrapping_add(index as $t);
+                    let right = mid..self.range.end;
+                    self.range.end = mid;
+                    (self, Some(IterProducer { range: right }))
+                } else {
+                    (self, None)
+                }
+            }
+
+            fn fold_with<F>(self, folder: F) -> F
+            where
+                F: Folder<Self::Item>,
+            {
+                folder.consume_iter(self)
+            }
+        }
+    };
+}
+
+// all Range<T> with ExactSizeIterator
+indexed_range_impl! {u8}
+indexed_range_impl! {u16}
+indexed_range_impl! {u32}
+indexed_range_impl! {usize}
+indexed_range_impl! {i8}
+indexed_range_impl! {i16}
+indexed_range_impl! {i32}
+indexed_range_impl! {isize}
+
+// other Range<T> with just Iterator
+unindexed_range_impl! {u64, u64}
+unindexed_range_impl! {i64, u64}
+unindexed_range_impl! {u128, u128}
+unindexed_range_impl! {i128, u128}
+
+// char is special because of the surrogate range hole
+macro_rules! convert_char {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {{
+        let start = $self.range.start as u32;
+        let end = $self.range.end as u32;
+        if start < 0xD800 && 0xE000 < end {
+            // chain the before and after surrogate range fragments
+            (start..0xD800)
+                .into_par_iter()
+                .chain(0xE000..end)
+                .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                .$method($( $arg ),*)
+        } else {
+            // no surrogate range to worry about
+            (start..end)
+                .into_par_iter()
+                .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                .$method($( $arg ),*)
+        }
+    }};
+}
+
+impl ParallelIterator for Iter<char> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl IndexedParallelIterator for Iter<char> {
+    // Split at the surrogate range first if we're allowed to
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn len(&self) -> usize {
+        // Taken from <char as Step>::steps_between
+        let start = self.range.start as u32;
+        let end = self.range.end as u32;
+        if start < end {
+            let mut count = end - start;
+            if start < 0xD800 && 0xE000 <= end {
+                count -= 0x800
+            }
+            count as usize
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        convert_char!(self.with_producer(callback))
+    }
+}
+
+#[test]
+fn check_range_split_at_overflow() {
+    // Note, this split index overflows i8!
+    let producer = IterProducer { range: -100i8..100 };
+    let (left, right) = producer.split_at(150);
+    let r1: i32 = left.range.map(i32::from).sum();
+    let r2: i32 = right.range.map(i32::from).sum();
+    assert_eq!(r1 + r2, -100);
+}
+
+#[test]
+fn test_i128_len_doesnt_overflow() {
+    use std::{i128, u128};
+
+    // Using parse because some versions of rust don't allow long literals
+    let octillion: i128 = "1000000000000000000000000000".parse().unwrap();
+    let producer = IterProducer {
+        range: 0..octillion,
+    };
+
+    assert_eq!(octillion as u128, producer.range.len());
+    assert_eq!(octillion as u128, (0..octillion).len());
+    assert_eq!(2 * octillion as u128, (-octillion..octillion).len());
+
+    assert_eq!(u128::MAX, (i128::MIN..i128::MAX).len());
+}
+
+#[test]
+fn test_u64_opt_len() {
+    use std::{u64, usize};
+    assert_eq!(Some(100), (0..100u64).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..usize::MAX as u64).into_par_iter().opt_len()
+    );
+    if (usize::MAX as u64) < u64::MAX {
+        assert_eq!(
+            None,
+            (0..(usize::MAX as u64).wrapping_add(1))
+                .into_par_iter()
+                .opt_len()
+        );
+        assert_eq!(None, (0..u64::MAX).into_par_iter().opt_len());
+    }
+}
+
+#[test]
+fn test_u128_opt_len() {
+    use std::{u128, usize};
+    assert_eq!(Some(100), (0..100u128).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..usize::MAX as u128).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..1 + usize::MAX as u128).into_par_iter().opt_len());
+    assert_eq!(None, (0..u128::MAX).into_par_iter().opt_len());
+}
+
+// `usize as i64` can overflow, so make sure to wrap it appropriately
+// when using the `opt_len` "indexed" mode.
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_usize_i64_overflow() {
+    use crate::ThreadPoolBuilder;
+    use std::i64;
+
+    let iter = (-2..i64::MAX).into_par_iter();
+    assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 2));
+
+    // always run with multiple threads to split into, or this will take forever...
+    let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+    pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX - 1)));
+}
diff --git a/src/range_inclusive.rs b/src/range_inclusive.rs
new file mode 100644
index 0000000..c802b6c
--- /dev/null
+++ b/src/range_inclusive.rs
@@ -0,0 +1,288 @@
+//! Parallel iterator types for [inclusive ranges][std::range],
+//! the type for values created by `a..=b` expressions
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! ```
+//! use rayon::prelude::*;
+//!
+//! let r = (0..=100u64).into_par_iter()
+//!                     .sum();
+//!
+//! // compare result with sequential calculation
+//! assert_eq!((0..=100).sum::<u64>(), r);
+//! ```
+//!
+//! [std::range]: https://doc.rust-lang.org/core/ops/struct.RangeInclusive.html
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::char;
+use std::ops::RangeInclusive;
+
+/// Parallel iterator over an inclusive range, implemented for all integer types.
+///
+/// **Note:** The `zip` operation requires `IndexedParallelIterator`
+/// which is only implemented for `u8`, `i8`, `u16`, and `i16`.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let p = (0..=25u16).into_par_iter()
+///                   .zip(0..=25u16)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum::<u16>();
+///
+/// let s = (0..=25u16).zip(0..=25u16)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum();
+///
+/// assert_eq!(p, s);
+/// ```
+#[derive(Debug, Clone)]
+pub struct Iter<T> {
+    range: RangeInclusive<T>,
+}
+
+impl<T> Iter<T>
+where
+    RangeInclusive<T>: Eq,
+    T: Ord + Copy,
+{
+    /// Returns `Some((start, end))` for `start..=end`, or `None` if it is exhausted.
+    ///
+    /// Note that `RangeInclusive` does not specify the bounds of an exhausted iterator,
+    /// so this is a way for us to figure out what we've got.  Thankfully, all of the
+    /// integer types we care about can be trivially cloned.
+    fn bounds(&self) -> Option<(T, T)> {
+        let start = *self.range.start();
+        let end = *self.range.end();
+        if start <= end && self.range == (start..=end) {
+            // If the range is still nonempty, this is obviously true
+            // If the range is exhausted, either start > end or
+            // the range does not equal start..=end.
+            Some((start, end))
+        } else {
+            None
+        }
+    }
+}
+
+impl<T> IntoParallelIterator for RangeInclusive<T>
+where
+    Iter<T>: ParallelIterator,
+{
+    type Item = <Iter<T> as ParallelIterator>::Item;
+    type Iter = Iter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { range: self }
+    }
+}
+
+macro_rules! convert {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {
+        if let Some((start, end)) = $self.bounds() {
+            if let Some(end) = end.checked_add(1) {
+                (start..end).into_par_iter().$method($( $arg ),*)
+            } else {
+                (start..end).into_par_iter().chain(once(end)).$method($( $arg ),*)
+            }
+        } else {
+            empty::<Self::Item>().$method($( $arg ),*)
+        }
+    };
+}
+
+macro_rules! parallel_range_impl {
+    ( $t:ty ) => {
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                convert!(self.drive_unindexed(consumer))
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                convert!(self.opt_len())
+            }
+        }
+    };
+}
+
+macro_rules! indexed_range_impl {
+    ( $t:ty ) => {
+        parallel_range_impl! { $t }
+
+        impl IndexedParallelIterator for Iter<$t> {
+            fn drive<C>(self, consumer: C) -> C::Result
+            where
+                C: Consumer<Self::Item>,
+            {
+                convert!(self.drive(consumer))
+            }
+
+            fn len(&self) -> usize {
+                self.range.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+            where
+                CB: ProducerCallback<Self::Item>,
+            {
+                convert!(self.with_producer(callback))
+            }
+        }
+    };
+}
+
+// all RangeInclusive<T> with ExactSizeIterator
+indexed_range_impl! {u8}
+indexed_range_impl! {u16}
+indexed_range_impl! {i8}
+indexed_range_impl! {i16}
+
+// other RangeInclusive<T> with just Iterator
+parallel_range_impl! {usize}
+parallel_range_impl! {isize}
+parallel_range_impl! {u32}
+parallel_range_impl! {i32}
+parallel_range_impl! {u64}
+parallel_range_impl! {i64}
+parallel_range_impl! {u128}
+parallel_range_impl! {i128}
+
+// char is special
+macro_rules! convert_char {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {
+        if let Some((start, end)) = $self.bounds() {
+            let start = start as u32;
+            let end = end as u32;
+            if start < 0xD800 && 0xE000 <= end {
+                // chain the before and after surrogate range fragments
+                (start..0xD800)
+                    .into_par_iter()
+                    .chain(0xE000..end + 1) // cannot use RangeInclusive, so add one to end
+                    .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                    .$method($( $arg ),*)
+            } else {
+                // no surrogate range to worry about
+                (start..end + 1) // cannot use RangeInclusive, so add one to end
+                    .into_par_iter()
+                    .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                    .$method($( $arg ),*)
+            }
+        } else {
+            empty().into_par_iter().$method($( $arg ),*)
+        }
+    };
+}
+
+impl ParallelIterator for Iter<char> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+// Range<u32> is broken on 16 bit platforms, may as well benefit from it
+impl IndexedParallelIterator for Iter<char> {
+    // Split at the surrogate range first if we're allowed to
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn len(&self) -> usize {
+        if let Some((start, end)) = self.bounds() {
+            // Taken from <char as Step>::steps_between
+            let start = start as u32;
+            let end = end as u32;
+            let mut count = end - start;
+            if start < 0xD800 && 0xE000 <= end {
+                count -= 0x800
+            }
+            (count + 1) as usize // add one for inclusive
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        convert_char!(self.with_producer(callback))
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_u32_opt_len() {
+    use std::u32;
+    assert_eq!(Some(101), (0..=100u32).into_par_iter().opt_len());
+    assert_eq!(
+        Some(u32::MAX as usize),
+        (0..=u32::MAX - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(
+        Some(u32::MAX as usize + 1),
+        (0..=u32::MAX).into_par_iter().opt_len()
+    );
+}
+
+#[test]
+fn test_u64_opt_len() {
+    use std::{u64, usize};
+    assert_eq!(Some(101), (0..=100u64).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..=usize::MAX as u64 - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..=usize::MAX as u64).into_par_iter().opt_len());
+    assert_eq!(None, (0..=u64::MAX).into_par_iter().opt_len());
+}
+
+#[test]
+fn test_u128_opt_len() {
+    use std::{u128, usize};
+    assert_eq!(Some(101), (0..=100u128).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..=usize::MAX as u128 - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..=usize::MAX as u128).into_par_iter().opt_len());
+    assert_eq!(None, (0..=u128::MAX).into_par_iter().opt_len());
+}
+
+// `usize as i64` can overflow, so make sure to wrap it appropriately
+// when using the `opt_len` "indexed" mode.
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_usize_i64_overflow() {
+    use crate::ThreadPoolBuilder;
+    use std::i64;
+
+    let iter = (-2..=i64::MAX).into_par_iter();
+    assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 3));
+
+    // always run with multiple threads to split into, or this will take forever...
+    let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+    pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX)));
+}
diff --git a/src/result.rs b/src/result.rs
new file mode 100644
index 0000000..43685ca
--- /dev/null
+++ b/src/result.rs
@@ -0,0 +1,132 @@
+//! Parallel iterator types for [results][std::result]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::result]: https://doc.rust-lang.org/stable/std/result/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::sync::Mutex;
+
+use crate::option;
+
+/// Parallel iterator over a result
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: option::IntoIter<T>,
+}
+
+impl<T: Send, E> IntoParallelIterator for Result<T, E> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter {
+            inner: self.ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a result
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: option::IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync, E> IntoParallelIterator for &'a Result<T, E> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter {
+            inner: self.as_ref().ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a result
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: option::IntoIter<&'a mut T>,
+}
+
+impl<'a, T: Send, E> IntoParallelIterator for &'a mut Result<T, E> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut {
+            inner: self.as_mut().ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Collect an arbitrary `Result`-wrapped collection.
+///
+/// If any item is `Err`, then all previous `Ok` items collected are
+/// discarded, and it returns that error.  If there are multiple errors, the
+/// one returned is not deterministic.
+impl<C, T, E> FromParallelIterator<Result<T, E>> for Result<C, E>
+where
+    C: FromParallelIterator<T>,
+    T: Send,
+    E: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Result<T, E>>,
+    {
+        fn ok<T, E>(saved: &Mutex<Option<E>>) -> impl Fn(Result<T, E>) -> Option<T> + '_ {
+            move |item| match item {
+                Ok(item) => Some(item),
+                Err(error) => {
+                    // We don't need a blocking `lock()`, as anybody
+                    // else holding the lock will also be writing
+                    // `Some(error)`, and then ours is irrelevant.
+                    if let Ok(mut guard) = saved.try_lock() {
+                        if guard.is_none() {
+                            *guard = Some(error);
+                        }
+                    }
+                    None
+                }
+            }
+        }
+
+        let saved_error = Mutex::new(None);
+        let collection = par_iter
+            .into_par_iter()
+            .map(ok(&saved_error))
+            .while_some()
+            .collect();
+
+        match saved_error.into_inner().unwrap() {
+            Some(error) => Err(error),
+            None => Ok(collection),
+        }
+    }
+}
diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs
new file mode 100644
index 0000000..a007cae
--- /dev/null
+++ b/src/slice/mergesort.rs
@@ -0,0 +1,763 @@
+//! Parallel merge sort.
+//!
+//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
+//! The only difference from the original is that the sequential `mergesort` returns
+//! `MergesortResult` and leaves descending arrays intact.
+
+use crate::iter::*;
+use crate::slice::ParallelSliceMut;
+use std::mem;
+use std::mem::size_of;
+use std::ptr;
+use std::slice;
+
+unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
+    let old = *ptr;
+    *ptr = ptr.offset(1);
+    old
+}
+
+unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
+    *ptr = ptr.offset(-1);
+    *ptr
+}
+
+/// When dropped, copies from `src` into `dest` a sequence of length `len`.
+struct CopyOnDrop<T> {
+    src: *mut T,
+    dest: *mut T,
+    len: usize,
+}
+
+impl<T> Drop for CopyOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::copy_nonoverlapping(self.src, self.dest, self.len);
+        }
+    }
+}
+
+/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
+///
+/// This is the integral subroutine of insertion sort.
+fn insert_head<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    if v.len() >= 2 && is_less(&v[1], &v[0]) {
+        unsafe {
+            // There are three ways to implement insertion here:
+            //
+            // 1. Swap adjacent elements until the first one gets to its final destination.
+            //    However, this way we copy data around more than is necessary. If elements are big
+            //    structures (costly to copy), this method will be slow.
+            //
+            // 2. Iterate until the right place for the first element is found. Then shift the
+            //    elements succeeding it to make room for it and finally place it into the
+            //    remaining hole. This is a good method.
+            //
+            // 3. Copy the first element into a temporary variable. Iterate until the right place
+            //    for it is found. As we go along, copy every traversed element into the slot
+            //    preceding it. Finally, copy data from the temporary variable into the remaining
+            //    hole. This method is very good. Benchmarks demonstrated slightly better
+            //    performance than with the 2nd method.
+            //
+            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(&v[0])),
+            };
+
+            // Intermediate state of the insertion process is always tracked by `hole`, which
+            // serves two purposes:
+            // 1. Protects integrity of `v` from panics in `is_less`.
+            // 2. Fills the remaining hole in `v` in the end.
+            //
+            // Panic safety:
+            //
+            // If `is_less` panics at any point during the process, `hole` will get dropped and
+            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
+            // initially held exactly once.
+            let mut hole = InsertionHole {
+                src: tmp.value.as_mut().unwrap(),
+                dest: &mut v[1],
+            };
+            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
+
+            for i in 2..v.len() {
+                if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
+                    break;
+                }
+                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
+                hole.dest = &mut v[i];
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+
+    // Holds a value, but never drops it.
+    struct NoDrop<T> {
+        value: Option<T>,
+    }
+
+    impl<T> Drop for NoDrop<T> {
+        fn drop(&mut self) {
+            mem::forget(self.value.take());
+        }
+    }
+
+    // When dropped, copies from `src` into `dest`.
+    struct InsertionHole<T> {
+        src: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for InsertionHole<T> {
+        fn drop(&mut self) {
+            unsafe {
+                ptr::copy_nonoverlapping(self.src, self.dest, 1);
+            }
+        }
+    }
+}
+
+/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
+/// stores the result into `v[..]`.
+///
+/// # Safety
+///
+/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
+/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
+unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    let v = v.as_mut_ptr();
+    let v_mid = v.add(mid);
+    let v_end = v.add(len);
+
+    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
+    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
+    // copying the lesser (or greater) one into `v`.
+    //
+    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
+    // consumed first, then we must copy whatever is left of the shorter run into the remaining
+    // hole in `v`.
+    //
+    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
+    // 1. Protects integrity of `v` from panics in `is_less`.
+    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
+    //
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
+    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
+    // object it initially held exactly once.
+    let mut hole;
+
+    if mid <= len - mid {
+        // The left run is shorter.
+        ptr::copy_nonoverlapping(v, buf, mid);
+        hole = MergeHole {
+            start: buf,
+            end: buf.add(mid),
+            dest: v,
+        };
+
+        // Initially, these pointers point to the beginnings of their arrays.
+        let left = &mut hole.start;
+        let mut right = v_mid;
+        let out = &mut hole.dest;
+
+        while *left < hole.end && right < v_end {
+            // Consume the lesser side.
+            // If equal, prefer the left run to maintain stability.
+            let to_copy = if is_less(&*right, &**left) {
+                get_and_increment(&mut right)
+            } else {
+                get_and_increment(left)
+            };
+            ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
+        }
+    } else {
+        // The right run is shorter.
+        ptr::copy_nonoverlapping(v_mid, buf, len - mid);
+        hole = MergeHole {
+            start: buf,
+            end: buf.add(len - mid),
+            dest: v_mid,
+        };
+
+        // Initially, these pointers point past the ends of their arrays.
+        let left = &mut hole.dest;
+        let right = &mut hole.end;
+        let mut out = v_end;
+
+        while v < *left && buf < *right {
+            // Consume the greater side.
+            // If equal, prefer the right run to maintain stability.
+            let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
+                decrement_and_get(left)
+            } else {
+                decrement_and_get(right)
+            };
+            ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
+        }
+    }
+    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
+    // it will now be copied into the hole in `v`.
+
+    // When dropped, copies the range `start..end` into `dest..`.
+    struct MergeHole<T> {
+        start: *mut T,
+        end: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for MergeHole<T> {
+        fn drop(&mut self) {
+            // `T` is not a zero-sized type, so it's okay to divide by its size.
+            let len = (self.end as usize - self.start as usize) / size_of::<T>();
+            unsafe {
+                ptr::copy_nonoverlapping(self.start, self.dest, len);
+            }
+        }
+    }
+}
+
+/// The result of merge sort.
+#[must_use]
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum MergesortResult {
+    /// The slice has already been sorted.
+    NonDescending,
+    /// The slice has been descending and therefore it was left intact.
+    Descending,
+    /// The slice was sorted.
+    Sorted,
+}
+
+/// A sorted run that starts at index `start` and is of length `len`.
+#[derive(Clone, Copy)]
+struct Run {
+    start: usize,
+    len: usize,
+}
+
+/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
+/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
+/// algorithm should continue building a new run instead, `None` is returned.
+///
+/// TimSort is infamous for its buggy implementations, as described here:
+/// http://envisage-project.eu/timsort-specification-and-verification/
+///
+/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
+/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
+/// hold for *all* runs in the stack.
+///
+/// This function correctly checks invariants for the top four runs. Additionally, if the top
+/// run starts at index 0, it will always demand a merge operation until the stack is fully
+/// collapsed, in order to complete the sort.
+#[inline]
+fn collapse(runs: &[Run]) -> Option<usize> {
+    let n = runs.len();
+
+    if n >= 2
+        && (runs[n - 1].start == 0
+            || runs[n - 2].len <= runs[n - 1].len
+            || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
+            || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
+    {
+        if n >= 3 && runs[n - 3].len < runs[n - 1].len {
+            Some(n - 3)
+        } else {
+            Some(n - 2)
+        }
+    } else {
+        None
+    }
+}
+
+/// Sorts a slice using merge sort, unless it is already in descending order.
+///
+/// This function doesn't modify the slice if it is already non-descending or descending.
+/// Otherwise, it sorts the slice into non-descending order.
+///
+/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
+/// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt).
+///
+/// The algorithm identifies strictly descending and non-descending subsequences, which are called
+/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
+/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
+/// satisfied:
+///
+/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
+/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
+///
+/// The invariants ensure that the total running time is `O(n log n)` worst-case.
+///
+/// # Safety
+///
+/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
+unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Very short runs are extended using insertion sort to span at least this many elements.
+    const MIN_RUN: usize = 10;
+
+    let len = v.len();
+
+    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
+    // strange decision, but consider the fact that merges more often go in the opposite direction
+    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
+    // backwards. To conclude, identifying runs by traversing backwards improves performance.
+    let mut runs = vec![];
+    let mut end = len;
+    while end > 0 {
+        // Find the next natural run, and reverse it if it's strictly descending.
+        let mut start = end - 1;
+
+        if start > 0 {
+            start -= 1;
+
+            if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
+                while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
+                    start -= 1;
+                }
+
+                // If this descending run covers the whole slice, return immediately.
+                if start == 0 && end == len {
+                    return MergesortResult::Descending;
+                } else {
+                    v[start..end].reverse();
+                }
+            } else {
+                while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
+                    start -= 1;
+                }
+
+                // If this non-descending run covers the whole slice, return immediately.
+                if end - start == len {
+                    return MergesortResult::NonDescending;
+                }
+            }
+        }
+
+        // Insert some more elements into the run if it's too short. Insertion sort is faster than
+        // merge sort on short sequences, so this significantly improves performance.
+        while start > 0 && end - start < MIN_RUN {
+            start -= 1;
+            insert_head(&mut v[start..end], &is_less);
+        }
+
+        // Push this run onto the stack.
+        runs.push(Run {
+            start,
+            len: end - start,
+        });
+        end = start;
+
+        // Merge some pairs of adjacent runs to satisfy the invariants.
+        while let Some(r) = collapse(&runs) {
+            let left = runs[r + 1];
+            let right = runs[r];
+            merge(
+                &mut v[left.start..right.start + right.len],
+                left.len,
+                buf,
+                &is_less,
+            );
+
+            runs[r] = Run {
+                start: left.start,
+                len: left.len + right.len,
+            };
+            runs.remove(r + 1);
+        }
+    }
+
+    // Finally, exactly one run must remain in the stack.
+    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
+
+    // The original order of the slice was neither non-descending nor descending.
+    MergesortResult::Sorted
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
+// Everything below this line is parallelization.
+////////////////////////////////////////////////////////////////////////////
+
+/// Splits two sorted slices so that they can be merged in parallel.
+///
+/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
+/// `left[a..]` and `right[b..]`.
+fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let left_len = left.len();
+    let right_len = right.len();
+
+    if left_len >= right_len {
+        let left_mid = left_len / 2;
+
+        // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
+        let mut a = 0;
+        let mut b = right_len;
+        while a < b {
+            let m = a + (b - a) / 2;
+            if is_less(&right[m], &left[left_mid]) {
+                a = m + 1;
+            } else {
+                b = m;
+            }
+        }
+
+        (left_mid, a)
+    } else {
+        let right_mid = right_len / 2;
+
+        // Find the first element in `left` that is greater than `right[right_mid]`.
+        let mut a = 0;
+        let mut b = left_len;
+        while a < b {
+            let m = a + (b - a) / 2;
+            if is_less(&right[right_mid], &left[m]) {
+                b = m;
+            } else {
+                a = m + 1;
+            }
+        }
+
+        (a, right_mid)
+    }
+}
+
+/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
+///
+/// # Safety
+///
+/// The `dest` pointer must have enough space to store the result.
+///
+/// Even if `is_less` panics at any point during the merge process, this function will fully copy
+/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
+unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
+    // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
+    // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
+    // scheduling.
+    const MAX_SEQUENTIAL: usize = 5000;
+
+    let left_len = left.len();
+    let right_len = right.len();
+
+    // Intermediate state of the merge process, which serves two purposes:
+    // 1. Protects integrity of `dest` from panics in `is_less`.
+    // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
+    //
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
+    // remaining parts of `left` and `right` into `dest`.
+    let mut s = State {
+        left_start: left.as_mut_ptr(),
+        left_end: left.as_mut_ptr().add(left_len),
+        right_start: right.as_mut_ptr(),
+        right_end: right.as_mut_ptr().add(right_len),
+        dest,
+    };
+
+    if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
+        while s.left_start < s.left_end && s.right_start < s.right_end {
+            // Consume the lesser side.
+            // If equal, prefer the left run to maintain stability.
+            let to_copy = if is_less(&*s.right_start, &*s.left_start) {
+                get_and_increment(&mut s.right_start)
+            } else {
+                get_and_increment(&mut s.left_start)
+            };
+            ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
+        }
+    } else {
+        // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
+        // the whole `left` and `right` into `dest`.
+        let (left_mid, right_mid) = split_for_merge(left, right, is_less);
+        let (left_l, left_r) = left.split_at_mut(left_mid);
+        let (right_l, right_r) = right.split_at_mut(right_mid);
+
+        // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
+        // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
+        // get copied into `dest_left` and `dest_right``.
+        mem::forget(s);
+
+        // Convert the pointers to `usize` because `*mut T` is not `Send`.
+        let dest_l = dest as usize;
+        let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
+        rayon_core::join(
+            || par_merge(left_l, right_l, dest_l as *mut T, is_less),
+            || par_merge(left_r, right_r, dest_r as *mut T, is_less),
+        );
+    }
+    // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
+    // all at once.
+
+    // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
+    // in that order.
+    struct State<T> {
+        left_start: *mut T,
+        left_end: *mut T,
+        right_start: *mut T,
+        right_end: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for State<T> {
+        fn drop(&mut self) {
+            let size = size_of::<T>();
+            let left_len = (self.left_end as usize - self.left_start as usize) / size;
+            let right_len = (self.right_end as usize - self.right_start as usize) / size;
+
+            // Copy array `left`, followed by `right`.
+            unsafe {
+                ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
+                self.dest = self.dest.add(left_len);
+                ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
+            }
+        }
+    }
+}
+
+/// Recursively merges pre-sorted chunks inside `v`.
+///
+/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
+/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
+/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
+///
+/// # Safety
+///
+/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
+/// must equal the left bound of the following chunk.
+///
+/// The buffer must be at least as long as `v`.
+unsafe fn recurse<T, F>(
+    v: *mut T,
+    buf: *mut T,
+    chunks: &[(usize, usize)],
+    into_buf: bool,
+    is_less: &F,
+) where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    let len = chunks.len();
+    debug_assert!(len > 0);
+
+    // Base case of the algorithm.
+    // If only one chunk is remaining, there's no more work to split and merge.
+    if len == 1 {
+        if into_buf {
+            // Copy the chunk from `v` into `buf`.
+            let (start, end) = chunks[0];
+            let src = v.add(start);
+            let dest = buf.add(start);
+            ptr::copy_nonoverlapping(src, dest, end - start);
+        }
+        return;
+    }
+
+    // Split the chunks into two halves.
+    let (start, _) = chunks[0];
+    let (mid, _) = chunks[len / 2];
+    let (_, end) = chunks[len - 1];
+    let (left, right) = chunks.split_at(len / 2);
+
+    // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
+    // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
+    // merge chunks from `v` into `buf`, and viceversa.
+    //
+    // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
+    // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
+    // level etc.
+    let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
+
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
+    // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
+    // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
+    let guard = CopyOnDrop {
+        src: src.add(start),
+        dest: dest.add(start),
+        len: end - start,
+    };
+
+    // Convert the pointers to `usize` because `*mut T` is not `Send`.
+    let v = v as usize;
+    let buf = buf as usize;
+    rayon_core::join(
+        || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
+        || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
+    );
+
+    // Everything went all right - recursive calls didn't panic.
+    // Forget the guard in order to prevent its destructor from running.
+    mem::forget(guard);
+
+    // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
+    let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
+    let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
+    par_merge(src_left, src_right, dest.add(start), is_less);
+}
+
+/// Sorts `v` using merge sort in parallel.
+///
+/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
+/// The allocated temporary buffer is of the same length as is `v`.
+pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
+    // buffer allocation.
+    const MAX_INSERTION: usize = 20;
+    // The length of initial chunks. This number is as small as possible but so that the overhead
+    // of Rayon's task scheduling is still negligible.
+    const CHUNK_LENGTH: usize = 2000;
+
+    // Sorting has no meaningful behavior on zero-sized types.
+    if size_of::<T>() == 0 {
+        return;
+    }
+
+    let len = v.len();
+
+    // Short slices get sorted in-place via insertion sort to avoid allocations.
+    if len <= MAX_INSERTION {
+        if len >= 2 {
+            for i in (0..len - 1).rev() {
+                insert_head(&mut v[i..], &is_less);
+            }
+        }
+        return;
+    }
+
+    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
+    // shallow copies of the contents of `v` without risking the dtors running on copies if
+    // `is_less` panics.
+    let mut buf = Vec::<T>::with_capacity(len);
+    let buf = buf.as_mut_ptr();
+
+    // If the slice is not longer than one chunk would be, do sequential merge sort and return.
+    if len <= CHUNK_LENGTH {
+        let res = unsafe { mergesort(v, buf, &is_less) };
+        if res == MergesortResult::Descending {
+            v.reverse();
+        }
+        return;
+    }
+
+    // Split the slice into chunks and merge sort them in parallel.
+    // However, descending chunks will not be sorted - they will be simply left intact.
+    let mut iter = {
+        // Convert the pointer to `usize` because `*mut T` is not `Send`.
+        let buf = buf as usize;
+
+        v.par_chunks_mut(CHUNK_LENGTH)
+            .with_max_len(1)
+            .enumerate()
+            .map(|(i, chunk)| {
+                let l = CHUNK_LENGTH * i;
+                let r = l + chunk.len();
+                unsafe {
+                    let buf = (buf as *mut T).add(l);
+                    (l, r, mergesort(chunk, buf, &is_less))
+                }
+            })
+            .collect::<Vec<_>>()
+            .into_iter()
+            .peekable()
+    };
+
+    // Now attempt to concatenate adjacent chunks that were left intact.
+    let mut chunks = Vec::with_capacity(iter.len());
+
+    while let Some((a, mut b, res)) = iter.next() {
+        // If this chunk was not modified by the sort procedure...
+        if res != MergesortResult::Sorted {
+            while let Some(&(x, y, r)) = iter.peek() {
+                // If the following chunk is of the same type and can be concatenated...
+                if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
+                    // Concatenate them.
+                    b = y;
+                    iter.next();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        // Descending chunks must be reversed.
+        if res == MergesortResult::Descending {
+            v[a..b].reverse();
+        }
+
+        chunks.push((a, b));
+    }
+
+    // All chunks are properly sorted.
+    // Now we just have to merge them together.
+    unsafe {
+        recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::split_for_merge;
+    use rand::distributions::Uniform;
+    use rand::{thread_rng, Rng};
+
+    #[test]
+    fn test_split_for_merge() {
+        fn check(left: &[u32], right: &[u32]) {
+            let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
+            assert!(left[..l]
+                .iter()
+                .all(|&x| right[r..].iter().all(|&y| x <= y)));
+            assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
+        }
+
+        check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
+        check(&[1, 2, 2, 2, 2, 3], &[]);
+        check(&[], &[1, 2, 2, 2, 2, 3]);
+
+        let mut rng = thread_rng();
+
+        for _ in 0..100 {
+            let limit: u32 = rng.gen_range(1, 21);
+            let left_len: usize = rng.gen_range(0, 20);
+            let right_len: usize = rng.gen_range(0, 20);
+
+            let mut left = rng
+                .sample_iter(&Uniform::new(0, limit))
+                .take(left_len)
+                .collect::<Vec<_>>();
+            let mut right = rng
+                .sample_iter(&Uniform::new(0, limit))
+                .take(right_len)
+                .collect::<Vec<_>>();
+
+            left.sort();
+            right.sort();
+            check(&left, &right);
+        }
+    }
+}
diff --git a/src/slice/mod.rs b/src/slice/mod.rs
new file mode 100644
index 0000000..b80125f
--- /dev/null
+++ b/src/slice/mod.rs
@@ -0,0 +1,1203 @@
+//! Parallel iterator types for [slices][std::slice]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
+
+mod mergesort;
+mod quicksort;
+
+mod test;
+
+use self::mergesort::par_mergesort;
+use self::quicksort::par_quicksort;
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::split_producer::*;
+use std::cmp;
+use std::cmp::Ordering;
+use std::fmt::{self, Debug};
+
+use super::math::div_round_up;
+
+/// Parallel extensions for slices.
+pub trait ParallelSlice<T: Sync> {
+    /// Returns a plain slice, which is used to implement the rest of the
+    /// parallel methods.
+    fn as_parallel_slice(&self) -> &[T];
+
+    /// Returns a parallel iterator over subslices separated by elements that
+    /// match the separator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let smallest = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
+    ///     .par_split(|i| *i == 0)
+    ///     .map(|numbers| numbers.iter().min().unwrap())
+    ///     .min();
+    /// assert_eq!(Some(&1), smallest);
+    /// ```
+    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
+    where
+        P: Fn(&T) -> bool + Sync + Send,
+    {
+        Split {
+            slice: self.as_parallel_slice(),
+            separator,
+        }
+    }
+
+    /// Returns a parallel iterator over all contiguous windows of length
+    /// `window_size`. The windows overlap.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
+    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
+    /// ```
+    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
+        Windows {
+            window_size,
+            slice: self.as_parallel_slice(),
+        }
+    }
+
+    /// Returns a parallel iterator over at most `chunk_size` elements of
+    /// `self` at a time. The chunks do not overlap.
+    ///
+    /// If the number of elements in the iterator is not divisible by
+    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
+    /// other chunks will have that exact length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
+    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
+    /// ```
+    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        Chunks {
+            chunk_size,
+            slice: self.as_parallel_slice(),
+        }
+    }
+
+    /// Returns a parallel iterator over `chunk_size` elements of
+    /// `self` at a time. The chunks do not overlap.
+    ///
+    /// If `chunk_size` does not divide the length of the slice, then the
+    /// last up to `chunk_size-1` elements will be omitted and can be
+    /// retrieved from the remainder function of the iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
+    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
+    /// ```
+    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        let slice = self.as_parallel_slice();
+        let rem = slice.len() % chunk_size;
+        let len = slice.len() - rem;
+        let (fst, snd) = slice.split_at(len);
+        ChunksExact {
+            chunk_size,
+            slice: fst,
+            rem: snd,
+        }
+    }
+}
+
+impl<T: Sync> ParallelSlice<T> for [T] {
+    #[inline]
+    fn as_parallel_slice(&self) -> &[T] {
+        self
+    }
+}
+
+/// Parallel extensions for mutable slices.
+pub trait ParallelSliceMut<T: Send> {
+    /// Returns a plain mutable slice, which is used to implement the rest of
+    /// the parallel methods.
+    fn as_parallel_slice_mut(&mut self) -> &mut [T];
+
+    /// Returns a parallel iterator over mutable subslices separated by
+    /// elements that match the separator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
+    /// array.par_split_mut(|i| *i == 0)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
+    /// ```
+    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
+    where
+        P: Fn(&T) -> bool + Sync + Send,
+    {
+        SplitMut {
+            slice: self.as_parallel_slice_mut(),
+            separator,
+        }
+    }
+
+    /// Returns a parallel iterator over at most `chunk_size` elements of
+    /// `self` at a time. The chunks are mutable and do not overlap.
+    ///
+    /// If the number of elements in the iterator is not divisible by
+    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
+    /// other chunks will have that exact length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 4, 5];
+    /// array.par_chunks_mut(2)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [2, 1, 4, 3, 5]);
+    /// ```
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        ChunksMut {
+            chunk_size,
+            slice: self.as_parallel_slice_mut(),
+        }
+    }
+
+    /// Returns a parallel iterator over `chunk_size` elements of
+    /// `self` at a time. The chunks are mutable and do not overlap.
+    ///
+    /// If `chunk_size` does not divide the length of the slice, then the
+    /// last up to `chunk_size-1` elements will be omitted and can be
+    /// retrieved from the remainder function of the iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 4, 5];
+    /// array.par_chunks_exact_mut(3)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [3, 2, 1, 4, 5]);
+    /// ```
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        let slice = self.as_parallel_slice_mut();
+        let rem = slice.len() % chunk_size;
+        let len = slice.len() - rem;
+        let (fst, snd) = slice.split_at_mut(len);
+        ChunksExactMut {
+            chunk_size,
+            slice: fst,
+            rem: snd,
+        }
+    }
+
+    /// Sorts the slice in parallel.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable`](#method.par_sort_unstable).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5, 4, 1, -3, 2];
+    ///
+    /// v.par_sort();
+    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
+    /// ```
+    fn par_sort(&mut self)
+    where
+        T: Ord,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), T::lt);
+    }
+
+    /// Sorts the slice in parallel with a comparator function.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [5, 4, 1, 3, 2];
+    /// v.par_sort_by(|a, b| a.cmp(b));
+    /// assert_eq!(v, [1, 2, 3, 4, 5]);
+    ///
+    /// // reverse sorting
+    /// v.par_sort_by(|a, b| b.cmp(a));
+    /// assert_eq!(v, [5, 4, 3, 2, 1]);
+    /// ```
+    fn par_sort_by<F>(&mut self, compare: F)
+    where
+        F: Fn(&T, &T) -> Ordering + Sync,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
+            compare(a, b) == Ordering::Less
+        });
+    }
+
+    /// Sorts the slice in parallel with a key extraction function.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5i32, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_by_key(|k| k.abs());
+    /// assert_eq!(v, [1, 2, -3, 4, -5]);
+    /// ```
+    fn par_sort_by_key<B, F>(&mut self, f: F)
+    where
+        B: Ord,
+        F: Fn(&T) -> B + Sync,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
+    }
+
+    /// Sorts the slice in parallel, but may not preserve the order of equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_unstable();
+    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
+    /// ```
+    fn par_sort_unstable(&mut self)
+    where
+        T: Ord,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), T::lt);
+    }
+
+    /// Sorts the slice in parallel with a comparator function, but may not preserve the order of
+    /// equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [5, 4, 1, 3, 2];
+    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
+    /// assert_eq!(v, [1, 2, 3, 4, 5]);
+    ///
+    /// // reverse sorting
+    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
+    /// assert_eq!(v, [5, 4, 3, 2, 1]);
+    /// ```
+    fn par_sort_unstable_by<F>(&mut self, compare: F)
+    where
+        F: Fn(&T, &T) -> Ordering + Sync,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
+            compare(a, b) == Ordering::Less
+        });
+    }
+
+    /// Sorts the slice in parallel with a key extraction function, but may not preserve the order
+    /// of equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5i32, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_unstable_by_key(|k| k.abs());
+    /// assert_eq!(v, [1, 2, -3, 4, -5]);
+    /// ```
+    fn par_sort_unstable_by_key<B, F>(&mut self, f: F)
+    where
+        B: Ord,
+        F: Fn(&T) -> B + Sync,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
+    }
+}
+
+impl<T: Send> ParallelSliceMut<T> for [T] {
+    #[inline]
+    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
+        self
+    }
+}
+
+impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
+    type Item = &'data T;
+    type Iter = Iter<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { slice: self }
+    }
+}
+
+impl<'data, T: Sync + 'data> IntoParallelIterator for &'data Vec<T> {
+    type Item = &'data T;
+    type Iter = Iter<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { slice: self }
+    }
+}
+
+impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
+    type Item = &'data mut T;
+    type Iter = IterMut<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut { slice: self }
+    }
+}
+
+impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut Vec<T> {
+    type Item = &'data mut T;
+    type Iter = IterMut<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut { slice: self }
+    }
+}
+
+/// Parallel iterator over immutable items in a slice
+#[derive(Debug)]
+pub struct Iter<'data, T: Sync> {
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Iter<'data, T> {
+    fn clone(&self) -> Self {
+        Iter { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
+    type Item = &'data T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(IterProducer { slice: self.slice })
+    }
+}
+
+struct IterProducer<'data, T: Sync> {
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
+    type Item = &'data T;
+    type IntoIter = ::std::slice::Iter<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.iter()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.slice.split_at(index);
+        (IterProducer { slice: left }, IterProducer { slice: right })
+    }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct Chunks<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Chunks<'data, T> {
+    fn clone(&self) -> Self {
+        Chunks { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Chunks<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Chunks<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.slice.len(), self.chunk_size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksProducer<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::Chunks<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+        let (left, right) = self.slice.split_at(elem_index);
+        (
+            ChunksProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExact<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+    rem: &'data [T],
+}
+
+impl<'data, T: Sync> ChunksExact<'data, T> {
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    pub fn remainder(&self) -> &'data [T] {
+        self.rem
+    }
+}
+
+impl<'data, T: Sync> Clone for ChunksExact<'data, T> {
+    fn clone(&self) -> Self {
+        ChunksExact { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for ChunksExact<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for ChunksExact<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len() / self.chunk_size
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksExactProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksExactProducer<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksExactProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::ChunksExact<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_exact(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = index * self.chunk_size;
+        let (left, right) = self.slice.split_at(elem_index);
+        (
+            ChunksExactProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksExactProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over immutable overlapping windows of a slice
+#[derive(Debug)]
+pub struct Windows<'data, T: Sync> {
+    window_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Windows<'data, T> {
+    fn clone(&self) -> Self {
+        Windows { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        assert!(self.window_size >= 1);
+        self.slice.len().saturating_sub(self.window_size - 1)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(WindowsProducer {
+            window_size: self.window_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct WindowsProducer<'data, T: Sync> {
+    window_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::Windows<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.windows(self.window_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let left_index = cmp::min(self.slice.len(), index + (self.window_size - 1));
+        let left = &self.slice[..left_index];
+        let right = &self.slice[index..];
+        (
+            WindowsProducer {
+                window_size: self.window_size,
+                slice: left,
+            },
+            WindowsProducer {
+                window_size: self.window_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over mutable items in a slice
+#[derive(Debug)]
+pub struct IterMut<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
+    type Item = &'data mut T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(IterMutProducer { slice: self.slice })
+    }
+}
+
+struct IterMutProducer<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
+    type Item = &'data mut T;
+    type IntoIter = ::std::slice::IterMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.iter_mut()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.slice.split_at_mut(index);
+        (
+            IterMutProducer { slice: left },
+            IterMutProducer { slice: right },
+        )
+    }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksMut<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksMut<'data, T> {
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.slice.len(), self.chunk_size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksMutProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksMutProducer<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksMutProducer<'data, T> {
+    type Item = &'data mut [T];
+    type IntoIter = ::std::slice::ChunksMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_mut(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+        let (left, right) = self.slice.split_at_mut(elem_index);
+        (
+            ChunksMutProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksMutProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExactMut<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+    rem: &'data mut [T],
+}
+
+impl<'data, T: Send> ChunksExactMut<'data, T> {
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    ///
+    /// Note that this has to consume `self` to return the original lifetime of
+    /// the data, which prevents this from actually being used as a parallel
+    /// iterator since that also consumes. This method is provided for parity
+    /// with `std::iter::ChunksExactMut`, but consider calling `remainder()` or
+    /// `take_remainder()` as alternatives.
+    pub fn into_remainder(self) -> &'data mut [T] {
+        self.rem
+    }
+
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    ///
+    /// Consider `take_remainder()` if you need access to the data with its
+    /// original lifetime, rather than borrowing through `&mut self` here.
+    pub fn remainder(&mut self) -> &mut [T] {
+        self.rem
+    }
+
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements. Subsequent calls will return an empty slice.
+    pub fn take_remainder(&mut self) -> &'data mut [T] {
+        std::mem::replace(&mut self.rem, &mut [])
+    }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksExactMut<'data, T> {
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksExactMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len() / self.chunk_size
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksExactMutProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksExactMutProducer<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksExactMutProducer<'data, T> {
+    type Item = &'data mut [T];
+    type IntoIter = ::std::slice::ChunksExactMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_exact_mut(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = index * self.chunk_size;
+        let (left, right) = self.slice.split_at_mut(elem_index);
+        (
+            ChunksExactMutProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksExactMutProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over slices separated by a predicate
+pub struct Split<'data, T, P> {
+    slice: &'data [T],
+    separator: P,
+}
+
+impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
+    fn clone(&self) -> Self {
+        Split {
+            separator: self.separator.clone(),
+            ..*self
+        }
+    }
+}
+
+impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Split").field("slice", &self.slice).finish()
+    }
+}
+
+impl<'data, T, P> ParallelIterator for Split<'data, T, P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Sync,
+{
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.slice, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'data, T, P> Fissile<P> for &'data [T]
+where
+    P: Fn(&T) -> bool,
+{
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        end / 2
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        self[start..end].iter().position(separator)
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        self[..end].iter().rposition(separator)
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at(index);
+        (left, &right[1..]) // skip the separator
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send,
+    {
+        let mut split = self.split(separator);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+}
+
+/// Parallel iterator over mutable slices separated by a predicate
+pub struct SplitMut<'data, T, P> {
+    slice: &'data mut [T],
+    separator: P,
+}
+
+impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("SplitMut")
+            .field("slice", &self.slice)
+            .finish()
+    }
+}
+
+impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Send,
+{
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.slice, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'data, T, P> Fissile<P> for &'data mut [T]
+where
+    P: Fn(&T) -> bool,
+{
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        end / 2
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        self[start..end].iter().position(separator)
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        self[..end].iter().rposition(separator)
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at_mut(index);
+        (left, &mut right[1..]) // skip the separator
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send,
+    {
+        let mut split = self.split_mut(separator);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+}
diff --git a/src/slice/quicksort.rs b/src/slice/quicksort.rs
new file mode 100644
index 0000000..b985073
--- /dev/null
+++ b/src/slice/quicksort.rs
@@ -0,0 +1,800 @@
+//! Parallel quicksort.
+//!
+//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
+//! The only difference from the original is that calls to `recurse` are executed in parallel using
+//! `rayon_core::join`.
+
+use std::cmp;
+use std::mem;
+use std::ptr;
+
+/// When dropped, takes the value out of `Option` and writes it into `dest`.
+///
+/// This allows us to safely read the pivot into a stack-allocated variable for efficiency, and
+/// write it back into the slice after partitioning. This way we ensure that the write happens
+/// even if `is_less` panics in the meantime.
+struct WriteOnDrop<T> {
+    value: Option<T>,
+    dest: *mut T,
+}
+
+impl<T> Drop for WriteOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::write(self.dest, self.value.take().unwrap());
+        }
+    }
+}
+
+/// Holds a value, but never drops it.
+struct NoDrop<T> {
+    value: Option<T>,
+}
+
+impl<T> Drop for NoDrop<T> {
+    fn drop(&mut self) {
+        mem::forget(self.value.take());
+    }
+}
+
+/// When dropped, copies from `src` into `dest`.
+struct CopyOnDrop<T> {
+    src: *mut T,
+    dest: *mut T,
+}
+
+impl<T> Drop for CopyOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::copy_nonoverlapping(self.src, self.dest, 1);
+        }
+    }
+}
+
+/// Shifts the first element to the right until it encounters a greater or equal element.
+fn shift_head<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    unsafe {
+        // If the first two elements are out-of-order...
+        if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
+            // Read the first element into a stack-allocated variable. If a following comparison
+            // operation panics, `hole` will get dropped and automatically write the element back
+            // into the slice.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(v.get_unchecked(0))),
+            };
+            let mut hole = CopyOnDrop {
+                src: tmp.value.as_mut().unwrap(),
+                dest: v.get_unchecked_mut(1),
+            };
+            ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
+
+            for i in 2..len {
+                if !is_less(v.get_unchecked(i), tmp.value.as_ref().unwrap()) {
+                    break;
+                }
+
+                // Move `i`-th element one place to the left, thus shifting the hole to the right.
+                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
+                hole.dest = v.get_unchecked_mut(i);
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+}
+
+/// Shifts the last element to the left until it encounters a smaller or equal element.
+fn shift_tail<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    unsafe {
+        // If the last two elements are out-of-order...
+        if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
+            // Read the last element into a stack-allocated variable. If a following comparison
+            // operation panics, `hole` will get dropped and automatically write the element back
+            // into the slice.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(v.get_unchecked(len - 1))),
+            };
+            let mut hole = CopyOnDrop {
+                src: tmp.value.as_mut().unwrap(),
+                dest: v.get_unchecked_mut(len - 2),
+            };
+            ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
+
+            for i in (0..len - 2).rev() {
+                if !is_less(&tmp.value.as_ref().unwrap(), v.get_unchecked(i)) {
+                    break;
+                }
+
+                // Move `i`-th element one place to the right, thus shifting the hole to the left.
+                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
+                hole.dest = v.get_unchecked_mut(i);
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+}
+
+/// Partially sorts a slice by shifting several out-of-order elements around.
+///
+/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
+#[cold]
+fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &F) -> bool
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Maximum number of adjacent out-of-order pairs that will get shifted.
+    const MAX_STEPS: usize = 5;
+    // If the slice is shorter than this, don't shift any elements.
+    const SHORTEST_SHIFTING: usize = 50;
+
+    let len = v.len();
+    let mut i = 1;
+
+    for _ in 0..MAX_STEPS {
+        unsafe {
+            // Find the next pair of adjacent out-of-order elements.
+            while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
+                i += 1;
+            }
+        }
+
+        // Are we done?
+        if i == len {
+            return true;
+        }
+
+        // Don't shift elements on short arrays, that has a performance cost.
+        if len < SHORTEST_SHIFTING {
+            return false;
+        }
+
+        // Swap the found pair of elements. This puts them in correct order.
+        v.swap(i - 1, i);
+
+        // Shift the smaller element to the left.
+        shift_tail(&mut v[..i], is_less);
+        // Shift the greater element to the right.
+        shift_head(&mut v[i..], is_less);
+    }
+
+    // Didn't manage to sort the slice in the limited number of steps.
+    false
+}
+
+/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
+fn insertion_sort<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    for i in 1..v.len() {
+        shift_tail(&mut v[..=i], is_less);
+    }
+}
+
+/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
+#[cold]
+fn heapsort<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // This binary heap respects the invariant `parent >= child`.
+    let sift_down = |v: &mut [T], mut node| {
+        loop {
+            // Children of `node`:
+            let left = 2 * node + 1;
+            let right = 2 * node + 2;
+
+            // Choose the greater child.
+            let greater = if right < v.len() && is_less(&v[left], &v[right]) {
+                right
+            } else {
+                left
+            };
+
+            // Stop if the invariant holds at `node`.
+            if greater >= v.len() || !is_less(&v[node], &v[greater]) {
+                break;
+            }
+
+            // Swap `node` with the greater child, move one step down, and continue sifting.
+            v.swap(node, greater);
+            node = greater;
+        }
+    };
+
+    // Build the heap in linear time.
+    for i in (0..v.len() / 2).rev() {
+        sift_down(v, i);
+    }
+
+    // Pop maximal elements from the heap.
+    for i in (1..v.len()).rev() {
+        v.swap(0, i);
+        sift_down(&mut v[..i], 0);
+    }
+}
+
+/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
+/// to `pivot`.
+///
+/// Returns the number of elements smaller than `pivot`.
+///
+/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
+/// This idea is presented in the [BlockQuicksort][pdf] paper.
+///
+/// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
+fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &F) -> usize
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Number of elements in a typical block.
+    const BLOCK: usize = 128;
+
+    // The partitioning algorithm repeats the following steps until completion:
+    //
+    // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
+    // 2. Trace a block from the right side to identify elements smaller than the pivot.
+    // 3. Exchange the identified elements between the left and right side.
+    //
+    // We keep the following variables for a block of elements:
+    //
+    // 1. `block` - Number of elements in the block.
+    // 2. `start` - Start pointer into the `offsets` array.
+    // 3. `end` - End pointer into the `offsets` array.
+    // 4. `offsets - Indices of out-of-order elements within the block.
+
+    // The current block on the left side (from `l` to `l.offset(block_l)`).
+    let mut l = v.as_mut_ptr();
+    let mut block_l = BLOCK;
+    let mut start_l = ptr::null_mut();
+    let mut end_l = ptr::null_mut();
+    let mut offsets_l = [0u8; BLOCK];
+
+    // The current block on the right side (from `r.offset(-block_r)` to `r`).
+    let mut r = unsafe { l.add(v.len()) };
+    let mut block_r = BLOCK;
+    let mut start_r = ptr::null_mut();
+    let mut end_r = ptr::null_mut();
+    let mut offsets_r = [0u8; BLOCK];
+
+    // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
+    fn width<T>(l: *mut T, r: *mut T) -> usize {
+        assert!(mem::size_of::<T>() > 0);
+        (r as usize - l as usize) / mem::size_of::<T>()
+    }
+
+    loop {
+        // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
+        // some patch-up work in order to partition the remaining elements in between.
+        let is_done = width(l, r) <= 2 * BLOCK;
+
+        if is_done {
+            // Number of remaining elements (still not compared to the pivot).
+            let mut rem = width(l, r);
+            if start_l < end_l || start_r < end_r {
+                rem -= BLOCK;
+            }
+
+            // Adjust block sizes so that the left and right block don't overlap, but get perfectly
+            // aligned to cover the whole remaining gap.
+            if start_l < end_l {
+                block_r = rem;
+            } else if start_r < end_r {
+                block_l = rem;
+            } else {
+                block_l = rem / 2;
+                block_r = rem - block_l;
+            }
+            debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
+            debug_assert!(width(l, r) == block_l + block_r);
+        }
+
+        if start_l == end_l {
+            // Trace `block_l` elements from the left side.
+            start_l = offsets_l.as_mut_ptr();
+            end_l = offsets_l.as_mut_ptr();
+            let mut elem = l;
+
+            for i in 0..block_l {
+                unsafe {
+                    // Branchless comparison.
+                    *end_l = i as u8;
+                    end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
+                    elem = elem.offset(1);
+                }
+            }
+        }
+
+        if start_r == end_r {
+            // Trace `block_r` elements from the right side.
+            start_r = offsets_r.as_mut_ptr();
+            end_r = offsets_r.as_mut_ptr();
+            let mut elem = r;
+
+            for i in 0..block_r {
+                unsafe {
+                    // Branchless comparison.
+                    elem = elem.offset(-1);
+                    *end_r = i as u8;
+                    end_r = end_r.offset(is_less(&*elem, pivot) as isize);
+                }
+            }
+        }
+
+        // Number of out-of-order elements to swap between the left and right side.
+        let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
+
+        if count > 0 {
+            macro_rules! left {
+                () => {
+                    l.offset(*start_l as isize)
+                };
+            }
+            macro_rules! right {
+                () => {
+                    r.offset(-(*start_r as isize) - 1)
+                };
+            }
+
+            // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
+            // permutation. This is not strictly equivalent to swapping, but produces a similar
+            // result using fewer memory operations.
+            unsafe {
+                let tmp = ptr::read(left!());
+                ptr::copy_nonoverlapping(right!(), left!(), 1);
+
+                for _ in 1..count {
+                    start_l = start_l.offset(1);
+                    ptr::copy_nonoverlapping(left!(), right!(), 1);
+                    start_r = start_r.offset(1);
+                    ptr::copy_nonoverlapping(right!(), left!(), 1);
+                }
+
+                ptr::copy_nonoverlapping(&tmp, right!(), 1);
+                mem::forget(tmp);
+                start_l = start_l.offset(1);
+                start_r = start_r.offset(1);
+            }
+        }
+
+        if start_l == end_l {
+            // All out-of-order elements in the left block were moved. Move to the next block.
+            l = unsafe { l.add(block_l) };
+        }
+
+        if start_r == end_r {
+            // All out-of-order elements in the right block were moved. Move to the previous block.
+            r = unsafe { r.sub(block_r) };
+        }
+
+        if is_done {
+            break;
+        }
+    }
+
+    // All that remains now is at most one block (either the left or the right) with out-of-order
+    // elements that need to be moved. Such remaining elements can be simply shifted to the end
+    // within their block.
+
+    if start_l < end_l {
+        // The left block remains.
+        // Move it's remaining out-of-order elements to the far right.
+        debug_assert_eq!(width(l, r), block_l);
+        while start_l < end_l {
+            unsafe {
+                end_l = end_l.offset(-1);
+                ptr::swap(l.offset(*end_l as isize), r.offset(-1));
+                r = r.offset(-1);
+            }
+        }
+        width(v.as_mut_ptr(), r)
+    } else if start_r < end_r {
+        // The right block remains.
+        // Move it's remaining out-of-order elements to the far left.
+        debug_assert_eq!(width(l, r), block_r);
+        while start_r < end_r {
+            unsafe {
+                end_r = end_r.offset(-1);
+                ptr::swap(l, r.offset(-(*end_r as isize) - 1));
+                l = l.offset(1);
+            }
+        }
+        width(v.as_mut_ptr(), l)
+    } else {
+        // Nothing else to do, we're done.
+        width(v.as_mut_ptr(), l)
+    }
+}
+
+/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
+/// equal to `v[pivot]`.
+///
+/// Returns a tuple of:
+///
+/// 1. Number of elements smaller than `v[pivot]`.
+/// 2. True if `v` was already partitioned.
+fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> (usize, bool)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let (mid, was_partitioned) = {
+        // Place the pivot at the beginning of slice.
+        v.swap(0, pivot);
+        let (pivot, v) = v.split_at_mut(1);
+        let pivot = &mut pivot[0];
+
+        // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
+        // operation panics, the pivot will be automatically written back into the slice.
+        let write_on_drop = WriteOnDrop {
+            value: unsafe { Some(ptr::read(pivot)) },
+            dest: pivot,
+        };
+        let pivot = write_on_drop.value.as_ref().unwrap();
+
+        // Find the first pair of out-of-order elements.
+        let mut l = 0;
+        let mut r = v.len();
+        unsafe {
+            // Find the first element greater then or equal to the pivot.
+            while l < r && is_less(v.get_unchecked(l), pivot) {
+                l += 1;
+            }
+
+            // Find the last element smaller that the pivot.
+            while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
+                r -= 1;
+            }
+        }
+
+        (
+            l + partition_in_blocks(&mut v[l..r], pivot, is_less),
+            l >= r,
+        )
+
+        // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated
+        // variable) back into the slice where it originally was. This step is critical in ensuring
+        // safety!
+    };
+
+    // Place the pivot between the two partitions.
+    v.swap(0, mid);
+
+    (mid, was_partitioned)
+}
+
+/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
+///
+/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
+/// elements smaller than the pivot.
+fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> usize
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Place the pivot at the beginning of slice.
+    v.swap(0, pivot);
+    let (pivot, v) = v.split_at_mut(1);
+    let pivot = &mut pivot[0];
+
+    // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
+    // operation panics, the pivot will be automatically written back into the slice.
+    let write_on_drop = WriteOnDrop {
+        value: unsafe { Some(ptr::read(pivot)) },
+        dest: pivot,
+    };
+    let pivot = write_on_drop.value.as_ref().unwrap();
+
+    // Now partition the slice.
+    let mut l = 0;
+    let mut r = v.len();
+    loop {
+        unsafe {
+            // Find the first element greater that the pivot.
+            while l < r && !is_less(pivot, v.get_unchecked(l)) {
+                l += 1;
+            }
+
+            // Find the last element equal to the pivot.
+            while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
+                r -= 1;
+            }
+
+            // Are we done?
+            if l >= r {
+                break;
+            }
+
+            // Swap the found pair of out-of-order elements.
+            r -= 1;
+            ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
+            l += 1;
+        }
+    }
+
+    // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
+    l + 1
+
+    // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated variable)
+    // back into the slice where it originally was. This step is critical in ensuring safety!
+}
+
+/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
+/// partitions in quicksort.
+#[cold]
+fn break_patterns<T>(v: &mut [T]) {
+    let len = v.len();
+    if len >= 8 {
+        // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
+        let mut random = len as u32;
+        let mut gen_u32 = || {
+            random ^= random << 13;
+            random ^= random >> 17;
+            random ^= random << 5;
+            random
+        };
+        let mut gen_usize = || {
+            if mem::size_of::<usize>() <= 4 {
+                gen_u32() as usize
+            } else {
+                ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize
+            }
+        };
+
+        // Take random numbers modulo this number.
+        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
+        let modulus = len.next_power_of_two();
+
+        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
+        let pos = len / 4 * 2;
+
+        for i in 0..3 {
+            // Generate a random number modulo `len`. However, in order to avoid costly operations
+            // we first take it modulo a power of two, and then decrease by `len` until it fits
+            // into the range `[0, len - 1]`.
+            let mut other = gen_usize() & (modulus - 1);
+
+            // `other` is guaranteed to be less than `2 * len`.
+            if other >= len {
+                other -= len;
+            }
+
+            v.swap(pos - 1 + i, other);
+        }
+    }
+}
+
+/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
+///
+/// Elements in `v` might be reordered in the process.
+fn choose_pivot<T, F>(v: &mut [T], is_less: &F) -> (usize, bool)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Minimum length to choose the median-of-medians method.
+    // Shorter slices use the simple median-of-three method.
+    const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
+    // Maximum number of swaps that can be performed in this function.
+    const MAX_SWAPS: usize = 4 * 3;
+
+    let len = v.len();
+
+    // Three indices near which we are going to choose a pivot.
+    let mut a = len / 4 * 1;
+    let mut b = len / 4 * 2;
+    let mut c = len / 4 * 3;
+
+    // Counts the total number of swaps we are about to perform while sorting indices.
+    let mut swaps = 0;
+
+    if len >= 8 {
+        // Swaps indices so that `v[a] <= v[b]`.
+        let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
+            if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
+                ptr::swap(a, b);
+                swaps += 1;
+            }
+        };
+
+        // Swaps indices so that `v[a] <= v[b] <= v[c]`.
+        let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
+            sort2(a, b);
+            sort2(b, c);
+            sort2(a, b);
+        };
+
+        if len >= SHORTEST_MEDIAN_OF_MEDIANS {
+            // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
+            let mut sort_adjacent = |a: &mut usize| {
+                let tmp = *a;
+                sort3(&mut (tmp - 1), a, &mut (tmp + 1));
+            };
+
+            // Find medians in the neighborhoods of `a`, `b`, and `c`.
+            sort_adjacent(&mut a);
+            sort_adjacent(&mut b);
+            sort_adjacent(&mut c);
+        }
+
+        // Find the median among `a`, `b`, and `c`.
+        sort3(&mut a, &mut b, &mut c);
+    }
+
+    if swaps < MAX_SWAPS {
+        (b, swaps == 0)
+    } else {
+        // The maximum number of swaps was performed. Chances are the slice is descending or mostly
+        // descending, so reversing will probably help sort it faster.
+        v.reverse();
+        (len - 1 - b, true)
+    }
+}
+
+/// Sorts `v` recursively.
+///
+/// If the slice had a predecessor in the original array, it is specified as `pred`.
+///
+/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
+/// this function will immediately switch to heapsort.
+fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: usize)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices of up to this length get sorted using insertion sort.
+    const MAX_INSERTION: usize = 20;
+    // If both partitions are up to this length, we continue sequentially. This number is as small
+    // as possible but so that the overhead of Rayon's task scheduling is still negligible.
+    const MAX_SEQUENTIAL: usize = 2000;
+
+    // True if the last partitioning was reasonably balanced.
+    let mut was_balanced = true;
+    // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
+    let mut was_partitioned = true;
+
+    loop {
+        let len = v.len();
+
+        // Very short slices get sorted using insertion sort.
+        if len <= MAX_INSERTION {
+            insertion_sort(v, is_less);
+            return;
+        }
+
+        // If too many bad pivot choices were made, simply fall back to heapsort in order to
+        // guarantee `O(n log n)` worst-case.
+        if limit == 0 {
+            heapsort(v, is_less);
+            return;
+        }
+
+        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
+        // some elements around. Hopefully we'll choose a better pivot this time.
+        if !was_balanced {
+            break_patterns(v);
+            limit -= 1;
+        }
+
+        // Choose a pivot and try guessing whether the slice is already sorted.
+        let (pivot, likely_sorted) = choose_pivot(v, is_less);
+
+        // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
+        // selection predicts the slice is likely already sorted...
+        if was_balanced && was_partitioned && likely_sorted {
+            // Try identifying several out-of-order elements and shifting them to correct
+            // positions. If the slice ends up being completely sorted, we're done.
+            if partial_insertion_sort(v, is_less) {
+                return;
+            }
+        }
+
+        // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
+        // slice. Partition the slice into elements equal to and elements greater than the pivot.
+        // This case is usually hit when the slice contains many duplicate elements.
+        if let Some(ref p) = pred {
+            if !is_less(p, &v[pivot]) {
+                let mid = partition_equal(v, pivot, is_less);
+
+                // Continue sorting elements greater than the pivot.
+                v = &mut { v }[mid..];
+                continue;
+            }
+        }
+
+        // Partition the slice.
+        let (mid, was_p) = partition(v, pivot, is_less);
+        was_balanced = cmp::min(mid, len - mid) >= len / 8;
+        was_partitioned = was_p;
+
+        // Split the slice into `left`, `pivot`, and `right`.
+        let (left, right) = { v }.split_at_mut(mid);
+        let (pivot, right) = right.split_at_mut(1);
+        let pivot = &mut pivot[0];
+
+        if cmp::max(left.len(), right.len()) <= MAX_SEQUENTIAL {
+            // Recurse into the shorter side only in order to minimize the total number of recursive
+            // calls and consume less stack space. Then just continue with the longer side (this is
+            // akin to tail recursion).
+            if left.len() < right.len() {
+                recurse(left, is_less, pred, limit);
+                v = right;
+                pred = Some(pivot);
+            } else {
+                recurse(right, is_less, Some(pivot), limit);
+                v = left;
+            }
+        } else {
+            // Sort the left and right half in parallel.
+            rayon_core::join(
+                || recurse(left, is_less, pred, limit),
+                || recurse(right, is_less, Some(pivot), limit),
+            );
+            break;
+        }
+    }
+}
+
+/// Sorts `v` using pattern-defeating quicksort in parallel.
+///
+/// The algorithm is unstable, in-place, and `O(n log n)` worst-case.
+pub(super) fn par_quicksort<T, F>(v: &mut [T], is_less: F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Sorting has no meaningful behavior on zero-sized types.
+    if mem::size_of::<T>() == 0 {
+        return;
+    }
+
+    // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
+    let limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
+
+    recurse(v, &is_less, None, limit);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::heapsort;
+    use rand::distributions::Uniform;
+    use rand::{thread_rng, Rng};
+
+    #[test]
+    fn test_heapsort() {
+        let rng = thread_rng();
+
+        for len in (0..25).chain(500..501) {
+            for &modulus in &[5, 10, 100] {
+                let dist = Uniform::new(0, modulus);
+                for _ in 0..100 {
+                    let v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    // Test heapsort using `<` operator.
+                    let mut tmp = v.clone();
+                    heapsort(&mut tmp, &|a, b| a < b);
+                    assert!(tmp.windows(2).all(|w| w[0] <= w[1]));
+
+                    // Test heapsort using `>` operator.
+                    let mut tmp = v.clone();
+                    heapsort(&mut tmp, &|a, b| a > b);
+                    assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
+                }
+            }
+        }
+
+        // Sort using a completely random comparison function.
+        // This will reorder the elements *somehow*, but won't panic.
+        let mut v: Vec<_> = (0..100).collect();
+        heapsort(&mut v, &|_, _| thread_rng().gen());
+        heapsort(&mut v, &|a, b| a < b);
+
+        for i in 0..v.len() {
+            assert_eq!(v[i], i);
+        }
+    }
+}
diff --git a/src/slice/test.rs b/src/slice/test.rs
new file mode 100644
index 0000000..97de7d8
--- /dev/null
+++ b/src/slice/test.rs
@@ -0,0 +1,148 @@
+#![cfg(test)]
+
+use crate::prelude::*;
+use rand::distributions::Uniform;
+use rand::seq::SliceRandom;
+use rand::{thread_rng, Rng};
+use std::cmp::Ordering::{Equal, Greater, Less};
+
+macro_rules! sort {
+    ($f:ident, $name:ident) => {
+        #[test]
+        fn $name() {
+            let mut rng = thread_rng();
+
+            for len in (0..25).chain(500..501) {
+                for &modulus in &[5, 10, 100] {
+                    let dist = Uniform::new(0, modulus);
+                    for _ in 0..100 {
+                        let v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                        // Test sort using `<` operator.
+                        let mut tmp = v.clone();
+                        tmp.$f(|a, b| a.cmp(b));
+                        assert!(tmp.windows(2).all(|w| w[0] <= w[1]));
+
+                        // Test sort using `>` operator.
+                        let mut tmp = v.clone();
+                        tmp.$f(|a, b| b.cmp(a));
+                        assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
+                    }
+                }
+            }
+
+            // Test sort with many duplicates.
+            for &len in &[1_000, 10_000, 100_000] {
+                for &modulus in &[5, 10, 100, 10_000] {
+                    let dist = Uniform::new(0, modulus);
+                    let mut v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    v.$f(|a, b| a.cmp(b));
+                    assert!(v.windows(2).all(|w| w[0] <= w[1]));
+                }
+            }
+
+            // Test sort with many pre-sorted runs.
+            for &len in &[1_000, 10_000, 100_000] {
+                let len_dist = Uniform::new(0, len);
+                for &modulus in &[5, 10, 1000, 50_000] {
+                    let dist = Uniform::new(0, modulus);
+                    let mut v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    v.sort();
+                    v.reverse();
+
+                    for _ in 0..5 {
+                        let a = rng.sample(&len_dist);
+                        let b = rng.sample(&len_dist);
+                        if a < b {
+                            v[a..b].reverse();
+                        } else {
+                            v.swap(a, b);
+                        }
+                    }
+
+                    v.$f(|a, b| a.cmp(b));
+                    assert!(v.windows(2).all(|w| w[0] <= w[1]));
+                }
+            }
+
+            // Sort using a completely random comparison function.
+            // This will reorder the elements *somehow*, but won't panic.
+            let mut v: Vec<_> = (0..100).collect();
+            v.$f(|_, _| *[Less, Equal, Greater].choose(&mut thread_rng()).unwrap());
+            v.$f(|a, b| a.cmp(b));
+            for i in 0..v.len() {
+                assert_eq!(v[i], i);
+            }
+
+            // Should not panic.
+            [0i32; 0].$f(|a, b| a.cmp(b));
+            [(); 10].$f(|a, b| a.cmp(b));
+            [(); 100].$f(|a, b| a.cmp(b));
+
+            let mut v = [0xDEAD_BEEFu64];
+            v.$f(|a, b| a.cmp(b));
+            assert!(v == [0xDEAD_BEEF]);
+        }
+    };
+}
+
+sort!(par_sort_by, test_par_sort);
+sort!(par_sort_unstable_by, test_par_sort_unstable);
+
+#[test]
+fn test_par_sort_stability() {
+    for len in (2..25).chain(500..510).chain(50_000..50_010) {
+        for _ in 0..10 {
+            let mut counts = [0; 10];
+
+            // Create a vector like [(6, 1), (5, 1), (6, 2), ...],
+            // where the first item of each tuple is random, but
+            // the second item represents which occurrence of that
+            // number this element is, i.e. the second elements
+            // will occur in sorted order.
+            let mut rng = thread_rng();
+            let mut v: Vec<_> = (0..len)
+                .map(|_| {
+                    let n: usize = rng.gen_range(0, 10);
+                    counts[n] += 1;
+                    (n, counts[n])
+                })
+                .collect();
+
+            // Only sort on the first element, so an unstable sort
+            // may mix up the counts.
+            v.par_sort_by(|&(a, _), &(b, _)| a.cmp(&b));
+
+            // This comparison includes the count (the second item
+            // of the tuple), so elements with equal first items
+            // will need to be ordered with increasing
+            // counts... i.e. exactly asserting that this sort is
+            // stable.
+            assert!(v.windows(2).all(|w| w[0] <= w[1]));
+        }
+    }
+}
+
+#[test]
+fn test_par_chunks_exact_remainder() {
+    let v: &[i32] = &[0, 1, 2, 3, 4];
+    let c = v.par_chunks_exact(2);
+    assert_eq!(c.remainder(), &[4]);
+    assert_eq!(c.len(), 2);
+}
+
+#[test]
+fn test_par_chunks_exact_mut_remainder() {
+    let v: &mut [i32] = &mut [0, 1, 2, 3, 4];
+    let mut c = v.par_chunks_exact_mut(2);
+    assert_eq!(c.remainder(), &[4]);
+    assert_eq!(c.len(), 2);
+    assert_eq!(c.into_remainder(), &[4]);
+
+    let mut c = v.par_chunks_exact_mut(2);
+    assert_eq!(c.take_remainder(), &[4]);
+    assert_eq!(c.take_remainder(), &[]);
+    assert_eq!(c.len(), 2);
+}
diff --git a/src/split_producer.rs b/src/split_producer.rs
new file mode 100644
index 0000000..568657a
--- /dev/null
+++ b/src/split_producer.rs
@@ -0,0 +1,132 @@
+//! Common splitter for strings and slices
+//!
+//! This module is private, so these items are effectively `pub(super)`
+
+use crate::iter::plumbing::{Folder, UnindexedProducer};
+
+/// Common producer for splitting on a predicate.
+pub(super) struct SplitProducer<'p, P, V> {
+    data: V,
+    separator: &'p P,
+
+    /// Marks the endpoint beyond which we've already found no separators.
+    tail: usize,
+}
+
+/// Helper trait so `&str`, `&[T]`, and `&mut [T]` can share `SplitProducer`.
+pub(super) trait Fissile<P>: Sized {
+    fn length(&self) -> usize;
+    fn midpoint(&self, end: usize) -> usize;
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize>;
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize>;
+    fn split_once(self, index: usize) -> (Self, Self);
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send;
+}
+
+impl<'p, P, V> SplitProducer<'p, P, V>
+where
+    V: Fissile<P> + Send,
+{
+    pub(super) fn new(data: V, separator: &'p P) -> Self {
+        SplitProducer {
+            tail: data.length(),
+            data,
+            separator,
+        }
+    }
+
+    /// Common `fold_with` implementation, integrating `SplitTerminator`'s
+    /// need to sometimes skip its final empty item.
+    pub(super) fn fold_with<F>(self, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<V>,
+    {
+        let SplitProducer {
+            data,
+            separator,
+            tail,
+        } = self;
+
+        if tail == data.length() {
+            // No tail section, so just let `fold_splits` handle it.
+            data.fold_splits(separator, folder, skip_last)
+        } else if let Some(index) = data.rfind(separator, tail) {
+            // We found the last separator to complete the tail, so
+            // end with that slice after `fold_splits` finds the rest.
+            let (left, right) = data.split_once(index);
+            let folder = left.fold_splits(separator, folder, false);
+            if skip_last || folder.full() {
+                folder
+            } else {
+                folder.consume(right)
+            }
+        } else {
+            // We know there are no separators at all.  Return our whole data.
+            if skip_last {
+                folder
+            } else {
+                folder.consume(data)
+            }
+        }
+    }
+}
+
+impl<'p, P, V> UnindexedProducer for SplitProducer<'p, P, V>
+where
+    V: Fissile<P> + Send,
+    P: Sync,
+{
+    type Item = V;
+
+    fn split(self) -> (Self, Option<Self>) {
+        // Look forward for the separator, and failing that look backward.
+        let mid = self.data.midpoint(self.tail);
+        let index = match self.data.find(self.separator, mid, self.tail) {
+            Some(i) => Some(mid + i),
+            None => self.data.rfind(self.separator, mid),
+        };
+
+        if let Some(index) = index {
+            let len = self.data.length();
+            let (left, right) = self.data.split_once(index);
+
+            let (left_tail, right_tail) = if index < mid {
+                // If we scanned backwards to find the separator, everything in
+                // the right side is exhausted, with no separators left to find.
+                (index, 0)
+            } else {
+                let right_index = len - right.length();
+                (mid, self.tail - right_index)
+            };
+
+            // Create the left split before the separator.
+            let left = SplitProducer {
+                data: left,
+                tail: left_tail,
+                ..self
+            };
+
+            // Create the right split following the separator.
+            let right = SplitProducer {
+                data: right,
+                tail: right_tail,
+                ..self
+            };
+
+            (left, Some(right))
+        } else {
+            // The search is exhausted, no more separators...
+            (SplitProducer { tail: 0, ..self }, None)
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.fold_with(folder, false)
+    }
+}
diff --git a/src/str.rs b/src/str.rs
new file mode 100644
index 0000000..2fdaaa7
--- /dev/null
+++ b/src/str.rs
@@ -0,0 +1,874 @@
+//! Parallel iterator types for [strings][std::str]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
+//! reference a `Pattern` trait which is not visible outside this crate.
+//! This trait is intentionally kept private, for use only by Rayon itself.
+//! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+//!
+//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
+//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
+//!
+//! [std::str]: https://doc.rust-lang.org/stable/std/str/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::split_producer::*;
+
+/// Test if a byte is the start of a UTF-8 character.
+/// (extracted from `str::is_char_boundary`)
+#[inline]
+fn is_char_boundary(b: u8) -> bool {
+    // This is bit magic equivalent to: b < 128 || b >= 192
+    (b as i8) >= -0x40
+}
+
+/// Find the index of a character boundary near the midpoint.
+#[inline]
+fn find_char_midpoint(chars: &str) -> usize {
+    let mid = chars.len() / 2;
+
+    // We want to split near the midpoint, but we need to find an actual
+    // character boundary.  So we look at the raw bytes, first scanning
+    // forward from the midpoint for a boundary, then trying backward.
+    let (left, right) = chars.as_bytes().split_at(mid);
+    match right.iter().cloned().position(is_char_boundary) {
+        Some(i) => mid + i,
+        None => left
+            .iter()
+            .cloned()
+            .rposition(is_char_boundary)
+            .unwrap_or(0),
+    }
+}
+
+/// Try to split a string near the midpoint.
+#[inline]
+fn split(chars: &str) -> Option<(&str, &str)> {
+    let index = find_char_midpoint(chars);
+    if index > 0 {
+        Some(chars.split_at(index))
+    } else {
+        None
+    }
+}
+
+/// Parallel extensions for strings.
+pub trait ParallelString {
+    /// Returns a plain string slice, which is used to implement the rest of
+    /// the parallel methods.
+    fn as_parallel_string(&self) -> &str;
+
+    /// Returns a parallel iterator over the characters of a string.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
+    /// assert_eq!(Some('o'), max);
+    /// ```
+    fn par_chars(&self) -> Chars<'_> {
+        Chars {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over the characters of a string, with their positions.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
+    /// assert_eq!(Some((1, 'e')), min);
+    /// ```
+    fn par_char_indices(&self) -> CharIndices<'_> {
+        CharIndices {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over the bytes of a string.
+    ///
+    /// Note that multi-byte sequences (for code points greater than `U+007F`)
+    /// are produced as separate items, but will not be split across threads.
+    /// If you would prefer an indexed iterator without that guarantee, consider
+    /// `string.as_bytes().par_iter().cloned()` instead.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let max = "hello".par_bytes().max();
+    /// assert_eq!(Some(b'o'), max);
+    /// ```
+    fn par_bytes(&self) -> Bytes<'_> {
+        Bytes {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over a string encoded as UTF-16.
+    ///
+    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
+    /// produced as separate items, but will not be split across threads.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let max = "hello".par_encode_utf16().max();
+    /// assert_eq!(Some(b'o' as u16), max);
+    ///
+    /// let text = "Zażółć gęślą jaźń";
+    /// let utf8_len = text.len();
+    /// let utf16_len = text.par_encode_utf16().count();
+    /// assert!(utf16_len <= utf8_len);
+    /// ```
+    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
+        EncodeUtf16 {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over substrings separated by a
+    /// given character or predicate, similar to `str::split`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let total = "1, 2, buckle, 3, 4, door"
+    ///    .par_split(',')
+    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
+    ///    .sum();
+    /// assert_eq!(10, total);
+    /// ```
+    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
+        Split::new(self.as_parallel_string(), separator)
+    }
+
+    /// Returns a parallel iterator over substrings terminated by a
+    /// given character or predicate, similar to `str::split_terminator`.
+    /// It's equivalent to `par_split`, except it doesn't produce an empty
+    /// substring after a trailing terminator.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let parts: Vec<_> = "((1 + 3) * 2)"
+    ///     .par_split_terminator(|c| c == '(' || c == ')')
+    ///     .collect();
+    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
+    /// ```
+    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
+        SplitTerminator::new(self.as_parallel_string(), terminator)
+    }
+
+    /// Returns a parallel iterator over the lines of a string, ending with an
+    /// optional carriage return and with a newline (`\r\n` or just `\n`).
+    /// The final line ending is optional, and line endings are not included in
+    /// the output strings.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let lengths: Vec<_> = "hello world\nfizbuzz"
+    ///     .par_lines()
+    ///     .map(|l| l.len())
+    ///     .collect();
+    /// assert_eq!(vec![11, 7], lengths);
+    /// ```
+    fn par_lines(&self) -> Lines<'_> {
+        Lines(self.as_parallel_string())
+    }
+
+    /// Returns a parallel iterator over the sub-slices of a string that are
+    /// separated by any amount of whitespace.
+    ///
+    /// As with `str::split_whitespace`, 'whitespace' is defined according to
+    /// the terms of the Unicode Derived Core Property `White_Space`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let longest = "which is the longest word?"
+    ///     .par_split_whitespace()
+    ///     .max_by_key(|word| word.len());
+    /// assert_eq!(Some("longest"), longest);
+    /// ```
+    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
+        SplitWhitespace(self.as_parallel_string())
+    }
+
+    /// Returns a parallel iterator over substrings that match a
+    /// given character or predicate, similar to `str::matches`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let total = "1, 2, buckle, 3, 4, door"
+    ///    .par_matches(char::is_numeric)
+    ///    .map(|s| s.parse::<i32>().expect("digit"))
+    ///    .sum();
+    /// assert_eq!(10, total);
+    /// ```
+    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
+        Matches {
+            chars: self.as_parallel_string(),
+            pattern,
+        }
+    }
+
+    /// Returns a parallel iterator over substrings that match a given character
+    /// or predicate, with their positions, similar to `str::match_indices`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
+    ///    .par_match_indices(char::is_numeric)
+    ///    .collect();
+    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
+    /// ```
+    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
+        MatchIndices {
+            chars: self.as_parallel_string(),
+            pattern,
+        }
+    }
+}
+
+impl ParallelString for str {
+    #[inline]
+    fn as_parallel_string(&self) -> &str {
+        self
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// We hide the `Pattern` trait in a private module, as its API is not meant
+/// for general consumption.  If we could have privacy on trait items, then it
+/// would be nicer to have its basic existence and implementors public while
+/// keeping all of the methods private.
+mod private {
+    use crate::iter::plumbing::Folder;
+
+    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
+    /// `std::str::pattern::{Pattern, Searcher}`.
+    ///
+    /// Implementing this trait is not permitted outside of `rayon`.
+    pub trait Pattern: Sized + Sync + Send {
+        private_decl! {}
+        fn find_in(&self, haystack: &str) -> Option<usize>;
+        fn rfind_in(&self, haystack: &str) -> Option<usize>;
+        fn is_suffix_of(&self, haystack: &str) -> bool;
+        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
+        where
+            F: Folder<&'ch str>;
+        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
+        where
+            F: Folder<&'ch str>;
+        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
+        where
+            F: Folder<(usize, &'ch str)>;
+    }
+}
+use self::private::Pattern;
+
+#[inline]
+fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
+    move |(i, x)| (base + i, x)
+}
+
+impl Pattern for char {
+    private_impl! {}
+
+    #[inline]
+    fn find_in(&self, chars: &str) -> Option<usize> {
+        chars.find(*self)
+    }
+
+    #[inline]
+    fn rfind_in(&self, chars: &str) -> Option<usize> {
+        chars.rfind(*self)
+    }
+
+    #[inline]
+    fn is_suffix_of(&self, chars: &str) -> bool {
+        chars.ends_with(*self)
+    }
+
+    fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        let mut split = chars.split(*self);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+
+    fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        folder.consume_iter(chars.matches(*self))
+    }
+
+    fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
+    where
+        F: Folder<(usize, &'ch str)>,
+    {
+        folder.consume_iter(chars.match_indices(*self).map(offset(base)))
+    }
+}
+
+impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
+    private_impl! {}
+
+    fn find_in(&self, chars: &str) -> Option<usize> {
+        chars.find(self)
+    }
+
+    fn rfind_in(&self, chars: &str) -> Option<usize> {
+        chars.rfind(self)
+    }
+
+    fn is_suffix_of(&self, chars: &str) -> bool {
+        chars.ends_with(self)
+    }
+
+    fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        let mut split = chars.split(self);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+
+    fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        folder.consume_iter(chars.matches(self))
+    }
+
+    fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
+    where
+        F: Folder<(usize, &'ch str)>,
+    {
+        folder.consume_iter(chars.match_indices(self).map(offset(base)))
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the characters of a string
+#[derive(Debug, Clone)]
+pub struct Chars<'ch> {
+    chars: &'ch str,
+}
+
+struct CharsProducer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for Chars<'ch> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for CharsProducer<'ch> {
+    type Item = char;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                CharsProducer { chars: left },
+                Some(CharsProducer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.chars())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the characters of a string, with their positions
+#[derive(Debug, Clone)]
+pub struct CharIndices<'ch> {
+    chars: &'ch str,
+}
+
+struct CharIndicesProducer<'ch> {
+    index: usize,
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for CharIndices<'ch> {
+    type Item = (usize, char);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = CharIndicesProducer {
+            index: 0,
+            chars: self.chars,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
+    type Item = (usize, char);
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                CharIndicesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(CharIndicesProducer {
+                    chars: right,
+                    index: self.index + left.len(),
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        let base = self.index;
+        folder.consume_iter(self.chars.char_indices().map(offset(base)))
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the bytes of a string
+#[derive(Debug, Clone)]
+pub struct Bytes<'ch> {
+    chars: &'ch str,
+}
+
+struct BytesProducer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for Bytes<'ch> {
+    type Item = u8;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for BytesProducer<'ch> {
+    type Item = u8;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                BytesProducer { chars: left },
+                Some(BytesProducer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.bytes())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over a string encoded as UTF-16
+#[derive(Debug, Clone)]
+pub struct EncodeUtf16<'ch> {
+    chars: &'ch str,
+}
+
+struct EncodeUtf16Producer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
+    type Item = u16;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
+    type Item = u16;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                EncodeUtf16Producer { chars: left },
+                Some(EncodeUtf16Producer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.encode_utf16())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by a pattern
+#[derive(Debug, Clone)]
+pub struct Split<'ch, P: Pattern> {
+    chars: &'ch str,
+    separator: P,
+}
+
+impl<'ch, P: Pattern> Split<'ch, P> {
+    fn new(chars: &'ch str, separator: P) -> Self {
+        Split { chars, separator }
+    }
+}
+
+impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.chars, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'ch, P: Pattern> Fissile<P> for &'ch str {
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        // First find a suitable UTF-8 boundary.
+        find_char_midpoint(&self[..end])
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        separator.find_in(&self[start..end])
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        separator.rfind_in(&self[..end])
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at(index);
+        let mut right_iter = right.chars();
+        right_iter.next(); // skip the separator
+        (left, right_iter.as_str())
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+    {
+        separator.fold_splits(self, folder, skip_last)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by a terminator pattern
+#[derive(Debug, Clone)]
+pub struct SplitTerminator<'ch, P: Pattern> {
+    chars: &'ch str,
+    terminator: P,
+}
+
+struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
+    splitter: SplitProducer<'sep, P, &'ch str>,
+    skip_last: bool,
+}
+
+impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
+    fn new(chars: &'ch str, terminator: P) -> Self {
+        SplitTerminator { chars, terminator }
+    }
+}
+
+impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
+    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
+        SplitTerminatorProducer {
+            splitter: SplitProducer::new(chars, terminator),
+            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
+        }
+    }
+}
+
+impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
+    type Item = &'ch str;
+
+    fn split(mut self) -> (Self, Option<Self>) {
+        let (left, right) = self.splitter.split();
+        self.splitter = left;
+        let right = right.map(|right| {
+            let skip_last = self.skip_last;
+            self.skip_last = false;
+            SplitTerminatorProducer {
+                splitter: right,
+                skip_last,
+            }
+        });
+        (self, right)
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.splitter.fold_with(folder, self.skip_last)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over lines in a string
+#[derive(Debug, Clone)]
+pub struct Lines<'ch>(&'ch str);
+
+#[inline]
+fn no_carriage_return(line: &str) -> &str {
+    if line.ends_with('\r') {
+        &line[..line.len() - 1]
+    } else {
+        line
+    }
+}
+
+impl<'ch> ParallelIterator for Lines<'ch> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0
+            .par_split_terminator('\n')
+            .map(no_carriage_return)
+            .drive_unindexed(consumer)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by whitespace
+#[derive(Debug, Clone)]
+pub struct SplitWhitespace<'ch>(&'ch str);
+
+#[inline]
+fn not_empty(s: &&str) -> bool {
+    !s.is_empty()
+}
+
+impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0
+            .par_split(char::is_whitespace)
+            .filter(not_empty)
+            .drive_unindexed(consumer)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings that match a pattern
+#[derive(Debug, Clone)]
+pub struct Matches<'ch, P: Pattern> {
+    chars: &'ch str,
+    pattern: P,
+}
+
+struct MatchesProducer<'ch, 'pat, P: Pattern> {
+    chars: &'ch str,
+    pattern: &'pat P,
+}
+
+impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = MatchesProducer {
+            chars: self.chars,
+            pattern: &self.pattern,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
+    type Item = &'ch str;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                MatchesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(MatchesProducer {
+                    chars: right,
+                    ..self
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.pattern.fold_matches(self.chars, folder)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings that match a pattern, with their positions
+#[derive(Debug, Clone)]
+pub struct MatchIndices<'ch, P: Pattern> {
+    chars: &'ch str,
+    pattern: P,
+}
+
+struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
+    index: usize,
+    chars: &'ch str,
+    pattern: &'pat P,
+}
+
+impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
+    type Item = (usize, &'ch str);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = MatchIndicesProducer {
+            index: 0,
+            chars: self.chars,
+            pattern: &self.pattern,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
+    type Item = (usize, &'ch str);
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                MatchIndicesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(MatchIndicesProducer {
+                    chars: right,
+                    index: self.index + left.len(),
+                    ..self
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.pattern
+            .fold_match_indices(self.chars, folder, self.index)
+    }
+}
diff --git a/src/string.rs b/src/string.rs
new file mode 100644
index 0000000..91e69f9
--- /dev/null
+++ b/src/string.rs
@@ -0,0 +1,48 @@
+//! This module contains the parallel iterator types for owned strings
+//! (`String`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use crate::iter::plumbing::*;
+use crate::math::simplify_range;
+use crate::prelude::*;
+use std::ops::{Range, RangeBounds};
+
+impl<'a> ParallelDrainRange<usize> for &'a mut String {
+    type Iter = Drain<'a>;
+    type Item = char;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            range: simplify_range(range, self.len()),
+            string: self,
+        }
+    }
+}
+
+/// Draining parallel iterator that moves a range of characters out of a string,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a> {
+    string: &'a mut String,
+    range: Range<usize>,
+}
+
+impl<'a> ParallelIterator for Drain<'a> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.string[self.range.clone()]
+            .par_chars()
+            .drive_unindexed(consumer)
+    }
+}
+
+impl<'a> Drop for Drain<'a> {
+    fn drop(&mut self) {
+        // Remove the drained range.
+        self.string.drain(self.range.clone());
+    }
+}
diff --git a/src/vec.rs b/src/vec.rs
new file mode 100644
index 0000000..686673b
--- /dev/null
+++ b/src/vec.rs
@@ -0,0 +1,245 @@
+//! Parallel iterator types for [vectors][std::vec] (`Vec<T>`)
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::vec]: https://doc.rust-lang.org/stable/std/vec/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::simplify_range;
+use std::iter;
+use std::mem;
+use std::ops::{Range, RangeBounds};
+use std::ptr;
+use std::slice;
+
+/// Parallel iterator that moves out of a vector.
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    vec: Vec<T>,
+}
+
+impl<T: Send> IntoParallelIterator for Vec<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter { vec: self }
+    }
+}
+
+impl<T: Send> ParallelIterator for IntoIter<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for IntoIter<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.vec.len()
+    }
+
+    fn with_producer<CB>(mut self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // Drain every item, and then the vector only needs to free its buffer.
+        self.vec.par_drain(..).with_producer(callback)
+    }
+}
+
+impl<'data, T: Send> ParallelDrainRange<usize> for &'data mut Vec<T> {
+    type Iter = Drain<'data, T>;
+    type Item = T;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            orig_len: self.len(),
+            range: simplify_range(range, self.len()),
+            vec: self,
+        }
+    }
+}
+
+/// Draining parallel iterator that moves a range out of a vector, but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'data, T: Send> {
+    vec: &'data mut Vec<T>,
+    range: Range<usize>,
+    orig_len: usize,
+}
+
+impl<'data, T: Send> ParallelIterator for Drain<'data, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.range.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        unsafe {
+            // Make the vector forget about the drained items, and temporarily the tail too.
+            let start = self.range.start;
+            self.vec.set_len(start);
+
+            // Get a correct borrow lifetime, then extend it to the original length.
+            let mut slice = &mut self.vec[start..];
+            slice = slice::from_raw_parts_mut(slice.as_mut_ptr(), self.range.len());
+
+            // The producer will move or drop each item from the drained range.
+            callback.callback(DrainProducer::new(slice))
+        }
+    }
+}
+
+impl<'data, T: Send> Drop for Drain<'data, T> {
+    fn drop(&mut self) {
+        if self.range.len() > 0 {
+            let Range { start, end } = self.range;
+            if self.vec.len() != start {
+                // We must not have produced, so just call a normal drain to remove the items.
+                assert_eq!(self.vec.len(), self.orig_len);
+                self.vec.drain(start..end);
+            } else if end < self.orig_len {
+                // The producer was responsible for consuming the drained items.
+                // Move the tail items to their new place, then set the length to include them.
+                unsafe {
+                    let ptr = self.vec.as_mut_ptr().add(start);
+                    let tail_ptr = self.vec.as_ptr().add(end);
+                    let tail_len = self.orig_len - end;
+                    ptr::copy(tail_ptr, ptr, tail_len);
+                    self.vec.set_len(start + tail_len);
+                }
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+pub(crate) struct DrainProducer<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> DrainProducer<'data, T> {
+    /// Creates a draining producer, which *moves* items from the slice.
+    ///
+    /// Unsafe bacause `!Copy` data must not be read after the borrow is released.
+    pub(crate) unsafe fn new(slice: &'data mut [T]) -> Self {
+        DrainProducer { slice }
+    }
+}
+
+impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
+    type Item = T;
+    type IntoIter = SliceDrain<'data, T>;
+
+    fn into_iter(mut self) -> Self::IntoIter {
+        // replace the slice so we don't drop it twice
+        let slice = mem::replace(&mut self.slice, &mut []);
+        SliceDrain {
+            iter: slice.iter_mut(),
+        }
+    }
+
+    fn split_at(mut self, index: usize) -> (Self, Self) {
+        // replace the slice so we don't drop it twice
+        let slice = mem::replace(&mut self.slice, &mut []);
+        let (left, right) = slice.split_at_mut(index);
+        unsafe { (DrainProducer::new(left), DrainProducer::new(right)) }
+    }
+}
+
+impl<'data, T: 'data + Send> Drop for DrainProducer<'data, T> {
+    fn drop(&mut self) {
+        // use `Drop for [T]`
+        unsafe { ptr::drop_in_place(self.slice) };
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+// like std::vec::Drain, without updating a source Vec
+pub(crate) struct SliceDrain<'data, T> {
+    iter: slice::IterMut<'data, T>,
+}
+
+impl<'data, T: 'data> Iterator for SliceDrain<'data, T> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<T> {
+        let ptr = self.iter.next()?;
+        Some(unsafe { ptr::read(ptr) })
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+
+    fn count(self) -> usize {
+        self.iter.len()
+    }
+}
+
+impl<'data, T: 'data> DoubleEndedIterator for SliceDrain<'data, T> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let ptr = self.iter.next_back()?;
+        Some(unsafe { ptr::read(ptr) })
+    }
+}
+
+impl<'data, T: 'data> ExactSizeIterator for SliceDrain<'data, T> {
+    fn len(&self) -> usize {
+        self.iter.len()
+    }
+}
+
+impl<'data, T: 'data> iter::FusedIterator for SliceDrain<'data, T> {}
+
+impl<'data, T: 'data> Drop for SliceDrain<'data, T> {
+    fn drop(&mut self) {
+        // extract the iterator so we can use `Drop for [T]`
+        let iter = mem::replace(&mut self.iter, [].iter_mut());
+        unsafe { ptr::drop_in_place(iter.into_slice()) };
+    }
+}
diff --git a/tests/chars.rs b/tests/chars.rs
new file mode 100644
index 0000000..ac8e3f3
--- /dev/null
+++ b/tests/chars.rs
@@ -0,0 +1,39 @@
+use rayon::prelude::*;
+use std::char;
+
+#[test]
+fn half_open_correctness() {
+    let low = char::from_u32(0xD800 - 0x7).unwrap();
+    let high = char::from_u32(0xE000 + 0x7).unwrap();
+
+    let range = low..high;
+    let mut chars: Vec<char> = range.into_par_iter().collect();
+    chars.sort();
+
+    assert_eq!(
+        chars,
+        vec![
+            '\u{D7F9}', '\u{D7FA}', '\u{D7FB}', '\u{D7FC}', '\u{D7FD}', '\u{D7FE}', '\u{D7FF}',
+            '\u{E000}', '\u{E001}', '\u{E002}', '\u{E003}', '\u{E004}', '\u{E005}', '\u{E006}',
+        ]
+    );
+}
+
+#[test]
+fn closed_correctness() {
+    let low = char::from_u32(0xD800 - 0x7).unwrap();
+    let high = char::from_u32(0xE000 + 0x7).unwrap();
+
+    let range = low..=high;
+    let mut chars: Vec<char> = range.into_par_iter().collect();
+    chars.sort();
+
+    assert_eq!(
+        chars,
+        vec![
+            '\u{D7F9}', '\u{D7FA}', '\u{D7FB}', '\u{D7FC}', '\u{D7FD}', '\u{D7FE}', '\u{D7FF}',
+            '\u{E000}', '\u{E001}', '\u{E002}', '\u{E003}', '\u{E004}', '\u{E005}', '\u{E006}',
+            '\u{E007}',
+        ]
+    );
+}
diff --git a/tests/clones.rs b/tests/clones.rs
new file mode 100644
index 0000000..fa93f8a
--- /dev/null
+++ b/tests/clones.rs
@@ -0,0 +1,186 @@
+use rayon::prelude::*;
+
+fn check<I>(iter: I)
+where
+    I: ParallelIterator + Clone,
+    I::Item: std::fmt::Debug + PartialEq,
+{
+    let a: Vec<_> = iter.clone().collect();
+    let b: Vec<_> = iter.collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn clone_binary_heap() {
+    use std::collections::BinaryHeap;
+    let heap: BinaryHeap<_> = (0..1000).collect();
+    check(heap.par_iter());
+    check(heap.into_par_iter());
+}
+
+#[test]
+fn clone_btree_map() {
+    use std::collections::BTreeMap;
+    let map: BTreeMap<_, _> = (0..1000).enumerate().collect();
+    check(map.par_iter());
+}
+
+#[test]
+fn clone_btree_set() {
+    use std::collections::BTreeSet;
+    let set: BTreeSet<_> = (0..1000).collect();
+    check(set.par_iter());
+}
+
+#[test]
+fn clone_hash_map() {
+    use std::collections::HashMap;
+    let map: HashMap<_, _> = (0..1000).enumerate().collect();
+    check(map.par_iter());
+}
+
+#[test]
+fn clone_hash_set() {
+    use std::collections::HashSet;
+    let set: HashSet<_> = (0..1000).collect();
+    check(set.par_iter());
+}
+
+#[test]
+fn clone_linked_list() {
+    use std::collections::LinkedList;
+    let list: LinkedList<_> = (0..1000).collect();
+    check(list.par_iter());
+    check(list.into_par_iter());
+}
+
+#[test]
+fn clone_vec_deque() {
+    use std::collections::VecDeque;
+    let deque: VecDeque<_> = (0..1000).collect();
+    check(deque.par_iter());
+    check(deque.into_par_iter());
+}
+
+#[test]
+fn clone_option() {
+    let option = Some(0);
+    check(option.par_iter());
+    check(option.into_par_iter());
+}
+
+#[test]
+fn clone_result() {
+    let result = Ok::<_, ()>(0);
+    check(result.par_iter());
+    check(result.into_par_iter());
+}
+
+#[test]
+fn clone_range() {
+    check((0..1000).into_par_iter());
+}
+
+#[test]
+fn clone_range_inclusive() {
+    check((0..=1000).into_par_iter());
+}
+
+#[test]
+fn clone_str() {
+    let s = include_str!("clones.rs");
+    check(s.par_chars());
+    check(s.par_lines());
+    check(s.par_split('\n'));
+    check(s.par_split_terminator('\n'));
+    check(s.par_split_whitespace());
+}
+
+#[test]
+fn clone_vec() {
+    let v: Vec<_> = (0..1000).collect();
+    check(v.par_iter());
+    check(v.par_chunks(42));
+    check(v.par_chunks_exact(42));
+    check(v.par_windows(42));
+    check(v.par_split(|x| x % 3 == 0));
+    check(v.into_par_iter());
+}
+
+#[test]
+fn clone_adaptors() {
+    let v: Vec<_> = (0..1000).map(Some).collect();
+    check(v.par_iter().chain(&v));
+    check(v.par_iter().cloned());
+    check(v.par_iter().copied());
+    check(v.par_iter().enumerate());
+    check(v.par_iter().filter(|_| true));
+    check(v.par_iter().filter_map(|x| *x));
+    check(v.par_iter().flat_map(|x| *x));
+    check(v.par_iter().flat_map_iter(|x| *x));
+    check(v.par_iter().flatten());
+    check(v.par_iter().flatten_iter());
+    check(v.par_iter().with_max_len(1).fold(|| 0, |x, _| x));
+    check(v.par_iter().with_max_len(1).fold_with(0, |x, _| x));
+    check(v.par_iter().with_max_len(1).try_fold(|| 0, |_, &x| x));
+    check(v.par_iter().with_max_len(1).try_fold_with(0, |_, &x| x));
+    check(v.par_iter().inspect(|_| ()));
+    check(v.par_iter().update(|_| ()));
+    check(v.par_iter().interleave(&v));
+    check(v.par_iter().interleave_shortest(&v));
+    check(v.par_iter().intersperse(&None));
+    check(v.par_iter().chunks(3));
+    check(v.par_iter().map(|x| x));
+    check(v.par_iter().map_with(0, |_, x| x));
+    check(v.par_iter().map_init(|| 0, |_, x| x));
+    check(v.par_iter().panic_fuse());
+    check(v.par_iter().positions(|_| true));
+    check(v.par_iter().rev());
+    check(v.par_iter().skip(1));
+    check(v.par_iter().take(1));
+    check(v.par_iter().cloned().while_some());
+    check(v.par_iter().with_max_len(1));
+    check(v.par_iter().with_min_len(1));
+    check(v.par_iter().zip(&v));
+    check(v.par_iter().zip_eq(&v));
+    check(v.par_iter().step_by(2));
+}
+
+#[test]
+fn clone_empty() {
+    check(rayon::iter::empty::<i32>());
+}
+
+#[test]
+fn clone_once() {
+    check(rayon::iter::once(10));
+}
+
+#[test]
+fn clone_repeat() {
+    let x: Option<i32> = None;
+    check(rayon::iter::repeat(x).while_some());
+    check(rayon::iter::repeatn(x, 1000));
+}
+
+#[test]
+fn clone_splitter() {
+    check(rayon::iter::split(0..1000, |x| (x, None)));
+}
+
+#[test]
+fn clone_multizip() {
+    let v: &Vec<_> = &(0..1000).collect();
+    check((v,).into_par_iter());
+    check((v, v).into_par_iter());
+    check((v, v, v).into_par_iter());
+    check((v, v, v, v).into_par_iter());
+    check((v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+}
diff --git a/tests/collect.rs b/tests/collect.rs
new file mode 100644
index 0000000..48b80f6
--- /dev/null
+++ b/tests/collect.rs
@@ -0,0 +1,111 @@
+use rayon::prelude::*;
+
+use std::panic;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use std::sync::Mutex;
+
+#[test]
+fn collect_drop_on_unwind() {
+    struct Recorddrop<'a>(i64, &'a Mutex<Vec<i64>>);
+
+    impl<'a> Drop for Recorddrop<'a> {
+        fn drop(&mut self) {
+            self.1.lock().unwrap().push(self.0);
+        }
+    }
+
+    let test_collect_panic = |will_panic: bool| {
+        let test_vec_len = 1024;
+        let panic_point = 740;
+
+        let mut inserts = Mutex::new(Vec::new());
+        let mut drops = Mutex::new(Vec::new());
+
+        let mut a = (0..test_vec_len).collect::<Vec<_>>();
+        let b = (0..test_vec_len).collect::<Vec<_>>();
+
+        let _result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+            let mut result = Vec::new();
+            a.par_iter_mut()
+                .zip(&b)
+                .map(|(&mut a, &b)| {
+                    if a > panic_point && will_panic {
+                        panic!("unwinding for test");
+                    }
+                    let elt = a + b;
+                    inserts.lock().unwrap().push(elt);
+                    Recorddrop(elt, &drops)
+                })
+                .collect_into_vec(&mut result);
+
+            // If we reach this point, this must pass
+            assert_eq!(a.len(), result.len());
+        }));
+
+        let inserts = inserts.get_mut().unwrap();
+        let drops = drops.get_mut().unwrap();
+        println!("{:?}", inserts);
+        println!("{:?}", drops);
+
+        assert_eq!(inserts.len(), drops.len(), "Incorrect number of drops");
+        // sort to normalize order
+        inserts.sort();
+        drops.sort();
+        assert_eq!(inserts, drops, "Incorrect elements were dropped");
+    };
+
+    for &should_panic in &[true, false] {
+        test_collect_panic(should_panic);
+    }
+}
+
+#[test]
+fn collect_drop_on_unwind_zst() {
+    static INSERTS: AtomicUsize = AtomicUsize::new(0);
+    static DROPS: AtomicUsize = AtomicUsize::new(0);
+
+    struct RecorddropZst;
+
+    impl Drop for RecorddropZst {
+        fn drop(&mut self) {
+            DROPS.fetch_add(1, Ordering::SeqCst);
+        }
+    }
+
+    let test_collect_panic = |will_panic: bool| {
+        INSERTS.store(0, Ordering::SeqCst);
+        DROPS.store(0, Ordering::SeqCst);
+
+        let test_vec_len = 1024;
+        let panic_point = 740;
+
+        let a = (0..test_vec_len).collect::<Vec<_>>();
+
+        let _result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+            let mut result = Vec::new();
+            a.par_iter()
+                .map(|&a| {
+                    if a > panic_point && will_panic {
+                        panic!("unwinding for test");
+                    }
+                    INSERTS.fetch_add(1, Ordering::SeqCst);
+                    RecorddropZst
+                })
+                .collect_into_vec(&mut result);
+
+            // If we reach this point, this must pass
+            assert_eq!(a.len(), result.len());
+        }));
+
+        let inserts = INSERTS.load(Ordering::SeqCst);
+        let drops = DROPS.load(Ordering::SeqCst);
+
+        assert_eq!(inserts, drops, "Incorrect number of drops");
+        assert!(will_panic || drops == test_vec_len)
+    };
+
+    for &should_panic in &[true, false] {
+        test_collect_panic(should_panic);
+    }
+}
diff --git a/tests/cross-pool.rs b/tests/cross-pool.rs
new file mode 100644
index 0000000..f0a2128
--- /dev/null
+++ b/tests/cross-pool.rs
@@ -0,0 +1,21 @@
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+#[test]
+fn cross_pool_busy() {
+    let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let n: i32 = 100;
+    let sum: i32 = pool1.install(move || {
+        // Each item will block on pool2, but pool1 can continue processing other work from the
+        // parallel iterator in the meantime. There's a chance that pool1 will still be awake to
+        // see the latch set without being tickled, and then it will drop that stack job. The latch
+        // internals must not assume that the job will still be alive after it's set!
+        (1..=n)
+            .into_par_iter()
+            .map(|i| pool2.install(move || i))
+            .sum()
+    });
+    assert_eq!(sum, n * (n + 1) / 2);
+}
diff --git a/tests/debug.rs b/tests/debug.rs
new file mode 100644
index 0000000..fb11110
--- /dev/null
+++ b/tests/debug.rs
@@ -0,0 +1,209 @@
+use rayon::prelude::*;
+use std::fmt::Debug;
+
+fn check<I>(iter: I)
+where
+    I: ParallelIterator + Debug,
+{
+    println!("{:?}", iter);
+}
+
+#[test]
+fn debug_binary_heap() {
+    use std::collections::BinaryHeap;
+    let mut heap: BinaryHeap<_> = (0..10).collect();
+    check(heap.par_iter());
+    check(heap.par_drain());
+    check(heap.into_par_iter());
+}
+
+#[test]
+fn debug_btree_map() {
+    use std::collections::BTreeMap;
+    let mut map: BTreeMap<_, _> = (0..10).enumerate().collect();
+    check(map.par_iter());
+    check(map.par_iter_mut());
+    check(map.into_par_iter());
+}
+
+#[test]
+fn debug_btree_set() {
+    use std::collections::BTreeSet;
+    let set: BTreeSet<_> = (0..10).collect();
+    check(set.par_iter());
+    check(set.into_par_iter());
+}
+
+#[test]
+fn debug_hash_map() {
+    use std::collections::HashMap;
+    let mut map: HashMap<_, _> = (0..10).enumerate().collect();
+    check(map.par_iter());
+    check(map.par_iter_mut());
+    check(map.par_drain());
+    check(map.into_par_iter());
+}
+
+#[test]
+fn debug_hash_set() {
+    use std::collections::HashSet;
+    let mut set: HashSet<_> = (0..10).collect();
+    check(set.par_iter());
+    check(set.par_drain());
+    check(set.into_par_iter());
+}
+
+#[test]
+fn debug_linked_list() {
+    use std::collections::LinkedList;
+    let mut list: LinkedList<_> = (0..10).collect();
+    check(list.par_iter());
+    check(list.par_iter_mut());
+    check(list.into_par_iter());
+}
+
+#[test]
+fn debug_vec_deque() {
+    use std::collections::VecDeque;
+    let mut deque: VecDeque<_> = (0..10).collect();
+    check(deque.par_iter());
+    check(deque.par_iter_mut());
+    check(deque.par_drain(..));
+    check(deque.into_par_iter());
+}
+
+#[test]
+fn debug_option() {
+    let mut option = Some(0);
+    check(option.par_iter());
+    check(option.par_iter_mut());
+    check(option.into_par_iter());
+}
+
+#[test]
+fn debug_result() {
+    let mut result = Ok::<_, ()>(0);
+    check(result.par_iter());
+    check(result.par_iter_mut());
+    check(result.into_par_iter());
+}
+
+#[test]
+fn debug_range() {
+    check((0..10).into_par_iter());
+}
+
+#[test]
+fn debug_range_inclusive() {
+    check((0..=10).into_par_iter());
+}
+
+#[test]
+fn debug_str() {
+    let s = "a b c d\ne f g";
+    check(s.par_chars());
+    check(s.par_lines());
+    check(s.par_split('\n'));
+    check(s.par_split_terminator('\n'));
+    check(s.par_split_whitespace());
+}
+
+#[test]
+fn debug_string() {
+    let mut s = "a b c d\ne f g".to_string();
+    s.par_drain(..);
+}
+
+#[test]
+fn debug_vec() {
+    let mut v: Vec<_> = (0..10).collect();
+    check(v.par_iter());
+    check(v.par_iter_mut());
+    check(v.par_chunks(42));
+    check(v.par_chunks_exact(42));
+    check(v.par_chunks_mut(42));
+    check(v.par_chunks_exact_mut(42));
+    check(v.par_windows(42));
+    check(v.par_split(|x| x % 3 == 0));
+    check(v.par_split_mut(|x| x % 3 == 0));
+    check(v.par_drain(..));
+    check(v.into_par_iter());
+}
+
+#[test]
+fn debug_adaptors() {
+    let v: Vec<_> = (0..10).collect();
+    check(v.par_iter().chain(&v));
+    check(v.par_iter().cloned());
+    check(v.par_iter().copied());
+    check(v.par_iter().enumerate());
+    check(v.par_iter().filter(|_| true));
+    check(v.par_iter().filter_map(Some));
+    check(v.par_iter().flat_map(Some));
+    check(v.par_iter().flat_map_iter(Some));
+    check(v.par_iter().map(Some).flatten());
+    check(v.par_iter().map(Some).flatten_iter());
+    check(v.par_iter().fold(|| 0, |x, _| x));
+    check(v.par_iter().fold_with(0, |x, _| x));
+    check(v.par_iter().try_fold(|| 0, |x, _| Some(x)));
+    check(v.par_iter().try_fold_with(0, |x, _| Some(x)));
+    check(v.par_iter().inspect(|_| ()));
+    check(v.par_iter().update(|_| ()));
+    check(v.par_iter().interleave(&v));
+    check(v.par_iter().interleave_shortest(&v));
+    check(v.par_iter().intersperse(&-1));
+    check(v.par_iter().chunks(3));
+    check(v.par_iter().map(|x| x));
+    check(v.par_iter().map_with(0, |_, x| x));
+    check(v.par_iter().map_init(|| 0, |_, x| x));
+    check(v.par_iter().panic_fuse());
+    check(v.par_iter().positions(|_| true));
+    check(v.par_iter().rev());
+    check(v.par_iter().skip(1));
+    check(v.par_iter().take(1));
+    check(v.par_iter().map(Some).while_some());
+    check(v.par_iter().with_max_len(1));
+    check(v.par_iter().with_min_len(1));
+    check(v.par_iter().zip(&v));
+    check(v.par_iter().zip_eq(&v));
+    check(v.par_iter().step_by(2));
+}
+
+#[test]
+fn debug_empty() {
+    check(rayon::iter::empty::<i32>());
+}
+
+#[test]
+fn debug_once() {
+    check(rayon::iter::once(10));
+}
+
+#[test]
+fn debug_repeat() {
+    let x: Option<i32> = None;
+    check(rayon::iter::repeat(x));
+    check(rayon::iter::repeatn(x, 10));
+}
+
+#[test]
+fn debug_splitter() {
+    check(rayon::iter::split(0..10, |x| (x, None)));
+}
+
+#[test]
+fn debug_multizip() {
+    let v: &Vec<_> = &(0..10).collect();
+    check((v,).into_par_iter());
+    check((v, v).into_par_iter());
+    check((v, v, v).into_par_iter());
+    check((v, v, v, v).into_par_iter());
+    check((v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+}
diff --git a/tests/intersperse.rs b/tests/intersperse.rs
new file mode 100644
index 0000000..aaa5b65
--- /dev/null
+++ b/tests/intersperse.rs
@@ -0,0 +1,60 @@
+use rayon::prelude::*;
+
+#[test]
+fn check_intersperse() {
+    let v: Vec<_> = (0..1000).into_par_iter().intersperse(-1).collect();
+    assert_eq!(v.len(), 1999);
+    for (i, x) in v.into_iter().enumerate() {
+        assert_eq!(x, if i % 2 == 0 { i as i32 / 2 } else { -1 });
+    }
+}
+
+#[test]
+fn check_intersperse_again() {
+    let v: Vec<_> = (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .intersperse(-2)
+        .collect();
+    assert_eq!(v.len(), 3997);
+    for (i, x) in v.into_iter().enumerate() {
+        let y = match i % 4 {
+            0 => i as i32 / 4,
+            2 => -1,
+            _ => -2,
+        };
+        assert_eq!(x, y);
+    }
+}
+
+#[test]
+fn check_intersperse_unindexed() {
+    let v: Vec<_> = (0..1000).map(|i| i.to_string()).collect();
+    let s = v.join(",");
+    let s2 = v.join(";");
+    let par: String = s.par_split(',').intersperse(";").collect();
+    assert_eq!(par, s2);
+}
+
+#[test]
+fn check_intersperse_producer() {
+    (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .zip_eq(0..1999)
+        .for_each(|(x, i)| {
+            assert_eq!(x, if i % 2 == 0 { i / 2 } else { -1 });
+        });
+}
+
+#[test]
+fn check_intersperse_rev() {
+    (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .zip_eq(0..1999)
+        .rev()
+        .for_each(|(x, i)| {
+            assert_eq!(x, if i % 2 == 0 { i / 2 } else { -1 });
+        });
+}
diff --git a/tests/issue671-unzip.rs b/tests/issue671-unzip.rs
new file mode 100644
index 0000000..c9af7e6
--- /dev/null
+++ b/tests/issue671-unzip.rs
@@ -0,0 +1,17 @@
+#![type_length_limit = "10000"]
+
+use rayon::prelude::*;
+
+#[test]
+fn type_length_limit() {
+    let input = vec![1, 2, 3, 4, 5];
+    let (indexes, (squares, cubes)): (Vec<_>, (Vec<_>, Vec<_>)) = input
+        .par_iter()
+        .map(|x| (x * x, x * x * x))
+        .enumerate()
+        .unzip();
+
+    drop(indexes);
+    drop(squares);
+    drop(cubes);
+}
diff --git a/tests/issue671.rs b/tests/issue671.rs
new file mode 100644
index 0000000..9d0953f
--- /dev/null
+++ b/tests/issue671.rs
@@ -0,0 +1,16 @@
+#![type_length_limit = "500000"]
+
+use rayon::prelude::*;
+
+#[test]
+fn type_length_limit() {
+    let _ = Vec::<Result<(), ()>>::new()
+        .into_par_iter()
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .collect::<Result<(), ()>>();
+}
diff --git a/tests/iter_panic.rs b/tests/iter_panic.rs
new file mode 100644
index 0000000..4885a28
--- /dev/null
+++ b/tests/iter_panic.rs
@@ -0,0 +1,52 @@
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use std::ops::Range;
+use std::panic::{self, UnwindSafe};
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+const ITER: Range<i32> = 0..0x1_0000;
+const PANIC: i32 = 0xC000;
+
+fn check(&i: &i32) {
+    if i == PANIC {
+        panic!("boom")
+    }
+}
+
+#[test]
+#[should_panic(expected = "boom")]
+fn iter_panic() {
+    ITER.into_par_iter().for_each(|i| check(&i));
+}
+
+#[test]
+fn iter_panic_fuse() {
+    // We only use a single thread in order to make the behavior
+    // of 'panic_fuse' deterministic
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    pool.install(|| {
+        fn count(iter: impl ParallelIterator + UnwindSafe) -> usize {
+            let count = AtomicUsize::new(0);
+            let result = panic::catch_unwind(|| {
+                iter.for_each(|_| {
+                    count.fetch_add(1, Ordering::Relaxed);
+                });
+            });
+            assert!(result.is_err());
+            count.into_inner()
+        }
+
+        // Without `panic_fuse()`, we'll reach every item except the panicking one.
+        let expected = ITER.len() - 1;
+        let iter = ITER.into_par_iter().with_max_len(1);
+        assert_eq!(count(iter.clone().inspect(check)), expected);
+
+        // With `panic_fuse()` anywhere in the chain, we'll reach fewer items.
+        assert!(count(iter.clone().inspect(check).panic_fuse()) < expected);
+        assert!(count(iter.clone().panic_fuse().inspect(check)) < expected);
+
+        // Try in reverse to be sure we hit the producer case.
+        assert!(count(iter.clone().panic_fuse().inspect(check).rev()) < expected);
+    });
+}
diff --git a/tests/named-threads.rs b/tests/named-threads.rs
new file mode 100644
index 0000000..fd1b0be
--- /dev/null
+++ b/tests/named-threads.rs
@@ -0,0 +1,24 @@
+use std::collections::HashSet;
+
+use rayon::prelude::*;
+use rayon::*;
+
+#[test]
+fn named_threads() {
+    ThreadPoolBuilder::new()
+        .thread_name(|i| format!("hello-name-test-{}", i))
+        .build_global()
+        .unwrap();
+
+    const N: usize = 10000;
+
+    let thread_names = (0..N)
+        .into_par_iter()
+        .flat_map(|_| ::std::thread::current().name().map(str::to_owned))
+        .collect::<HashSet<String>>();
+
+    let all_contains_name = thread_names
+        .iter()
+        .all(|name| name.starts_with("hello-name-test-"));
+    assert!(all_contains_name);
+}
diff --git a/tests/octillion.rs b/tests/octillion.rs
new file mode 100644
index 0000000..cff2b11
--- /dev/null
+++ b/tests/octillion.rs
@@ -0,0 +1,130 @@
+use rayon::prelude::*;
+
+const OCTILLION: u128 = 1_000_000_000_000_000_000_000_000_000;
+
+/// Produce a parallel iterator for 0u128..10²⁷
+fn octillion() -> rayon::range::Iter<u128> {
+    (0..OCTILLION).into_par_iter()
+}
+
+/// Produce a parallel iterator for 0u128..=10²⁷
+fn octillion_inclusive() -> rayon::range_inclusive::Iter<u128> {
+    (0..=OCTILLION).into_par_iter()
+}
+
+/// Produce a parallel iterator for 0u128..10²⁷ using `flat_map`
+fn octillion_flat() -> impl ParallelIterator<Item = u128> {
+    (0u32..1_000_000_000)
+        .into_par_iter()
+        .with_max_len(1_000)
+        .map(|i| u64::from(i) * 1_000_000_000)
+        .flat_map(|i| {
+            (0u32..1_000_000_000)
+                .into_par_iter()
+                .with_max_len(1_000)
+                .map(move |j| i + u64::from(j))
+        })
+        .map(|i| u128::from(i) * 1_000_000_000)
+        .flat_map(|i| {
+            (0u32..1_000_000_000)
+                .into_par_iter()
+                .with_max_len(1_000)
+                .map(move |j| i + u128::from(j))
+        })
+}
+
+// NOTE: `find_first` and `find_last` currently take too long on 32-bit targets,
+// because the `AtomicUsize` match position has much too limited resolution.
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion() {
+    let x = octillion().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion_inclusive() {
+    let x = octillion_inclusive().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion_flat() {
+    let x = octillion_flat().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+fn two_threads<F: Send + FnOnce() -> R, R: Send>(f: F) -> R {
+    // FIXME: If we don't use at least two threads, then we end up walking
+    // through the entire iterator sequentially, without the benefit of any
+    // short-circuiting.  We probably don't want testing to wait that long. ;)
+    let builder = rayon::ThreadPoolBuilder::new().num_threads(2);
+    let pool = builder.build().unwrap();
+
+    pool.install(f)
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion() {
+    // It would be nice if `find_last` could prioritize the later splits,
+    // basically flipping the `join` args, without needing indexed `rev`.
+    // (or could we have an unindexed `rev`?)
+    let x = two_threads(|| octillion().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION - 1));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion_inclusive() {
+    let x = two_threads(|| octillion_inclusive().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion_flat() {
+    let x = two_threads(|| octillion_flat().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION - 1));
+}
+
+#[test]
+fn find_any_octillion() {
+    let x = two_threads(|| octillion().find_any(|x| *x > OCTILLION / 2));
+    assert!(x.is_some());
+}
+
+#[test]
+fn find_any_octillion_flat() {
+    let x = two_threads(|| octillion_flat().find_any(|x| *x > OCTILLION / 2));
+    assert!(x.is_some());
+}
+
+#[test]
+fn filter_find_any_octillion() {
+    let x = two_threads(|| {
+        octillion()
+            .filter(|x| *x > OCTILLION / 2)
+            .find_any(|_| true)
+    });
+    assert!(x.is_some());
+}
+
+#[test]
+fn filter_find_any_octillion_flat() {
+    let x = two_threads(|| {
+        octillion_flat()
+            .filter(|x| *x > OCTILLION / 2)
+            .find_any(|_| true)
+    });
+    assert!(x.is_some());
+}
+
+#[test]
+fn fold_find_any_octillion_flat() {
+    let x = two_threads(|| octillion_flat().fold(|| (), |_, _| ()).find_any(|_| true));
+    assert!(x.is_some());
+}
diff --git a/tests/producer_split_at.rs b/tests/producer_split_at.rs
new file mode 100644
index 0000000..752bc3e
--- /dev/null
+++ b/tests/producer_split_at.rs
@@ -0,0 +1,344 @@
+use rayon::iter::plumbing::*;
+use rayon::prelude::*;
+
+/// Stress-test indexes for `Producer::split_at`.
+fn check<F, I>(expected: &[I::Item], mut f: F)
+where
+    F: FnMut() -> I,
+    I: IntoParallelIterator,
+    I::Iter: IndexedParallelIterator,
+    I::Item: PartialEq + std::fmt::Debug,
+{
+    map_triples(expected.len() + 1, |i, j, k| {
+        Split::forward(f(), i, j, k, expected);
+        Split::reverse(f(), i, j, k, expected);
+    });
+}
+
+fn map_triples<F>(end: usize, mut f: F)
+where
+    F: FnMut(usize, usize, usize),
+{
+    for i in 0..end {
+        for j in i..end {
+            for k in j..end {
+                f(i, j, k);
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Split {
+    i: usize,
+    j: usize,
+    k: usize,
+    reverse: bool,
+}
+
+impl Split {
+    fn forward<I>(iter: I, i: usize, j: usize, k: usize, expected: &[I::Item])
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        I::Item: PartialEq + std::fmt::Debug,
+    {
+        let result = iter.into_par_iter().with_producer(Split {
+            i,
+            j,
+            k,
+            reverse: false,
+        });
+        assert_eq!(result, expected);
+    }
+
+    fn reverse<I>(iter: I, i: usize, j: usize, k: usize, expected: &[I::Item])
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        I::Item: PartialEq + std::fmt::Debug,
+    {
+        let result = iter.into_par_iter().with_producer(Split {
+            i,
+            j,
+            k,
+            reverse: true,
+        });
+        assert!(result.iter().eq(expected.iter().rev()));
+    }
+}
+
+impl<T> ProducerCallback<T> for Split {
+    type Output = Vec<T>;
+
+    fn callback<P>(self, producer: P) -> Self::Output
+    where
+        P: Producer<Item = T>,
+    {
+        println!("{:?}", self);
+
+        // Splitting the outer indexes first gets us an arbitrary mid section,
+        // which we then split further to get full test coverage.
+        let (left, d) = producer.split_at(self.k);
+        let (a, mid) = left.split_at(self.i);
+        let (b, c) = mid.split_at(self.j - self.i);
+
+        let a = a.into_iter();
+        let b = b.into_iter();
+        let c = c.into_iter();
+        let d = d.into_iter();
+
+        check_len(&a, self.i);
+        check_len(&b, self.j - self.i);
+        check_len(&c, self.k - self.j);
+
+        let chain = a.chain(b).chain(c).chain(d);
+        if self.reverse {
+            chain.rev().collect()
+        } else {
+            chain.collect()
+        }
+    }
+}
+
+fn check_len<I: ExactSizeIterator>(iter: &I, len: usize) {
+    assert_eq!(iter.size_hint(), (len, Some(len)));
+    assert_eq!(iter.len(), len);
+}
+
+// **** Base Producers ****
+
+#[test]
+fn empty() {
+    let v = vec![42];
+    check(&v[..0], rayon::iter::empty);
+}
+
+#[test]
+fn once() {
+    let v = vec![42];
+    check(&v, || rayon::iter::once(42));
+}
+
+#[test]
+fn option() {
+    let v = vec![42];
+    check(&v, || Some(42));
+}
+
+#[test]
+fn range() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || 0..10);
+}
+
+#[test]
+fn range_inclusive() {
+    let v: Vec<_> = (0u16..=10).collect();
+    check(&v, || 0u16..=10);
+}
+
+#[test]
+fn repeatn() {
+    let v: Vec<_> = std::iter::repeat(1).take(5).collect();
+    check(&v, || rayon::iter::repeatn(1, 5));
+}
+
+#[test]
+fn slice_iter() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.iter().collect();
+    check(&v, || &s);
+}
+
+#[test]
+fn slice_iter_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    let expected: Vec<_> = v.iter_mut().collect();
+
+    map_triples(expected.len() + 1, |i, j, k| {
+        Split::forward(s.par_iter_mut(), i, j, k, &expected);
+        Split::reverse(s.par_iter_mut(), i, j, k, &expected);
+    });
+}
+
+#[test]
+fn slice_chunks() {
+    let s: Vec<_> = (0..10).collect();
+    for len in 1..s.len() + 2 {
+        let v: Vec<_> = s.chunks(len).collect();
+        check(&v, || s.par_chunks(len));
+    }
+}
+
+#[test]
+fn slice_chunks_exact() {
+    let s: Vec<_> = (0..10).collect();
+    for len in 1..s.len() + 2 {
+        let v: Vec<_> = s.chunks_exact(len).collect();
+        check(&v, || s.par_chunks_exact(len));
+    }
+}
+
+#[test]
+fn slice_chunks_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    for len in 1..s.len() + 2 {
+        let expected: Vec<_> = v.chunks_mut(len).collect();
+        map_triples(expected.len() + 1, |i, j, k| {
+            Split::forward(s.par_chunks_mut(len), i, j, k, &expected);
+            Split::reverse(s.par_chunks_mut(len), i, j, k, &expected);
+        });
+    }
+}
+
+#[test]
+fn slice_chunks_exact_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    for len in 1..s.len() + 2 {
+        let expected: Vec<_> = v.chunks_exact_mut(len).collect();
+        map_triples(expected.len() + 1, |i, j, k| {
+            Split::forward(s.par_chunks_exact_mut(len), i, j, k, &expected);
+            Split::reverse(s.par_chunks_exact_mut(len), i, j, k, &expected);
+        });
+    }
+}
+
+#[test]
+fn slice_windows() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.windows(2).collect();
+    check(&v, || s.par_windows(2));
+}
+
+#[test]
+fn vec() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.clone());
+}
+
+// **** Adaptors ****
+
+#[test]
+fn chain() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..5).into_par_iter().chain(5..10));
+}
+
+#[test]
+fn cloned() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().cloned());
+}
+
+#[test]
+fn copied() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().copied());
+}
+
+#[test]
+fn enumerate() {
+    let v: Vec<_> = (0..10).enumerate().collect();
+    check(&v, || (0..10).into_par_iter().enumerate());
+}
+
+#[test]
+fn step_by() {
+    let v: Vec<_> = (0..10).step_by(2).collect();
+    check(&v, || (0..10).into_par_iter().step_by(2))
+}
+
+#[test]
+fn step_by_unaligned() {
+    let v: Vec<_> = (0..10).step_by(3).collect();
+    check(&v, || (0..10).into_par_iter().step_by(3))
+}
+
+#[test]
+fn inspect() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().inspect(|_| ()));
+}
+
+#[test]
+fn update() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().update(|_| ()));
+}
+
+#[test]
+fn interleave() {
+    let v = [0, 10, 1, 11, 2, 12, 3, 4];
+    check(&v, || (0..5).into_par_iter().interleave(10..13));
+    check(&v[..6], || (0..3).into_par_iter().interleave(10..13));
+
+    let v = [0, 10, 1, 11, 2, 12, 13, 14];
+    check(&v, || (0..3).into_par_iter().interleave(10..15));
+}
+
+#[test]
+fn intersperse() {
+    let v = [0, -1, 1, -1, 2, -1, 3, -1, 4];
+    check(&v, || (0..5).into_par_iter().intersperse(-1));
+}
+
+#[test]
+fn chunks() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.chunks(2).map(|c| c.to_vec()).collect();
+    check(&v, || s.par_iter().cloned().chunks(2));
+}
+
+#[test]
+fn map() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map(Clone::clone));
+}
+
+#[test]
+fn map_with() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map_with(vec![0], |_, &x| x));
+}
+
+#[test]
+fn map_init() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map_init(|| vec![0], |_, &x| x));
+}
+
+#[test]
+fn panic_fuse() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().panic_fuse());
+}
+
+#[test]
+fn rev() {
+    let v: Vec<_> = (0..10).rev().collect();
+    check(&v, || (0..10).into_par_iter().rev());
+}
+
+#[test]
+fn with_max_len() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().with_max_len(1));
+}
+
+#[test]
+fn with_min_len() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().with_min_len(1));
+}
+
+#[test]
+fn zip() {
+    let v: Vec<_> = (0..10).zip(10..20).collect();
+    check(&v, || (0..10).into_par_iter().zip(10..20));
+    check(&v[..5], || (0..5).into_par_iter().zip(10..20));
+    check(&v[..5], || (0..10).into_par_iter().zip(10..15));
+}
diff --git a/tests/sort-panic-safe.rs b/tests/sort-panic-safe.rs
new file mode 100644
index 0000000..7c50505
--- /dev/null
+++ b/tests/sort-panic-safe.rs
@@ -0,0 +1,162 @@
+use rand::distributions::Uniform;
+use rand::{thread_rng, Rng};
+use rayon::prelude::*;
+use std::cell::Cell;
+use std::cmp::{self, Ordering};
+use std::panic;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::Relaxed;
+use std::thread;
+
+static VERSIONS: AtomicUsize = AtomicUsize::new(0);
+
+lazy_static::lazy_static! {
+    static ref DROP_COUNTS: Vec<AtomicUsize> = (0..20_000).map(|_| AtomicUsize::new(0)).collect();
+}
+
+#[derive(Clone, Eq)]
+struct DropCounter {
+    x: u32,
+    id: usize,
+    version: Cell<usize>,
+}
+
+impl PartialEq for DropCounter {
+    fn eq(&self, other: &Self) -> bool {
+        self.partial_cmp(other) == Some(Ordering::Equal)
+    }
+}
+
+impl PartialOrd for DropCounter {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.version.set(self.version.get() + 1);
+        other.version.set(other.version.get() + 1);
+        VERSIONS.fetch_add(2, Relaxed);
+        self.x.partial_cmp(&other.x)
+    }
+}
+
+impl Ord for DropCounter {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.partial_cmp(other).unwrap()
+    }
+}
+
+impl Drop for DropCounter {
+    fn drop(&mut self) {
+        DROP_COUNTS[self.id].fetch_add(1, Relaxed);
+        VERSIONS.fetch_sub(self.version.get(), Relaxed);
+    }
+}
+
+macro_rules! test {
+    ($input:ident, $func:ident) => {
+        let len = $input.len();
+
+        // Work out the total number of comparisons required to sort
+        // this array...
+        let count = AtomicUsize::new(0);
+        $input.to_owned().$func(|a, b| {
+            count.fetch_add(1, Relaxed);
+            a.cmp(b)
+        });
+
+        let mut panic_countdown = count.load(Relaxed);
+        let step = if len <= 100 {
+            1
+        } else {
+            cmp::max(1, panic_countdown / 10)
+        };
+
+        // ... and then panic after each `step` comparisons.
+        loop {
+            // Refresh the counters.
+            VERSIONS.store(0, Relaxed);
+            for i in 0..len {
+                DROP_COUNTS[i].store(0, Relaxed);
+            }
+
+            let v = $input.to_owned();
+            let _ = thread::spawn(move || {
+                let mut v = v;
+                let panic_countdown = AtomicUsize::new(panic_countdown);
+                v.$func(|a, b| {
+                    if panic_countdown.fetch_sub(1, Relaxed) == 1 {
+                        SILENCE_PANIC.with(|s| s.set(true));
+                        panic!();
+                    }
+                    a.cmp(b)
+                })
+            })
+            .join();
+
+            // Check that the number of things dropped is exactly
+            // what we expect (i.e. the contents of `v`).
+            for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
+                let count = c.load(Relaxed);
+                assert!(
+                    count == 1,
+                    "found drop count == {} for i == {}, len == {}",
+                    count,
+                    i,
+                    len
+                );
+            }
+
+            // Check that the most recent versions of values were dropped.
+            assert_eq!(VERSIONS.load(Relaxed), 0);
+
+            if panic_countdown < step {
+                break;
+            }
+            panic_countdown -= step;
+        }
+    };
+}
+
+thread_local!(static SILENCE_PANIC: Cell<bool> = Cell::new(false));
+
+#[test]
+fn sort_panic_safe() {
+    let prev = panic::take_hook();
+    panic::set_hook(Box::new(move |info| {
+        if !SILENCE_PANIC.with(Cell::get) {
+            prev(info);
+        }
+    }));
+
+    for &len in &[1, 2, 3, 4, 5, 10, 20, 100, 500, 5_000, 20_000] {
+        let len_dist = Uniform::new(0, len);
+        for &modulus in &[5, 30, 1_000, 20_000] {
+            for &has_runs in &[false, true] {
+                let mut rng = thread_rng();
+                let mut input = (0..len)
+                    .map(|id| DropCounter {
+                        x: rng.gen_range(0, modulus),
+                        id,
+                        version: Cell::new(0),
+                    })
+                    .collect::<Vec<_>>();
+
+                if has_runs {
+                    for c in &mut input {
+                        c.x = c.id as u32;
+                    }
+
+                    for _ in 0..5 {
+                        let a = rng.sample(&len_dist);
+                        let b = rng.sample(&len_dist);
+                        if a < b {
+                            input[a..b].reverse();
+                        } else {
+                            input.swap(a, b);
+                        }
+                    }
+                }
+
+                test!(input, par_sort_by);
+                test!(input, par_sort_unstable_by);
+            }
+        }
+    }
+}
diff --git a/tests/str.rs b/tests/str.rs
new file mode 100644
index 0000000..0e1e35e
--- /dev/null
+++ b/tests/str.rs
@@ -0,0 +1,116 @@
+use rand::distributions::Standard;
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use rayon::prelude::*;
+
+fn seeded_rng() -> XorShiftRng {
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    XorShiftRng::from_seed(seed)
+}
+
+#[test]
+pub fn execute_strings() {
+    let rng = seeded_rng();
+    let s: String = rng.sample_iter::<char, _>(&Standard).take(1024).collect();
+
+    let par_chars: String = s.par_chars().collect();
+    assert_eq!(s, par_chars);
+
+    let par_even: String = s.par_chars().filter(|&c| (c as u32) & 1 == 0).collect();
+    let ser_even: String = s.chars().filter(|&c| (c as u32) & 1 == 0).collect();
+    assert_eq!(par_even, ser_even);
+
+    // test `FromParallelIterator<&char> for String`
+    let vchars: Vec<char> = s.par_chars().collect();
+    let par_chars: String = vchars.par_iter().collect();
+    assert_eq!(s, par_chars);
+
+    let par_bytes: Vec<u8> = s.par_bytes().collect();
+    assert_eq!(s.as_bytes(), &*par_bytes);
+
+    let par_utf16: Vec<u16> = s.par_encode_utf16().collect();
+    let ser_utf16: Vec<u16> = s.encode_utf16().collect();
+    assert_eq!(par_utf16, ser_utf16);
+
+    let par_charind: Vec<_> = s.par_char_indices().collect();
+    let ser_charind: Vec<_> = s.char_indices().collect();
+    assert_eq!(par_charind, ser_charind);
+}
+
+#[test]
+pub fn execute_strings_split() {
+    // char testcases from examples in `str::split` etc.,
+    // plus a large self-test for good measure.
+    let tests = vec![
+        ("Mary had a little lamb", ' '),
+        ("", 'X'),
+        ("lionXXtigerXleopard", 'X'),
+        ("||||a||b|c", '|'),
+        ("(///)", '/'),
+        ("010", '0'),
+        ("    a  b c", ' '),
+        ("A.B.", '.'),
+        ("A..B..", '.'),
+        ("foo\r\nbar\n\nbaz\n", '\n'),
+        ("foo\nbar\n\r\nbaz", '\n'),
+        ("A few words", ' '),
+        (" Mary   had\ta\u{2009}little  \n\t lamb", ' '),
+        (include_str!("str.rs"), ' '),
+    ];
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split(separator).collect();
+        let parallel: Vec<_> = string.par_split(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.split(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_split(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split_terminator(separator).collect();
+        let parallel: Vec<_> = string.par_split_terminator(separator).collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split_terminator(|c| c == separator).collect();
+        let parallel: Vec<_> = string.par_split_terminator(|c| c == separator).collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, _) in &tests {
+        let serial: Vec<_> = string.lines().collect();
+        let parallel: Vec<_> = string.par_lines().collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, _) in &tests {
+        let serial: Vec<_> = string.split_whitespace().collect();
+        let parallel: Vec<_> = string.par_split_whitespace().collect();
+        assert_eq!(serial, parallel);
+    }
+
+    // try matching separators too!
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.matches(separator).collect();
+        let parallel: Vec<_> = string.par_matches(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.matches(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_matches(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.match_indices(separator).collect();
+        let parallel: Vec<_> = string.par_match_indices(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.match_indices(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_match_indices(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+}