Blame - src/sparse.rs - platform/external/rust/crates/regex-automata

blob: d18024b344c0ad1b66b509d9b73974a93bb88316 [file] [log] [blame]

Jakub Kotur	3bceaeb	2020-12-21 17:28:16 +0100	[diff] [blame^]	1	#[cfg(feature = "std")]
				2	use core::fmt;
				3	#[cfg(feature = "std")]
				4	use core::iter;
				5	use core::marker::PhantomData;
				6	use core::mem::size_of;
				7	#[cfg(feature = "std")]
				8	use std::collections::HashMap;
				9
				10	#[cfg(feature = "std")]
				11	use byteorder::{BigEndian, LittleEndian};
				12	use byteorder::{ByteOrder, NativeEndian};
				13
				14	use classes::ByteClasses;
				15	use dense;
				16	use dfa::DFA;
				17	#[cfg(feature = "std")]
				18	use error::{Error, Result};
				19	#[cfg(feature = "std")]
				20	use state_id::{dead_id, usize_to_state_id, write_state_id_bytes, StateID};
				21	#[cfg(not(feature = "std"))]
				22	use state_id::{dead_id, StateID};
				23
				24	/// A sparse table-based deterministic finite automaton (DFA).
				25	///
				26	/// In contrast to a [dense DFA](enum.DenseDFA.html), a sparse DFA uses a
				27	/// more space efficient representation for its transition table. Consequently,
				28	/// sparse DFAs can use much less memory than dense DFAs, but this comes at a
				29	/// price. In particular, reading the more space efficient transitions takes
				30	/// more work, and consequently, searching using a sparse DFA is typically
				31	/// slower than a dense DFA.
				32	///
				33	/// A sparse DFA can be built using the default configuration via the
				34	/// [`SparseDFA::new`](enum.SparseDFA.html#method.new) constructor. Otherwise,
				35	/// one can configure various aspects of a dense DFA via
				36	/// [`dense::Builder`](dense/struct.Builder.html), and then convert a dense
				37	/// DFA to a sparse DFA using
				38	/// [`DenseDFA::to_sparse`](enum.DenseDFA.html#method.to_sparse).
				39	///
				40	/// In general, a sparse DFA supports all the same operations as a dense DFA.
				41	///
				42	/// Making the choice between a dense and sparse DFA depends on your specific
				43	/// work load. If you can sacrifice a bit of search time performance, then a
				44	/// sparse DFA might be the best choice. In particular, while sparse DFAs are
				45	/// probably always slower than dense DFAs, you may find that they are easily
				46	/// fast enough for your purposes!
				47	///
				48	/// # State size
				49	///
				50	/// A `SparseDFA` has two type parameters, `T` and `S`. `T` corresponds to
				51	/// the type of the DFA's transition table while `S` corresponds to the
				52	/// representation used for the DFA's state identifiers as described by the
				53	/// [`StateID`](trait.StateID.html) trait. This type parameter is typically
				54	/// `usize`, but other valid choices provided by this crate include `u8`,
				55	/// `u16`, `u32` and `u64`. The primary reason for choosing a different state
				56	/// identifier representation than the default is to reduce the amount of
				57	/// memory used by a DFA. Note though, that if the chosen representation cannot
				58	/// accommodate the size of your DFA, then building the DFA will fail and
				59	/// return an error.
				60	///
				61	/// While the reduction in heap memory used by a DFA is one reason for choosing
				62	/// a smaller state identifier representation, another possible reason is for
				63	/// decreasing the serialization size of a DFA, as returned by
				64	/// [`to_bytes_little_endian`](enum.SparseDFA.html#method.to_bytes_little_endian),
				65	/// [`to_bytes_big_endian`](enum.SparseDFA.html#method.to_bytes_big_endian)
				66	/// or
				67	/// [`to_bytes_native_endian`](enum.DenseDFA.html#method.to_bytes_native_endian).
				68	///
				69	/// The type of the transition table is typically either `Vec<u8>` or `&[u8]`,
				70	/// depending on where the transition table is stored. Note that this is
				71	/// different than a dense DFA, whose transition table is typically
				72	/// `Vec<S>` or `&[S]`. The reason for this is that a sparse DFA always reads
				73	/// its transition table from raw bytes because the table is compactly packed.
				74	///
				75	/// # Variants
				76	///
				77	/// This DFA is defined as a non-exhaustive enumeration of different types of
				78	/// dense DFAs. All of the variants use the same internal representation
				79	/// for the transition table, but they vary in how the transition table is
				80	/// read. A DFA's specific variant depends on the configuration options set via
				81	/// [`dense::Builder`](dense/struct.Builder.html). The default variant is
				82	/// `ByteClass`.
				83	///
				84	/// # The `DFA` trait
				85	///
				86	/// This type implements the [`DFA`](trait.DFA.html) trait, which means it
				87	/// can be used for searching. For example:
				88	///
				89	/// ```
				90	/// use regex_automata::{DFA, SparseDFA};
				91	///
				92	/// # fn example() -> Result<(), regex_automata::Error> {
				93	/// let dfa = SparseDFA::new("foo[0-9]+")?;
				94	/// assert_eq!(Some(8), dfa.find(b"foo12345"));
				95	/// # Ok(()) }; example().unwrap()
				96	/// ```
				97	///
				98	/// The `DFA` trait also provides an assortment of other lower level methods
				99	/// for DFAs, such as `start_state` and `next_state`. While these are correctly
				100	/// implemented, it is an anti-pattern to use them in performance sensitive
				101	/// code on the `SparseDFA` type directly. Namely, each implementation requires
				102	/// a branch to determine which type of sparse DFA is being used. Instead,
				103	/// this branch should be pushed up a layer in the code since walking the
				104	/// transitions of a DFA is usually a hot path. If you do need to use these
				105	/// lower level methods in performance critical code, then you should match on
				106	/// the variants of this DFA and use each variant's implementation of the `DFA`
				107	/// trait directly.
				108	#[derive(Clone, Debug)]
				109	pub enum SparseDFA<T: AsRef<[u8]>, S: StateID = usize> {
				110	/// A standard DFA that does not use byte classes.
				111	Standard(Standard<T, S>),
				112	/// A DFA that shrinks its alphabet to a set of equivalence classes instead
				113	/// of using all possible byte values. Any two bytes belong to the same
				114	/// equivalence class if and only if they can be used interchangeably
				115	/// anywhere in the DFA while never discriminating between a match and a
				116	/// non-match.
				117	///
				118	/// Unlike dense DFAs, sparse DFAs do not tend to benefit nearly as much
				119	/// from using byte classes. In some cases, using byte classes can even
				120	/// marginally increase the size of a sparse DFA's transition table. The
				121	/// reason for this is that a sparse DFA already compacts each state's
				122	/// transitions separate from whether byte classes are used.
				123	ByteClass(ByteClass<T, S>),
				124	/// Hints that destructuring should not be exhaustive.
				125	///
				126	/// This enum may grow additional variants, so this makes sure clients
				127	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				128	/// could break existing code.)
				129	#[doc(hidden)]
				130	__Nonexhaustive,
				131	}
				132
				133	#[cfg(feature = "std")]
				134	impl SparseDFA<Vec<u8>, usize> {
				135	/// Parse the given regular expression using a default configuration and
				136	/// return the corresponding sparse DFA.
				137	///
				138	/// The default configuration uses `usize` for state IDs and reduces the
				139	/// alphabet size by splitting bytes into equivalence classes. The
				140	/// resulting DFA is not minimized.
				141	///
				142	/// If you want a non-default configuration, then use the
				143	/// [`dense::Builder`](dense/struct.Builder.html)
				144	/// to set your own configuration, and then call
				145	/// [`DenseDFA::to_sparse`](enum.DenseDFA.html#method.to_sparse)
				146	/// to create a sparse DFA.
				147	///
				148	/// # Example
				149	///
				150	/// ```
				151	/// use regex_automata::{DFA, SparseDFA};
				152	///
				153	/// # fn example() -> Result<(), regex_automata::Error> {
				154	/// let dfa = SparseDFA::new("foo[0-9]+bar")?;
				155	/// assert_eq!(Some(11), dfa.find(b"foo12345bar"));
				156	/// # Ok(()) }; example().unwrap()
				157	/// ```
				158	pub fn new(pattern: &str) -> Result<SparseDFA<Vec<u8>, usize>> {
				159	dense::Builder::new()
				160	.build(pattern)
				161	.and_then(\|dense\| dense.to_sparse())
				162	}
				163	}
				164
				165	#[cfg(feature = "std")]
				166	impl<S: StateID> SparseDFA<Vec<u8>, S> {
				167	/// Create a new empty sparse DFA that never matches any input.
				168	///
				169	/// # Example
				170	///
				171	/// In order to build an empty DFA, callers must provide a type hint
				172	/// indicating their choice of state identifier representation.
				173	///
				174	/// ```
				175	/// use regex_automata::{DFA, SparseDFA};
				176	///
				177	/// # fn example() -> Result<(), regex_automata::Error> {
				178	/// let dfa: SparseDFA<Vec<u8>, usize> = SparseDFA::empty();
				179	/// assert_eq!(None, dfa.find(b""));
				180	/// assert_eq!(None, dfa.find(b"foo"));
				181	/// # Ok(()) }; example().unwrap()
				182	/// ```
				183	pub fn empty() -> SparseDFA<Vec<u8>, S> {
				184	dense::DenseDFA::empty().to_sparse().unwrap()
				185	}
				186
				187	pub(crate) fn from_dense_sized<T: AsRef<[S]>, A: StateID>(
				188	dfa: &dense::Repr<T, S>,
				189	) -> Result<SparseDFA<Vec<u8>, A>> {
				190	Repr::from_dense_sized(dfa).map(\|r\| r.into_sparse_dfa())
				191	}
				192	}
				193
				194	impl<T: AsRef<[u8]>, S: StateID> SparseDFA<T, S> {
				195	/// Cheaply return a borrowed version of this sparse DFA. Specifically, the
				196	/// DFA returned always uses `&[u8]` for its transition table while keeping
				197	/// the same state identifier representation.
				198	pub fn as_ref<'a>(&'a self) -> SparseDFA<&'a [u8], S> {
				199	match *self {
				200	SparseDFA::Standard(Standard(ref r)) => {
				201	SparseDFA::Standard(Standard(r.as_ref()))
				202	}
				203	SparseDFA::ByteClass(ByteClass(ref r)) => {
				204	SparseDFA::ByteClass(ByteClass(r.as_ref()))
				205	}
				206	SparseDFA::__Nonexhaustive => unreachable!(),
				207	}
				208	}
				209
				210	/// Return an owned version of this sparse DFA. Specifically, the DFA
				211	/// returned always uses `Vec<u8>` for its transition table while keeping
				212	/// the same state identifier representation.
				213	///
				214	/// Effectively, this returns a sparse DFA whose transition table lives
				215	/// on the heap.
				216	#[cfg(feature = "std")]
				217	pub fn to_owned(&self) -> SparseDFA<Vec<u8>, S> {
				218	match *self {
				219	SparseDFA::Standard(Standard(ref r)) => {
				220	SparseDFA::Standard(Standard(r.to_owned()))
				221	}
				222	SparseDFA::ByteClass(ByteClass(ref r)) => {
				223	SparseDFA::ByteClass(ByteClass(r.to_owned()))
				224	}
				225	SparseDFA::__Nonexhaustive => unreachable!(),
				226	}
				227	}
				228
				229	/// Returns the memory usage, in bytes, of this DFA.
				230	///
				231	/// The memory usage is computed based on the number of bytes used to
				232	/// represent this DFA's transition table. This typically corresponds to
				233	/// heap memory usage.
				234	///
				235	/// This does not include the stack size used up by this DFA. To
				236	/// compute that, used `std::mem::size_of::<SparseDFA>()`.
				237	pub fn memory_usage(&self) -> usize {
				238	self.repr().memory_usage()
				239	}
				240
				241	fn repr(&self) -> &Repr<T, S> {
				242	match *self {
				243	SparseDFA::Standard(ref r) => &r.0,
				244	SparseDFA::ByteClass(ref r) => &r.0,
				245	SparseDFA::__Nonexhaustive => unreachable!(),
				246	}
				247	}
				248	}
				249
				250	/// Routines for converting a sparse DFA to other representations, such as
				251	/// smaller state identifiers or raw bytes suitable for persistent storage.
				252	#[cfg(feature = "std")]
				253	impl<T: AsRef<[u8]>, S: StateID> SparseDFA<T, S> {
				254	/// Create a new sparse DFA whose match semantics are equivalent to
				255	/// this DFA, but attempt to use `u8` for the representation of state
				256	/// identifiers. If `u8` is insufficient to represent all state identifiers
				257	/// in this DFA, then this returns an error.
				258	///
				259	/// This is a convenience routine for `to_sized::<u8>()`.
				260	pub fn to_u8(&self) -> Result<SparseDFA<Vec<u8>, u8>> {
				261	self.to_sized()
				262	}
				263
				264	/// Create a new sparse DFA whose match semantics are equivalent to
				265	/// this DFA, but attempt to use `u16` for the representation of state
				266	/// identifiers. If `u16` is insufficient to represent all state
				267	/// identifiers in this DFA, then this returns an error.
				268	///
				269	/// This is a convenience routine for `to_sized::<u16>()`.
				270	pub fn to_u16(&self) -> Result<SparseDFA<Vec<u8>, u16>> {
				271	self.to_sized()
				272	}
				273
				274	/// Create a new sparse DFA whose match semantics are equivalent to
				275	/// this DFA, but attempt to use `u32` for the representation of state
				276	/// identifiers. If `u32` is insufficient to represent all state
				277	/// identifiers in this DFA, then this returns an error.
				278	///
				279	/// This is a convenience routine for `to_sized::<u32>()`.
				280	#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
				281	pub fn to_u32(&self) -> Result<SparseDFA<Vec<u8>, u32>> {
				282	self.to_sized()
				283	}
				284
				285	/// Create a new sparse DFA whose match semantics are equivalent to
				286	/// this DFA, but attempt to use `u64` for the representation of state
				287	/// identifiers. If `u64` is insufficient to represent all state
				288	/// identifiers in this DFA, then this returns an error.
				289	///
				290	/// This is a convenience routine for `to_sized::<u64>()`.
				291	#[cfg(target_pointer_width = "64")]
				292	pub fn to_u64(&self) -> Result<SparseDFA<Vec<u8>, u64>> {
				293	self.to_sized()
				294	}
				295
				296	/// Create a new sparse DFA whose match semantics are equivalent to
				297	/// this DFA, but attempt to use `A` for the representation of state
				298	/// identifiers. If `A` is insufficient to represent all state identifiers
				299	/// in this DFA, then this returns an error.
				300	///
				301	/// An alternative way to construct such a DFA is to use
				302	/// [`DenseDFA::to_sparse_sized`](enum.DenseDFA.html#method.to_sparse_sized).
				303	/// In general, picking the appropriate size upon initial construction of
				304	/// a sparse DFA is preferred, since it will do the conversion in one
				305	/// step instead of two.
				306	pub fn to_sized<A: StateID>(&self) -> Result<SparseDFA<Vec<u8>, A>> {
				307	self.repr().to_sized().map(\|r\| r.into_sparse_dfa())
				308	}
				309
				310	/// Serialize a sparse DFA to raw bytes in little endian format.
				311	///
				312	/// If the state identifier representation of this DFA has a size different
				313	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
				314	/// implementations of `StateID` provided by this crate satisfy this
				315	/// requirement.
				316	pub fn to_bytes_little_endian(&self) -> Result<Vec<u8>> {
				317	self.repr().to_bytes::<LittleEndian>()
				318	}
				319
				320	/// Serialize a sparse DFA to raw bytes in big endian format.
				321	///
				322	/// If the state identifier representation of this DFA has a size different
				323	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
				324	/// implementations of `StateID` provided by this crate satisfy this
				325	/// requirement.
				326	pub fn to_bytes_big_endian(&self) -> Result<Vec<u8>> {
				327	self.repr().to_bytes::<BigEndian>()
				328	}
				329
				330	/// Serialize a sparse DFA to raw bytes in native endian format.
				331	/// Generally, it is better to pick an explicit endianness using either
				332	/// `to_bytes_little_endian` or `to_bytes_big_endian`. This routine is
				333	/// useful in tests where the DFA is serialized and deserialized on the
				334	/// same platform.
				335	///
				336	/// If the state identifier representation of this DFA has a size different
				337	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
				338	/// implementations of `StateID` provided by this crate satisfy this
				339	/// requirement.
				340	pub fn to_bytes_native_endian(&self) -> Result<Vec<u8>> {
				341	self.repr().to_bytes::<NativeEndian>()
				342	}
				343	}
				344
				345	impl<'a, S: StateID> SparseDFA<&'a [u8], S> {
				346	/// Deserialize a sparse DFA with a specific state identifier
				347	/// representation.
				348	///
				349	/// Deserializing a DFA using this routine will never allocate heap memory.
				350	/// This is also guaranteed to be a constant time operation that does not
				351	/// vary with the size of the DFA.
				352	///
				353	/// The bytes given should be generated by the serialization of a DFA with
				354	/// either the
				355	/// [`to_bytes_little_endian`](enum.DenseDFA.html#method.to_bytes_little_endian)
				356	/// method or the
				357	/// [`to_bytes_big_endian`](enum.DenseDFA.html#method.to_bytes_big_endian)
				358	/// endian, depending on the endianness of the machine you are
				359	/// deserializing this DFA from.
				360	///
				361	/// If the state identifier representation is `usize`, then deserialization
				362	/// is dependent on the pointer size. For this reason, it is best to
				363	/// serialize DFAs using a fixed size representation for your state
				364	/// identifiers, such as `u8`, `u16`, `u32` or `u64`.
				365	///
				366	/// # Panics
				367	///
				368	/// The bytes given should be trusted. In particular, if the bytes
				369	/// are not a valid serialization of a DFA, or if the endianness of the
				370	/// serialized bytes is different than the endianness of the machine that
				371	/// is deserializing the DFA, then this routine will panic. Moreover, it
				372	/// is possible for this deserialization routine to succeed even if the
				373	/// given bytes do not represent a valid serialized sparse DFA.
				374	///
				375	/// # Safety
				376	///
				377	/// This routine is unsafe because it permits callers to provide an
				378	/// arbitrary transition table with possibly incorrect transitions. While
				379	/// the various serialization routines will never return an incorrect
				380	/// transition table, there is no guarantee that the bytes provided here
				381	/// are correct. While deserialization does many checks (as documented
				382	/// above in the panic conditions), this routine does not check that the
				383	/// transition table is correct. Given an incorrect transition table, it is
				384	/// possible for the search routines to access out-of-bounds memory because
				385	/// of explicit bounds check elision.
				386	///
				387	/// # Example
				388	///
				389	/// This example shows how to serialize a DFA to raw bytes, deserialize it
				390	/// and then use it for searching. Note that we first convert the DFA to
				391	/// using `u16` for its state identifier representation before serializing
				392	/// it. While this isn't strictly necessary, it's good practice in order to
				393	/// decrease the size of the DFA and to avoid platform specific pitfalls
				394	/// such as differing pointer sizes.
				395	///
				396	/// ```
				397	/// use regex_automata::{DFA, DenseDFA, SparseDFA};
				398	///
				399	/// # fn example() -> Result<(), regex_automata::Error> {
				400	/// let sparse = SparseDFA::new("foo[0-9]+")?;
				401	/// let bytes = sparse.to_u16()?.to_bytes_native_endian()?;
				402	///
				403	/// let dfa: SparseDFA<&[u8], u16> = unsafe {
				404	/// SparseDFA::from_bytes(&bytes)
				405	/// };
				406	///
				407	/// assert_eq!(Some(8), dfa.find(b"foo12345"));
				408	/// # Ok(()) }; example().unwrap()
				409	/// ```
				410	pub unsafe fn from_bytes(buf: &'a [u8]) -> SparseDFA<&'a [u8], S> {
				411	Repr::from_bytes(buf).into_sparse_dfa()
				412	}
				413	}
				414
				415	impl<T: AsRef<[u8]>, S: StateID> DFA for SparseDFA<T, S> {
				416	type ID = S;
				417
				418	#[inline]
				419	fn start_state(&self) -> S {
				420	self.repr().start_state()
				421	}
				422
				423	#[inline]
				424	fn is_match_state(&self, id: S) -> bool {
				425	self.repr().is_match_state(id)
				426	}
				427
				428	#[inline]
				429	fn is_dead_state(&self, id: S) -> bool {
				430	self.repr().is_dead_state(id)
				431	}
				432
				433	#[inline]
				434	fn is_match_or_dead_state(&self, id: S) -> bool {
				435	self.repr().is_match_or_dead_state(id)
				436	}
				437
				438	#[inline]
				439	fn is_anchored(&self) -> bool {
				440	self.repr().is_anchored()
				441	}
				442
				443	#[inline]
				444	fn next_state(&self, current: S, input: u8) -> S {
				445	match *self {
				446	SparseDFA::Standard(ref r) => r.next_state(current, input),
				447	SparseDFA::ByteClass(ref r) => r.next_state(current, input),
				448	SparseDFA::__Nonexhaustive => unreachable!(),
				449	}
				450	}
				451
				452	#[inline]
				453	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
				454	self.next_state(current, input)
				455	}
				456
				457	// We specialize the following methods because it lets us lift the
				458	// case analysis between the different types of sparse DFAs. Instead of
				459	// doing the case analysis for every transition, we do it once before
				460	// searching. For sparse DFAs, this doesn't seem to benefit performance as
				461	// much as it does for the dense DFAs, but it's easy to do so we might as
				462	// well do it.
				463
				464	#[inline]
				465	fn is_match_at(&self, bytes: &[u8], start: usize) -> bool {
				466	match *self {
				467	SparseDFA::Standard(ref r) => r.is_match_at(bytes, start),
				468	SparseDFA::ByteClass(ref r) => r.is_match_at(bytes, start),
				469	SparseDFA::__Nonexhaustive => unreachable!(),
				470	}
				471	}
				472
				473	#[inline]
				474	fn shortest_match_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
				475	match *self {
				476	SparseDFA::Standard(ref r) => r.shortest_match_at(bytes, start),
				477	SparseDFA::ByteClass(ref r) => r.shortest_match_at(bytes, start),
				478	SparseDFA::__Nonexhaustive => unreachable!(),
				479	}
				480	}
				481
				482	#[inline]
				483	fn find_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
				484	match *self {
				485	SparseDFA::Standard(ref r) => r.find_at(bytes, start),
				486	SparseDFA::ByteClass(ref r) => r.find_at(bytes, start),
				487	SparseDFA::__Nonexhaustive => unreachable!(),
				488	}
				489	}
				490
				491	#[inline]
				492	fn rfind_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
				493	match *self {
				494	SparseDFA::Standard(ref r) => r.rfind_at(bytes, start),
				495	SparseDFA::ByteClass(ref r) => r.rfind_at(bytes, start),
				496	SparseDFA::__Nonexhaustive => unreachable!(),
				497	}
				498	}
				499	}
				500
				501	/// A standard sparse DFA that does not use premultiplication or byte classes.
				502	///
				503	/// Generally, it isn't necessary to use this type directly, since a
				504	/// `SparseDFA` can be used for searching directly. One possible reason why
				505	/// one might want to use this type directly is if you are implementing your
				506	/// own search routines by walking a DFA's transitions directly. In that case,
				507	/// you'll want to use this type (or any of the other DFA variant types)
				508	/// directly, since they implement `next_state` more efficiently.
				509	#[derive(Clone, Debug)]
				510	pub struct Standard<T: AsRef<[u8]>, S: StateID = usize>(Repr<T, S>);
				511
				512	impl<T: AsRef<[u8]>, S: StateID> DFA for Standard<T, S> {
				513	type ID = S;
				514
				515	#[inline]
				516	fn start_state(&self) -> S {
				517	self.0.start_state()
				518	}
				519
				520	#[inline]
				521	fn is_match_state(&self, id: S) -> bool {
				522	self.0.is_match_state(id)
				523	}
				524
				525	#[inline]
				526	fn is_dead_state(&self, id: S) -> bool {
				527	self.0.is_dead_state(id)
				528	}
				529
				530	#[inline]
				531	fn is_match_or_dead_state(&self, id: S) -> bool {
				532	self.0.is_match_or_dead_state(id)
				533	}
				534
				535	#[inline]
				536	fn is_anchored(&self) -> bool {
				537	self.0.is_anchored()
				538	}
				539
				540	#[inline]
				541	fn next_state(&self, current: S, input: u8) -> S {
				542	self.0.state(current).next(input)
				543	}
				544
				545	#[inline]
				546	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
				547	self.next_state(current, input)
				548	}
				549	}
				550
				551	/// A sparse DFA that shrinks its alphabet.
				552	///
				553	/// Alphabet shrinking is achieved by using a set of equivalence classes
				554	/// instead of using all possible byte values. Any two bytes belong to the same
				555	/// equivalence class if and only if they can be used interchangeably anywhere
				556	/// in the DFA while never discriminating between a match and a non-match.
				557	///
				558	/// Unlike dense DFAs, sparse DFAs do not tend to benefit nearly as much from
				559	/// using byte classes. In some cases, using byte classes can even marginally
				560	/// increase the size of a sparse DFA's transition table. The reason for this
				561	/// is that a sparse DFA already compacts each state's transitions separate
				562	/// from whether byte classes are used.
				563	///
				564	/// Generally, it isn't necessary to use this type directly, since a
				565	/// `SparseDFA` can be used for searching directly. One possible reason why
				566	/// one might want to use this type directly is if you are implementing your
				567	/// own search routines by walking a DFA's transitions directly. In that case,
				568	/// you'll want to use this type (or any of the other DFA variant types)
				569	/// directly, since they implement `next_state` more efficiently.
				570	#[derive(Clone, Debug)]
				571	pub struct ByteClass<T: AsRef<[u8]>, S: StateID = usize>(Repr<T, S>);
				572
				573	impl<T: AsRef<[u8]>, S: StateID> DFA for ByteClass<T, S> {
				574	type ID = S;
				575
				576	#[inline]
				577	fn start_state(&self) -> S {
				578	self.0.start_state()
				579	}
				580
				581	#[inline]
				582	fn is_match_state(&self, id: S) -> bool {
				583	self.0.is_match_state(id)
				584	}
				585
				586	#[inline]
				587	fn is_dead_state(&self, id: S) -> bool {
				588	self.0.is_dead_state(id)
				589	}
				590
				591	#[inline]
				592	fn is_match_or_dead_state(&self, id: S) -> bool {
				593	self.0.is_match_or_dead_state(id)
				594	}
				595
				596	#[inline]
				597	fn is_anchored(&self) -> bool {
				598	self.0.is_anchored()
				599	}
				600
				601	#[inline]
				602	fn next_state(&self, current: S, input: u8) -> S {
				603	let input = self.0.byte_classes.get(input);
				604	self.0.state(current).next(input)
				605	}
				606
				607	#[inline]
				608	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
				609	self.next_state(current, input)
				610	}
				611	}
				612
				613	/// The underlying representation of a sparse DFA. This is shared by all of
				614	/// the different variants of a sparse DFA.
				615	#[derive(Clone)]
				616	#[cfg_attr(not(feature = "std"), derive(Debug))]
				617	struct Repr<T: AsRef<[u8]>, S: StateID = usize> {
				618	anchored: bool,
				619	start: S,
				620	state_count: usize,
				621	max_match: S,
				622	byte_classes: ByteClasses,
				623	trans: T,
				624	}
				625
				626	impl<T: AsRef<[u8]>, S: StateID> Repr<T, S> {
				627	fn into_sparse_dfa(self) -> SparseDFA<T, S> {
				628	if self.byte_classes.is_singleton() {
				629	SparseDFA::Standard(Standard(self))
				630	} else {
				631	SparseDFA::ByteClass(ByteClass(self))
				632	}
				633	}
				634
				635	fn as_ref<'a>(&'a self) -> Repr<&'a [u8], S> {
				636	Repr {
				637	anchored: self.anchored,
				638	start: self.start,
				639	state_count: self.state_count,
				640	max_match: self.max_match,
				641	byte_classes: self.byte_classes.clone(),
				642	trans: self.trans(),
				643	}
				644	}
				645
				646	#[cfg(feature = "std")]
				647	fn to_owned(&self) -> Repr<Vec<u8>, S> {
				648	Repr {
				649	anchored: self.anchored,
				650	start: self.start,
				651	state_count: self.state_count,
				652	max_match: self.max_match,
				653	byte_classes: self.byte_classes.clone(),
				654	trans: self.trans().to_vec(),
				655	}
				656	}
				657
				658	/// Return a convenient representation of the given state.
				659	///
				660	/// This is marked as inline because it doesn't seem to get inlined
				661	/// otherwise, which leads to a fairly significant performance loss (~25%).
				662	#[inline]
				663	fn state<'a>(&'a self, id: S) -> State<'a, S> {
				664	let mut pos = id.to_usize();
				665	let ntrans = NativeEndian::read_u16(&self.trans()[pos..]) as usize;
				666	pos += 2;
				667	let input_ranges = &self.trans()[pos..pos + (ntrans * 2)];
				668	pos += 2 * ntrans;
				669	let next = &self.trans()[pos..pos + (ntrans * size_of::<S>())];
				670	State { _state_id_repr: PhantomData, ntrans, input_ranges, next }
				671	}
				672
				673	/// Return an iterator over all of the states in this DFA.
				674	///
				675	/// The iterator returned yields tuples, where the first element is the
				676	/// state ID and the second element is the state itself.
				677	#[cfg(feature = "std")]
				678	fn states<'a>(&'a self) -> StateIter<'a, T, S> {
				679	StateIter { dfa: self, id: dead_id() }
				680	}
				681
				682	fn memory_usage(&self) -> usize {
				683	self.trans().len()
				684	}
				685
				686	fn start_state(&self) -> S {
				687	self.start
				688	}
				689
				690	fn is_match_state(&self, id: S) -> bool {
				691	self.is_match_or_dead_state(id) && !self.is_dead_state(id)
				692	}
				693
				694	fn is_dead_state(&self, id: S) -> bool {
				695	id == dead_id()
				696	}
				697
				698	fn is_match_or_dead_state(&self, id: S) -> bool {
				699	id <= self.max_match
				700	}
				701
				702	fn is_anchored(&self) -> bool {
				703	self.anchored
				704	}
				705
				706	fn trans(&self) -> &[u8] {
				707	self.trans.as_ref()
				708	}
				709
				710	/// Create a new sparse DFA whose match semantics are equivalent to this
				711	/// DFA, but attempt to use `A` for the representation of state
				712	/// identifiers. If `A` is insufficient to represent all state identifiers
				713	/// in this DFA, then this returns an error.
				714	#[cfg(feature = "std")]
				715	fn to_sized<A: StateID>(&self) -> Result<Repr<Vec<u8>, A>> {
				716	// To build the new DFA, we proceed much like the initial construction
				717	// of the sparse DFA. Namely, since the state ID size is changing,
				718	// we don't actually know all of our state IDs until we've allocated
				719	// all necessary space. So we do one pass that allocates all of the
				720	// storage we need, and then another pass to fill in the transitions.
				721
				722	let mut trans = Vec::with_capacity(size_of::<A>() * self.state_count);
				723	let mut map: HashMap<S, A> = HashMap::with_capacity(self.state_count);
				724	for (old_id, state) in self.states() {
				725	let pos = trans.len();
				726	map.insert(old_id, usize_to_state_id(pos)?);
				727
				728	let n = state.ntrans;
				729	let zeros = 2 + (n * 2) + (n * size_of::<A>());
				730	trans.extend(iter::repeat(0).take(zeros));
				731
				732	NativeEndian::write_u16(&mut trans[pos..], n as u16);
				733	let (s, e) = (pos + 2, pos + 2 + (n * 2));
				734	trans[s..e].copy_from_slice(state.input_ranges);
				735	}
				736
				737	let mut new = Repr {
				738	anchored: self.anchored,
				739	start: map[&self.start],
				740	state_count: self.state_count,
				741	max_match: map[&self.max_match],
				742	byte_classes: self.byte_classes.clone(),
				743	trans,
				744	};
				745	for (&old_id, &new_id) in map.iter() {
				746	let old_state = self.state(old_id);
				747	let mut new_state = new.state_mut(new_id);
				748	for i in 0..new_state.ntrans {
				749	let next = map[&old_state.next_at(i)];
				750	new_state.set_next_at(i, usize_to_state_id(next.to_usize())?);
				751	}
				752	}
				753	new.start = map[&self.start];
				754	new.max_match = map[&self.max_match];
				755	Ok(new)
				756	}
				757
				758	/// Serialize a sparse DFA to raw bytes using the provided endianness.
				759	///
				760	/// If the state identifier representation of this DFA has a size different
				761	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
				762	/// implementations of `StateID` provided by this crate satisfy this
				763	/// requirement.
				764	///
				765	/// Unlike dense DFAs, the result is not necessarily aligned since a
				766	/// sparse DFA's transition table is always read as a sequence of bytes.
				767	#[cfg(feature = "std")]
				768	fn to_bytes<A: ByteOrder>(&self) -> Result<Vec<u8>> {
				769	let label = b"rust-regex-automata-sparse-dfa\x00";
				770	let size =
				771	// For human readable label.
				772	label.len()
				773	// endiannes check, must be equal to 0xFEFF for native endian
				774	+ 2
				775	// For version number.
				776	+ 2
				777	// Size of state ID representation, in bytes.
				778	// Must be 1, 2, 4 or 8.
				779	+ 2
				780	// For DFA misc options. (Currently unused.)
				781	+ 2
				782	// For start state.
				783	+ 8
				784	// For state count.
				785	+ 8
				786	// For max match state.
				787	+ 8
				788	// For byte class map.
				789	+ 256
				790	// For transition table.
				791	+ self.trans().len();
				792
				793	let mut i = 0;
				794	let mut buf = vec![0; size];
				795
				796	// write label
				797	for &b in label {
				798	buf[i] = b;
				799	i += 1;
				800	}
				801	// endianness check
				802	A::write_u16(&mut buf[i..], 0xFEFF);
				803	i += 2;
				804	// version number
				805	A::write_u16(&mut buf[i..], 1);
				806	i += 2;
				807	// size of state ID
				808	let state_size = size_of::<S>();
				809	if ![1, 2, 4, 8].contains(&state_size) {
				810	return Err(Error::serialize(&format!(
				811	"state size of {} not supported, must be 1, 2, 4 or 8",
				812	state_size
				813	)));
				814	}
				815	A::write_u16(&mut buf[i..], state_size as u16);
				816	i += 2;
				817	// DFA misc options
				818	let mut options = 0u16;
				819	if self.anchored {
				820	options \|= dense::MASK_ANCHORED;
				821	}
				822	A::write_u16(&mut buf[i..], options);
				823	i += 2;
				824	// start state
				825	A::write_u64(&mut buf[i..], self.start.to_usize() as u64);
				826	i += 8;
				827	// state count
				828	A::write_u64(&mut buf[i..], self.state_count as u64);
				829	i += 8;
				830	// max match state
				831	A::write_u64(&mut buf[i..], self.max_match.to_usize() as u64);
				832	i += 8;
				833	// byte class map
				834	for b in (0..256).map(\|b\| b as u8) {
				835	buf[i] = self.byte_classes.get(b);
				836	i += 1;
				837	}
				838	// transition table
				839	for (_, state) in self.states() {
				840	A::write_u16(&mut buf[i..], state.ntrans as u16);
				841	i += 2;
				842	buf[i..i + (state.ntrans * 2)].copy_from_slice(state.input_ranges);
				843	i += state.ntrans * 2;
				844	for j in 0..state.ntrans {
				845	write_state_id_bytes::<A, _>(&mut buf[i..], state.next_at(j));
				846	i += size_of::<S>();
				847	}
				848	}
				849
				850	assert_eq!(size, i, "expected to consume entire buffer");
				851
				852	Ok(buf)
				853	}
				854	}
				855
				856	impl<'a, S: StateID> Repr<&'a [u8], S> {
				857	/// The implementation for deserializing a sparse DFA from raw bytes.
				858	unsafe fn from_bytes(mut buf: &'a [u8]) -> Repr<&'a [u8], S> {
				859	// skip over label
				860	match buf.iter().position(\|&b\| b == b'\x00') {
				861	None => panic!("could not find label"),
				862	Some(i) => buf = &buf[i + 1..],
				863	}
				864
				865	// check that current endianness is same as endianness of DFA
				866	let endian_check = NativeEndian::read_u16(buf);
				867	buf = &buf[2..];
				868	if endian_check != 0xFEFF {
				869	panic!(
				870	"endianness mismatch, expected 0xFEFF but got 0x{:X}. \
				871	are you trying to load a SparseDFA serialized with a \
				872	different endianness?",
				873	endian_check,
				874	);
				875	}
				876
				877	// check that the version number is supported
				878	let version = NativeEndian::read_u16(buf);
				879	buf = &buf[2..];
				880	if version != 1 {
				881	panic!(
				882	"expected version 1, but found unsupported version {}",
				883	version,
				884	);
				885	}
				886
				887	// read size of state
				888	let state_size = NativeEndian::read_u16(buf) as usize;
				889	if state_size != size_of::<S>() {
				890	panic!(
				891	"state size of SparseDFA ({}) does not match \
				892	requested state size ({})",
				893	state_size,
				894	size_of::<S>(),
				895	);
				896	}
				897	buf = &buf[2..];
				898
				899	// read miscellaneous options
				900	let opts = NativeEndian::read_u16(buf);
				901	buf = &buf[2..];
				902
				903	// read start state
				904	let start = S::from_usize(NativeEndian::read_u64(buf) as usize);
				905	buf = &buf[8..];
				906
				907	// read state count
				908	let state_count = NativeEndian::read_u64(buf) as usize;
				909	buf = &buf[8..];
				910
				911	// read max match state
				912	let max_match = S::from_usize(NativeEndian::read_u64(buf) as usize);
				913	buf = &buf[8..];
				914
				915	// read byte classes
				916	let byte_classes = ByteClasses::from_slice(&buf[..256]);
				917	buf = &buf[256..];
				918
				919	Repr {
				920	anchored: opts & dense::MASK_ANCHORED > 0,
				921	start,
				922	state_count,
				923	max_match,
				924	byte_classes,
				925	trans: buf,
				926	}
				927	}
				928	}
				929
				930	#[cfg(feature = "std")]
				931	impl<S: StateID> Repr<Vec<u8>, S> {
				932	/// The implementation for constructing a sparse DFA from a dense DFA.
				933	fn from_dense_sized<T: AsRef<[S]>, A: StateID>(
				934	dfa: &dense::Repr<T, S>,
				935	) -> Result<Repr<Vec<u8>, A>> {
				936	// In order to build the transition table, we need to be able to write
				937	// state identifiers for each of the "next" transitions in each state.
				938	// Our state identifiers correspond to the byte offset in the
				939	// transition table at which the state is encoded. Therefore, we do not
				940	// actually know what the state identifiers are until we've allocated
				941	// exactly as much space as we need for each state. Thus, construction
				942	// of the transition table happens in two passes.
				943	//
				944	// In the first pass, we fill out the shell of each state, which
				945	// includes the transition count, the input byte ranges and zero-filled
				946	// space for the transitions. In this first pass, we also build up a
				947	// map from the state identifier index of the dense DFA to the state
				948	// identifier in this sparse DFA.
				949	//
				950	// In the second pass, we fill in the transitions based on the map
				951	// built in the first pass.
				952
				953	let mut trans = Vec::with_capacity(size_of::<A>() * dfa.state_count());
				954	let mut remap: Vec<A> = vec![dead_id(); dfa.state_count()];
				955	for (old_id, state) in dfa.states() {
				956	let pos = trans.len();
				957
				958	remap[dfa.state_id_to_index(old_id)] = usize_to_state_id(pos)?;
				959	// zero-filled space for the transition count
				960	trans.push(0);
				961	trans.push(0);
				962
				963	let mut trans_count = 0;
				964	for (b1, b2, _) in state.sparse_transitions() {
				965	trans_count += 1;
				966	trans.push(b1);
				967	trans.push(b2);
				968	}
				969	// fill in the transition count
				970	NativeEndian::write_u16(&mut trans[pos..], trans_count);
				971
				972	// zero-fill the actual transitions
				973	let zeros = trans_count as usize * size_of::<A>();
				974	trans.extend(iter::repeat(0).take(zeros));
				975	}
				976
				977	let mut new = Repr {
				978	anchored: dfa.is_anchored(),
				979	start: remap[dfa.state_id_to_index(dfa.start_state())],
				980	state_count: dfa.state_count(),
				981	max_match: remap[dfa.state_id_to_index(dfa.max_match_state())],
				982	byte_classes: dfa.byte_classes().clone(),
				983	trans,
				984	};
				985	for (old_id, old_state) in dfa.states() {
				986	let new_id = remap[dfa.state_id_to_index(old_id)];
				987	let mut new_state = new.state_mut(new_id);
				988	let sparse = old_state.sparse_transitions();
				989	for (i, (_, _, next)) in sparse.enumerate() {
				990	let next = remap[dfa.state_id_to_index(next)];
				991	new_state.set_next_at(i, next);
				992	}
				993	}
				994	Ok(new)
				995	}
				996
				997	/// Return a convenient mutable representation of the given state.
				998	fn state_mut<'a>(&'a mut self, id: S) -> StateMut<'a, S> {
				999	let mut pos = id.to_usize();
				1000	let ntrans = NativeEndian::read_u16(&self.trans[pos..]) as usize;
				1001	pos += 2;
				1002
				1003	let size = (ntrans * 2) + (ntrans * size_of::<S>());
				1004	let ranges_and_next = &mut self.trans[pos..pos + size];
				1005	let (input_ranges, next) = ranges_and_next.split_at_mut(ntrans * 2);
				1006	StateMut { _state_id_repr: PhantomData, ntrans, input_ranges, next }
				1007	}
				1008	}
				1009
				1010	#[cfg(feature = "std")]
				1011	impl<T: AsRef<[u8]>, S: StateID> fmt::Debug for Repr<T, S> {
				1012	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1013	fn state_status<T: AsRef<[u8]>, S: StateID>(
				1014	dfa: &Repr<T, S>,
				1015	id: S,
				1016	) -> &'static str {
				1017	if id == dead_id() {
				1018	if dfa.is_match_state(id) {
				1019	"D*"
				1020	} else {
				1021	"D "
				1022	}
				1023	} else if id == dfa.start_state() {
				1024	if dfa.is_match_state(id) {
				1025	">*"
				1026	} else {
				1027	"> "
				1028	}
				1029	} else {
				1030	if dfa.is_match_state(id) {
				1031	" *"
				1032	} else {
				1033	" "
				1034	}
				1035	}
				1036	}
				1037
				1038	writeln!(f, "SparseDFA(")?;
				1039	for (id, state) in self.states() {
				1040	let status = state_status(self, id);
				1041	writeln!(f, "{}{:06}: {:?}", status, id.to_usize(), state)?;
				1042	}
				1043	writeln!(f, ")")?;
				1044	Ok(())
				1045	}
				1046	}
				1047
				1048	/// An iterator over all states in a sparse DFA.
				1049	///
				1050	/// This iterator yields tuples, where the first element is the state ID and
				1051	/// the second element is the state itself.
				1052	#[cfg(feature = "std")]
				1053	#[derive(Debug)]
				1054	struct StateIter<'a, T: AsRef<[u8]> + 'a, S: StateID + 'a = usize> {
				1055	dfa: &'a Repr<T, S>,
				1056	id: S,
				1057	}
				1058
				1059	#[cfg(feature = "std")]
				1060	impl<'a, T: AsRef<[u8]>, S: StateID> Iterator for StateIter<'a, T, S> {
				1061	type Item = (S, State<'a, S>);
				1062
				1063	fn next(&mut self) -> Option<(S, State<'a, S>)> {
				1064	if self.id.to_usize() >= self.dfa.trans().len() {
				1065	return None;
				1066	}
				1067	let id = self.id;
				1068	let state = self.dfa.state(id);
				1069	self.id = S::from_usize(self.id.to_usize() + state.bytes());
				1070	Some((id, state))
				1071	}
				1072	}
				1073
				1074	/// A representation of a sparse DFA state that can be cheaply materialized
				1075	/// from a state identifier.
				1076	#[derive(Clone)]
				1077	struct State<'a, S: StateID = usize> {
				1078	/// The state identifier representation used by the DFA from which this
				1079	/// state was extracted. Since our transition table is compacted in a
				1080	/// &[u8], we don't actually use the state ID type parameter explicitly
				1081	/// anywhere, so we fake it. This prevents callers from using an incorrect
				1082	/// state ID representation to read from this state.
				1083	_state_id_repr: PhantomData<S>,
				1084	/// The number of transitions in this state.
				1085	ntrans: usize,
				1086	/// Pairs of input ranges, where there is one pair for each transition.
				1087	/// Each pair specifies an inclusive start and end byte range for the
				1088	/// corresponding transition.
				1089	input_ranges: &'a [u8],
				1090	/// Transitions to the next state. This slice contains native endian
				1091	/// encoded state identifiers, with `S` as the representation. Thus, there
				1092	/// are `ntrans * size_of::<S>()` bytes in this slice.
				1093	next: &'a [u8],
				1094	}
				1095
				1096	impl<'a, S: StateID> State<'a, S> {
				1097	/// Searches for the next transition given an input byte. If no such
				1098	/// transition could be found, then a dead state is returned.
				1099	fn next(&self, input: u8) -> S {
				1100	// This straight linear search was observed to be much better than
				1101	// binary search on ASCII haystacks, likely because a binary search
				1102	// visits the ASCII case last but a linear search sees it first. A
				1103	// binary search does do a little better on non-ASCII haystacks, but
				1104	// not by much. There might be a better trade off lurking here.
				1105	for i in 0..self.ntrans {
				1106	let (start, end) = self.range(i);
				1107	if start <= input && input <= end {
				1108	return self.next_at(i);
				1109	}
				1110	// We could bail early with an extra branch: if input < b1, then
				1111	// we know we'll never find a matching transition. Interestingly,
				1112	// this extra branch seems to not help performance, or will even
				1113	// hurt it. It's likely very dependent on the DFA itself and what
				1114	// is being searched.
				1115	}
				1116	dead_id()
				1117	}
				1118
				1119	/// Returns the inclusive input byte range for the ith transition in this
				1120	/// state.
				1121	fn range(&self, i: usize) -> (u8, u8) {
				1122	(self.input_ranges[i * 2], self.input_ranges[i * 2 + 1])
				1123	}
				1124
				1125	/// Returns the next state for the ith transition in this state.
				1126	fn next_at(&self, i: usize) -> S {
				1127	S::read_bytes(&self.next[i * size_of::<S>()..])
				1128	}
				1129
				1130	/// Return the total number of bytes that this state consumes in its
				1131	/// encoded form.
				1132	#[cfg(feature = "std")]
				1133	fn bytes(&self) -> usize {
				1134	2 + (self.ntrans * 2) + (self.ntrans * size_of::<S>())
				1135	}
				1136	}
				1137
				1138	#[cfg(feature = "std")]
				1139	impl<'a, S: StateID> fmt::Debug for State<'a, S> {
				1140	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1141	let mut transitions = vec![];
				1142	for i in 0..self.ntrans {
				1143	let next = self.next_at(i);
				1144	if next == dead_id() {
				1145	continue;
				1146	}
				1147
				1148	let (start, end) = self.range(i);
				1149	if start == end {
				1150	transitions.push(format!(
				1151	"{} => {}",
				1152	escape(start),
				1153	next.to_usize()
				1154	));
				1155	} else {
				1156	transitions.push(format!(
				1157	"{}-{} => {}",
				1158	escape(start),
				1159	escape(end),
				1160	next.to_usize(),
				1161	));
				1162	}
				1163	}
				1164	write!(f, "{}", transitions.join(", "))
				1165	}
				1166	}
				1167
				1168	/// A representation of a mutable sparse DFA state that can be cheaply
				1169	/// materialized from a state identifier.
				1170	#[cfg(feature = "std")]
				1171	struct StateMut<'a, S: StateID = usize> {
				1172	/// The state identifier representation used by the DFA from which this
				1173	/// state was extracted. Since our transition table is compacted in a
				1174	/// &[u8], we don't actually use the state ID type parameter explicitly
				1175	/// anywhere, so we fake it. This prevents callers from using an incorrect
				1176	/// state ID representation to read from this state.
				1177	_state_id_repr: PhantomData<S>,
				1178	/// The number of transitions in this state.
				1179	ntrans: usize,
				1180	/// Pairs of input ranges, where there is one pair for each transition.
				1181	/// Each pair specifies an inclusive start and end byte range for the
				1182	/// corresponding transition.
				1183	input_ranges: &'a mut [u8],
				1184	/// Transitions to the next state. This slice contains native endian
				1185	/// encoded state identifiers, with `S` as the representation. Thus, there
				1186	/// are `ntrans * size_of::<S>()` bytes in this slice.
				1187	next: &'a mut [u8],
				1188	}
				1189
				1190	#[cfg(feature = "std")]
				1191	impl<'a, S: StateID> StateMut<'a, S> {
				1192	/// Sets the ith transition to the given state.
				1193	fn set_next_at(&mut self, i: usize, next: S) {
				1194	next.write_bytes(&mut self.next[i * size_of::<S>()..]);
				1195	}
				1196	}
				1197
				1198	#[cfg(feature = "std")]
				1199	impl<'a, S: StateID> fmt::Debug for StateMut<'a, S> {
				1200	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1201	let state = State {
				1202	_state_id_repr: self._state_id_repr,
				1203	ntrans: self.ntrans,
				1204	input_ranges: self.input_ranges,
				1205	next: self.next,
				1206	};
				1207	fmt::Debug::fmt(&state, f)
				1208	}
				1209	}
				1210
				1211	/// Return the given byte as its escaped string form.
				1212	#[cfg(feature = "std")]
				1213	fn escape(b: u8) -> String {
				1214	use std::ascii;
				1215
				1216	String::from_utf8(ascii::escape_default(b).collect::<Vec<_>>()).unwrap()
				1217	}
				1218
				1219	/// A binary search routine specialized specifically to a sparse DFA state's
				1220	/// transitions. Specifically, the transitions are defined as a set of pairs
				1221	/// of input bytes that delineate an inclusive range of bytes. If the input
				1222	/// byte is in the range, then the corresponding transition is a match.
				1223	///
				1224	/// This binary search accepts a slice of these pairs and returns the position
				1225	/// of the matching pair (the ith transition), or None if no matching pair
				1226	/// could be found.
				1227	///
				1228	/// Note that this routine is not currently used since it was observed to
				1229	/// either decrease performance when searching ASCII, or did not provide enough
				1230	/// of a boost on non-ASCII haystacks to be worth it. However, we leave it here
				1231	/// for posterity in case we can find a way to use it.
				1232	///
				1233	/// In theory, we could use the standard library's search routine if we could
				1234	/// cast a `&[u8]` to a `&[(u8, u8)]`, but I don't believe this is currently
				1235	/// guaranteed to be safe and is thus UB (since I don't think the in-memory
				1236	/// representation of `(u8, u8)` has been nailed down).
				1237	#[inline(always)]
				1238	#[allow(dead_code)]
				1239	fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option<usize> {
				1240	debug_assert!(ranges.len() % 2 == 0, "ranges must have even length");
				1241	debug_assert!(ranges.len() <= 512, "ranges should be short");
				1242
				1243	let (mut left, mut right) = (0, ranges.len() / 2);
				1244	while left < right {
				1245	let mid = (left + right) / 2;
				1246	let (b1, b2) = (ranges[mid * 2], ranges[mid * 2 + 1]);
				1247	if needle < b1 {
				1248	right = mid;
				1249	} else if needle > b2 {
				1250	left = mid + 1;
				1251	} else {
				1252	return Some(mid);
				1253	}
				1254	}
				1255	None
				1256	}