Chih-Hung Hsieh | e42c505 | 2020-04-16 10:44:21 -0700 | [diff] [blame] | 1 | // The Computer Language Benchmarks Game |
Haibo Huang | 47619dd | 2021-01-08 17:05:43 -0800 | [diff] [blame^] | 2 | // https://benchmarksgame-team.pages.debian.net/benchmarksgame/ |
Chih-Hung Hsieh | e42c505 | 2020-04-16 10:44:21 -0700 | [diff] [blame] | 3 | // |
| 4 | // contributed by the Rust Project Developers |
| 5 | // contributed by TeXitoi |
| 6 | // contributed by BurntSushi |
| 7 | |
| 8 | extern crate regex; |
| 9 | |
| 10 | use std::io::{self, Read}; |
| 11 | |
| 12 | macro_rules! regex { |
| 13 | ($re:expr) => { |
| 14 | ::regex::Regex::new($re).unwrap() |
| 15 | }; |
| 16 | } |
| 17 | |
| 18 | fn main() { |
| 19 | let mut seq = String::with_capacity(50 * (1 << 20)); |
| 20 | io::stdin().read_to_string(&mut seq).unwrap(); |
| 21 | let ilen = seq.len(); |
| 22 | |
| 23 | seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); |
| 24 | let clen = seq.len(); |
| 25 | |
| 26 | let variants = vec![ |
| 27 | regex!("agggtaaa|tttaccct"), |
| 28 | regex!("[cgt]gggtaaa|tttaccc[acg]"), |
| 29 | regex!("a[act]ggtaaa|tttacc[agt]t"), |
| 30 | regex!("ag[act]gtaaa|tttac[agt]ct"), |
| 31 | regex!("agg[act]taaa|ttta[agt]cct"), |
| 32 | regex!("aggg[acg]aaa|ttt[cgt]ccct"), |
| 33 | regex!("agggt[cgt]aa|tt[acg]accct"), |
| 34 | regex!("agggta[cgt]a|t[acg]taccct"), |
| 35 | regex!("agggtaa[cgt]|[acg]ttaccct"), |
| 36 | ]; |
| 37 | for re in variants { |
| 38 | println!("{} {}", re.to_string(), re.find_iter(&seq).count()); |
| 39 | } |
| 40 | |
| 41 | let substs = vec![ |
| 42 | (b'B', "(c|g|t)"), |
| 43 | (b'D', "(a|g|t)"), |
| 44 | (b'H', "(a|c|t)"), |
| 45 | (b'K', "(g|t)"), |
| 46 | (b'M', "(a|c)"), |
| 47 | (b'N', "(a|c|g|t)"), |
| 48 | (b'R', "(a|g)"), |
| 49 | (b'S', "(c|g)"), |
| 50 | (b'V', "(a|c|g)"), |
| 51 | (b'W', "(a|t)"), |
| 52 | (b'Y', "(c|t)"), |
| 53 | ]; // combined into one regex in `replace_all` |
| 54 | let seq = replace_all(&seq, substs); |
| 55 | |
| 56 | println!("\n{}\n{}\n{}", ilen, clen, seq.len()); |
| 57 | } |
| 58 | |
| 59 | fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { |
| 60 | let mut replacements = vec![""; 256]; |
| 61 | let mut alternates = vec![]; |
| 62 | for (re, replacement) in substs { |
| 63 | replacements[re as usize] = replacement; |
| 64 | alternates.push((re as char).to_string()); |
| 65 | } |
| 66 | |
| 67 | let re = regex!(&alternates.join("|")); |
| 68 | let mut new = String::with_capacity(text.len()); |
| 69 | let mut last_match = 0; |
| 70 | for m in re.find_iter(text) { |
| 71 | new.push_str(&text[last_match..m.start()]); |
| 72 | new.push_str(replacements[text.as_bytes()[m.start()] as usize]); |
| 73 | last_match = m.end(); |
| 74 | } |
| 75 | new.push_str(&text[last_match..]); |
| 76 | new |
| 77 | } |