| 1 | //! Common functions for [Longest common subsequences](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) |
| 2 | //! on slice. |
| 3 | |
| 4 | cfg_prettytable! { |
| 5 | use crate::format_table; |
| 6 | use prettytable::{Cell, Row}; |
| 7 | } |
| 8 | use std::cmp::max; |
| 9 | |
| 10 | #[derive (Debug)] |
| 11 | pub struct Table<'a, T: 'a> { |
| 12 | x: &'a [T], |
| 13 | y: &'a [T], |
| 14 | table: Vec<Vec<usize>>, |
| 15 | } |
| 16 | |
| 17 | /// Implements Longest Common Subsequences Table |
| 18 | /// Memory requirement: O(N^2) |
| 19 | /// |
| 20 | /// Based on [Wikipedia article](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) |
| 21 | impl<'a, T> Table<'a, T> |
| 22 | where |
| 23 | T: PartialEq, |
| 24 | { |
| 25 | /// Creates new table for search common subsequences in x and y |
| 26 | pub fn new(x: &'a [T], y: &'a [T]) -> Table<'a, T> { |
| 27 | let x_len = x.len() + 1; |
| 28 | let y_len = y.len() + 1; |
| 29 | let mut table = vec![vec![0; y_len]; x_len]; |
| 30 | |
| 31 | for i in 1..x_len { |
| 32 | for j in 1..y_len { |
| 33 | table[i][j] = if x[i - 1] == y[j - 1] { |
| 34 | table[i - 1][j - 1] + 1 |
| 35 | } else { |
| 36 | max(table[i][j - 1], table[i - 1][j]) |
| 37 | }; |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | Table { x, y, table } |
| 42 | } |
| 43 | |
| 44 | fn seq_iter(&self) -> TableIter<T> { |
| 45 | TableIter { |
| 46 | x: self.x.len(), |
| 47 | y: self.y.len(), |
| 48 | table: self, |
| 49 | } |
| 50 | } |
| 51 | fn get_match(&self, x: usize, y: usize, len: usize) -> Match<T> { |
| 52 | Match { |
| 53 | x, |
| 54 | y, |
| 55 | len, |
| 56 | table: self, |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | /// Returns matches between X and Y |
| 61 | pub fn matches(&self) -> Vec<Match<T>> { |
| 62 | let mut matches: Vec<Match<T>> = Vec::new(); |
| 63 | for (x, y) in self.seq_iter() { |
| 64 | if let Some(last) = matches.last_mut() { |
| 65 | if last.x == x + 1 && last.y == y + 1 { |
| 66 | last.x = x; |
| 67 | last.y = y; |
| 68 | last.len += 1; |
| 69 | continue; |
| 70 | } |
| 71 | } |
| 72 | matches.push(self.get_match(x, y, 1)); |
| 73 | } |
| 74 | matches.reverse(); |
| 75 | matches |
| 76 | } |
| 77 | |
| 78 | /// Returns matches between X and Y with zero-len match at the end |
| 79 | pub fn matches_zero(&self) -> Vec<Match<T>> { |
| 80 | let mut matches = self.matches(); |
| 81 | matches.push(self.get_match(self.x.len(), self.y.len(), 0)); |
| 82 | matches |
| 83 | } |
| 84 | |
| 85 | /// Find longest sequence |
| 86 | pub fn longest_seq(&self) -> Vec<&T> { |
| 87 | self.matches(); |
| 88 | let mut common: Vec<_> = self.seq_iter().map(|(x, _y)| &self.x[x]).collect(); |
| 89 | common.reverse(); |
| 90 | common |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | #[cfg (feature = "prettytable-rs" )] |
| 95 | /// Prints pretty-table for LCS |
| 96 | impl<'a, T> std::fmt::Display for Table<'a, T> |
| 97 | where |
| 98 | T: std::fmt::Display, |
| 99 | { |
| 100 | fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { |
| 101 | let mut table = format_table::new(); |
| 102 | let mut header = vec!["" .to_string(), "Ø" .to_string()]; |
| 103 | for i in self.x { |
| 104 | header.push(format!("{}" , i)); |
| 105 | } |
| 106 | |
| 107 | table.set_titles(Row::new( |
| 108 | header.into_iter().map(|i| Cell::new(&i)).collect(), |
| 109 | )); |
| 110 | for j in 0..=self.y.len() { |
| 111 | let mut row = vec![if j == 0 { |
| 112 | "Ø" .to_string() |
| 113 | } else { |
| 114 | format!("{}" , self.y[j - 1]) |
| 115 | }]; |
| 116 | for i in 0..=self.x.len() { |
| 117 | row.push(format!("{}" , self.table[i][j])); |
| 118 | } |
| 119 | table.add_row(row.into_iter().map(|i| Cell::new(&i)).collect()); |
| 120 | } |
| 121 | write!(formatter, " \n{}" , table) |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | struct TableIter<'a, T: 'a> { |
| 126 | x: usize, |
| 127 | y: usize, |
| 128 | table: &'a Table<'a, T>, |
| 129 | } |
| 130 | |
| 131 | impl<'a, T> Iterator for TableIter<'a, T> { |
| 132 | type Item = (usize, usize); |
| 133 | fn next(&mut self) -> Option<Self::Item> { |
| 134 | let table: &Vec> = &self.table.table; |
| 135 | |
| 136 | while self.x != 0 && self.y != 0 { |
| 137 | let cur: usize = table[self.x][self.y]; |
| 138 | |
| 139 | if cur == table[self.x - 1][self.y] { |
| 140 | self.x -= 1; |
| 141 | continue; |
| 142 | } |
| 143 | self.y -= 1; |
| 144 | if cur == table[self.x][self.y] { |
| 145 | continue; |
| 146 | } |
| 147 | self.x -= 1; |
| 148 | return Some((self.x, self.y)); |
| 149 | } |
| 150 | None |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | pub struct Match<'a, T: 'a> { |
| 155 | pub x: usize, |
| 156 | pub y: usize, |
| 157 | pub len: usize, |
| 158 | table: &'a Table<'a, T>, |
| 159 | } |
| 160 | |
| 161 | impl<'a, T> Match<'a, T> { |
| 162 | /// Returns matched sequence |
| 163 | pub fn seq(&self) -> &[T] { |
| 164 | &self.table.x[self.x..(self.x + self.len)] |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | #[test ] |
| 169 | fn test_table() { |
| 170 | let x = vec!["A" , "G" , "C" , "A" , "T" ]; |
| 171 | let y = vec!["G" , "A" , "C" ]; |
| 172 | |
| 173 | let table = Table::new(&x, &y); |
| 174 | assert_eq!( |
| 175 | format!("{}" , table), |
| 176 | r#" |
| 177 | ┌───┬───┬───┬───┬───┬───┬───┐ |
| 178 | │ │ Ø │ A │ G │ C │ A │ T │ |
| 179 | ├───┼───┼───┼───┼───┼───┼───┤ |
| 180 | │ Ø │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ |
| 181 | ├───┼───┼───┼───┼───┼───┼───┤ |
| 182 | │ G │ 0 │ 0 │ 1 │ 1 │ 1 │ 1 │ |
| 183 | ├───┼───┼───┼───┼───┼───┼───┤ |
| 184 | │ A │ 0 │ 1 │ 1 │ 1 │ 2 │ 2 │ |
| 185 | ├───┼───┼───┼───┼───┼───┼───┤ |
| 186 | │ C │ 0 │ 1 │ 1 │ 2 │ 2 │ 2 │ |
| 187 | └───┴───┴───┴───┴───┴───┴───┘ |
| 188 | "# |
| 189 | ); |
| 190 | assert_eq!(table.longest_seq(), vec![&"A" , &"C" ]); |
| 191 | } |
| 192 | |
| 193 | #[test ] |
| 194 | |
| 195 | fn test_table_match() { |
| 196 | let test_v = vec![ |
| 197 | ( |
| 198 | "The quick brown fox jumps over the lazy dog" , |
| 199 | "The quick brown dog leaps over the lazy cat" , |
| 200 | "The quick brown o ps over the lazy " , |
| 201 | vec!["The quick brown " , "o" , " " , "ps over the lazy " ], |
| 202 | ), |
| 203 | ("ab:c" , "ba:b:c" , "ab:c" , vec!["a" , "b:c" ]), |
| 204 | ( |
| 205 | "The red brown fox jumped over the rolling log" , |
| 206 | "The brown spotted fox leaped over the rolling log" , |
| 207 | "The brown fox ped over the rolling log" , |
| 208 | vec!["The " , "brown " , "fox " , "ped over the rolling log" ], |
| 209 | ), |
| 210 | ]; |
| 211 | for (x_str, y_str, exp_str, match_exp) in test_v { |
| 212 | let x: Vec<_> = x_str.split("" ).collect(); |
| 213 | let y: Vec<_> = y_str.split("" ).collect(); |
| 214 | |
| 215 | // Trim empty elements |
| 216 | let table = Table::new(&x[1..(x.len() - 1)], &y[1..(y.len() - 1)]); |
| 217 | let seq = table |
| 218 | .longest_seq() |
| 219 | .iter() |
| 220 | .map(|i| i.to_string()) |
| 221 | .collect::<Vec<String>>() |
| 222 | .join("" ); |
| 223 | assert_eq!(seq, exp_str); |
| 224 | let matches: Vec<_> = table.matches().iter().map(|m| m.seq().join("" )).collect(); |
| 225 | assert_eq!(matches, match_exp); |
| 226 | } |
| 227 | } |
| 228 | |