1 | //! Common functions for [Longest common subsequences](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) |
2 | //! on slice. |
3 | |
4 | cfg_prettytable! { |
5 | use crate::format_table; |
6 | use prettytable::{Cell, Row}; |
7 | } |
8 | use std::cmp::max; |
9 | |
10 | #[derive (Debug)] |
11 | pub struct Table<'a, T: 'a> { |
12 | x: &'a [T], |
13 | y: &'a [T], |
14 | table: Vec<Vec<usize>>, |
15 | } |
16 | |
17 | /// Implements Longest Common Subsequences Table |
18 | /// Memory requirement: O(N^2) |
19 | /// |
20 | /// Based on [Wikipedia article](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) |
21 | impl<'a, T> Table<'a, T> |
22 | where |
23 | T: PartialEq, |
24 | { |
25 | /// Creates new table for search common subsequences in x and y |
26 | pub fn new(x: &'a [T], y: &'a [T]) -> Table<'a, T> { |
27 | let x_len = x.len() + 1; |
28 | let y_len = y.len() + 1; |
29 | let mut table = vec![vec![0; y_len]; x_len]; |
30 | |
31 | for i in 1..x_len { |
32 | for j in 1..y_len { |
33 | table[i][j] = if x[i - 1] == y[j - 1] { |
34 | table[i - 1][j - 1] + 1 |
35 | } else { |
36 | max(table[i][j - 1], table[i - 1][j]) |
37 | }; |
38 | } |
39 | } |
40 | |
41 | Table { x, y, table } |
42 | } |
43 | |
44 | fn seq_iter(&self) -> TableIter<T> { |
45 | TableIter { |
46 | x: self.x.len(), |
47 | y: self.y.len(), |
48 | table: self, |
49 | } |
50 | } |
51 | fn get_match(&self, x: usize, y: usize, len: usize) -> Match<T> { |
52 | Match { |
53 | x, |
54 | y, |
55 | len, |
56 | table: self, |
57 | } |
58 | } |
59 | |
60 | /// Returns matches between X and Y |
61 | pub fn matches(&self) -> Vec<Match<T>> { |
62 | let mut matches: Vec<Match<T>> = Vec::new(); |
63 | for (x, y) in self.seq_iter() { |
64 | if let Some(last) = matches.last_mut() { |
65 | if last.x == x + 1 && last.y == y + 1 { |
66 | last.x = x; |
67 | last.y = y; |
68 | last.len += 1; |
69 | continue; |
70 | } |
71 | } |
72 | matches.push(self.get_match(x, y, 1)); |
73 | } |
74 | matches.reverse(); |
75 | matches |
76 | } |
77 | |
78 | /// Returns matches between X and Y with zero-len match at the end |
79 | pub fn matches_zero(&self) -> Vec<Match<T>> { |
80 | let mut matches = self.matches(); |
81 | matches.push(self.get_match(self.x.len(), self.y.len(), 0)); |
82 | matches |
83 | } |
84 | |
85 | /// Find longest sequence |
86 | pub fn longest_seq(&self) -> Vec<&T> { |
87 | self.matches(); |
88 | let mut common: Vec<_> = self.seq_iter().map(|(x, _y)| &self.x[x]).collect(); |
89 | common.reverse(); |
90 | common |
91 | } |
92 | } |
93 | |
94 | #[cfg (feature = "prettytable-rs" )] |
95 | /// Prints pretty-table for LCS |
96 | impl<'a, T> std::fmt::Display for Table<'a, T> |
97 | where |
98 | T: std::fmt::Display, |
99 | { |
100 | fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { |
101 | let mut table = format_table::new(); |
102 | let mut header = vec!["" .to_string(), "Ø" .to_string()]; |
103 | for i in self.x { |
104 | header.push(format!(" {}" , i)); |
105 | } |
106 | |
107 | table.set_titles(Row::new( |
108 | header.into_iter().map(|i| Cell::new(&i)).collect(), |
109 | )); |
110 | for j in 0..=self.y.len() { |
111 | let mut row = vec![if j == 0 { |
112 | "Ø" .to_string() |
113 | } else { |
114 | format!(" {}" , self.y[j - 1]) |
115 | }]; |
116 | for i in 0..=self.x.len() { |
117 | row.push(format!(" {}" , self.table[i][j])); |
118 | } |
119 | table.add_row(row.into_iter().map(|i| Cell::new(&i)).collect()); |
120 | } |
121 | write!(formatter, " \n{}" , table) |
122 | } |
123 | } |
124 | |
125 | struct TableIter<'a, T: 'a> { |
126 | x: usize, |
127 | y: usize, |
128 | table: &'a Table<'a, T>, |
129 | } |
130 | |
131 | impl<'a, T> Iterator for TableIter<'a, T> { |
132 | type Item = (usize, usize); |
133 | fn next(&mut self) -> Option<Self::Item> { |
134 | let table: &Vec> = &self.table.table; |
135 | |
136 | while self.x != 0 && self.y != 0 { |
137 | let cur: usize = table[self.x][self.y]; |
138 | |
139 | if cur == table[self.x - 1][self.y] { |
140 | self.x -= 1; |
141 | continue; |
142 | } |
143 | self.y -= 1; |
144 | if cur == table[self.x][self.y] { |
145 | continue; |
146 | } |
147 | self.x -= 1; |
148 | return Some((self.x, self.y)); |
149 | } |
150 | None |
151 | } |
152 | } |
153 | |
154 | pub struct Match<'a, T: 'a> { |
155 | pub x: usize, |
156 | pub y: usize, |
157 | pub len: usize, |
158 | table: &'a Table<'a, T>, |
159 | } |
160 | |
161 | impl<'a, T> Match<'a, T> { |
162 | /// Returns matched sequence |
163 | pub fn seq(&self) -> &[T] { |
164 | &self.table.x[self.x..(self.x + self.len)] |
165 | } |
166 | } |
167 | |
168 | #[test ] |
169 | fn test_table() { |
170 | let x: Vec<&str> = vec!["A" , "G" , "C" , "A" , "T" ]; |
171 | let y: Vec<&str> = vec!["G" , "A" , "C" ]; |
172 | |
173 | let table: Table<'_, &str> = Table::new(&x, &y); |
174 | assert_eq!( |
175 | format!(" {}" , table), |
176 | r#" |
177 | ┌───┬───┬───┬───┬───┬───┬───┐ |
178 | │ │ Ø │ A │ G │ C │ A │ T │ |
179 | ├───┼───┼───┼───┼───┼───┼───┤ |
180 | │ Ø │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ |
181 | ├───┼───┼───┼───┼───┼───┼───┤ |
182 | │ G │ 0 │ 0 │ 1 │ 1 │ 1 │ 1 │ |
183 | ├───┼───┼───┼───┼───┼───┼───┤ |
184 | │ A │ 0 │ 1 │ 1 │ 1 │ 2 │ 2 │ |
185 | ├───┼───┼───┼───┼───┼───┼───┤ |
186 | │ C │ 0 │ 1 │ 1 │ 2 │ 2 │ 2 │ |
187 | └───┴───┴───┴───┴───┴───┴───┘ |
188 | "# |
189 | ); |
190 | assert_eq!(table.longest_seq(), vec![&"A" , &"C" ]); |
191 | } |
192 | |
193 | #[test ] |
194 | |
195 | fn test_table_match() { |
196 | let test_v = vec![ |
197 | ( |
198 | "The quick brown fox jumps over the lazy dog" , |
199 | "The quick brown dog leaps over the lazy cat" , |
200 | "The quick brown o ps over the lazy " , |
201 | vec!["The quick brown " , "o" , " " , "ps over the lazy " ], |
202 | ), |
203 | ("ab:c" , "ba:b:c" , "ab:c" , vec!["a" , "b:c" ]), |
204 | ( |
205 | "The red brown fox jumped over the rolling log" , |
206 | "The brown spotted fox leaped over the rolling log" , |
207 | "The brown fox ped over the rolling log" , |
208 | vec!["The " , "brown " , "fox " , "ped over the rolling log" ], |
209 | ), |
210 | ]; |
211 | for (x_str, y_str, exp_str, match_exp) in test_v { |
212 | let x: Vec<_> = x_str.split("" ).collect(); |
213 | let y: Vec<_> = y_str.split("" ).collect(); |
214 | |
215 | // Trim empty elements |
216 | let table = Table::new(&x[1..(x.len() - 1)], &y[1..(y.len() - 1)]); |
217 | let seq = table |
218 | .longest_seq() |
219 | .iter() |
220 | .map(|i| i.to_string()) |
221 | .collect::<Vec<String>>() |
222 | .join("" ); |
223 | assert_eq!(seq, exp_str); |
224 | let matches: Vec<_> = table.matches().iter().map(|m| m.seq().join("" )).collect(); |
225 | assert_eq!(matches, match_exp); |
226 | } |
227 | } |
228 | |