1 | use crate::diff::*; |
2 | use crate::levenshtein::distance; |
3 | use std::collections::{BTreeMap, HashSet}; |
4 | |
5 | pub type Mapping<'a> = BTreeMap<&'a str, &'a str>; |
6 | |
7 | // TODO: Is it better to return a list of matches or a hashmap |
8 | // It might be better to do former because we may need to distinguise |
9 | // b/w full and partial match |
10 | #[derive (Debug, PartialEq)] |
11 | pub struct Matching<'a> { |
12 | pub from: &'a str, |
13 | pub to: &'a str, |
14 | } |
15 | |
16 | impl<'a> Matching<'a> { |
17 | pub fn new(from: &'a str, to: &'a str) -> Matching<'a> { |
18 | Matching { from, to } |
19 | } |
20 | } |
21 | |
22 | #[derive (Debug, PartialEq)] |
23 | pub enum Match<'a> { |
24 | Full(Matching<'a>), |
25 | Partial(Matching<'a>), |
26 | } |
27 | // |
28 | // impl<'a> Match<'a> { |
29 | // fn is_full(&self) -> bool { |
30 | // match self { |
31 | // Match::Full(_) => true, |
32 | // _ => false |
33 | // } |
34 | // } |
35 | // } |
36 | |
37 | /// Matches both graphs and returns the mapping of nodes from g1 to g2 |
38 | pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> { |
39 | let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2); |
40 | |
41 | // TODO: This mapping may have duplicate mappings, remove them |
42 | |
43 | let mut matches: Vec<Match> = mapping |
44 | .iter() |
45 | .map(|(from, to)| Match::Full(Matching { from, to })) |
46 | .collect(); |
47 | |
48 | // we use rev adjacency list because we are going to compare the parents |
49 | let rev_adj_list1 = d1.graph.rev_adj_list(); |
50 | let rev_adj_list2 = d2.graph.rev_adj_list(); |
51 | |
52 | let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect(); |
53 | |
54 | let mut prev_mapping = mapping.clone(); |
55 | loop { |
56 | let mut new_mapping: Mapping = BTreeMap::new(); |
57 | for (k, v) in prev_mapping.iter() { |
58 | let parents_in_1 = rev_adj_list1.get(k).unwrap(); |
59 | let parents_in_2 = rev_adj_list2.get(v).unwrap(); |
60 | |
61 | for n in parents_in_1.iter() { |
62 | // don't bother selecting if the node in 1 is already matched |
63 | // as we use a stricter selection for the initial matching |
64 | if mapping.contains_key(n) { |
65 | continue; |
66 | } |
67 | if let Some(lab) = select(d1, d2, n, &parents_in_2, false) { |
68 | // if the matched label is already matched to some node in 1, skip |
69 | if !matched_labels_in_2.contains(lab) { |
70 | matched_labels_in_2.insert(lab); |
71 | new_mapping.insert(n, lab); |
72 | } |
73 | } |
74 | } |
75 | } |
76 | // println!("{:?}", new_mapping); |
77 | // merge |
78 | let new_matches = get_new_matches(&mapping, &new_mapping); |
79 | if new_matches.len() == 0 { |
80 | break; |
81 | } |
82 | for mch in new_matches { |
83 | mapping.insert(mch.from, mch.to); |
84 | matches.push(Match::Partial(mch)); |
85 | } |
86 | prev_mapping = new_mapping; |
87 | } |
88 | |
89 | matches |
90 | } |
91 | |
92 | fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> { |
93 | let mut mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
94 | let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
95 | let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect(); |
96 | |
97 | // TODO: this can be functional |
98 | for node in d1.graph.nodes.iter() { |
99 | if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) { |
100 | if let Some(lab_in_1) = reverse_mapping.get(matched_lab) { |
101 | // matched_lab was already matched |
102 | // select the one with the lowest cost |
103 | // this is done so that no duplicate matching will occur |
104 | let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab); |
105 | let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab); |
106 | if dist_new > dist_already { |
107 | continue; |
108 | } |
109 | mapping.remove(lab_in_1); |
110 | } |
111 | mapping.insert(&node.label, matched_lab); |
112 | reverse_mapping.insert(matched_lab, &node.label); |
113 | } |
114 | } |
115 | |
116 | mapping |
117 | } |
118 | |
119 | fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> { |
120 | let node1: &Node = d1.graph.get_node_by_label(n1).unwrap(); |
121 | let node2: &Node = d2.graph.get_node_by_label(n2).unwrap(); |
122 | |
123 | let tup1: (i64, i64, i64) = ( |
124 | d1.dist_start[n1] as i64, |
125 | d1.dist_end[n1] as i64, |
126 | node1.stmts.len() as i64, |
127 | ); |
128 | let tup2: (i64, i64, i64) = ( |
129 | d2.dist_start[n2] as i64, |
130 | d2.dist_end[n2] as i64, |
131 | node2.stmts.len() as i64, |
132 | ); |
133 | |
134 | let s1: String = node1.stmts.join(sep:"" ); |
135 | let s2: String = node2.stmts.join(sep:"" ); |
136 | let dist: usize = distance(&s1, &s2); |
137 | |
138 | Some( |
139 | ((tup1.0 - tup2.0).pow(exp:2) + (tup1.1 - tup2.1).pow(exp:2) + (tup1.2 - tup2.2).pow(exp:2)) as usize |
140 | + dist, |
141 | ) |
142 | } |
143 | |
144 | /// Selects the most suitable match for n from L |
145 | fn select<'a>( |
146 | d1: &'a DiffGraph<'_>, |
147 | d2: &'a DiffGraph<'_>, |
148 | n: &'a str, |
149 | list_of_labs: &[&'a str], |
150 | use_text_dist_filter: bool, |
151 | ) -> Option<&'a str> { |
152 | let node: &Node = d1.graph.get_node_by_label(n).unwrap(); |
153 | let node_stmt_len: usize = node.stmts.len(); |
154 | let node_stmts: String = node.stmts.join(sep:"" ); |
155 | list_of_labsOption<&&str> |
156 | .iter() |
157 | .filter(|lab: &&&str| { |
158 | let other_node: &Node = d2.graph.get_node_by_label(lab).unwrap(); |
159 | // filter out nodes that may differ by more than 2 lines |
160 | (other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2 |
161 | }) |
162 | .filter(|lab: &&&str| { |
163 | if !use_text_dist_filter { |
164 | return true; |
165 | } |
166 | let other_node_stmts: String = d2.graph.get_node_by_label(lab).unwrap().stmts.join(sep:"" ); |
167 | // allow bigger basic blocks have more edits |
168 | // 2 here is arbitary |
169 | distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len |
170 | }) |
171 | .min_by_key(|x: &&&str| dist_bw_nodes(d1, d2, n1:n, n2:x)) |
172 | .map(|x: &&str| *x) |
173 | } |
174 | |
175 | fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> { |
176 | let mut changed: Vec> = Vec::new(); |
177 | for (k: &&str, v: &&str) in new_mapping.iter() { |
178 | if !mapping.contains_key(k) { |
179 | changed.push(Matching { from: k, to: v }) |
180 | } |
181 | } |
182 | |
183 | changed |
184 | } |
185 | |