| 1 | use crate::diff::*; |
| 2 | use crate::levenshtein::distance; |
| 3 | use std::collections::{BTreeMap, HashSet}; |
| 4 | |
| 5 | pub type Mapping<'a> = BTreeMap<&'a str, &'a str>; |
| 6 | |
| 7 | // TODO: Is it better to return a list of matches or a hashmap |
| 8 | // It might be better to do former because we may need to distinguise |
| 9 | // b/w full and partial match |
| 10 | #[derive (Debug, PartialEq)] |
| 11 | pub struct Matching<'a> { |
| 12 | pub from: &'a str, |
| 13 | pub to: &'a str, |
| 14 | } |
| 15 | |
| 16 | impl<'a> Matching<'a> { |
| 17 | pub fn new(from: &'a str, to: &'a str) -> Matching<'a> { |
| 18 | Matching { from, to } |
| 19 | } |
| 20 | } |
| 21 | |
| 22 | #[derive (Debug, PartialEq)] |
| 23 | pub enum Match<'a> { |
| 24 | Full(Matching<'a>), |
| 25 | Partial(Matching<'a>), |
| 26 | } |
| 27 | // |
| 28 | // impl<'a> Match<'a> { |
| 29 | // fn is_full(&self) -> bool { |
| 30 | // match self { |
| 31 | // Match::Full(_) => true, |
| 32 | // _ => false |
| 33 | // } |
| 34 | // } |
| 35 | // } |
| 36 | |
| 37 | /// Matches both graphs and returns the mapping of nodes from g1 to g2 |
| 38 | pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> { |
| 39 | let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2); |
| 40 | |
| 41 | // TODO: This mapping may have duplicate mappings, remove them |
| 42 | |
| 43 | let mut matches: Vec<Match> = mapping |
| 44 | .iter() |
| 45 | .map(|(from, to)| Match::Full(Matching { from, to })) |
| 46 | .collect(); |
| 47 | |
| 48 | // we use rev adjacency list because we are going to compare the parents |
| 49 | let rev_adj_list1 = d1.graph.rev_adj_list(); |
| 50 | let rev_adj_list2 = d2.graph.rev_adj_list(); |
| 51 | |
| 52 | let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect(); |
| 53 | |
| 54 | let mut prev_mapping = mapping.clone(); |
| 55 | loop { |
| 56 | let mut new_mapping: Mapping = BTreeMap::new(); |
| 57 | for (k, v) in prev_mapping.iter() { |
| 58 | let parents_in_1 = rev_adj_list1.get(k).unwrap(); |
| 59 | let parents_in_2 = rev_adj_list2.get(v).unwrap(); |
| 60 | |
| 61 | for n in parents_in_1.iter() { |
| 62 | // don't bother selecting if the node in 1 is already matched |
| 63 | // as we use a stricter selection for the initial matching |
| 64 | if mapping.contains_key(n) { |
| 65 | continue; |
| 66 | } |
| 67 | if let Some(lab) = select(d1, d2, n, &parents_in_2, false) { |
| 68 | // if the matched label is already matched to some node in 1, skip |
| 69 | if !matched_labels_in_2.contains(lab) { |
| 70 | matched_labels_in_2.insert(lab); |
| 71 | new_mapping.insert(n, lab); |
| 72 | } |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | // println!("{:?}", new_mapping); |
| 77 | // merge |
| 78 | let new_matches = get_new_matches(&mapping, &new_mapping); |
| 79 | if new_matches.len() == 0 { |
| 80 | break; |
| 81 | } |
| 82 | for mch in new_matches { |
| 83 | mapping.insert(mch.from, mch.to); |
| 84 | matches.push(Match::Partial(mch)); |
| 85 | } |
| 86 | prev_mapping = new_mapping; |
| 87 | } |
| 88 | |
| 89 | matches |
| 90 | } |
| 91 | |
| 92 | fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> { |
| 93 | let mut mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
| 94 | let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
| 95 | let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect(); |
| 96 | |
| 97 | // TODO: this can be functional |
| 98 | for node in d1.graph.nodes.iter() { |
| 99 | if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) { |
| 100 | if let Some(lab_in_1) = reverse_mapping.get(matched_lab) { |
| 101 | // matched_lab was already matched |
| 102 | // select the one with the lowest cost |
| 103 | // this is done so that no duplicate matching will occur |
| 104 | let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab); |
| 105 | let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab); |
| 106 | if dist_new > dist_already { |
| 107 | continue; |
| 108 | } |
| 109 | mapping.remove(lab_in_1); |
| 110 | } |
| 111 | mapping.insert(&node.label, matched_lab); |
| 112 | reverse_mapping.insert(matched_lab, &node.label); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | mapping |
| 117 | } |
| 118 | |
| 119 | fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> { |
| 120 | let node1: &Node = d1.graph.get_node_by_label(n1).unwrap(); |
| 121 | let node2: &Node = d2.graph.get_node_by_label(n2).unwrap(); |
| 122 | |
| 123 | let tup1: (i64, i64, i64) = ( |
| 124 | d1.dist_start[n1] as i64, |
| 125 | d1.dist_end[n1] as i64, |
| 126 | node1.stmts.len() as i64, |
| 127 | ); |
| 128 | let tup2: (i64, i64, i64) = ( |
| 129 | d2.dist_start[n2] as i64, |
| 130 | d2.dist_end[n2] as i64, |
| 131 | node2.stmts.len() as i64, |
| 132 | ); |
| 133 | |
| 134 | let s1: String = node1.stmts.join(sep:"" ); |
| 135 | let s2: String = node2.stmts.join(sep:"" ); |
| 136 | let dist: usize = distance(&s1, &s2); |
| 137 | |
| 138 | Some( |
| 139 | ((tup1.0 - tup2.0).pow(exp:2) + (tup1.1 - tup2.1).pow(exp:2) + (tup1.2 - tup2.2).pow(exp:2)) as usize |
| 140 | + dist, |
| 141 | ) |
| 142 | } |
| 143 | |
| 144 | /// Selects the most suitable match for n from L |
| 145 | fn select<'a>( |
| 146 | d1: &'a DiffGraph<'_>, |
| 147 | d2: &'a DiffGraph<'_>, |
| 148 | n: &'a str, |
| 149 | list_of_labs: &[&'a str], |
| 150 | use_text_dist_filter: bool, |
| 151 | ) -> Option<&'a str> { |
| 152 | let node: &Node = d1.graph.get_node_by_label(n).unwrap(); |
| 153 | let node_stmt_len: usize = node.stmts.len(); |
| 154 | let node_stmts: String = node.stmts.join(sep:"" ); |
| 155 | list_of_labsOption<&&str> |
| 156 | .iter() |
| 157 | .filter(|lab: &&&str| { |
| 158 | let other_node: &Node = d2.graph.get_node_by_label(lab).unwrap(); |
| 159 | // filter out nodes that may differ by more than 2 lines |
| 160 | (other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2 |
| 161 | }) |
| 162 | .filter(|lab: &&&str| { |
| 163 | if !use_text_dist_filter { |
| 164 | return true; |
| 165 | } |
| 166 | let other_node_stmts: String = d2.graph.get_node_by_label(lab).unwrap().stmts.join(sep:"" ); |
| 167 | // allow bigger basic blocks have more edits |
| 168 | // 2 here is arbitary |
| 169 | distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len |
| 170 | }) |
| 171 | .min_by_key(|x: &&&str| dist_bw_nodes(d1, d2, n1:n, n2:x)) |
| 172 | .map(|x: &&str| *x) |
| 173 | } |
| 174 | |
| 175 | fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> { |
| 176 | let mut changed: Vec> = Vec::new(); |
| 177 | for (k: &&str, v: &&str) in new_mapping.iter() { |
| 178 | if !mapping.contains_key(k) { |
| 179 | changed.push(Matching { from: k, to: v }) |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | changed |
| 184 | } |
| 185 | |