| 1 | use crate::diff::*; | 
| 2 | use crate::levenshtein::distance; | 
|---|
| 3 | use std::collections::{BTreeMap, HashSet}; | 
|---|
| 4 |  | 
|---|
| 5 | pub type Mapping<'a> = BTreeMap<&'a str, &'a str>; | 
|---|
| 6 |  | 
|---|
| 7 | // TODO: Is it better to return a list of matches or a hashmap | 
|---|
| 8 | // It might be better to do former because we may need to distinguise | 
|---|
| 9 | // b/w full and partial match | 
|---|
| 10 | #[ derive(Debug, PartialEq)] | 
|---|
| 11 | pub struct Matching<'a> { | 
|---|
| 12 | pub from: &'a str, | 
|---|
| 13 | pub to: &'a str, | 
|---|
| 14 | } | 
|---|
| 15 |  | 
|---|
| 16 | impl<'a> Matching<'a> { | 
|---|
| 17 | pub fn new(from: &'a str, to: &'a str) -> Matching<'a> { | 
|---|
| 18 | Matching { from, to } | 
|---|
| 19 | } | 
|---|
| 20 | } | 
|---|
| 21 |  | 
|---|
| 22 | #[ derive(Debug, PartialEq)] | 
|---|
| 23 | pub enum Match<'a> { | 
|---|
| 24 | Full(Matching<'a>), | 
|---|
| 25 | Partial(Matching<'a>), | 
|---|
| 26 | } | 
|---|
| 27 | // | 
|---|
| 28 | // impl<'a> Match<'a> { | 
|---|
| 29 | //     fn is_full(&self) -> bool { | 
|---|
| 30 | //         match self { | 
|---|
| 31 | //             Match::Full(_) => true, | 
|---|
| 32 | //             _ => false | 
|---|
| 33 | //         } | 
|---|
| 34 | //     } | 
|---|
| 35 | // } | 
|---|
| 36 |  | 
|---|
| 37 | /// Matches both graphs and returns the mapping of nodes from g1 to g2 | 
|---|
| 38 | pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> { | 
|---|
| 39 | let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2); | 
|---|
| 40 |  | 
|---|
| 41 | // TODO: This mapping may have duplicate mappings, remove them | 
|---|
| 42 |  | 
|---|
| 43 | let mut matches: Vec<Match> = mapping | 
|---|
| 44 | .iter() | 
|---|
| 45 | .map(|(from, to)| Match::Full(Matching { from, to })) | 
|---|
| 46 | .collect(); | 
|---|
| 47 |  | 
|---|
| 48 | // we use rev adjacency list because we are going to compare the parents | 
|---|
| 49 | let rev_adj_list1 = d1.graph.rev_adj_list(); | 
|---|
| 50 | let rev_adj_list2 = d2.graph.rev_adj_list(); | 
|---|
| 51 |  | 
|---|
| 52 | let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect(); | 
|---|
| 53 |  | 
|---|
| 54 | let mut prev_mapping = mapping.clone(); | 
|---|
| 55 | loop { | 
|---|
| 56 | let mut new_mapping: Mapping = BTreeMap::new(); | 
|---|
| 57 | for (k, v) in prev_mapping.iter() { | 
|---|
| 58 | let parents_in_1 = rev_adj_list1.get(k).unwrap(); | 
|---|
| 59 | let parents_in_2 = rev_adj_list2.get(v).unwrap(); | 
|---|
| 60 |  | 
|---|
| 61 | for n in parents_in_1.iter() { | 
|---|
| 62 | // don't bother selecting if the node in 1 is already matched | 
|---|
| 63 | // as we use a stricter selection for the initial matching | 
|---|
| 64 | if mapping.contains_key(n) { | 
|---|
| 65 | continue; | 
|---|
| 66 | } | 
|---|
| 67 | if let Some(lab) = select(d1, d2, n, &parents_in_2, false) { | 
|---|
| 68 | // if the matched label is already matched to some node in 1, skip | 
|---|
| 69 | if !matched_labels_in_2.contains(lab) { | 
|---|
| 70 | matched_labels_in_2.insert(lab); | 
|---|
| 71 | new_mapping.insert(n, lab); | 
|---|
| 72 | } | 
|---|
| 73 | } | 
|---|
| 74 | } | 
|---|
| 75 | } | 
|---|
| 76 | // println!("{:?}", new_mapping); | 
|---|
| 77 | // merge | 
|---|
| 78 | let new_matches = get_new_matches(&mapping, &new_mapping); | 
|---|
| 79 | if new_matches.len() == 0 { | 
|---|
| 80 | break; | 
|---|
| 81 | } | 
|---|
| 82 | for mch in new_matches { | 
|---|
| 83 | mapping.insert(mch.from, mch.to); | 
|---|
| 84 | matches.push(Match::Partial(mch)); | 
|---|
| 85 | } | 
|---|
| 86 | prev_mapping = new_mapping; | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | matches | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> { | 
|---|
| 93 | let mut mapping: BTreeMap<&str, &str> = BTreeMap::new(); | 
|---|
| 94 | let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new(); | 
|---|
| 95 | let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect(); | 
|---|
| 96 |  | 
|---|
| 97 | // TODO: this can be functional | 
|---|
| 98 | for node in d1.graph.nodes.iter() { | 
|---|
| 99 | if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) { | 
|---|
| 100 | if let Some(lab_in_1) = reverse_mapping.get(matched_lab) { | 
|---|
| 101 | // matched_lab was already matched | 
|---|
| 102 | // select the one with the lowest cost | 
|---|
| 103 | // this is done so that no duplicate matching will occur | 
|---|
| 104 | let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab); | 
|---|
| 105 | let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab); | 
|---|
| 106 | if dist_new > dist_already { | 
|---|
| 107 | continue; | 
|---|
| 108 | } | 
|---|
| 109 | mapping.remove(lab_in_1); | 
|---|
| 110 | } | 
|---|
| 111 | mapping.insert(&node.label, matched_lab); | 
|---|
| 112 | reverse_mapping.insert(matched_lab, &node.label); | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | mapping | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> { | 
|---|
| 120 | let node1: &Node = d1.graph.get_node_by_label(n1).unwrap(); | 
|---|
| 121 | let node2: &Node = d2.graph.get_node_by_label(n2).unwrap(); | 
|---|
| 122 |  | 
|---|
| 123 | let tup1: (i64, i64, i64) = ( | 
|---|
| 124 | d1.dist_start[n1] as i64, | 
|---|
| 125 | d1.dist_end[n1] as i64, | 
|---|
| 126 | node1.stmts.len() as i64, | 
|---|
| 127 | ); | 
|---|
| 128 | let tup2: (i64, i64, i64) = ( | 
|---|
| 129 | d2.dist_start[n2] as i64, | 
|---|
| 130 | d2.dist_end[n2] as i64, | 
|---|
| 131 | node2.stmts.len() as i64, | 
|---|
| 132 | ); | 
|---|
| 133 |  | 
|---|
| 134 | let s1: String = node1.stmts.join(sep: ""); | 
|---|
| 135 | let s2: String = node2.stmts.join(sep: ""); | 
|---|
| 136 | let dist: usize = distance(&s1, &s2); | 
|---|
| 137 |  | 
|---|
| 138 | Some( | 
|---|
| 139 | ((tup1.0 - tup2.0).pow(exp:2) + (tup1.1 - tup2.1).pow(exp:2) + (tup1.2 - tup2.2).pow(exp:2)) as usize | 
|---|
| 140 | + dist, | 
|---|
| 141 | ) | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | /// Selects the most suitable match for n from L | 
|---|
| 145 | fn select<'a>( | 
|---|
| 146 | d1: &'a DiffGraph<'_>, | 
|---|
| 147 | d2: &'a DiffGraph<'_>, | 
|---|
| 148 | n: &'a str, | 
|---|
| 149 | list_of_labs: &[&'a str], | 
|---|
| 150 | use_text_dist_filter: bool, | 
|---|
| 151 | ) -> Option<&'a str> { | 
|---|
| 152 | let node: &Node = d1.graph.get_node_by_label(n).unwrap(); | 
|---|
| 153 | let node_stmt_len: usize = node.stmts.len(); | 
|---|
| 154 | let node_stmts: String = node.stmts.join(sep: ""); | 
|---|
| 155 | list_of_labsOption<&&str> | 
|---|
| 156 | .iter() | 
|---|
| 157 | .filter(|lab: &&&str| { | 
|---|
| 158 | let other_node: &Node = d2.graph.get_node_by_label(lab).unwrap(); | 
|---|
| 159 | // filter out nodes that may differ by more than 2 lines | 
|---|
| 160 | (other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2 | 
|---|
| 161 | }) | 
|---|
| 162 | .filter(|lab: &&&str| { | 
|---|
| 163 | if !use_text_dist_filter { | 
|---|
| 164 | return true; | 
|---|
| 165 | } | 
|---|
| 166 | let other_node_stmts: String = d2.graph.get_node_by_label(lab).unwrap().stmts.join(sep: ""); | 
|---|
| 167 | // allow bigger basic blocks have more edits | 
|---|
| 168 | // 2 here is arbitary | 
|---|
| 169 | distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len | 
|---|
| 170 | }) | 
|---|
| 171 | .min_by_key(|x: &&&str| dist_bw_nodes(d1, d2, n1:n, n2:x)) | 
|---|
| 172 | .map(|x: &&str| *x) | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> { | 
|---|
| 176 | let mut changed: Vec> = Vec::new(); | 
|---|
| 177 | for (k: &&str, v: &&str) in new_mapping.iter() { | 
|---|
| 178 | if !mapping.contains_key(k) { | 
|---|
| 179 | changed.push(Matching { from: k, to: v }) | 
|---|
| 180 | } | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | changed | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|