1use crate::diff::*;
2use crate::levenshtein::distance;
3use std::collections::{BTreeMap, HashSet};
4
5pub type Mapping<'a> = BTreeMap<&'a str, &'a str>;
6
7// TODO: Is it better to return a list of matches or a hashmap
8// It might be better to do former because we may need to distinguise
9// b/w full and partial match
10#[derive(Debug, PartialEq)]
11pub struct Matching<'a> {
12 pub from: &'a str,
13 pub to: &'a str,
14}
15
16impl<'a> Matching<'a> {
17 pub fn new(from: &'a str, to: &'a str) -> Matching<'a> {
18 Matching { from, to }
19 }
20}
21
22#[derive(Debug, PartialEq)]
23pub enum Match<'a> {
24 Full(Matching<'a>),
25 Partial(Matching<'a>),
26}
27//
28// impl<'a> Match<'a> {
29// fn is_full(&self) -> bool {
30// match self {
31// Match::Full(_) => true,
32// _ => false
33// }
34// }
35// }
36
37/// Matches both graphs and returns the mapping of nodes from g1 to g2
38pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> {
39 let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2);
40
41 // TODO: This mapping may have duplicate mappings, remove them
42
43 let mut matches: Vec<Match> = mapping
44 .iter()
45 .map(|(from, to)| Match::Full(Matching { from, to }))
46 .collect();
47
48 // we use rev adjacency list because we are going to compare the parents
49 let rev_adj_list1 = d1.graph.rev_adj_list();
50 let rev_adj_list2 = d2.graph.rev_adj_list();
51
52 let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect();
53
54 let mut prev_mapping = mapping.clone();
55 loop {
56 let mut new_mapping: Mapping = BTreeMap::new();
57 for (k, v) in prev_mapping.iter() {
58 let parents_in_1 = rev_adj_list1.get(k).unwrap();
59 let parents_in_2 = rev_adj_list2.get(v).unwrap();
60
61 for n in parents_in_1.iter() {
62 // don't bother selecting if the node in 1 is already matched
63 // as we use a stricter selection for the initial matching
64 if mapping.contains_key(n) {
65 continue;
66 }
67 if let Some(lab) = select(d1, d2, n, &parents_in_2, false) {
68 // if the matched label is already matched to some node in 1, skip
69 if !matched_labels_in_2.contains(lab) {
70 matched_labels_in_2.insert(lab);
71 new_mapping.insert(n, lab);
72 }
73 }
74 }
75 }
76 // println!("{:?}", new_mapping);
77 // merge
78 let new_matches = get_new_matches(&mapping, &new_mapping);
79 if new_matches.len() == 0 {
80 break;
81 }
82 for mch in new_matches {
83 mapping.insert(mch.from, mch.to);
84 matches.push(Match::Partial(mch));
85 }
86 prev_mapping = new_mapping;
87 }
88
89 matches
90}
91
92fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> {
93 let mut mapping: BTreeMap<&str, &str> = BTreeMap::new();
94 let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new();
95 let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect();
96
97 // TODO: this can be functional
98 for node in d1.graph.nodes.iter() {
99 if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) {
100 if let Some(lab_in_1) = reverse_mapping.get(matched_lab) {
101 // matched_lab was already matched
102 // select the one with the lowest cost
103 // this is done so that no duplicate matching will occur
104 let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab);
105 let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab);
106 if dist_new > dist_already {
107 continue;
108 }
109 mapping.remove(lab_in_1);
110 }
111 mapping.insert(&node.label, matched_lab);
112 reverse_mapping.insert(matched_lab, &node.label);
113 }
114 }
115
116 mapping
117}
118
119fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> {
120 let node1: &Node = d1.graph.get_node_by_label(n1).unwrap();
121 let node2: &Node = d2.graph.get_node_by_label(n2).unwrap();
122
123 let tup1: (i64, i64, i64) = (
124 d1.dist_start[n1] as i64,
125 d1.dist_end[n1] as i64,
126 node1.stmts.len() as i64,
127 );
128 let tup2: (i64, i64, i64) = (
129 d2.dist_start[n2] as i64,
130 d2.dist_end[n2] as i64,
131 node2.stmts.len() as i64,
132 );
133
134 let s1: String = node1.stmts.join(sep:"");
135 let s2: String = node2.stmts.join(sep:"");
136 let dist: usize = distance(&s1, &s2);
137
138 Some(
139 ((tup1.0 - tup2.0).pow(exp:2) + (tup1.1 - tup2.1).pow(exp:2) + (tup1.2 - tup2.2).pow(exp:2)) as usize
140 + dist,
141 )
142}
143
144/// Selects the most suitable match for n from L
145fn select<'a>(
146 d1: &'a DiffGraph<'_>,
147 d2: &'a DiffGraph<'_>,
148 n: &'a str,
149 list_of_labs: &[&'a str],
150 use_text_dist_filter: bool,
151) -> Option<&'a str> {
152 let node: &Node = d1.graph.get_node_by_label(n).unwrap();
153 let node_stmt_len: usize = node.stmts.len();
154 let node_stmts: String = node.stmts.join(sep:"");
155 list_of_labsOption<&&str>
156 .iter()
157 .filter(|lab: &&&str| {
158 let other_node: &Node = d2.graph.get_node_by_label(lab).unwrap();
159 // filter out nodes that may differ by more than 2 lines
160 (other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2
161 })
162 .filter(|lab: &&&str| {
163 if !use_text_dist_filter {
164 return true;
165 }
166 let other_node_stmts: String = d2.graph.get_node_by_label(lab).unwrap().stmts.join(sep:"");
167 // allow bigger basic blocks have more edits
168 // 2 here is arbitary
169 distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len
170 })
171 .min_by_key(|x: &&&str| dist_bw_nodes(d1, d2, n1:n, n2:x))
172 .map(|x: &&str| *x)
173}
174
175fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> {
176 let mut changed: Vec> = Vec::new();
177 for (k: &&str, v: &&str) in new_mapping.iter() {
178 if !mapping.contains_key(k) {
179 changed.push(Matching { from: k, to: v })
180 }
181 }
182
183 changed
184}
185