1/*
2 * Copyright © 2003 Davide Libenzi
3 * 2018 Benjamin Otte
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Authors: Davide Libenzi <davidel@xmailserver.org>
19 * Benjamin Otte <otte@gnome.org>
20 */
21
22#include "config.h"
23
24#include "gskdiffprivate.h"
25
26
27#define XDL_MAX_COST_MIN 256
28#define XDL_HEUR_MIN_COST 256
29#define XDL_LINE_MAX G_MAXSSIZE
30#define XDL_SNAKE_CNT 20
31#define XDL_K_HEUR 4
32#define MAXCOST 20
33
34struct _GskDiffSettings {
35 GCompareDataFunc compare_func;
36 GskKeepFunc keep_func;
37 GskDeleteFunc delete_func;
38 GskInsertFunc insert_func;
39
40 guint allow_abort : 1;
41};
42
43typedef struct _SplitResult {
44 long i1, i2;
45 int min_lo, min_hi;
46} SplitResult;
47
48GskDiffSettings *
49gsk_diff_settings_new (GCompareDataFunc compare_func,
50 GskKeepFunc keep_func,
51 GskDeleteFunc delete_func,
52 GskInsertFunc insert_func)
53{
54 GskDiffSettings *settings;
55
56 settings = g_slice_new0 (GskDiffSettings);
57
58 settings->compare_func = compare_func;
59 settings->keep_func = keep_func;
60 settings->delete_func = delete_func;
61 settings->insert_func = insert_func;
62
63 return settings;
64}
65
66void
67gsk_diff_settings_set_allow_abort (GskDiffSettings *settings,
68 gboolean allow_abort)
69{
70 settings->allow_abort = allow_abort;
71}
72
73void
74gsk_diff_settings_free (GskDiffSettings *settings)
75{
76 g_slice_free (GskDiffSettings, settings);
77}
78
79/*
80 * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
81 * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
82 * the forward diagonal starting from (off1, off2) and the backward diagonal
83 * starting from (lim1, lim2). If the K values on the same diagonal crosses
84 * returns the furthest point of reach. We might end up having to expensive
85 * cases using this algorithm is full, so a little bit of heuristic is needed
86 * to cut the search and to return a suboptimal point.
87 */
88static GskDiffResult
89split (gconstpointer *elem1,
90 gssize off1,
91 gssize lim1,
92 gconstpointer *elem2,
93 gssize off2,
94 gssize lim2,
95 gssize *kvdf,
96 gssize *kvdb,
97 gboolean need_min,
98 const GskDiffSettings *settings,
99 gpointer data,
100 SplitResult *spl)
101{
102 gssize dmin = off1 - lim2, dmax = lim1 - off2;
103 gssize fmid = off1 - off2, bmid = lim1 - lim2;
104 gboolean odd = (fmid - bmid) & 1;
105 gssize fmin = fmid, fmax = fmid;
106 gssize bmin = bmid, bmax = bmid;
107 gssize ec, d, i1, i2, prev1, best, dd, v, k;
108
109 /*
110 * Set initial diagonal values for both forward and backward path.
111 */
112 kvdf[fmid] = off1;
113 kvdb[bmid] = lim1;
114
115 for (ec = 1;; ec++)
116 {
117 gboolean got_snake = FALSE;
118
119 /*
120 * We need to extent the diagonal "domain" by one. If the next
121 * values exits the box boundaries we need to change it in the
122 * opposite direction because (max - min) must be a power of two.
123 * Also we initialize the external K value to -1 so that we can
124 * avoid extra conditions check inside the core loop.
125 */
126 if (fmin > dmin)
127 kvdf[--fmin - 1] = -1;
128 else
129 ++fmin;
130 if (fmax < dmax)
131 kvdf[++fmax + 1] = -1;
132 else
133 --fmax;
134
135 for (d = fmax; d >= fmin; d -= 2)
136 {
137 if (kvdf[d - 1] >= kvdf[d + 1])
138 i1 = kvdf[d - 1] + 1;
139 else
140 i1 = kvdf[d + 1];
141 prev1 = i1;
142 i2 = i1 - d;
143 for (; i1 < lim1 && i2 < lim2; i1++, i2++)
144 {
145 if (settings->compare_func (elem1[i1], elem2[i2], data) != 0)
146 break;
147 }
148 if (i1 - prev1 > XDL_SNAKE_CNT)
149 got_snake = TRUE;
150 kvdf[d] = i1;
151 if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1)
152 {
153 spl->i1 = i1;
154 spl->i2 = i2;
155 spl->min_lo = spl->min_hi = 1;
156 return GSK_DIFF_OK;
157 }
158 }
159
160 /*
161 * We need to extent the diagonal "domain" by one. If the next
162 * values exits the box boundaries we need to change it in the
163 * opposite direction because (max - min) must be a power of two.
164 * Also we initialize the external K value to -1 so that we can
165 * avoid extra conditions check inside the core loop.
166 */
167 if (bmin > dmin)
168 kvdb[--bmin - 1] = XDL_LINE_MAX;
169 else
170 ++bmin;
171 if (bmax < dmax)
172 kvdb[++bmax + 1] = XDL_LINE_MAX;
173 else
174 --bmax;
175
176 for (d = bmax; d >= bmin; d -= 2)
177 {
178 if (kvdb[d - 1] < kvdb[d + 1])
179 i1 = kvdb[d - 1];
180 else
181 i1 = kvdb[d + 1] - 1;
182 prev1 = i1;
183 i2 = i1 - d;
184 for (; i1 > off1 && i2 > off2; i1--, i2--)
185 {
186 if (settings->compare_func (elem1[i1 - 1], elem2[i2 - 1], data) != 0)
187 break;
188 }
189 if (prev1 - i1 > XDL_SNAKE_CNT)
190 got_snake = TRUE;
191 kvdb[d] = i1;
192 if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d])
193 {
194 spl->i1 = i1;
195 spl->i2 = i2;
196 spl->min_lo = spl->min_hi = 1;
197 return GSK_DIFF_OK;
198 }
199 }
200
201 if (need_min)
202 continue;
203
204 /*
205 * If the edit cost is above the heuristic trigger and if
206 * we got a good snake, we sample current diagonals to see
207 * if some of them have reached an "interesting" path. Our
208 * measure is a function of the distance from the diagonal
209 * corner (i1 + i2) penalized with the distance from the
210 * mid diagonal itself. If this value is above the current
211 * edit cost times a magic factor (XDL_K_HEUR) we consider
212 * it interesting.
213 */
214 if (got_snake && ec > XDL_HEUR_MIN_COST)
215 {
216 for (best = 0, d = fmax; d >= fmin; d -= 2)
217 {
218 dd = d > fmid ? d - fmid: fmid - d;
219 i1 = kvdf[d];
220 i2 = i1 - d;
221 v = (i1 - off1) + (i2 - off2) - dd;
222
223 if (v > XDL_K_HEUR * ec && v > best &&
224 off1 + XDL_SNAKE_CNT <= i1 && i1 < lim1 &&
225 off2 + XDL_SNAKE_CNT <= i2 && i2 < lim2)
226 {
227 for (k = 1; ; k++)
228 {
229 if (settings->compare_func (elem1[i1 - k], elem2[i2 - k], data) != 0)
230 break;
231 if (k == XDL_SNAKE_CNT)
232 {
233 best = v;
234 spl->i1 = i1;
235 spl->i2 = i2;
236 break;
237 }
238 }
239 }
240 }
241 if (best > 0)
242 {
243 spl->min_lo = 1;
244 spl->min_hi = 0;
245 return GSK_DIFF_OK;
246 }
247
248 for (best = 0, d = bmax; d >= bmin; d -= 2)
249 {
250 dd = d > bmid ? d - bmid: bmid - d;
251 i1 = kvdb[d];
252 i2 = i1 - d;
253 v = (lim1 - i1) + (lim2 - i2) - dd;
254
255 if (v > XDL_K_HEUR * ec && v > best &&
256 off1 < i1 && i1 <= lim1 - XDL_SNAKE_CNT &&
257 off2 < i2 && i2 <= lim2 - XDL_SNAKE_CNT)
258 {
259 for (k = 0; ; k++)
260 {
261 if (settings->compare_func (elem1[i1 + k], elem2[i2 + k], data) != 0)
262 break;
263
264 if (k == XDL_SNAKE_CNT - 1)
265 {
266 best = v;
267 spl->i1 = i1;
268 spl->i2 = i2;
269 break;
270 }
271 }
272 }
273 }
274 if (best > 0)
275 {
276 spl->min_lo = 0;
277 spl->min_hi = 1;
278 return GSK_DIFF_OK;
279 }
280 }
281
282 /*
283 * Enough is enough. We spent too much time here and now we collect
284 * the furthest reaching path using the (i1 + i2) measure.
285 */
286 if (ec >= MAXCOST)
287 {
288 gssize fbest, fbest1, bbest, bbest1;
289
290 if (settings->allow_abort)
291 return GSK_DIFF_ABORTED;
292
293 fbest = fbest1 = -1;
294 for (d = fmax; d >= fmin; d -= 2)
295 {
296 i1 = MIN (kvdf[d], lim1);
297 i2 = i1 - d;
298 if (lim2 < i2)
299 i1 = lim2 + d, i2 = lim2;
300 if (fbest < i1 + i2)
301 {
302 fbest = i1 + i2;
303 fbest1 = i1;
304 }
305 }
306
307 bbest = bbest1 = XDL_LINE_MAX;
308 for (d = bmax; d >= bmin; d -= 2)
309 {
310 i1 = MAX (off1, kvdb[d]);
311 i2 = i1 - d;
312 if (i2 < off2)
313 i1 = off2 + d, i2 = off2;
314 if (i1 + i2 < bbest)
315 {
316 bbest = i1 + i2;
317 bbest1 = i1;
318 }
319 }
320
321 if ((lim1 + lim2) - bbest < fbest - (off1 + off2))
322 {
323 spl->i1 = fbest1;
324 spl->i2 = fbest - fbest1;
325 spl->min_lo = 1;
326 spl->min_hi = 0;
327 }
328 else
329 {
330 spl->i1 = bbest1;
331 spl->i2 = bbest - bbest1;
332 spl->min_lo = 0;
333 spl->min_hi = 1;
334 }
335
336 return GSK_DIFF_OK;
337 }
338 }
339}
340
341/*
342 * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
343 * the box splitting function. Note that the real job (marking changed lines)
344 * is done in the two boundary reaching checks.
345 */
346static GskDiffResult
347compare (gconstpointer *elem1,
348 gssize off1,
349 gssize lim1,
350 gconstpointer *elem2,
351 gssize off2,
352 gssize lim2,
353 gssize *kvdf,
354 gssize *kvdb,
355 gboolean need_min,
356 const GskDiffSettings *settings,
357 gpointer data)
358{
359 GskDiffResult res;
360
361 /*
362 * Shrink the box by walking through each diagonal snake (SW and NE).
363 */
364 for (; off1 < lim1 && off2 < lim2; off1++, off2++)
365 {
366 if (settings->compare_func (elem1[off1], elem2[off2], data) != 0)
367 break;
368
369 res = settings->keep_func (elem1[off1], elem2[off2], data);
370 if (res != GSK_DIFF_OK)
371 return res;
372 }
373
374 for (; off1 < lim1 && off2 < lim2; lim1--, lim2--)
375 {
376 if (settings->compare_func (elem1[lim1 - 1], elem2[lim2 - 1], data) != 0)
377 break;
378
379 res = settings->keep_func (elem1[lim1 - 1], elem2[lim2 - 1], data);
380 if (res != GSK_DIFF_OK)
381 return res;
382 }
383
384 /*
385 * If one dimension is empty, then all records on the other one must
386 * be obviously changed.
387 */
388 if (off1 == lim1)
389 {
390 for (; off2 < lim2; off2++)
391 {
392 res = settings->insert_func (elem2[off2], off2, data);
393 if (res != GSK_DIFF_OK)
394 return res;
395 }
396 }
397 else if (off2 == lim2)
398 {
399 for (; off1 < lim1; off1++)
400 {
401 res = settings->delete_func (elem1[off1], off1, data);
402 if (res != GSK_DIFF_OK)
403 return res;
404 }
405 }
406 else
407 {
408 SplitResult spl = { 0, };
409
410 /*
411 * Divide ...
412 */
413 res = split (elem1, off1, lim1,
414 elem2, off2, lim2,
415 kvdf, kvdb, need_min,
416 settings, data,
417 spl: &spl);
418 if (res != GSK_DIFF_OK)
419 return res;
420
421 /*
422 * ... et Impera.
423 */
424 res = compare (elem1, off1, lim1: spl.i1,
425 elem2, off2, lim2: spl.i2,
426 kvdf, kvdb, need_min: spl.min_lo,
427 settings, data);
428 if (res != GSK_DIFF_OK)
429 return res;
430 res = compare (elem1, off1: spl.i1, lim1,
431 elem2, off2: spl.i2, lim2,
432 kvdf, kvdb, need_min: spl.min_hi,
433 settings, data);
434 if (res != GSK_DIFF_OK)
435 return res;
436 }
437
438 return GSK_DIFF_OK;
439}
440
441#if 0
442 ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
443 if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
444
445 xdl_free_env(xe);
446 return -1;
447 }
448 kvdf = kvd;
449 kvdb = kvdf + ndiags;
450 kvdf += xe->xdf2.nreff + 1;
451 kvdb += xe->xdf2.nreff + 1;
452
453 xenv.mxcost = xdl_bogosqrt(ndiags);
454 if (xenv.mxcost < XDL_MAX_COST_MIN)
455 xenv.mxcost = XDL_MAX_COST_MIN;
456 xenv.snake_cnt = XDL_SNAKE_CNT;
457 xenv.heur_min = XDL_HEUR_MIN_COST;
458
459 dd1.nrec = xe->xdf1.nreff;
460 dd1.ha = xe->xdf1.ha;
461 dd1.rchg = xe->xdf1.rchg;
462 dd1.rindex = xe->xdf1.rindex;
463 dd2.nrec = xe->xdf2.nreff;
464 dd2.ha = xe->xdf2.ha;
465 dd2.rchg = xe->xdf2.rchg;
466 dd2.rindex = xe->xdf2.rindex;
467#endif
468
469GskDiffResult
470gsk_diff (gconstpointer *elem1,
471 gsize n1,
472 gconstpointer *elem2,
473 gsize n2,
474 const GskDiffSettings *settings,
475 gpointer data)
476{
477 gsize ndiags;
478 gssize *kvd, *kvdf, *kvdb;
479 GskDiffResult res;
480
481 ndiags = n1 + n2 + 3;
482
483 kvd = g_new (gssize, 2 * ndiags + 2);
484 kvdf = kvd;
485 kvdb = kvd + ndiags;
486 kvdf += n2 + 1;
487 kvdb += n2 + 1;
488
489 res = compare (elem1, off1: 0, lim1: n1,
490 elem2, off2: 0, lim2: n2,
491 kvdf, kvdb, FALSE,
492 settings, data);
493
494 g_free (mem: kvd);
495
496 return res;
497}
498
499

source code of gtk/gsk/gskdiff.c