gskdiff.c source code [gtk/gsk/gskdiff.c]

1	/*
2	* Copyright © 2003 Davide Libenzi
3	* 2018 Benjamin Otte
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Lesser General Public
7	* License as published by the Free Software Foundation; either
8	* version 2.1 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Lesser General Public License for more details.
14	*
15	* You should have received a copy of the GNU Lesser General Public
16	* License along with this library. If not, see <http://www.gnu.org/licenses/>.
17	*
18	* Authors: Davide Libenzi <davidel@xmailserver.org>
19	* Benjamin Otte <otte@gnome.org>
20	*/
21
22	#include "config.h"
23
24	#include "gskdiffprivate.h"
25
26
27	#define XDL_MAX_COST_MIN 256
28	#define XDL_HEUR_MIN_COST 256
29	#define XDL_LINE_MAX G_MAXSSIZE
30	#define XDL_SNAKE_CNT 20
31	#define XDL_K_HEUR 4
32	#define MAXCOST 20
33
34	struct _GskDiffSettings {
35	GCompareDataFunc compare_func;
36	GskKeepFunc keep_func;
37	GskDeleteFunc delete_func;
38	GskInsertFunc insert_func;
39
40	guint allow_abort : `1`;
41	};
42
43	typedef struct _SplitResult {
44	long i1, i2;
45	int min_lo, min_hi;
46	} SplitResult;
47
48	GskDiffSettings *
49	gsk_diff_settings_new (GCompareDataFunc compare_func,
50	GskKeepFunc keep_func,
51	GskDeleteFunc delete_func,
52	GskInsertFunc insert_func)
53	{
54	GskDiffSettings *settings;
55
56	settings = g_slice_new0 (GskDiffSettings);
57
58	settings->compare_func = compare_func;
59	settings->keep_func = keep_func;
60	settings->delete_func = delete_func;
61	settings->insert_func = insert_func;
62
63	return settings;
64	}
65
66	void
67	gsk_diff_settings_set_allow_abort (GskDiffSettings *settings,
68	gboolean allow_abort)
69	{
70	settings->allow_abort = allow_abort;
71	}
72
73	void
74	gsk_diff_settings_free (GskDiffSettings *settings)
75	{
76	g_slice_free (GskDiffSettings, settings);
77	}
78
79	/*
80	* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
81	* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
82	* the forward diagonal starting from (off1, off2) and the backward diagonal
83	* starting from (lim1, lim2). If the K values on the same diagonal crosses
84	* returns the furthest point of reach. We might end up having to expensive
85	* cases using this algorithm is full, so a little bit of heuristic is needed
86	* to cut the search and to return a suboptimal point.
87	*/
88	static GskDiffResult
89	split (gconstpointer *elem1,
90	gssize off1,
91	gssize lim1,
92	gconstpointer *elem2,
93	gssize off2,
94	gssize lim2,
95	gssize *kvdf,
96	gssize *kvdb,
97	gboolean need_min,
98	const GskDiffSettings *settings,
99	gpointer data,
100	SplitResult *spl)
101	{
102	gssize dmin = off1 - lim2, dmax = lim1 - off2;
103	gssize fmid = off1 - off2, bmid = lim1 - lim2;
104	gboolean odd = (fmid - bmid) & `1`;
105	gssize fmin = fmid, fmax = fmid;
106	gssize bmin = bmid, bmax = bmid;
107	gssize ec, d, i1, i2, prev1, best, dd, v, k;
108
109	/*
110	* Set initial diagonal values for both forward and backward path.
111	*/
112	kvdf[fmid] = off1;
113	kvdb[bmid] = lim1;
114
115	for (ec = `1`;; ec++)
116	{
117	gboolean got_snake = FALSE;
118
119	/*
120	* We need to extent the diagonal "domain" by one. If the next
121	* values exits the box boundaries we need to change it in the
122	* opposite direction because (max - min) must be a power of two.
123	* Also we initialize the external K value to -1 so that we can
124	* avoid extra conditions check inside the core loop.
125	*/
126	if (fmin > dmin)
127	kvdf[--fmin - `1`] = -`1`;
128	else
129	++fmin;
130	if (fmax < dmax)
131	kvdf[++fmax + `1`] = -`1`;
132	else
133	--fmax;
134
135	for (d = fmax; d >= fmin; d -= `2`)
136	{
137	if (kvdf[d - `1`] >= kvdf[d + `1`])
138	i1 = kvdf[d - `1`] + `1`;
139	else
140	i1 = kvdf[d + `1`];
141	prev1 = i1;
142	i2 = i1 - d;
143	for (; i1 < lim1 && i2 < lim2; i1++, i2++)
144	{
145	if (settings->compare_func (elem1[i1], elem2[i2], data) != `0`)
146	break;
147	}
148	if (i1 - prev1 > XDL_SNAKE_CNT)
149	got_snake = TRUE;
150	kvdf[d] = i1;
151	if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1)
152	{
153	spl->i1 = i1;
154	spl->i2 = i2;
155	spl->min_lo = spl->min_hi = `1`;
156	return GSK_DIFF_OK;
157	}
158	}
159
160	/*
161	* We need to extent the diagonal "domain" by one. If the next
162	* values exits the box boundaries we need to change it in the
163	* opposite direction because (max - min) must be a power of two.
164	* Also we initialize the external K value to -1 so that we can
165	* avoid extra conditions check inside the core loop.
166	*/
167	if (bmin > dmin)
168	kvdb[--bmin - `1`] = XDL_LINE_MAX;
169	else
170	++bmin;
171	if (bmax < dmax)
172	kvdb[++bmax + `1`] = XDL_LINE_MAX;
173	else
174	--bmax;
175
176	for (d = bmax; d >= bmin; d -= `2`)
177	{
178	if (kvdb[d - `1`] < kvdb[d + `1`])
179	i1 = kvdb[d - `1`];
180	else
181	i1 = kvdb[d + `1`] - `1`;
182	prev1 = i1;
183	i2 = i1 - d;
184	for (; i1 > off1 && i2 > off2; i1--, i2--)
185	{
186	if (settings->compare_func (elem1[i1 - `1`], elem2[i2 - `1`], data) != `0`)
187	break;
188	}
189	if (prev1 - i1 > XDL_SNAKE_CNT)
190	got_snake = TRUE;
191	kvdb[d] = i1;
192	if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d])
193	{
194	spl->i1 = i1;
195	spl->i2 = i2;
196	spl->min_lo = spl->min_hi = `1`;
197	return GSK_DIFF_OK;
198	}
199	}
200
201	if (need_min)
202	continue;
203
204	/*
205	* If the edit cost is above the heuristic trigger and if
206	* we got a good snake, we sample current diagonals to see
207	* if some of them have reached an "interesting" path. Our
208	* measure is a function of the distance from the diagonal
209	* corner (i1 + i2) penalized with the distance from the
210	* mid diagonal itself. If this value is above the current
211	* edit cost times a magic factor (XDL_K_HEUR) we consider
212	* it interesting.
213	*/
214	if (got_snake && ec > XDL_HEUR_MIN_COST)
215	{
216	for (best = `0`, d = fmax; d >= fmin; d -= `2`)
217	{
218	dd = d > fmid ? d - fmid: fmid - d;
219	i1 = kvdf[d];
220	i2 = i1 - d;
221	v = (i1 - off1) + (i2 - off2) - dd;
222
223	if (v > XDL_K_HEUR * ec && v > best &&
224	off1 + XDL_SNAKE_CNT <= i1 && i1 < lim1 &&
225	off2 + XDL_SNAKE_CNT <= i2 && i2 < lim2)
226	{
227	for (k = `1`; ; k++)
228	{
229	if (settings->compare_func (elem1[i1 - k], elem2[i2 - k], data) != `0`)
230	break;
231	if (k == XDL_SNAKE_CNT)
232	{
233	best = v;
234	spl->i1 = i1;
235	spl->i2 = i2;
236	break;
237	}
238	}
239	}
240	}
241	if (best > `0`)
242	{
243	spl->min_lo = `1`;
244	spl->min_hi = `0`;
245	return GSK_DIFF_OK;
246	}
247
248	for (best = `0`, d = bmax; d >= bmin; d -= `2`)
249	{
250	dd = d > bmid ? d - bmid: bmid - d;
251	i1 = kvdb[d];
252	i2 = i1 - d;
253	v = (lim1 - i1) + (lim2 - i2) - dd;
254
255	if (v > XDL_K_HEUR * ec && v > best &&
256	off1 < i1 && i1 <= lim1 - XDL_SNAKE_CNT &&
257	off2 < i2 && i2 <= lim2 - XDL_SNAKE_CNT)
258	{
259	for (k = `0`; ; k++)
260	{
261	if (settings->compare_func (elem1[i1 + k], elem2[i2 + k], data) != `0`)
262	break;
263
264	if (k == XDL_SNAKE_CNT - `1`)
265	{
266	best = v;
267	spl->i1 = i1;
268	spl->i2 = i2;
269	break;
270	}
271	}
272	}
273	}
274	if (best > `0`)
275	{
276	spl->min_lo = `0`;
277	spl->min_hi = `1`;
278	return GSK_DIFF_OK;
279	}
280	}
281
282	/*
283	* Enough is enough. We spent too much time here and now we collect
284	* the furthest reaching path using the (i1 + i2) measure.
285	*/
286	if (ec >= MAXCOST)
287	{
288	gssize fbest, fbest1, bbest, bbest1;
289
290	if (settings->allow_abort)
291	return GSK_DIFF_ABORTED;
292
293	fbest = fbest1 = -`1`;
294	for (d = fmax; d >= fmin; d -= `2`)
295	{
296	i1 = MIN (kvdf[d], lim1);
297	i2 = i1 - d;
298	if (lim2 < i2)
299	i1 = lim2 + d, i2 = lim2;
300	if (fbest < i1 + i2)
301	{
302	fbest = i1 + i2;
303	fbest1 = i1;
304	}
305	}
306
307	bbest = bbest1 = XDL_LINE_MAX;
308	for (d = bmax; d >= bmin; d -= `2`)
309	{
310	i1 = MAX (off1, kvdb[d]);
311	i2 = i1 - d;
312	if (i2 < off2)
313	i1 = off2 + d, i2 = off2;
314	if (i1 + i2 < bbest)
315	{
316	bbest = i1 + i2;
317	bbest1 = i1;
318	}
319	}
320
321	if ((lim1 + lim2) - bbest < fbest - (off1 + off2))
322	{
323	spl->i1 = fbest1;
324	spl->i2 = fbest - fbest1;
325	spl->min_lo = `1`;
326	spl->min_hi = `0`;
327	}
328	else
329	{
330	spl->i1 = bbest1;
331	spl->i2 = bbest - bbest1;
332	spl->min_lo = `0`;
333	spl->min_hi = `1`;
334	}
335
336	return GSK_DIFF_OK;
337	}
338	}
339	}
340
341	/*
342	* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
343	* the box splitting function. Note that the real job (marking changed lines)
344	* is done in the two boundary reaching checks.
345	*/
346	static GskDiffResult
347	compare (gconstpointer *elem1,
348	gssize off1,
349	gssize lim1,
350	gconstpointer *elem2,
351	gssize off2,
352	gssize lim2,
353	gssize *kvdf,
354	gssize *kvdb,
355	gboolean need_min,
356	const GskDiffSettings *settings,
357	gpointer data)
358	{
359	GskDiffResult res;
360
361	/*
362	* Shrink the box by walking through each diagonal snake (SW and NE).
363	*/
364	for (; off1 < lim1 && off2 < lim2; off1++, off2++)
365	{
366	if (settings->compare_func (elem1[off1], elem2[off2], data) != `0`)
367	break;
368
369	res = settings->keep_func (elem1[off1], elem2[off2], data);
370	if (res != GSK_DIFF_OK)
371	return res;
372	}
373
374	for (; off1 < lim1 && off2 < lim2; lim1--, lim2--)
375	{
376	if (settings->compare_func (elem1[lim1 - `1`], elem2[lim2 - `1`], data) != `0`)
377	break;
378
379	res = settings->keep_func (elem1[lim1 - `1`], elem2[lim2 - `1`], data);
380	if (res != GSK_DIFF_OK)
381	return res;
382	}
383
384	/*
385	* If one dimension is empty, then all records on the other one must
386	* be obviously changed.
387	*/
388	if (off1 == lim1)
389	{
390	for (; off2 < lim2; off2++)
391	{
392	res = settings->insert_func (elem2[off2], off2, data);
393	if (res != GSK_DIFF_OK)
394	return res;
395	}
396	}
397	else if (off2 == lim2)
398	{
399	for (; off1 < lim1; off1++)
400	{
401	res = settings->delete_func (elem1[off1], off1, data);
402	if (res != GSK_DIFF_OK)
403	return res;
404	}
405	}
406	else
407	{
408	SplitResult spl = { `0`, };
409
410	/*
411	* Divide ...
412	*/
413	res = split (elem1, off1, lim1,
414	elem2, off2, lim2,
415	kvdf, kvdb, need_min,
416	settings, data,
417	spl: &spl);
418	if (res != GSK_DIFF_OK)
419	return res;
420
421	/*
422	* ... et Impera.
423	*/
424	res = compare (elem1, off1, lim1: spl.i1,
425	elem2, off2, lim2: spl.i2,
426	kvdf, kvdb, need_min: spl.min_lo,
427	settings, data);
428	if (res != GSK_DIFF_OK)
429	return res;
430	res = compare (elem1, off1: spl.i1, lim1,
431	elem2, off2: spl.i2, lim2,
432	kvdf, kvdb, need_min: spl.min_hi,
433	settings, data);
434	if (res != GSK_DIFF_OK)
435	return res;
436	}
437
438	return GSK_DIFF_OK;
439	}
440
441	#if 0
442	ndiags = xe->xdf1.nreff + xe->xdf2.nreff + `3`;
443	if (!(kvd = (long ) xdl_malloc((`2` ndiags + `2`) * sizeof(long)))) {
444
445	xdl_free_env(xe);
446	return -`1`;
447	}
448	kvdf = kvd;
449	kvdb = kvdf + ndiags;
450	kvdf += xe->xdf2.nreff + `1`;
451	kvdb += xe->xdf2.nreff + `1`;
452
453	xenv.mxcost = xdl_bogosqrt(ndiags);
454	if (xenv.mxcost < XDL_MAX_COST_MIN)
455	xenv.mxcost = XDL_MAX_COST_MIN;
456	xenv.snake_cnt = XDL_SNAKE_CNT;
457	xenv.heur_min = XDL_HEUR_MIN_COST;
458
459	dd1.nrec = xe->xdf1.nreff;
460	dd1.ha = xe->xdf1.ha;
461	dd1.rchg = xe->xdf1.rchg;
462	dd1.rindex = xe->xdf1.rindex;
463	dd2.nrec = xe->xdf2.nreff;
464	dd2.ha = xe->xdf2.ha;
465	dd2.rchg = xe->xdf2.rchg;
466	dd2.rindex = xe->xdf2.rindex;
467	#endif
468
469	GskDiffResult
470	gsk_diff (gconstpointer *elem1,
471	gsize n1,
472	gconstpointer *elem2,
473	gsize n2,
474	const GskDiffSettings *settings,
475	gpointer data)
476	{
477	gsize ndiags;
478	gssize kvd, kvdf, *kvdb;
479	GskDiffResult res;
480
481	ndiags = n1 + n2 + `3`;
482
483	kvd = g_new (gssize, `2` * ndiags + `2`);
484	kvdf = kvd;
485	kvdb = kvd + ndiags;
486	kvdf += n2 + `1`;
487	kvdb += n2 + `1`;
488
489	res = compare (elem1, off1: `0`, lim1: n1,
490	elem2, off2: `0`, lim2: n2,
491	kvdf, kvdb, FALSE,
492	settings, data);
493
494	g_free (mem: kvd);
495
496	return res;
497	}
498
499

Provided by KDAB

Definitions

source code of gtk/gsk/gskdiff.c