1 | #include <string.h> |
2 | #include <float.h> |
3 | #include <math.h> |
4 | #include <glib.h> |
5 | #include <assert.h> |
6 | |
7 | /* See Golub and Reinsch, |
8 | * "Handbook for Automatic Computation vol II - Linear Algebra", |
9 | * Springer, 1971 |
10 | */ |
11 | |
12 | |
13 | #define MAX_ITERATION_COUNT 30 |
14 | |
15 | /* Perform Householder reduction to bidiagonal form |
16 | * |
17 | * Input: Matrix A of size nrows x ncols |
18 | * |
19 | * Output: Matrices and vectors such that |
20 | * A = U*Bidiag(diagonal, superdiagonal)*Vt |
21 | * |
22 | * All matrices are allocated by the caller |
23 | * |
24 | * Sizes: |
25 | * A, U: nrows x ncols |
26 | * diagonal, superdiagonal: ncols |
27 | * V: ncols x ncols |
28 | */ |
29 | static void |
30 | householder_reduction (double *A, |
31 | int nrows, |
32 | int ncols, |
33 | double *U, |
34 | double *V, |
35 | double *diagonal, |
36 | double *superdiagonal) |
37 | { |
38 | int i, j, k, ip1; |
39 | double s, s2, si, scale; |
40 | double *pu, *pui, *pv, *pvi; |
41 | double half_norm_squared; |
42 | |
43 | assert (nrows >= 2); |
44 | assert (ncols >= 2); |
45 | |
46 | memcpy (dest: U, src: A, n: sizeof (double) * nrows * ncols); |
47 | |
48 | diagonal[0] = 0.0; |
49 | s = 0.0; |
50 | scale = 0.0; |
51 | for (i = 0, pui = U, ip1 = 1; |
52 | i < ncols; |
53 | pui += ncols, i++, ip1++) |
54 | { |
55 | superdiagonal[i] = scale * s; |
56 | |
57 | for (j = i, pu = pui, scale = 0.0; |
58 | j < nrows; |
59 | j++, pu += ncols) |
60 | scale += fabs( x: *(pu + i) ); |
61 | |
62 | if (scale > 0.0) |
63 | { |
64 | for (j = i, pu = pui, s2 = 0.0; j < nrows; j++, pu += ncols) |
65 | { |
66 | *(pu + i) /= scale; |
67 | s2 += *(pu + i) * *(pu + i); |
68 | } |
69 | s = *(pui + i) < 0.0 ? sqrt (x: s2) : -sqrt (x: s2); |
70 | half_norm_squared = *(pui + i) * s - s2; |
71 | *(pui + i) -= s; |
72 | |
73 | for (j = ip1; j < ncols; j++) |
74 | { |
75 | for (k = i, si = 0.0, pu = pui; k < nrows; k++, pu += ncols) |
76 | si += *(pu + i) * *(pu + j); |
77 | si /= half_norm_squared; |
78 | for (k = i, pu = pui; k < nrows; k++, pu += ncols) |
79 | *(pu + j) += si * *(pu + i); |
80 | } |
81 | } |
82 | for (j = i, pu = pui; j < nrows; j++, pu += ncols) |
83 | *(pu + i) *= scale; |
84 | diagonal[i] = s * scale; |
85 | s = 0.0; |
86 | scale = 0.0; |
87 | if (i >= nrows || i == ncols - 1) |
88 | continue; |
89 | for (j = ip1; j < ncols; j++) |
90 | scale += fabs (x: *(pui + j)); |
91 | if (scale > 0.0) |
92 | { |
93 | for (j = ip1, s2 = 0.0; j < ncols; j++) |
94 | { |
95 | *(pui + j) /= scale; |
96 | s2 += *(pui + j) * *(pui + j); |
97 | } |
98 | s = *(pui + ip1) < 0.0 ? sqrt (x: s2) : -sqrt (x: s2); |
99 | half_norm_squared = *(pui + ip1) * s - s2; |
100 | *(pui + ip1) -= s; |
101 | for (k = ip1; k < ncols; k++) |
102 | superdiagonal[k] = *(pui + k) / half_norm_squared; |
103 | if (i < (nrows - 1)) |
104 | { |
105 | for (j = ip1, pu = pui + ncols; j < nrows; j++, pu += ncols) |
106 | { |
107 | for (k = ip1, si = 0.0; k < ncols; k++) |
108 | si += *(pui + k) * *(pu + k); |
109 | for (k = ip1; k < ncols; k++) |
110 | *(pu + k) += si * superdiagonal[k]; |
111 | } |
112 | } |
113 | for (k = ip1; k < ncols; k++) |
114 | *(pui + k) *= scale; |
115 | } |
116 | } |
117 | |
118 | pui = U + ncols * (ncols - 2); |
119 | pvi = V + ncols * (ncols - 1); |
120 | *(pvi + ncols - 1) = 1.0; |
121 | s = superdiagonal[ncols - 1]; |
122 | pvi -= ncols; |
123 | for (i = ncols - 2, ip1 = ncols - 1; |
124 | i >= 0; |
125 | i--, pui -= ncols, pvi -= ncols, ip1--) |
126 | { |
127 | if (s != 0.0) |
128 | { |
129 | pv = pvi + ncols; |
130 | for (j = ip1; j < ncols; j++, pv += ncols) |
131 | *(pv + i) = ( *(pui + j) / *(pui + ip1) ) / s; |
132 | for (j = ip1; j < ncols; j++) |
133 | { |
134 | si = 0.0; |
135 | for (k = ip1, pv = pvi + ncols; k < ncols; k++, pv += ncols) |
136 | si += *(pui + k) * *(pv + j); |
137 | for (k = ip1, pv = pvi + ncols; k < ncols; k++, pv += ncols) |
138 | *(pv + j) += si * *(pv + i); |
139 | } |
140 | } |
141 | pv = pvi + ncols; |
142 | for (j = ip1; j < ncols; j++, pv += ncols) |
143 | { |
144 | *(pvi + j) = 0.0; |
145 | *(pv + i) = 0.0; |
146 | } |
147 | *(pvi + i) = 1.0; |
148 | s = superdiagonal[i]; |
149 | } |
150 | |
151 | pui = U + ncols * (ncols - 1); |
152 | for (i = ncols - 1, ip1 = ncols; |
153 | i >= 0; |
154 | ip1 = i, i--, pui -= ncols) |
155 | { |
156 | s = diagonal[i]; |
157 | for (j = ip1; j < ncols; j++) |
158 | *(pui + j) = 0.0; |
159 | if (s != 0.0) |
160 | { |
161 | for (j = ip1; j < ncols; j++) |
162 | { |
163 | si = 0.0; |
164 | pu = pui + ncols; |
165 | for (k = ip1; k < nrows; k++, pu += ncols) |
166 | si += *(pu + i) * *(pu + j); |
167 | si = (si / *(pui + i)) / s; |
168 | for (k = i, pu = pui; k < nrows; k++, pu += ncols) |
169 | *(pu + j) += si * *(pu + i); |
170 | } |
171 | for (j = i, pu = pui; j < nrows; j++, pu += ncols) |
172 | *(pu + i) /= s; |
173 | } |
174 | else |
175 | for (j = i, pu = pui; j < nrows; j++, pu += ncols) |
176 | *(pu + i) = 0.0; |
177 | *(pui + i) += 1.0; |
178 | } |
179 | } |
180 | |
181 | /* Perform Givens reduction |
182 | * |
183 | * Input: Matrices such that |
184 | * A = U*Bidiag(diagonal,superdiagonal)*Vt |
185 | * |
186 | * Output: The same, with superdiagonal = 0 |
187 | * |
188 | * All matrices are allocated by the caller |
189 | * |
190 | * Sizes: |
191 | * U: nrows x ncols |
192 | * diagonal, superdiagonal: ncols |
193 | * V: ncols x ncols |
194 | */ |
195 | static int |
196 | givens_reduction (int nrows, |
197 | int ncols, |
198 | double *U, |
199 | double *V, |
200 | double *diagonal, |
201 | double *superdiagonal) |
202 | { |
203 | double epsilon; |
204 | double c, s; |
205 | double f,g,h; |
206 | double x,y,z; |
207 | double *pu, *pv; |
208 | int i,j,k,m; |
209 | int rotation_test; |
210 | int iteration_count; |
211 | |
212 | assert (nrows >= 2); |
213 | assert (ncols >= 2); |
214 | |
215 | for (i = 0, x = 0.0; i < ncols; i++) |
216 | { |
217 | y = fabs (x: diagonal[i]) + fabs (x: superdiagonal[i]); |
218 | if (x < y) |
219 | x = y; |
220 | } |
221 | epsilon = x * DBL_EPSILON; |
222 | for (k = ncols - 1; k >= 0; k--) |
223 | { |
224 | iteration_count = 0; |
225 | while (1) |
226 | { |
227 | rotation_test = 1; |
228 | for (m = k; m >= 0; m--) |
229 | { |
230 | if (fabs (x: superdiagonal[m]) <= epsilon) |
231 | { |
232 | rotation_test = 0; |
233 | break; |
234 | } |
235 | if (fabs (x: diagonal[m-1]) <= epsilon) |
236 | break; |
237 | } |
238 | if (rotation_test) |
239 | { |
240 | c = 0.0; |
241 | s = 1.0; |
242 | for (i = m; i <= k; i++) |
243 | { |
244 | f = s * superdiagonal[i]; |
245 | superdiagonal[i] *= c; |
246 | if (fabs (x: f) <= epsilon) |
247 | break; |
248 | g = diagonal[i]; |
249 | h = sqrt (x: f*f + g*g); |
250 | diagonal[i] = h; |
251 | c = g / h; |
252 | s = -f / h; |
253 | for (j = 0, pu = U; j < nrows; j++, pu += ncols) |
254 | { |
255 | y = *(pu + m - 1); |
256 | z = *(pu + i); |
257 | *(pu + m - 1 ) = y * c + z * s; |
258 | *(pu + i) = -y * s + z * c; |
259 | } |
260 | } |
261 | } |
262 | z = diagonal[k]; |
263 | if (m == k) |
264 | { |
265 | if (z < 0.0) |
266 | { |
267 | diagonal[k] = -z; |
268 | for (j = 0, pv = V; j < ncols; j++, pv += ncols) |
269 | *(pv + k) = - *(pv + k); |
270 | } |
271 | break; |
272 | } |
273 | else |
274 | { |
275 | if (iteration_count >= MAX_ITERATION_COUNT) |
276 | return -1; |
277 | iteration_count++; |
278 | x = diagonal[m]; |
279 | y = diagonal[k-1]; |
280 | g = superdiagonal[k-1]; |
281 | h = superdiagonal[k]; |
282 | f = ((y - z) * ( y + z ) + (g - h) * (g + h))/(2.0 * h * y); |
283 | g = sqrt (x: f * f + 1.0); |
284 | if (f < 0.0) |
285 | g = -g; |
286 | f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x; |
287 | c = 1.0; |
288 | s = 1.0; |
289 | for (i = m + 1; i <= k; i++) |
290 | { |
291 | g = superdiagonal[i]; |
292 | y = diagonal[i]; |
293 | h = s * g; |
294 | g *= c; |
295 | z = sqrt (x: f * f + h * h); |
296 | superdiagonal[i-1] = z; |
297 | c = f / z; |
298 | s = h / z; |
299 | f = x * c + g * s; |
300 | g = -x * s + g * c; |
301 | h = y * s; |
302 | y *= c; |
303 | for (j = 0, pv = V; j < ncols; j++, pv += ncols) |
304 | { |
305 | x = *(pv + i - 1); |
306 | z = *(pv + i); |
307 | *(pv + i - 1) = x * c + z * s; |
308 | *(pv + i) = -x * s + z * c; |
309 | } |
310 | z = sqrt (x: f * f + h * h); |
311 | diagonal[i - 1] = z; |
312 | if (z != 0.0) |
313 | { |
314 | c = f / z; |
315 | s = h / z; |
316 | } |
317 | f = c * g + s * y; |
318 | x = -s * g + c * y; |
319 | for (j = 0, pu = U; j < nrows; j++, pu += ncols) |
320 | { |
321 | y = *(pu + i - 1); |
322 | z = *(pu + i); |
323 | *(pu + i - 1) = c * y + s * z; |
324 | *(pu + i) = -s * y + c * z; |
325 | } |
326 | } |
327 | superdiagonal[m] = 0.0; |
328 | superdiagonal[k] = f; |
329 | diagonal[k] = x; |
330 | } |
331 | } |
332 | } |
333 | return 0; |
334 | } |
335 | |
336 | /* Given a singular value decomposition |
337 | * of an nrows x ncols matrix A = U*Diag(S)*Vt, |
338 | * sort the values of S by decreasing value, |
339 | * permuting V to match. |
340 | */ |
341 | static void |
342 | sort_singular_values (int nrows, |
343 | int ncols, |
344 | double *S, |
345 | double *U, |
346 | double *V) |
347 | { |
348 | int i, j, max_index; |
349 | double temp; |
350 | double *p1, *p2; |
351 | |
352 | assert (nrows >= 2); |
353 | assert (ncols >= 2); |
354 | |
355 | for (i = 0; i < ncols - 1; i++) |
356 | { |
357 | max_index = i; |
358 | for (j = i + 1; j < ncols; j++) |
359 | if (S[j] > S[max_index]) |
360 | max_index = j; |
361 | if (max_index == i) |
362 | continue; |
363 | temp = S[i]; |
364 | S[i] = S[max_index]; |
365 | S[max_index] = temp; |
366 | p1 = U + max_index; |
367 | p2 = U + i; |
368 | for (j = 0; j < nrows; j++, p1 += ncols, p2 += ncols) |
369 | { |
370 | temp = *p1; |
371 | *p1 = *p2; |
372 | *p2 = temp; |
373 | } |
374 | p1 = V + max_index; |
375 | p2 = V + i; |
376 | for (j = 0; j < ncols; j++, p1 += ncols, p2 += ncols) |
377 | { |
378 | temp = *p1; |
379 | *p1 = *p2; |
380 | *p2 = temp; |
381 | } |
382 | } |
383 | } |
384 | |
385 | /* Compute a singular value decomposition of A, |
386 | * A = U*Diag(S)*Vt |
387 | * |
388 | * All matrices are allocated by the caller |
389 | * |
390 | * Sizes: |
391 | * A, U: nrows x ncols |
392 | * S: ncols |
393 | * V: ncols x ncols |
394 | */ |
395 | int |
396 | singular_value_decomposition (double *A, |
397 | int nrows, |
398 | int ncols, |
399 | double *U, |
400 | double *S, |
401 | double *V) |
402 | { |
403 | double *superdiagonal; |
404 | |
405 | superdiagonal = g_alloca (sizeof (double) * ncols); |
406 | |
407 | if (nrows < ncols) |
408 | return -1; |
409 | |
410 | householder_reduction (A, nrows, ncols, U, V, diagonal: S, superdiagonal); |
411 | |
412 | if (givens_reduction (nrows, ncols, U, V, diagonal: S, superdiagonal) < 0) |
413 | return -1; |
414 | |
415 | sort_singular_values (nrows, ncols, S, U, V); |
416 | |
417 | return 0; |
418 | } |
419 | |
420 | /* |
421 | * Given a singular value decomposition of A = U*Diag(S)*Vt, |
422 | * compute the best approximation x to A*x = B. |
423 | * |
424 | * All matrices are allocated by the caller |
425 | * |
426 | * Sizes: |
427 | * U: nrows x ncols |
428 | * S: ncols |
429 | * V: ncols x ncols |
430 | * B, x: ncols |
431 | */ |
432 | void |
433 | singular_value_decomposition_solve (double *U, |
434 | double *S, |
435 | double *V, |
436 | int nrows, |
437 | int ncols, |
438 | double *B, |
439 | double *x) |
440 | { |
441 | int i, j, k; |
442 | double *pu, *pv; |
443 | double d; |
444 | double tolerance; |
445 | |
446 | assert (nrows >= 2); |
447 | assert (ncols >= 2); |
448 | |
449 | tolerance = DBL_EPSILON * S[0] * (double) ncols; |
450 | |
451 | for (i = 0, pv = V; i < ncols; i++, pv += ncols) |
452 | { |
453 | x[i] = 0.0; |
454 | for (j = 0; j < ncols; j++) |
455 | { |
456 | if (S[j] > tolerance) |
457 | { |
458 | for (k = 0, d = 0.0, pu = U; k < nrows; k++, pu += ncols) |
459 | d += *(pu + j) * B[k]; |
460 | x[i] += d * *(pv + j) / S[j]; |
461 | } |
462 | } |
463 | } |
464 | } |
465 | |