bch.c source code [linux/lib/bch.c]

1	/*
2	* Generic binary BCH encoding/decoding library
3	*
4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms of the GNU General Public License version 2 as published by
6	* the Free Software Foundation.
7	*
8	* This program is distributed in the hope that it will be useful, but WITHOUT
9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11	* more details.
12	*
13	* You should have received a copy of the GNU General Public License along with
14	* this program; if not, write to the Free Software Foundation, Inc., 51
15	* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16	*
17	* Copyright © 2011 Parrot S.A.
18	*
19	* Author: Ivan Djelic <ivan.djelic@parrot.com>
20	*
21	* Description:
22	*
23	* This library provides runtime configurable encoding/decoding of binary
24	* Bose-Chaudhuri-Hocquenghem (BCH) codes.
25	*
26	* Call bch_init to get a pointer to a newly allocated bch_control structure for
27	* the given m (Galois field order), t (error correction capability) and
28	* (optional) primitive polynomial parameters.
29	*
30	* Call bch_encode to compute and store ecc parity bytes to a given buffer.
31	* Call bch_decode to detect and locate errors in received data.
32	*
33	* On systems supporting hw BCH features, intermediate results may be provided
34	* to bch_decode in order to skip certain steps. See bch_decode() documentation
35	* for details.
36	*
37	* Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38	* parameters m and t; thus allowing extra compiler optimizations and providing
39	* better (up to 2x) encoding performance. Using this option makes sense when
40	* (m,t) are fixed and known in advance, e.g. when using BCH error correction
41	* on a particular NAND flash device.
42	*
43	* Algorithmic details:
44	*
45	* Encoding is performed by processing 32 input bits in parallel, using 4
46	* remainder lookup tables.
47	*
48	* The final stage of decoding involves the following internal steps:
49	* a. Syndrome computation
50	* b. Error locator polynomial computation using Berlekamp-Massey algorithm
51	* c. Error locator root finding (by far the most expensive step)
52	*
53	* In this implementation, step c is not performed using the usual Chien search.
54	* Instead, an alternative approach described in [1] is used. It consists in
55	* factoring the error locator polynomial using the Berlekamp Trace algorithm
56	* (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57	* solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58	* much better performance than Chien search for usual (m,t) values (typically
59	* m >= 13, t < 32, see [1]).
60	*
61	* [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62	* of characteristic 2, in: Western European Workshop on Research in Cryptology
63	* - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64	* [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65	* finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66	*/
67
68	#include <linux/kernel.h>
69	#include <linux/errno.h>
70	#include <linux/init.h>
71	#include <linux/module.h>
72	#include <linux/slab.h>
73	#include <linux/bitops.h>
74	#include <linux/bitrev.h>
75	#include <asm/byteorder.h>
76	#include <linux/bch.h>
77
78	#if defined(CONFIG_BCH_CONST_PARAMS)
79	#define GF_M(_p) (CONFIG_BCH_CONST_M)
80	#define GF_T(_p) (CONFIG_BCH_CONST_T)
81	#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
82	#define BCH_MAX_M (CONFIG_BCH_CONST_M)
83	#define BCH_MAX_T (CONFIG_BCH_CONST_T)
84	#else
85	#define GF_M(_p) ((_p)->m)
86	#define GF_T(_p) ((_p)->t)
87	#define GF_N(_p) ((_p)->n)
88	#define BCH_MAX_M 15 /* 2KB */
89	#define BCH_MAX_T 64 /* 64 bit correction */
90	#endif
91
92	#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
93	#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
94
95	#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
96
97	#ifndef dbg
98	#define dbg(_fmt, args...) do {} while (0)
99	#endif
100
101	/*
102	* represent a polynomial over GF(2^m)
103	*/
104	struct gf_poly {
105	unsigned int deg; / polynomial degree /
106	unsigned int c[]; / polynomial terms /
107	};
108
109	/ given its degree, compute a polynomial size in bytes /
110	#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
111
112	/ polynomial of degree 1 /
113	struct gf_poly_deg1 {
114	struct gf_poly poly;
115	unsigned int c[`2`];
116	};
117
118	static u8 swap_bits(struct bch_control *bch, u8 in)
119	{
120	if (!bch->swap_bits)
121	return in;
122
123	return bitrev8(in);
124	}
125
126	/*
127	* same as bch_encode(), but process input data one byte at a time
128	*/
129	static void bch_encode_unaligned(struct bch_control *bch,
130	const unsigned char data, unsigned* int len,
131	uint32_t *ecc)
132	{
133	int i;
134	const uint32_t *p;
135	const int l = BCH_ECC_WORDS(bch)-`1`;
136
137	while (len--) {
138	u8 tmp = swap_bits(bch, in: *data++);
139
140	p = bch->mod8_tab + (l+`1`)*(((ecc[`0`] >> `24`)^(tmp)) & `0xff`);
141
142	for (i = `0`; i < l; i++)
143	ecc[i] = ((ecc[i] << `8`)\|(ecc[i+`1`] >> `24`))^(*p++);
144
145	ecc[l] = (ecc[l] << `8`)^(*p);
146	}
147	}
148
149	/*
150	* convert ecc bytes to aligned, zero-padded 32-bit ecc words
151	*/
152	static void load_ecc8(struct bch_control bch, uint32_t dst,
153	const uint8_t *src)
154	{
155	uint8_t pad[`4`] = {`0`, `0`, `0`, `0`};
156	unsigned int i, nwords = BCH_ECC_WORDS(bch)-`1`;
157
158	for (i = `0`; i < nwords; i++, src += `4`)
159	dst[i] = ((u32)swap_bits(bch, in: src[`0`]) << `24`) \|
160	((u32)swap_bits(bch, in: src[`1`]) << `16`) \|
161	((u32)swap_bits(bch, in: src[`2`]) << `8`) \|
162	swap_bits(bch, in: src[`3`]);
163
164	memcpy(pad, src, BCH_ECC_BYTES(bch)-`4`*nwords);
165	dst[nwords] = ((u32)swap_bits(bch, in: pad[`0`]) << `24`) \|
166	((u32)swap_bits(bch, in: pad[`1`]) << `16`) \|
167	((u32)swap_bits(bch, in: pad[`2`]) << `8`) \|
168	swap_bits(bch, in: pad[`3`]);
169	}
170
171	/*
172	* convert 32-bit ecc words to ecc bytes
173	*/
174	static void store_ecc8(struct bch_control bch, uint8_t dst,
175	const uint32_t *src)
176	{
177	uint8_t pad[`4`];
178	unsigned int i, nwords = BCH_ECC_WORDS(bch)-`1`;
179
180	for (i = `0`; i < nwords; i++) {
181	*dst++ = swap_bits(bch, in: src[i] >> `24`);
182	*dst++ = swap_bits(bch, in: src[i] >> `16`);
183	*dst++ = swap_bits(bch, in: src[i] >> `8`);
184	*dst++ = swap_bits(bch, in: src[i]);
185	}
186	pad[`0`] = swap_bits(bch, in: src[nwords] >> `24`);
187	pad[`1`] = swap_bits(bch, in: src[nwords] >> `16`);
188	pad[`2`] = swap_bits(bch, in: src[nwords] >> `8`);
189	pad[`3`] = swap_bits(bch, in: src[nwords]);
190	memcpy(dst, pad, BCH_ECC_BYTES(bch)-`4`*nwords);
191	}
192
193	/**
194	* bch_encode - calculate BCH ecc parity of data
195	* @bch: BCH control structure
196	* @data: data to encode
197	* @len: data length in bytes
198	* @ecc: ecc parity data, must be initialized by caller
199	*
200	* The @ecc parity array is used both as input and output parameter, in order to
201	* allow incremental computations. It should be of the size indicated by member
202	* @ecc_bytes of @bch, and should be initialized to 0 before the first call.
203	*
204	* The exact number of computed ecc parity bits is given by member @ecc_bits of
205	* @bch; it may be less than m*t for large values of t.
206	*/
207	void bch_encode(struct bch_control bch, const* uint8_t *data,
208	unsigned int len, uint8_t *ecc)
209	{
210	const unsigned int l = BCH_ECC_WORDS(bch)-`1`;
211	unsigned int i, mlen;
212	unsigned long m;
213	uint32_t w, r[BCH_ECC_MAX_WORDS];
214	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
215	const uint32_t * const tab0 = bch->mod8_tab;
216	const uint32_t * const tab1 = tab0 + `256`*(l+`1`);
217	const uint32_t * const tab2 = tab1 + `256`*(l+`1`);
218	const uint32_t * const tab3 = tab2 + `256`*(l+`1`);
219	const uint32_t pdata, p0, p1, p2, *p3;
220
221	if (WARN_ON(r_bytes > sizeof(r)))
222	return;
223
224	if (ecc) {
225	/ load ecc parity bytes into internal 32-bit buffer /
226	load_ecc8(bch, dst: bch->ecc_buf, src: ecc);
227	} else {
228	memset(bch->ecc_buf, `0`, r_bytes);
229	}
230
231	/ process first unaligned data bytes /
232	m = ((unsigned long)data) & `3`;
233	if (m) {
234	mlen = (len < (`4`-m)) ? len : `4`-m;
235	bch_encode_unaligned(bch, data, len: mlen, ecc: bch->ecc_buf);
236	data += mlen;
237	len -= mlen;
238	}
239
240	/ process 32-bit aligned data words /
241	pdata = (uint32_t *)data;
242	mlen = len/`4`;
243	data += `4`*mlen;
244	len -= `4`*mlen;
245	memcpy(r, bch->ecc_buf, r_bytes);
246
247	/*
248	* split each 32-bit word into 4 polynomials of weight 8 as follows:
249	*
250	* 31 ...24 23 ...16 15 ... 8 7 ... 0
251	* xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
252	* tttttttt mod g = r0 (precomputed)
253	* zzzzzzzz 00000000 mod g = r1 (precomputed)
254	* yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
255	* xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
256	* xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
257	*/
258	while (mlen--) {
259	/ input data is read in big-endian format /
260	w = cpu_to_be32(*pdata++);
261	if (bch->swap_bits)
262	w = (u32)swap_bits(bch, in: w) \|
263	((u32)swap_bits(bch, in: w >> `8`) << `8`) \|
264	((u32)swap_bits(bch, in: w >> `16`) << `16`) \|
265	((u32)swap_bits(bch, in: w >> `24`) << `24`);
266	w ^= r[`0`];
267	p0 = tab0 + (l+`1`)*((w >> `0`) & `0xff`);
268	p1 = tab1 + (l+`1`)*((w >> `8`) & `0xff`);
269	p2 = tab2 + (l+`1`)*((w >> `16`) & `0xff`);
270	p3 = tab3 + (l+`1`)*((w >> `24`) & `0xff`);
271
272	for (i = `0`; i < l; i++)
273	r[i] = r[i+`1`]^p0[i]^p1[i]^p2[i]^p3[i];
274
275	r[l] = p0[l]^p1[l]^p2[l]^p3[l];
276	}
277	memcpy(bch->ecc_buf, r, r_bytes);
278
279	/ process last unaligned bytes /
280	if (len)
281	bch_encode_unaligned(bch, data, len, ecc: bch->ecc_buf);
282
283	/ store ecc parity bytes into original parity buffer /
284	if (ecc)
285	store_ecc8(bch, dst: ecc, src: bch->ecc_buf);
286	}
287	EXPORT_SYMBOL_GPL(bch_encode);
288
289	static inline int modulo(struct bch_control bch, unsigned* int v)
290	{
291	const unsigned int n = GF_N(bch);
292	while (v >= n) {
293	v -= n;
294	v = (v & n) + (v >> GF_M(bch));
295	}
296	return v;
297	}
298
299	/*
300	* shorter and faster modulo function, only works when v < 2N.
301	*/
302	static inline int mod_s(struct bch_control bch, unsigned* int v)
303	{
304	const unsigned int n = GF_N(bch);
305	return (v < n) ? v : v-n;
306	}
307
308	static inline int deg(unsigned int poly)
309	{
310	/ polynomial degree is the most-significant bit index /
311	return fls(x: poly)-`1`;
312	}
313
314	static inline int parity(unsigned int x)
315	{
316	/*
317	* public domain code snippet, lifted from
318	* http://www-graphics.stanford.edu/~seander/bithacks.html
319	*/
320	x ^= x >> `1`;
321	x ^= x >> `2`;
322	x = (x & `0x11111111U`) * `0x11111111U`;
323	return (x >> `28`) & `1`;
324	}
325
326	/ Galois field basic operations: multiply, divide, inverse, etc. /
327
328	static inline unsigned int gf_mul(struct bch_control bch, unsigned* int a,
329	unsigned int b)
330	{
331	return (a && b) ? bch->a_pow_tab[mod_s(bch, v: bch->a_log_tab[a]+
332	bch->a_log_tab[b])] : `0`;
333	}
334
335	static inline unsigned int gf_sqr(struct bch_control bch, unsigned* int a)
336	{
337	return a ? bch->a_pow_tab[mod_s(bch, v: `2`*bch->a_log_tab[a])] : `0`;
338	}
339
340	static inline unsigned int gf_div(struct bch_control bch, unsigned* int a,
341	unsigned int b)
342	{
343	return a ? bch->a_pow_tab[mod_s(bch, v: bch->a_log_tab[a]+
344	GF_N(bch)-bch->a_log_tab[b])] : `0`;
345	}
346
347	static inline unsigned int gf_inv(struct bch_control bch, unsigned* int a)
348	{
349	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
350	}
351
352	static inline unsigned int a_pow(struct bch_control bch, int* i)
353	{
354	return bch->a_pow_tab[modulo(bch, v: i)];
355	}
356
357	static inline int a_log(struct bch_control bch, unsigned* int x)
358	{
359	return bch->a_log_tab[x];
360	}
361
362	static inline int a_ilog(struct bch_control bch, unsigned* int x)
363	{
364	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
365	}
366
367	/*
368	* compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
369	*/
370	static void compute_syndromes(struct bch_control bch, uint32_t ecc,
371	unsigned int *syn)
372	{
373	int i, j, s;
374	unsigned int m;
375	uint32_t poly;
376	const int t = GF_T(bch);
377
378	s = bch->ecc_bits;
379
380	/ make sure extra bits in last ecc word are cleared /
381	m = ((unsigned int)s) & `31`;
382	if (m)
383	ecc[s/`32`] &= ~((`1u` << (`32`-m))-`1`);
384	memset(syn, `0`, `2`tsizeof(*syn));
385
386	/ compute v(a^j) for j=1 .. 2t-1 /
387	do {
388	poly = *ecc++;
389	s -= `32`;
390	while (poly) {
391	i = deg(poly);
392	for (j = `0`; j < `2`*t; j += `2`)
393	syn[j] ^= a_pow(bch, i: (j+`1`)*(i+s));
394
395	poly ^= (`1` << i);
396	}
397	} while (s > `0`);
398
399	/ v(a^(2j)) = v(a^j)^2 /
400	for (j = `0`; j < t; j++)
401	syn[`2`*j+`1`] = gf_sqr(bch, a: syn[j]);
402	}
403
404	static void gf_poly_copy(struct gf_poly dst, struct* gf_poly *src)
405	{
406	memcpy(dst, src, GF_POLY_SZ(src->deg));
407	}
408
409	static int compute_error_locator_polynomial(struct bch_control *bch,
410	const unsigned int *syn)
411	{
412	const unsigned int t = GF_T(bch);
413	const unsigned int n = GF_N(bch);
414	unsigned int i, j, tmp, l, pd = `1`, d = syn[`0`];
415	struct gf_poly *elp = bch->elp;
416	struct gf_poly *pelp = bch->poly_2t[`0`];
417	struct gf_poly *elp_copy = bch->poly_2t[`1`];
418	int k, pp = -`1`;
419
420	memset(pelp, `0`, GF_POLY_SZ(`2`*t));
421	memset(elp, `0`, GF_POLY_SZ(`2`*t));
422
423	pelp->deg = `0`;
424	pelp->c[`0`] = `1`;
425	elp->deg = `0`;
426	elp->c[`0`] = `1`;
427
428	/ use simplified binary Berlekamp-Massey algorithm /
429	for (i = `0`; (i < t) && (elp->deg <= t); i++) {
430	if (d) {
431	k = `2`*i-pp;
432	gf_poly_copy(dst: elp_copy, src: elp);
433	/ e[i+1](X) = e[i](X)+didp^-1X^2(i-p)e[p](X) /*
434	tmp = a_log(bch, x: d)+n-a_log(bch, x: pd);
435	for (j = `0`; j <= pelp->deg; j++) {
436	if (pelp->c[j]) {
437	l = a_log(bch, x: pelp->c[j]);
438	elp->c[j+k] ^= a_pow(bch, i: tmp+l);
439	}
440	}
441	/ compute l[i+1] = max(l[i]->c[l[p]+2(i-p]) /*
442	tmp = pelp->deg+k;
443	if (tmp > elp->deg) {
444	elp->deg = tmp;
445	gf_poly_copy(dst: pelp, src: elp_copy);
446	pd = d;
447	pp = `2`*i;
448	}
449	}
450	/ di+1 = S(2i+3)+elp[i+1].1S(2i+2)+...+elp[i+1].lS(2i+3-l) /*
451	if (i < t-`1`) {
452	d = syn[`2`*i+`2`];
453	for (j = `1`; j <= elp->deg; j++)
454	d ^= gf_mul(bch, a: elp->c[j], b: syn[`2`*i+`2`-j]);
455	}
456	}
457	dbg("elp=%s\n", gf_poly_str(elp));
458	return (elp->deg > t) ? -`1` : (int)elp->deg;
459	}
460
461	/*
462	* solve a m x m linear system in GF(2) with an expected number of solutions,
463	* and return the number of found solutions
464	*/
465	static int solve_linear_system(struct bch_control bch, unsigned* int *rows,
466	unsigned int sol, int* nsol)
467	{
468	const int m = GF_M(bch);
469	unsigned int tmp, mask;
470	int rem, c, r, p, k, param[BCH_MAX_M];
471
472	k = `0`;
473	mask = `1` << m;
474
475	/ Gaussian elimination /
476	for (c = `0`; c < m; c++) {
477	rem = `0`;
478	p = c-k;
479	/ find suitable row for elimination /
480	for (r = p; r < m; r++) {
481	if (rows[r] & mask) {
482	if (r != p)
483	swap(rows[r], rows[p]);
484	rem = r+`1`;
485	break;
486	}
487	}
488	if (rem) {
489	/ perform elimination on remaining rows /
490	tmp = rows[p];
491	for (r = rem; r < m; r++) {
492	if (rows[r] & mask)
493	rows[r] ^= tmp;
494	}
495	} else {
496	/ elimination not needed, store defective row index /
497	param[k++] = c;
498	}
499	mask >>= `1`;
500	}
501	/ rewrite system, inserting fake parameter rows /
502	if (k > `0`) {
503	p = k;
504	for (r = m-`1`; r >= `0`; r--) {
505	if ((r > m-`1`-k) && rows[r])
506	/ system has no solution /
507	return `0`;
508
509	rows[r] = (p && (r == param[p-`1`])) ?
510	p--, `1u` << (m-r) : rows[r-p];
511	}
512	}
513
514	if (nsol != (`1` << k))
515	/ unexpected number of solutions /
516	return `0`;
517
518	for (p = `0`; p < nsol; p++) {
519	/ set parameters for p-th solution /
520	for (c = `0`; c < k; c++)
521	rows[param[c]] = (rows[param[c]] & ~`1`)\|((p >> c) & `1`);
522
523	/ compute unique solution /
524	tmp = `0`;
525	for (r = m-`1`; r >= `0`; r--) {
526	mask = rows[r] & (tmp\|`1`);
527	tmp \|= parity(x: mask) << (m-r);
528	}
529	sol[p] = tmp >> `1`;
530	}
531	return nsol;
532	}
533
534	/*
535	* this function builds and solves a linear system for finding roots of a degree
536	* 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
537	*/
538	static int find_affine4_roots(struct bch_control bch, unsigned* int a,
539	unsigned int b, unsigned int c,
540	unsigned int *roots)
541	{
542	int i, j, k;
543	const int m = GF_M(bch);
544	unsigned int mask = `0xff`, t, rows[`16`] = {`0`,};
545
546	j = a_log(bch, x: b);
547	k = a_log(bch, x: a);
548	rows[`0`] = c;
549
550	/ build linear system to solve X^4+aX^2+bX+c = 0 /
551	for (i = `0`; i < m; i++) {
552	rows[i+`1`] = bch->a_pow_tab[`4`*i]^
553	(a ? bch->a_pow_tab[mod_s(bch, v: k)] : `0`)^
554	(b ? bch->a_pow_tab[mod_s(bch, v: j)] : `0`);
555	j++;
556	k += `2`;
557	}
558	/*
559	* transpose 16x16 matrix before passing it to linear solver
560	* warning: this code assumes m < 16
561	*/
562	for (j = `8`; j != `0`; j >>= `1`, mask ^= (mask << j)) {
563	for (k = `0`; k < `16`; k = (k+j+`1`) & ~j) {
564	t = ((rows[k] >> j)^rows[k+j]) & mask;
565	rows[k] ^= (t << j);
566	rows[k+j] ^= t;
567	}
568	}
569	return solve_linear_system(bch, rows, sol: roots, nsol: `4`);
570	}
571
572	/*
573	* compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
574	*/
575	static int find_poly_deg1_roots(struct bch_control bch, struct* gf_poly *poly,
576	unsigned int *roots)
577	{
578	int n = `0`;
579
580	if (poly->c[`0`])
581	/ poly[X] = bX+c with c!=0, root=c/b /
582	roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[`0`]]+
583	bch->a_log_tab[poly->c[`1`]]);
584	return n;
585	}
586
587	/*
588	* compute roots of a degree 2 polynomial over GF(2^m)
589	*/
590	static int find_poly_deg2_roots(struct bch_control bch, struct* gf_poly *poly,
591	unsigned int *roots)
592	{
593	int n = `0`, i, l0, l1, l2;
594	unsigned int u, v, r;
595
596	if (poly->c[`0`] && poly->c[`1`]) {
597
598	l0 = bch->a_log_tab[poly->c[`0`]];
599	l1 = bch->a_log_tab[poly->c[`1`]];
600	l2 = bch->a_log_tab[poly->c[`2`]];
601
602	/ using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) /
603	u = a_pow(bch, i: l0+l2+`2`*(GF_N(bch)-l1));
604	/*
605	* let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
606	* r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
607	* u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
608	* i.e. r and r+1 are roots iff Tr(u)=0
609	*/
610	r = `0`;
611	v = u;
612	while (v) {
613	i = deg(poly: v);
614	r ^= bch->xi_tab[i];
615	v ^= (`1` << i);
616	}
617	/ verify root /
618	if ((gf_sqr(bch, a: r)^r) == u) {
619	/ reverse z=a/bX transformation and compute log(1/r) /
620	roots[n++] = modulo(bch, v: `2`*GF_N(bch)-l1-
621	bch->a_log_tab[r]+l2);
622	roots[n++] = modulo(bch, v: `2`*GF_N(bch)-l1-
623	bch->a_log_tab[r^`1`]+l2);
624	}
625	}
626	return n;
627	}
628
629	/*
630	* compute roots of a degree 3 polynomial over GF(2^m)
631	*/
632	static int find_poly_deg3_roots(struct bch_control bch, struct* gf_poly *poly,
633	unsigned int *roots)
634	{
635	int i, n = `0`;
636	unsigned int a, b, c, a2, b2, c2, e3, tmp[`4`];
637
638	if (poly->c[`0`]) {
639	/ transform polynomial into monic X^3 + a2X^2 + b2X + c2 /
640	e3 = poly->c[`3`];
641	c2 = gf_div(bch, a: poly->c[`0`], b: e3);
642	b2 = gf_div(bch, a: poly->c[`1`], b: e3);
643	a2 = gf_div(bch, a: poly->c[`2`], b: e3);
644
645	/ (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) /
646	c = gf_mul(bch, a: a2, b: c2); / c = a2c2 /
647	b = gf_mul(bch, a: a2, b: b2)^c2; / b = a2b2 + c2 /
648	a = gf_sqr(bch, a: a2)^b2; / a = a2^2 + b2 /
649
650	/ find the 4 roots of this affine polynomial /
651	if (find_affine4_roots(bch, a, b, c, roots: tmp) == `4`) {
652	/ remove a2 from final list of roots /
653	for (i = `0`; i < `4`; i++) {
654	if (tmp[i] != a2)
655	roots[n++] = a_ilog(bch, x: tmp[i]);
656	}
657	}
658	}
659	return n;
660	}
661
662	/*
663	* compute roots of a degree 4 polynomial over GF(2^m)
664	*/
665	static int find_poly_deg4_roots(struct bch_control bch, struct* gf_poly *poly,
666	unsigned int *roots)
667	{
668	int i, l, n = `0`;
669	unsigned int a, b, c, d, e = `0`, f, a2, b2, c2, e4;
670
671	if (poly->c[`0`] == `0`)
672	return `0`;
673
674	/ transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d /
675	e4 = poly->c[`4`];
676	d = gf_div(bch, a: poly->c[`0`], b: e4);
677	c = gf_div(bch, a: poly->c[`1`], b: e4);
678	b = gf_div(bch, a: poly->c[`2`], b: e4);
679	a = gf_div(bch, a: poly->c[`3`], b: e4);
680
681	/ use Y=1/X transformation to get an affine polynomial /
682	if (a) {
683	/ first, eliminate cX by using z=X+e with ae^2+c=0 /
684	if (c) {
685	/ compute e such that e^2 = c/a /
686	f = gf_div(bch, a: c, b: a);
687	l = a_log(bch, x: f);
688	l += (l & `1`) ? GF_N(bch) : `0`;
689	e = a_pow(bch, i: l/`2`);
690	/*
691	* use transformation z=X+e:
692	* z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
693	* z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
694	* z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
695	* z^4 + az^3 + b'z^2 + d'
696	*/
697	d = a_pow(bch, i: `2`*l)^gf_mul(bch, a: b, b: f)^d;
698	b = gf_mul(bch, a, b: e)^b;
699	}
700	/ now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d /
701	if (d == `0`)
702	/ assume all roots have multiplicity 1 /
703	return `0`;
704
705	c2 = gf_inv(bch, a: d);
706	b2 = gf_div(bch, a, b: d);
707	a2 = gf_div(bch, a: b, b: d);
708	} else {
709	/ polynomial is already affine /
710	c2 = d;
711	b2 = c;
712	a2 = b;
713	}
714	/ find the 4 roots of this affine polynomial /
715	if (find_affine4_roots(bch, a: a2, b: b2, c: c2, roots) == `4`) {
716	for (i = `0`; i < `4`; i++) {
717	/ post-process roots (reverse transformations) /
718	f = a ? gf_inv(bch, a: roots[i]) : roots[i];
719	roots[i] = a_ilog(bch, x: f^e);
720	}
721	n = `4`;
722	}
723	return n;
724	}
725
726	/*
727	* build monic, log-based representation of a polynomial
728	*/
729	static void gf_poly_logrep(struct bch_control *bch,
730	const struct gf_poly a, int* *rep)
731	{
732	int i, d = a->deg, l = GF_N(bch)-a_log(bch, x: a->c[a->deg]);
733
734	/ represent 0 values with -1; warning, rep[d] is not set to 1 /
735	for (i = `0`; i < d; i++)
736	rep[i] = a->c[i] ? mod_s(bch, v: a_log(bch, x: a->c[i])+l) : -`1`;
737	}
738
739	/*
740	* compute polynomial Euclidean division remainder in GF(2^m)[X]
741	*/
742	static void gf_poly_mod(struct bch_control bch, struct* gf_poly *a,
743	const struct gf_poly b, int* *rep)
744	{
745	int la, p, m;
746	unsigned int i, j, *c = a->c;
747	const unsigned int d = b->deg;
748
749	if (a->deg < d)
750	return;
751
752	/ reuse or compute log representation of denominator /
753	if (!rep) {
754	rep = bch->cache;
755	gf_poly_logrep(bch, a: b, rep);
756	}
757
758	for (j = a->deg; j >= d; j--) {
759	if (c[j]) {
760	la = a_log(bch, x: c[j]);
761	p = j-d;
762	for (i = `0`; i < d; i++, p++) {
763	m = rep[i];
764	if (m >= `0`)
765	c[p] ^= bch->a_pow_tab[mod_s(bch,
766	v: m+la)];
767	}
768	}
769	}
770	a->deg = d-`1`;
771	while (!c[a->deg] && a->deg)
772	a->deg--;
773	}
774
775	/*
776	* compute polynomial Euclidean division quotient in GF(2^m)[X]
777	*/
778	static void gf_poly_div(struct bch_control bch, struct* gf_poly *a,
779	const struct gf_poly b, struct* gf_poly *q)
780	{
781	if (a->deg >= b->deg) {
782	q->deg = a->deg-b->deg;
783	/ compute a mod b (modifies a) /
784	gf_poly_mod(bch, a, b, NULL);
785	/ quotient is stored in upper part of polynomial a /
786	memcpy(q->c, &a->c[b->deg], (`1`+q->deg)*sizeof(unsigned int));
787	} else {
788	q->deg = `0`;
789	q->c[`0`] = `0`;
790	}
791	}
792
793	/*
794	* compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
795	*/
796	static struct gf_poly gf_poly_gcd(struct* bch_control bch, struct* gf_poly *a,
797	struct gf_poly *b)
798	{
799	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
800
801	if (a->deg < b->deg)
802	swap(a, b);
803
804	while (b->deg > `0`) {
805	gf_poly_mod(bch, a, b, NULL);
806	swap(a, b);
807	}
808
809	dbg("%s\n", gf_poly_str(a));
810
811	return a;
812	}
813
814	/*
815	* Given a polynomial f and an integer k, compute Tr(a^kX) mod f
816	* This is used in Berlekamp Trace algorithm for splitting polynomials
817	*/
818	static void compute_trace_bk_mod(struct bch_control bch, int* k,
819	const struct gf_poly f, struct* gf_poly *z,
820	struct gf_poly *out)
821	{
822	const int m = GF_M(bch);
823	int i, j;
824
825	/ z contains z^2j mod f /
826	z->deg = `1`;
827	z->c[`0`] = `0`;
828	z->c[`1`] = bch->a_pow_tab[k];
829
830	out->deg = `0`;
831	memset(out, `0`, GF_POLY_SZ(f->deg));
832
833	/ compute f log representation only once /
834	gf_poly_logrep(bch, a: f, rep: bch->cache);
835
836	for (i = `0`; i < m; i++) {
837	/ add a^(k2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 /*
838	for (j = z->deg; j >= `0`; j--) {
839	out->c[j] ^= z->c[j];
840	z->c[`2`*j] = gf_sqr(bch, a: z->c[j]);
841	z->c[`2`*j+`1`] = `0`;
842	}
843	if (z->deg > out->deg)
844	out->deg = z->deg;
845
846	if (i < m-`1`) {
847	z->deg *= `2`;
848	/ z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f /
849	gf_poly_mod(bch, a: z, b: f, rep: bch->cache);
850	}
851	}
852	while (!out->c[out->deg] && out->deg)
853	out->deg--;
854
855	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
856	}
857
858	/*
859	* factor a polynomial using Berlekamp Trace algorithm (BTA)
860	*/
861	static void factor_polynomial(struct bch_control bch, int* k, struct gf_poly *f,
862	struct gf_poly g, struct gf_poly h)
863	{
864	struct gf_poly *f2 = bch->poly_2t[`0`];
865	struct gf_poly *q = bch->poly_2t[`1`];
866	struct gf_poly *tk = bch->poly_2t[`2`];
867	struct gf_poly *z = bch->poly_2t[`3`];
868	struct gf_poly *gcd;
869
870	dbg("factoring %s...\n", gf_poly_str(f));
871
872	*g = f;
873	*h = NULL;
874
875	/ tk = Tr(a^k.X) mod f /
876	compute_trace_bk_mod(bch, k, f, z, out: tk);
877
878	if (tk->deg > `0`) {
879	/ compute g = gcd(f, tk) (destructive operation) /
880	gf_poly_copy(dst: f2, src: f);
881	gcd = gf_poly_gcd(bch, a: f2, b: tk);
882	if (gcd->deg < f->deg) {
883	/ compute h=f/gcd(f,tk); this will modify f and q /
884	gf_poly_div(bch, a: f, b: gcd, q);
885	/ store g and h in-place (clobbering f) /
886	h = &((struct* gf_poly_deg1 *)f)[gcd->deg].poly;
887	gf_poly_copy(dst: *g, src: gcd);
888	gf_poly_copy(dst: *h, src: q);
889	}
890	}
891	}
892
893	/*
894	* find roots of a polynomial, using BTZ algorithm; see the beginning of this
895	* file for details
896	*/
897	static int find_poly_roots(struct bch_control bch, unsigned* int k,
898	struct gf_poly poly, unsigned* int *roots)
899	{
900	int cnt;
901	struct gf_poly f1, f2;
902
903	switch (poly->deg) {
904	/ handle low degree polynomials with ad hoc techniques /
905	case `1`:
906	cnt = find_poly_deg1_roots(bch, poly, roots);
907	break;
908	case `2`:
909	cnt = find_poly_deg2_roots(bch, poly, roots);
910	break;
911	case `3`:
912	cnt = find_poly_deg3_roots(bch, poly, roots);
913	break;
914	case `4`:
915	cnt = find_poly_deg4_roots(bch, poly, roots);
916	break;
917	default:
918	/ factor polynomial using Berlekamp Trace Algorithm (BTA) /
919	cnt = `0`;
920	if (poly->deg && (k <= GF_M(bch))) {
921	factor_polynomial(bch, k, f: poly, g: &f1, h: &f2);
922	if (f1)
923	cnt += find_poly_roots(bch, k: k+`1`, poly: f1, roots);
924	if (f2)
925	cnt += find_poly_roots(bch, k: k+`1`, poly: f2, roots: roots+cnt);
926	}
927	break;
928	}
929	return cnt;
930	}
931
932	#if defined(USE_CHIEN_SEARCH)
933	/*
934	* exhaustive root search (Chien) implementation - not used, included only for
935	* reference/comparison tests
936	*/
937	static int chien_search(struct bch_control bch, unsigned* int len,
938	struct gf_poly p, unsigned* int *roots)
939	{
940	int m;
941	unsigned int i, j, syn, syn0, count = `0`;
942	const unsigned int k = `8`*len+bch->ecc_bits;
943
944	/ use a log-based representation of polynomial /
945	gf_poly_logrep(bch, p, bch->cache);
946	bch->cache[p->deg] = `0`;
947	syn0 = gf_div(bch, p->c[`0`], p->c[p->deg]);
948
949	for (i = GF_N(bch)-k+`1`; i <= GF_N(bch); i++) {
950	/ compute elp(a^i) /
951	for (j = `1`, syn = syn0; j <= p->deg; j++) {
952	m = bch->cache[j];
953	if (m >= `0`)
954	syn ^= a_pow(bch, m+j*i);
955	}
956	if (syn == `0`) {
957	roots[count++] = GF_N(bch)-i;
958	if (count == p->deg)
959	break;
960	}
961	}
962	return (count == p->deg) ? count : `0`;
963	}
964	#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
965	#endif /* USE_CHIEN_SEARCH */
966
967	/**
968	* bch_decode - decode received codeword and find bit error locations
969	* @bch: BCH control structure
970	* @data: received data, ignored if @calc_ecc is provided
971	* @len: data length in bytes, must always be provided
972	* @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
973	* @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
974	* @syn: hw computed syndrome data (if NULL, syndrome is calculated)
975	* @errloc: output array of error locations
976	*
977	* Returns:
978	* The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
979	* invalid parameters were provided
980	*
981	* Depending on the available hw BCH support and the need to compute @calc_ecc
982	* separately (using bch_encode()), this function should be called with one of
983	* the following parameter configurations -
984	*
985	* by providing @data and @recv_ecc only:
986	* bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
987	*
988	* by providing @recv_ecc and @calc_ecc:
989	* bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
990	*
991	* by providing ecc = recv_ecc XOR calc_ecc:
992	* bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
993	*
994	* by providing syndrome results @syn:
995	* bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
996	*
997	* Once bch_decode() has successfully returned with a positive value, error
998	* locations returned in array @errloc should be interpreted as follows -
999	*
1000	* if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1001	* data correction)
1002	*
1003	* if (errloc[n] < 8*len), then n-th error is located in data and can be
1004	* corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1005	*
1006	* Note that this function does not perform any data correction by itself, it
1007	* merely indicates error locations.
1008	*/
1009	int bch_decode(struct bch_control bch, const* uint8_t data, unsigned* int len,
1010	const uint8_t recv_ecc, const* uint8_t *calc_ecc,
1011	const unsigned int syn, unsigned* int *errloc)
1012	{
1013	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1014	unsigned int nbits;
1015	int i, err, nroots;
1016	uint32_t sum;
1017
1018	/ sanity check: make sure data length can be handled /
1019	if (`8`*len > (bch->n-bch->ecc_bits))
1020	return -EINVAL;
1021
1022	/ if caller does not provide syndromes, compute them /
1023	if (!syn) {
1024	if (!calc_ecc) {
1025	/ compute received data ecc into an internal buffer /
1026	if (!data \|\| !recv_ecc)
1027	return -EINVAL;
1028	bch_encode(bch, data, len, NULL);
1029	} else {
1030	/ load provided calculated ecc /
1031	load_ecc8(bch, dst: bch->ecc_buf, src: calc_ecc);
1032	}
1033	/ load received ecc or assume it was XORed in calc_ecc /
1034	if (recv_ecc) {
1035	load_ecc8(bch, dst: bch->ecc_buf2, src: recv_ecc);
1036	/ XOR received and calculated ecc /
1037	for (i = `0`, sum = `0`; i < (int)ecc_words; i++) {
1038	bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1039	sum \|= bch->ecc_buf[i];
1040	}
1041	if (!sum)
1042	/ no error found /
1043	return `0`;
1044	}
1045	compute_syndromes(bch, ecc: bch->ecc_buf, syn: bch->syn);
1046	syn = bch->syn;
1047	}
1048
1049	err = compute_error_locator_polynomial(bch, syn);
1050	if (err > `0`) {
1051	nroots = find_poly_roots(bch, k: `1`, poly: bch->elp, roots: errloc);
1052	if (err != nroots)
1053	err = -`1`;
1054	}
1055	if (err > `0`) {
1056	/ post-process raw error locations for easier correction /
1057	nbits = (len*`8`)+bch->ecc_bits;
1058	for (i = `0`; i < err; i++) {
1059	if (errloc[i] >= nbits) {
1060	err = -`1`;
1061	break;
1062	}
1063	errloc[i] = nbits-`1`-errloc[i];
1064	if (!bch->swap_bits)
1065	errloc[i] = (errloc[i] & ~`7`) \|
1066	(`7`-(errloc[i] & `7`));
1067	}
1068	}
1069	return (err >= `0`) ? err : -EBADMSG;
1070	}
1071	EXPORT_SYMBOL_GPL(bch_decode);
1072
1073	/*
1074	* generate Galois field lookup tables
1075	*/
1076	static int build_gf_tables(struct bch_control bch, unsigned* int poly)
1077	{
1078	unsigned int i, x = `1`;
1079	const unsigned int k = `1` << deg(poly);
1080
1081	/ primitive polynomial must be of degree m /
1082	if (k != (`1u` << GF_M(bch)))
1083	return -`1`;
1084
1085	for (i = `0`; i < GF_N(bch); i++) {
1086	bch->a_pow_tab[i] = x;
1087	bch->a_log_tab[x] = i;
1088	if (i && (x == `1`))
1089	/ polynomial is not primitive (a^i=1 with 0<i<2^m-1) /
1090	return -`1`;
1091	x <<= `1`;
1092	if (x & k)
1093	x ^= poly;
1094	}
1095	bch->a_pow_tab[GF_N(bch)] = `1`;
1096	bch->a_log_tab[`0`] = `0`;
1097
1098	return `0`;
1099	}
1100
1101	/*
1102	* compute generator polynomial remainder tables for fast encoding
1103	*/
1104	static void build_mod8_tables(struct bch_control bch, const* uint32_t *g)
1105	{
1106	int i, j, b, d;
1107	uint32_t data, hi, lo, *tab;
1108	const int l = BCH_ECC_WORDS(bch);
1109	const int plen = DIV_ROUND_UP(bch->ecc_bits+`1`, `32`);
1110	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, `32`);
1111
1112	memset(bch->mod8_tab, `0`, `4``256`l*sizeof(*bch->mod8_tab));
1113
1114	for (i = `0`; i < `256`; i++) {
1115	/ p(X)=i is a small polynomial of weight <= 8 /
1116	for (b = `0`; b < `4`; b++) {
1117	/ we want to compute (p(X).X^(8b+deg(g))) mod g(X) /*
1118	tab = bch->mod8_tab + (b`256`+i)l;
1119	data = i << (`8`*b);
1120	while (data) {
1121	d = deg(poly: data);
1122	/ subtract X^d.g(X) from p(X).X^(8b+deg(g)) /*
1123	data ^= g[`0`] >> (`31`-d);
1124	for (j = `0`; j < ecclen; j++) {
1125	hi = (d < `31`) ? g[j] << (d+`1`) : `0`;
1126	lo = (j+`1` < plen) ?
1127	g[j+`1`] >> (`31`-d) : `0`;
1128	tab[j] ^= hi\|lo;
1129	}
1130	}
1131	}
1132	}
1133	}
1134
1135	/*
1136	* build a base for factoring degree 2 polynomials
1137	*/
1138	static int build_deg2_base(struct bch_control *bch)
1139	{
1140	const int m = GF_M(bch);
1141	int i, j, r;
1142	unsigned int sum, x, y, remaining, ak = `0`, xi[BCH_MAX_M];
1143
1144	/ find k s.t. Tr(a^k) = 1 and 0 <= k < m /
1145	for (i = `0`; i < m; i++) {
1146	for (j = `0`, sum = `0`; j < m; j++)
1147	sum ^= a_pow(bch, i: i*(`1` << j));
1148
1149	if (sum) {
1150	ak = bch->a_pow_tab[i];
1151	break;
1152	}
1153	}
1154	/ find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k /
1155	remaining = m;
1156	memset(xi, `0`, sizeof(xi));
1157
1158	for (x = `0`; (x <= GF_N(bch)) && remaining; x++) {
1159	y = gf_sqr(bch, a: x)^x;
1160	for (i = `0`; i < `2`; i++) {
1161	r = a_log(bch, x: y);
1162	if (y && (r < m) && !xi[r]) {
1163	bch->xi_tab[r] = x;
1164	xi[r] = `1`;
1165	remaining--;
1166	dbg("x%d = %x\n", r, x);
1167	break;
1168	}
1169	y ^= ak;
1170	}
1171	}
1172	/ should not happen but check anyway /
1173	return remaining ? -`1` : `0`;
1174	}
1175
1176	static void bch_alloc(size_t size, int* *err)
1177	{
1178	void *ptr;
1179
1180	ptr = kmalloc(size, GFP_KERNEL);
1181	if (ptr == NULL)
1182	*err = `1`;
1183	return ptr;
1184	}
1185
1186	/*
1187	* compute generator polynomial for given (m,t) parameters.
1188	*/
1189	static uint32_t compute_generator_polynomial(struct* bch_control *bch)
1190	{
1191	const unsigned int m = GF_M(bch);
1192	const unsigned int t = GF_T(bch);
1193	int n, err = `0`;
1194	unsigned int i, j, nbits, r, word, *roots;
1195	struct gf_poly *g;
1196	uint32_t *genpoly;
1197
1198	g = bch_alloc(GF_POLY_SZ(m*t), err: &err);
1199	roots = bch_alloc(size: (bch->n+`1`)*sizeof(*roots), err: &err);
1200	genpoly = bch_alloc(DIV_ROUND_UP(mt+`1`, `32`)sizeof(*genpoly), err: &err);
1201
1202	if (err) {
1203	kfree(objp: genpoly);
1204	genpoly = NULL;
1205	goto finish;
1206	}
1207
1208	/ enumerate all roots of g(X) /
1209	memset(roots , `0`, (bch->n+`1`)*sizeof(*roots));
1210	for (i = `0`; i < t; i++) {
1211	for (j = `0`, r = `2`*i+`1`; j < m; j++) {
1212	roots[r] = `1`;
1213	r = mod_s(bch, v: `2`*r);
1214	}
1215	}
1216	/ build generator polynomial g(X) /
1217	g->deg = `0`;
1218	g->c[`0`] = `1`;
1219	for (i = `0`; i < GF_N(bch); i++) {
1220	if (roots[i]) {
1221	/ multiply g(X) by (X+root) /
1222	r = bch->a_pow_tab[i];
1223	g->c[g->deg+`1`] = `1`;
1224	for (j = g->deg; j > `0`; j--)
1225	g->c[j] = gf_mul(bch, a: g->c[j], b: r)^g->c[j-`1`];
1226
1227	g->c[`0`] = gf_mul(bch, a: g->c[`0`], b: r);
1228	g->deg++;
1229	}
1230	}
1231	/ store left-justified binary representation of g(X) /
1232	n = g->deg+`1`;
1233	i = `0`;
1234
1235	while (n > `0`) {
1236	nbits = (n > `32`) ? `32` : n;
1237	for (j = `0`, word = `0`; j < nbits; j++) {
1238	if (g->c[n-`1`-j])
1239	word \|= `1u` << (`31`-j);
1240	}
1241	genpoly[i++] = word;
1242	n -= nbits;
1243	}
1244	bch->ecc_bits = g->deg;
1245
1246	finish:
1247	kfree(objp: g);
1248	kfree(objp: roots);
1249
1250	return genpoly;
1251	}
1252
1253	/**
1254	* bch_init - initialize a BCH encoder/decoder
1255	* @m: Galois field order, should be in the range 5-15
1256	* @t: maximum error correction capability, in bits
1257	* @prim_poly: user-provided primitive polynomial (or 0 to use default)
1258	* @swap_bits: swap bits within data and syndrome bytes
1259	*
1260	* Returns:
1261	* a newly allocated BCH control structure if successful, NULL otherwise
1262	*
1263	* This initialization can take some time, as lookup tables are built for fast
1264	* encoding/decoding; make sure not to call this function from a time critical
1265	* path. Usually, bch_init() should be called on module/driver init and
1266	* bch_free() should be called to release memory on exit.
1267	*
1268	* You may provide your own primitive polynomial of degree @m in argument
1269	* @prim_poly, or let bch_init() use its default polynomial.
1270	*
1271	* Once bch_init() has successfully returned a pointer to a newly allocated
1272	* BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1273	* the structure.
1274	*/
1275	struct bch_control bch_init(int* m, int t, unsigned int prim_poly,
1276	bool swap_bits)
1277	{
1278	int err = `0`;
1279	unsigned int i, words;
1280	uint32_t *genpoly;
1281	struct bch_control *bch = NULL;
1282
1283	const int min_m = `5`;
1284
1285	/ default primitive polynomials /
1286	static const unsigned int prim_poly_tab[] = {
1287	`0x25`, `0x43`, `0x83`, `0x11d`, `0x211`, `0x409`, `0x805`, `0x1053`, `0x201b`,
1288	`0x402b`, `0x8003`,
1289	};
1290
1291	#if defined(CONFIG_BCH_CONST_PARAMS)
1292	if ((m != (CONFIG_BCH_CONST_M)) \|\| (t != (CONFIG_BCH_CONST_T))) {
1293	printk(KERN_ERR "bch encoder/decoder was configured to support "
1294	"parameters m=%d, t=%d only!\n",
1295	CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1296	goto fail;
1297	}
1298	#endif
1299	if ((m < min_m) \|\| (m > BCH_MAX_M))
1300	/*
1301	* values of m greater than 15 are not currently supported;
1302	* supporting m > 15 would require changing table base type
1303	* (uint16_t) and a small patch in matrix transposition
1304	*/
1305	goto fail;
1306
1307	if (t > BCH_MAX_T)
1308	/*
1309	* we can support larger than 64 bits if necessary, at the
1310	* cost of higher stack usage.
1311	*/
1312	goto fail;
1313
1314	/ sanity checks /
1315	if ((t < `1`) \|\| (m*t >= ((`1` << m)-`1`)))
1316	/ invalid t value /
1317	goto fail;
1318
1319	/ select a primitive polynomial for generating GF(2^m) /
1320	if (prim_poly == `0`)
1321	prim_poly = prim_poly_tab[m-min_m];
1322
1323	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1324	if (bch == NULL)
1325	goto fail;
1326
1327	bch->m = m;
1328	bch->t = t;
1329	bch->n = (`1` << m)-`1`;
1330	words = DIV_ROUND_UP(m*t, `32`);
1331	bch->ecc_bytes = DIV_ROUND_UP(m*t, `8`);
1332	bch->a_pow_tab = bch_alloc(size: (`1`+bch->n)*sizeof(*bch->a_pow_tab), err: &err);
1333	bch->a_log_tab = bch_alloc(size: (`1`+bch->n)*sizeof(*bch->a_log_tab), err: &err);
1334	bch->mod8_tab = bch_alloc(size: words`1024`sizeof(*bch->mod8_tab), err: &err);
1335	bch->ecc_buf = bch_alloc(size: words*sizeof(*bch->ecc_buf), err: &err);
1336	bch->ecc_buf2 = bch_alloc(size: words*sizeof(*bch->ecc_buf2), err: &err);
1337	bch->xi_tab = bch_alloc(size: m*sizeof(*bch->xi_tab), err: &err);
1338	bch->syn = bch_alloc(size: `2`tsizeof(*bch->syn), err: &err);
1339	bch->cache = bch_alloc(size: `2`tsizeof(*bch->cache), err: &err);
1340	bch->elp = bch_alloc(size: (t+`1`)*sizeof(struct gf_poly_deg1), err: &err);
1341	bch->swap_bits = swap_bits;
1342
1343	for (i = `0`; i < ARRAY_SIZE(bch->poly_2t); i++)
1344	bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(`2`*t), err: &err);
1345
1346	if (err)
1347	goto fail;
1348
1349	err = build_gf_tables(bch, poly: prim_poly);
1350	if (err)
1351	goto fail;
1352
1353	/ use generator polynomial for computing encoding tables /
1354	genpoly = compute_generator_polynomial(bch);
1355	if (genpoly == NULL)
1356	goto fail;
1357
1358	build_mod8_tables(bch, g: genpoly);
1359	kfree(objp: genpoly);
1360
1361	err = build_deg2_base(bch);
1362	if (err)
1363	goto fail;
1364
1365	return bch;
1366
1367	fail:
1368	bch_free(bch);
1369	return NULL;
1370	}
1371	EXPORT_SYMBOL_GPL(bch_init);
1372
1373	/**
1374	* bch_free - free the BCH control structure
1375	* @bch: BCH control structure to release
1376	*/
1377	void bch_free(struct bch_control *bch)
1378	{
1379	unsigned int i;
1380
1381	if (bch) {
1382	kfree(objp: bch->a_pow_tab);
1383	kfree(objp: bch->a_log_tab);
1384	kfree(objp: bch->mod8_tab);
1385	kfree(objp: bch->ecc_buf);
1386	kfree(objp: bch->ecc_buf2);
1387	kfree(objp: bch->xi_tab);
1388	kfree(objp: bch->syn);
1389	kfree(objp: bch->cache);
1390	kfree(objp: bch->elp);
1391
1392	for (i = `0`; i < ARRAY_SIZE(bch->poly_2t); i++)
1393	kfree(objp: bch->poly_2t[i]);
1394
1395	kfree(objp: bch);
1396	}
1397	}
1398	EXPORT_SYMBOL_GPL(bch_free);
1399
1400	MODULE_LICENSE("GPL");
1401	MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1402	MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1403

source code of linux/lib/bch.c