cap_mjpeg_encoder.cpp source code [opencv/modules/videoio/src/cap_mjpeg_encoder.cpp]

1	/M///////////////////////////////////////////////////////////////////////////////////////*
2	//
3	// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4	//
5	// By downloading, copying, installing or using the software you agree to this license.
6	// If you do not agree to this license, do not download, install,
7	// copy or use the software.
8	//
9	//
10	// License Agreement
11	// For Open Source Computer Vision Library
12	//
13	// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14	// Third party copyrights are property of their respective owners.
15	//
16	// Redistribution and use in source and binary forms, with or without modification,
17	// are permitted provided that the following conditions are met:
18	//
19	// Redistribution's of source code must retain the above copyright notice,*
20	// this list of conditions and the following disclaimer.
21	//
22	// Redistribution's in binary form must reproduce the above copyright notice,*
23	// this list of conditions and the following disclaimer in the documentation
24	// and/or other materials provided with the distribution.
25	//
26	// The name of Intel Corporation may not be used to endorse or promote products*
27	// derived from this software without specific prior written permission.
28	//
29	// This software is provided by the copyright holders and contributors "as is" and
30	// any express or implied warranties, including, but not limited to, the implied
31	// warranties of merchantability and fitness for a particular purpose are disclaimed.
32	// In no event shall the Intel Corporation or contributors be liable for any direct,
33	// indirect, incidental, special, exemplary, or consequential damages
34	// (including, but not limited to, procurement of substitute goods or services;
35	// loss of use, data, or profits; or business interruption) however caused
36	// and on any theory of liability, whether in contract, strict liability,
37	// or tort (including negligence or otherwise) arising in any way out of
38	// the use of this software, even if advised of the possibility of such damage.
39	//
40	//M/*
41
42	#include "precomp.hpp"
43	#include "opencv2/videoio/container_avi.private.hpp"
44
45	#include <vector>
46	#include <deque>
47	#include <iostream>
48	#include <cstdlib>
49
50	#if CV_NEON
51	#define WITH_NEON
52	#endif
53
54	namespace cv
55	{
56
57	static const unsigned bit_mask[] =
58	{
59	`0`,
60	`0x00000001`, `0x00000003`, `0x00000007`, `0x0000000F`,
61	`0x0000001F`, `0x0000003F`, `0x0000007F`, `0x000000FF`,
62	`0x000001FF`, `0x000003FF`, `0x000007FF`, `0x00000FFF`,
63	`0x00001FFF`, `0x00003FFF`, `0x00007FFF`, `0x0000FFFF`,
64	`0x0001FFFF`, `0x0003FFFF`, `0x0007FFFF`, `0x000FFFFF`,
65	`0x001FFFFF`, `0x003FFFFF`, `0x007FFFFF`, `0x00FFFFFF`,
66	`0x01FFFFFF`, `0x03FFFFFF`, `0x07FFFFFF`, `0x0FFFFFFF`,
67	`0x1FFFFFFF`, `0x3FFFFFFF`, `0x7FFFFFFF`, `0xFFFFFFFF`
68	};
69
70	static const uchar huff_val_shift = `20`;
71	static const int huff_code_mask = (`1` << huff_val_shift) - `1`;
72
73	static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
74	{
75	int i, k;
76	int min_val = INT_MAX, max_val = INT_MIN;
77	int size;
78
79	/ calc min and max values in the table /
80	for( i = `1`, k = `1`; src[k] >= `0`; i++ )
81	{
82	int code_count = src[k++];
83
84	for( code_count += k; k < code_count; k++ )
85	{
86	int val = src[k] >> huff_val_shift;
87	if( val < min_val )
88	min_val = val;
89	if( val > max_val )
90	max_val = val;
91	}
92	}
93
94	size = max_val - min_val + `3`;
95
96	if( size > max_size )
97	{
98	CV_Error(cv::Error::StsOutOfRange, "too big maximum Huffman code size");
99	}
100
101	memset( s: table, c: `0`, n: size*sizeof(table[`0`]));
102
103	table[`0`] = min_val;
104	table[`1`] = size - `2`;
105
106	for( i = `1`, k = `1`; src[k] >= `0`; i++ )
107	{
108	int code_count = src[k++];
109
110	for( code_count += k; k < code_count; k++ )
111	{
112	int val = src[k] >> huff_val_shift;
113	int code = src[k] & huff_code_mask;
114
115	table[val - min_val + `2`] = (code << `8`) \| i;
116	}
117	}
118	return true;
119	}
120
121	static int* createSourceHuffmanTable(const uchar* src, int* dst,
122	int max_bits, int first_bits)
123	{
124	int i, val_idx, code = `0`;
125	int* table = dst;
126	*dst++ = first_bits;
127	for (i = `1`, val_idx = max_bits; i <= max_bits; i++)
128	{
129	int code_count = src[i - `1`];
130	dst[`0`] = code_count;
131	code <<= `1`;
132	for (int k = `0`; k < code_count; k++)
133	{
134	dst[k + `1`] = (src[val_idx + k] << huff_val_shift) \| (code + k);
135	}
136	code += code_count;
137	dst += code_count + `1`;
138	val_idx += code_count;
139	}
140	dst[`0`] = -`1`;
141	return table;
142	}
143
144
145	namespace mjpeg
146	{
147
148	class mjpeg_buffer
149	{
150	public:
151	mjpeg_buffer()
152	{
153	reset();
154	}
155
156	void resize(int size)
157	{
158	data.resize(new_size: size);
159	}
160
161	inline void put_bits(unsigned bits, int len)
162	{
163	CV_Assert(len >=`0` && len < `32`);
164	if((m_pos == (data.size() - `1`) && len > bits_free) \|\| m_pos == data.size())
165	{
166	resize(size: int(`2`*data.size()));
167	}
168
169	bits_free -= (len);
170	unsigned int tempval = (bits) & bit_mask[(len)];
171
172	if( bits_free <= `0` )
173	{
174	data [m_pos] \|= ((unsigned)tempval >> -bits_free);
175
176	bits_free += `32`;
177	++m_pos;
178	data [m_pos] = bits_free < `32` ? (tempval << bits_free) : `0`;
179	}
180	else
181	{
182	data [m_pos] \|= (bits_free == `32`) ? tempval : (tempval << bits_free);
183	}
184	}
185
186	inline void put_val(int val, const unsigned * table)
187	{
188	unsigned code = table[(val) + `2`];
189	put_bits(bits: code >> `8`, len: (int)(code & `255`));
190	}
191
192	void finish()
193	{
194	if(bits_free == `32`)
195	{
196	bits_free = `0`;
197	m_data_len = m_pos;
198	}
199	else
200	{
201	m_data_len = m_pos + `1`;
202	}
203	}
204
205	void reset()
206	{
207	bits_free = `32`;
208	m_pos = `0`;
209	m_data_len = `0`;
210	}
211
212	void clear()
213	{
214	//we need to clear only first element, the rest would be overwritten
215	data [`0`] = `0`;
216	}
217
218	int get_bits_free()
219	{
220	return bits_free;
221	}
222
223	unsigned* get_data()
224	{
225	return &data [`0`];
226	}
227
228	unsigned get_len()
229	{
230	return m_data_len;
231	}
232
233	private:
234	std::vector<unsigned> data;
235	int bits_free;
236	unsigned m_pos;
237	unsigned m_data_len;
238	};
239
240
241	class mjpeg_buffer_keeper
242	{
243	public:
244	mjpeg_buffer_keeper()
245	{
246	reset();
247	}
248
249	mjpeg_buffer& operator[](int i)
250	{
251	return m_buffer_list [i];
252	}
253
254	void allocate_buffers(int count, int size)
255	{
256	for(int i = (int)m_buffer_list.size(); i < count; ++i)
257	{
258	m_buffer_list.push_back(x: mjpeg_buffer ());
259	m_buffer_list.back().resize(size);
260	}
261	}
262
263	unsigned* get_data()
264	{
265	//if there is only one buffer (single thread) there is no need to stack buffers
266	if(m_buffer_list.size() == `1`)
267	{
268	m_buffer_list [`0`].finish();
269
270	m_data_len = m_buffer_list [`0`].get_len();
271	m_last_bit_len = `32` - m_buffer_list [`0`].get_bits_free();
272
273	return m_buffer_list [`0`].get_data();
274	}
275
276	allocate_output_buffer();
277
278	int bits = `0`;
279	unsigned currval = `0`;
280	m_data_len = `0`;
281
282	for(unsigned j = `0`; j < m_buffer_list.size(); ++j)
283	{
284	mjpeg_buffer& buffer = m_buffer_list [j];
285
286	//if no bit shift required we could use memcpy
287	if(bits == `0`)
288	{
289	size_t current_pos = m_data_len;
290
291	if(buffer.get_bits_free() == `0`)
292	{
293	memcpy(dest: &m_output_buffer [current_pos], src: buffer.get_data(), n: sizeof(buffer.get_data()[`0`])*buffer.get_len());
294	m_data_len += buffer.get_len();
295	currval = `0`;
296	}
297	else
298	{
299	memcpy(dest: &m_output_buffer [current_pos], src: buffer.get_data(), n: sizeof(buffer.get_data()[`0`])*(buffer.get_len() - `1` ));
300	m_data_len += buffer.get_len() - `1`;
301	currval = buffer.get_data()[buffer.get_len() - `1`];
302	}
303	}
304	else
305	{
306	for(unsigned i = `0`; i < buffer.get_len() - `1`; ++i)
307	{
308	currval \|= ( (unsigned)buffer.get_data()[i] >> (`31` & (-bits)) );
309
310	m_output_buffer [m_data_len++] = currval;
311
312	currval = buffer.get_data()[i] << (bits + `32`);
313	}
314
315	currval \|= ( (unsigned)buffer.get_data()[buffer.get_len() - `1`] >> (`31` & (-bits)) );
316
317	if( buffer.get_bits_free() <= -bits)
318	{
319	m_output_buffer [m_data_len++] = currval;
320
321	currval = buffer.get_data()[buffer.get_len() - `1`] << (bits + `32`);
322	}
323	}
324
325	bits += buffer.get_bits_free();
326
327	if(bits > `0`)
328	{
329	bits -= `32`;
330	}
331	}
332
333	//bits == 0 means that last element shouldn't be used.
334	if (bits != `0`) {
335	m_output_buffer [m_data_len++] = currval;
336	m_last_bit_len = -bits;
337	}
338	else
339	{
340	m_last_bit_len = `32`;
341	}
342
343	return &m_output_buffer [`0`];
344	}
345
346	int get_last_bit_len()
347	{
348	return m_last_bit_len;
349	}
350
351	int get_data_size()
352	{
353	return m_data_len;
354	}
355
356	void reset()
357	{
358	m_last_bit_len = `0`;
359	for(unsigned i = `0`; i < m_buffer_list.size(); ++i)
360	{
361	m_buffer_list [i].reset();
362	}
363
364	//there is no need to erase output buffer since it would be overwritten
365	m_data_len = `0`;
366	}
367
368	private:
369
370	void allocate_output_buffer()
371	{
372	unsigned total_size = `0`;
373
374	for(unsigned i = `0`; i < m_buffer_list.size(); ++i)
375	{
376	m_buffer_list [i].finish();
377	total_size += m_buffer_list [i].get_len();
378	}
379
380	if(total_size > m_output_buffer.size())
381	{
382	m_output_buffer.clear();
383	m_output_buffer.resize(new_size: total_size);
384	}
385	}
386
387	std::deque<mjpeg_buffer> m_buffer_list;
388	std::vector<unsigned> m_output_buffer;
389	int m_data_len;
390	int m_last_bit_len;
391	};
392
393	class MotionJpegWriter : public IVideoWriter
394	{
395	public:
396	MotionJpegWriter()
397	{
398	rawstream = false;
399	nstripes = -`1`;
400	quality = `0`;
401	}
402
403	MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
404	{
405	rawstream = false;
406	open(filename, fps, size, iscolor);
407	nstripes = -`1`;
408	}
409	~MotionJpegWriter() { close(); }
410
411	virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }
412
413	void close()
414	{
415	if( !container.isOpenedStream() )
416	return;
417
418	if( !container.isEmptyFrameOffset() && !rawstream )
419	{
420	container.endWriteChunk(); // end LIST 'movi'
421	container.writeIndex(stream_number: `0`, strm_type: dc);
422	container.finishWriteAVI();
423	}
424	}
425
426	bool open(const String& filename, double fps, Size size, bool iscolor)
427	{
428	close();
429
430	if( filename.empty() )
431	return false;
432	const char* ext = strrchr(s: filename.c_str(), c: `'.'`);
433	if( !ext )
434	return false;
435	if( strcmp(s1: ext, s2: ".avi") != `0` && strcmp(s1: ext, s2: ".AVI") != `0` && strcmp(s1: ext, s2: ".Avi") != `0` )
436	return false;
437
438	if( !container.initContainer(filename, fps, size, iscolor) )
439	return false;
440
441	CV_Assert(fps >= `1`);
442	quality = `75`;
443	rawstream = false;
444
445	if( !rawstream )
446	{
447	container.startWriteAVI(stream_count: `1`); // count stream
448	container.writeStreamHeader(codec_: MJPEG);
449	}
450	//printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
451	return true;
452	}
453
454	bool isOpened() const CV_OVERRIDE { return container.isOpenedStream(); }
455
456	void write(InputArray _img) CV_OVERRIDE
457	{
458	Mat img = _img.getMat();
459	size_t chunkPointer = container.getStreamPos();
460	int input_channels = img.channels();
461	int colorspace = -`1`;
462	int imgWidth = img.cols;
463	int frameWidth = container.getWidth();
464	int imgHeight = img.rows;
465	int frameHeight = container.getHeight();
466	int channels = container.getChannels();
467
468
469	if( input_channels == `1` && channels == `1` )
470	{
471	CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight );
472	colorspace = COLORSPACE_GRAY;
473	}
474	else if( input_channels == `4` )
475	{
476	CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == `3` );
477	colorspace = COLORSPACE_RGBA;
478	}
479	else if( input_channels == `3` )
480	{
481	CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == `3` );
482	colorspace = COLORSPACE_BGR;
483	}
484	else if( input_channels == `1` && channels == `3` )
485	{
486	CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight*`3` );
487	colorspace = COLORSPACE_YUV444P;
488	}
489	else
490	CV_Error(cv::Error::StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
491
492	if( !rawstream ) {
493	int avi_index = container.getAVIIndex(stream_number: `0`, strm_type: dc);
494	container.startWriteChunk(fourcc: avi_index);
495	}
496
497	writeFrameData(data: img.data, step: (int)img.step, colorspace, input_channels);
498
499	if( !rawstream )
500	{
501	size_t tempChunkPointer = container.getStreamPos();
502	size_t moviPointer = container.getMoviPointer();
503	container.pushFrameOffset(elem: chunkPointer - moviPointer);
504	container.pushFrameSize(elem: tempChunkPointer - chunkPointer - `8`); // Size excludes '00dc' and size field
505	container.endWriteChunk(); // end '00dc'
506	}
507	}
508
509	double getProperty(int propId) const CV_OVERRIDE
510	{
511	if( propId == VIDEOWRITER_PROP_QUALITY )
512	return quality;
513	if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
514	{
515	bool isEmpty = container.isEmptyFrameSize();
516	return isEmpty ? `0.` : container.atFrameSize(i: container.countFrameSize() - `1`);
517	}
518	if( propId == VIDEOWRITER_PROP_NSTRIPES )
519	return nstripes;
520	return `0.`;
521	}
522
523	bool setProperty(int propId, double value) CV_OVERRIDE
524	{
525	if( propId == VIDEOWRITER_PROP_QUALITY )
526	{
527	quality = value;
528	return true;
529	}
530
531	if( propId == VIDEOWRITER_PROP_NSTRIPES)
532	{
533	nstripes = value;
534	return true;
535	}
536
537	return false;
538	}
539
540	void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
541
542	protected:
543	double quality;
544	bool rawstream;
545	mjpeg_buffer_keeper buffers_list;
546	double nstripes;
547
548	AVIWriteContainer container;
549	};
550
551	#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
552	#define fix(x, n) (int)((x)*(1 << (n)) + .5);
553
554	enum
555	{
556	fixb = `14`,
557	fixc = `12`,
558	postshift = `14`
559	};
560
561	static const int C0_707 = fix(`0.707106781f`, fixb);
562	static const int C0_541 = fix(`0.541196100f`, fixb);
563	static const int C0_382 = fix(`0.382683432f`, fixb);
564	static const int C1_306 = fix(`1.306562965f`, fixb);
565
566	static const int y_r = fix(`0.299`, fixc);
567	static const int y_g = fix(`0.587`, fixc);
568	static const int y_b = fix(`0.114`, fixc);
569
570	static const int cb_r = -fix(`0.1687`, fixc);
571	static const int cb_g = -fix(`0.3313`, fixc);
572	static const int cb_b = fix(`0.5`, fixc);
573
574	static const int cr_r = fix(`0.5`, fixc);
575	static const int cr_g = -fix(`0.4187`, fixc);
576	static const int cr_b = -fix(`0.0813`, fixc);
577
578	// Standard JPEG quantization tables
579	static const uchar jpegTableK1_T[] =
580	{
581	`16`, `12`, `14`, `14`, `18`, `24`, `49`, `72`,
582	`11`, `12`, `13`, `17`, `22`, `35`, `64`, `92`,
583	`10`, `14`, `16`, `22`, `37`, `55`, `78`, `95`,
584	`16`, `19`, `24`, `29`, `56`, `64`, `87`, `98`,
585	`24`, `26`, `40`, `51`, `68`, `81`, `103`, `112`,
586	`40`, `58`, `57`, `87`, `109`, `104`, `121`, `100`,
587	`51`, `60`, `69`, `80`, `103`, `113`, `120`, `103`,
588	`61`, `55`, `56`, `62`, `77`, `92`, `101`, `99`
589	};
590
591	static const uchar jpegTableK2_T[] =
592	{
593	`17`, `18`, `24`, `47`, `99`, `99`, `99`, `99`,
594	`18`, `21`, `26`, `66`, `99`, `99`, `99`, `99`,
595	`24`, `26`, `56`, `99`, `99`, `99`, `99`, `99`,
596	`47`, `66`, `99`, `99`, `99`, `99`, `99`, `99`,
597	`99`, `99`, `99`, `99`, `99`, `99`, `99`, `99`,
598	`99`, `99`, `99`, `99`, `99`, `99`, `99`, `99`,
599	`99`, `99`, `99`, `99`, `99`, `99`, `99`, `99`,
600	`99`, `99`, `99`, `99`, `99`, `99`, `99`, `99`
601	};
602
603	// Standard Huffman tables
604
605	// ... for luma DCs.
606	static const uchar jpegTableK3[] =
607	{
608	`0`, `1`, `5`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
609	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`
610	};
611
612	// ... for chroma DCs.
613	static const uchar jpegTableK4[] =
614	{
615	`0`, `3`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`,
616	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`
617	};
618
619	// ... for luma ACs.
620	static const uchar jpegTableK5[] =
621	{
622	`0`, `2`, `1`, `3`, `3`, `2`, `4`, `3`, `5`, `5`, `4`, `4`, `0`, `0`, `1`, `125`,
623	`0x01`, `0x02`, `0x03`, `0x00`, `0x04`, `0x11`, `0x05`, `0x12`,
624	`0x21`, `0x31`, `0x41`, `0x06`, `0x13`, `0x51`, `0x61`, `0x07`,
625	`0x22`, `0x71`, `0x14`, `0x32`, `0x81`, `0x91`, `0xa1`, `0x08`,
626	`0x23`, `0x42`, `0xb1`, `0xc1`, `0x15`, `0x52`, `0xd1`, `0xf0`,
627	`0x24`, `0x33`, `0x62`, `0x72`, `0x82`, `0x09`, `0x0a`, `0x16`,
628	`0x17`, `0x18`, `0x19`, `0x1a`, `0x25`, `0x26`, `0x27`, `0x28`,
629	`0x29`, `0x2a`, `0x34`, `0x35`, `0x36`, `0x37`, `0x38`, `0x39`,
630	`0x3a`, `0x43`, `0x44`, `0x45`, `0x46`, `0x47`, `0x48`, `0x49`,
631	`0x4a`, `0x53`, `0x54`, `0x55`, `0x56`, `0x57`, `0x58`, `0x59`,
632	`0x5a`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`,
633	`0x6a`, `0x73`, `0x74`, `0x75`, `0x76`, `0x77`, `0x78`, `0x79`,
634	`0x7a`, `0x83`, `0x84`, `0x85`, `0x86`, `0x87`, `0x88`, `0x89`,
635	`0x8a`, `0x92`, `0x93`, `0x94`, `0x95`, `0x96`, `0x97`, `0x98`,
636	`0x99`, `0x9a`, `0xa2`, `0xa3`, `0xa4`, `0xa5`, `0xa6`, `0xa7`,
637	`0xa8`, `0xa9`, `0xaa`, `0xb2`, `0xb3`, `0xb4`, `0xb5`, `0xb6`,
638	`0xb7`, `0xb8`, `0xb9`, `0xba`, `0xc2`, `0xc3`, `0xc4`, `0xc5`,
639	`0xc6`, `0xc7`, `0xc8`, `0xc9`, `0xca`, `0xd2`, `0xd3`, `0xd4`,
640	`0xd5`, `0xd6`, `0xd7`, `0xd8`, `0xd9`, `0xda`, `0xe1`, `0xe2`,
641	`0xe3`, `0xe4`, `0xe5`, `0xe6`, `0xe7`, `0xe8`, `0xe9`, `0xea`,
642	`0xf1`, `0xf2`, `0xf3`, `0xf4`, `0xf5`, `0xf6`, `0xf7`, `0xf8`,
643	`0xf9`, `0xfa`
644	};
645
646	// ... for chroma ACs
647	static const uchar jpegTableK6[] =
648	{
649	`0`, `2`, `1`, `2`, `4`, `4`, `3`, `4`, `7`, `5`, `4`, `4`, `0`, `1`, `2`, `119`,
650	`0x00`, `0x01`, `0x02`, `0x03`, `0x11`, `0x04`, `0x05`, `0x21`,
651	`0x31`, `0x06`, `0x12`, `0x41`, `0x51`, `0x07`, `0x61`, `0x71`,
652	`0x13`, `0x22`, `0x32`, `0x81`, `0x08`, `0x14`, `0x42`, `0x91`,
653	`0xa1`, `0xb1`, `0xc1`, `0x09`, `0x23`, `0x33`, `0x52`, `0xf0`,
654	`0x15`, `0x62`, `0x72`, `0xd1`, `0x0a`, `0x16`, `0x24`, `0x34`,
655	`0xe1`, `0x25`, `0xf1`, `0x17`, `0x18`, `0x19`, `0x1a`, `0x26`,
656	`0x27`, `0x28`, `0x29`, `0x2a`, `0x35`, `0x36`, `0x37`, `0x38`,
657	`0x39`, `0x3a`, `0x43`, `0x44`, `0x45`, `0x46`, `0x47`, `0x48`,
658	`0x49`, `0x4a`, `0x53`, `0x54`, `0x55`, `0x56`, `0x57`, `0x58`,
659	`0x59`, `0x5a`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`,
660	`0x69`, `0x6a`, `0x73`, `0x74`, `0x75`, `0x76`, `0x77`, `0x78`,
661	`0x79`, `0x7a`, `0x82`, `0x83`, `0x84`, `0x85`, `0x86`, `0x87`,
662	`0x88`, `0x89`, `0x8a`, `0x92`, `0x93`, `0x94`, `0x95`, `0x96`,
663	`0x97`, `0x98`, `0x99`, `0x9a`, `0xa2`, `0xa3`, `0xa4`, `0xa5`,
664	`0xa6`, `0xa7`, `0xa8`, `0xa9`, `0xaa`, `0xb2`, `0xb3`, `0xb4`,
665	`0xb5`, `0xb6`, `0xb7`, `0xb8`, `0xb9`, `0xba`, `0xc2`, `0xc3`,
666	`0xc4`, `0xc5`, `0xc6`, `0xc7`, `0xc8`, `0xc9`, `0xca`, `0xd2`,
667	`0xd3`, `0xd4`, `0xd5`, `0xd6`, `0xd7`, `0xd8`, `0xd9`, `0xda`,
668	`0xe2`, `0xe3`, `0xe4`, `0xe5`, `0xe6`, `0xe7`, `0xe8`, `0xe9`,
669	`0xea`, `0xf2`, `0xf3`, `0xf4`, `0xf5`, `0xf6`, `0xf7`, `0xf8`,
670	`0xf9`, `0xfa`
671	};
672
673	static const uchar zigzag[] =
674	{
675	`0`, `8`, `1`, `2`, `9`, `16`, `24`, `17`, `10`, `3`, `4`, `11`, `18`, `25`, `32`, `40`,
676	`33`, `26`, `19`, `12`, `5`, `6`, `13`, `20`, `27`, `34`, `41`, `48`, `56`, `49`, `42`, `35`,
677	`28`, `21`, `14`, `7`, `15`, `22`, `29`, `36`, `43`, `50`, `57`, `58`, `51`, `44`, `37`, `30`,
678	`23`, `31`, `38`, `45`, `52`, `59`, `60`, `53`, `46`, `39`, `47`, `54`, `61`, `62`, `55`, `63`,
679	`63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`, `63`
680	};
681
682
683	static const int idct_prescale[] =
684	{
685	`16384`, `22725`, `21407`, `19266`, `16384`, `12873`, `8867`, `4520`,
686	`22725`, `31521`, `29692`, `26722`, `22725`, `17855`, `12299`, `6270`,
687	`21407`, `29692`, `27969`, `25172`, `21407`, `16819`, `11585`, `5906`,
688	`19266`, `26722`, `25172`, `22654`, `19266`, `15137`, `10426`, `5315`,
689	`16384`, `22725`, `21407`, `19266`, `16384`, `12873`, `8867`, `4520`,
690	`12873`, `17855`, `16819`, `15137`, `12873`, `10114`, `6967`, `3552`,
691	`8867`, `12299`, `11585`, `10426`, `8867`, `6967`, `4799`, `2446`,
692	`4520`, `6270`, `5906`, `5315`, `4520`, `3552`, `2446`, `1247`
693	};
694
695	static const char jpegHeader[] =
696	"\xFF\xD8" // SOI - start of image
697	"\xFF\xE0" // APP0 - jfif extension
698	"\x00\x10" // 2 bytes: length of APP0 segment
699	"JFIF\x00" // JFIF signature
700	"\x01\x02" // version of JFIF
701	"\x00" // units = pixels ( 1 - inch, 2 - cm )
702	"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
703	"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
704
705	#ifdef WITH_NEON
706	// FDCT with postscaling
707	static void aan_fdct8x8( const short src, short* *dst,
708	int step, const short *postscale )
709	{
710	// Pass 1: process rows
711	int16x8_t x0 = vld1q_s16(src); int16x8_t x1 = vld1q_s16(src + step*`7`);
712	int16x8_t x2 = vld1q_s16(src + step`3`); int16x8_t x3 = vld1q_s16(src + step`4`);
713
714	int16x8_t x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
715	x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
716
717	int16x8_t t1 = x0; int16x8_t t2 = x2;
718
719	x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
720
721	x0 = vld1q_s16(src + step); x3 = vld1q_s16(src + step*`6`);
722
723	x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
724	int16x8_t t3 = x0;
725
726	x0 = vld1q_s16(src + step`2`); x3 = vld1q_s16(src + step`5`);
727
728	int16x8_t t4 = vsubq_s16(x0, x3);
729
730	x0 = vaddq_s16(x0, x3);
731	x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
732	x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
733
734	int16x8_t res0 = x1;
735	int16x8_t res4 = x2;
736	x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*`2`));
737	x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
738
739	int16x8_t res2 = x4;
740	int16x8_t res6 = x1;
741
742	x0 = t2; x1 = t4;
743	x2 = t3; x3 = t1;
744	x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
745	x1 =vqdmulhq_n_s16(x1, (short)(C0_707*`2`));
746
747	x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
748	x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*`2`));
749	x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*`2`)), x1);
750	x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), `1`), x1);
751
752	x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
753	x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
754
755	int16x8_t res1 = x0;
756	int16x8_t res3 = x3;
757	int16x8_t res5 = x1;
758	int16x8_t res7 = x4;
759
760	//transpose a matrix
761	/*
762	res0 00 01 02 03 04 05 06 07
763	res1 10 11 12 13 14 15 16 17
764	res2 20 21 22 23 24 25 26 27
765	res3 30 31 32 33 34 35 36 37
766	res4 40 41 42 43 44 45 46 47
767	res5 50 51 52 53 54 55 56 57
768	res6 60 61 62 63 64 65 66 67
769	res7 70 71 72 73 74 75 76 77
770	*/
771
772	//transpose elements 00-33
773	int16x4_t res0_0 = vget_low_s16(res0);
774	int16x4_t res1_0 = vget_low_s16(res1);
775	int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
776	int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
777
778	res0_0 = vget_low_s16(res2);
779	res1_0 = vget_low_s16(res3);
780	tres = vtrn_s16(res0_0, res1_0);
781	int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
782
783	int32x4x2_t tres1 = vtrnq_s32(l0, l1);
784
785	// transpose elements 40-73
786	res0_0 = vget_low_s16(res4);
787	res1_0 = vget_low_s16(res5);
788	tres = vtrn_s16(res0_0, res1_0);
789	l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
790
791	res0_0 = vget_low_s16(res6);
792	res1_0 = vget_low_s16(res7);
793
794	tres = vtrn_s16(res0_0, res1_0);
795	l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
796
797	int32x4x2_t tres2 = vtrnq_s32(l0, l1);
798
799	//combine into 0-3
800	int16x8_t transp_res0 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[`0`]), vget_low_s32(tres2.val[`0`])));
801	int16x8_t transp_res1 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[`0`]), vget_high_s32(tres2.val[`0`])));
802	int16x8_t transp_res2 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[`1`]), vget_low_s32(tres2.val[`1`])));
803	int16x8_t transp_res3 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[`1`]), vget_high_s32(tres2.val[`1`])));
804
805	// transpose elements 04-37
806	res0_0 = vget_high_s16(res0);
807	res1_0 = vget_high_s16(res1);
808	tres = vtrn_s16(res0_0, res1_0);
809	l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
810
811	res0_0 = vget_high_s16(res2);
812	res1_0 = vget_high_s16(res3);
813
814	tres = vtrn_s16(res0_0, res1_0);
815	l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
816
817	tres1 = vtrnq_s32(l0, l1);
818
819	// transpose elements 44-77
820	res0_0 = vget_high_s16(res4);
821	res1_0 = vget_high_s16(res5);
822	tres = vtrn_s16(res0_0, res1_0);
823	l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
824
825	res0_0 = vget_high_s16(res6);
826	res1_0 = vget_high_s16(res7);
827
828	tres = vtrn_s16(res0_0, res1_0);
829	l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[`0`]),vreinterpret_s32_s16(tres.val[`1`]));
830
831	tres2 = vtrnq_s32(l0, l1);
832
833	//combine into 4-7
834	int16x8_t transp_res4 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[`0`]), vget_low_s32(tres2.val[`0`])));
835	int16x8_t transp_res5 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[`0`]), vget_high_s32(tres2.val[`0`])));
836	int16x8_t transp_res6 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[`1`]), vget_low_s32(tres2.val[`1`])));
837	int16x8_t transp_res7 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[`1`]), vget_high_s32(tres2.val[`1`])));
838
839	//special hack for vqdmulhq_s16 command that is producing -1 instead of 0
840	#define STORE_DESCALED(addr, reg, mul_addr) postscale_line = vld1q_s16((mul_addr)); \
841	mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
842	reg = vabsq_s16(reg); \
843	reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
844	reg = vsubq_s16(veorq_s16(reg, mask), mask); \
845	vst1q_s16((addr), reg);
846
847	int16x8_t z = vdupq_n_s16(`0`), postscale_line, mask;
848
849	// pass 2: process columns
850	x0 = transp_res0; x1 = transp_res7;
851	x2 = transp_res3; x3 = transp_res4;
852
853	x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
854	x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
855
856	t1 = x0; t2 = x2;
857
858	x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
859
860	x0 = transp_res1;
861	x3 = transp_res6;
862
863	x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
864
865	t3 = x0;
866
867	x0 = transp_res2; x3 = transp_res5;
868
869	t4 = vsubq_s16(x0, x3);
870
871	x0 = vaddq_s16(x0, x3);
872
873	x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
874	x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
875
876	STORE_DESCALED(dst, x1, postscale);
877	STORE_DESCALED(dst + `4``8`, x2, postscale + `4``8`);
878
879	x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*`2`));
880
881	x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
882
883	STORE_DESCALED(dst + `2``8`, x4,postscale + `2``8`);
884	STORE_DESCALED(dst + `6``8`, x1,postscale + `6``8`);
885
886	x0 = t2; x1 = t4;
887	x2 = t3; x3 = t1;
888
889	x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
890
891	x1 =vqdmulhq_n_s16(x1, (short)(C0_707*`2`));
892
893	x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
894
895	x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*`2`));
896	x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*`2`)), x1);
897	x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), `1`), x1);
898
899	x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
900	x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
901
902	STORE_DESCALED(dst + `5``8`, x1,postscale + `5``8`);
903	STORE_DESCALED(dst + `1``8`, x0,postscale + `1``8`);
904	STORE_DESCALED(dst + `7``8`, x4,postscale + `7``8`);
905	STORE_DESCALED(dst + `3``8`, x3,postscale + `3``8`);
906	}
907
908	#else
909	// FDCT with postscaling
910	static void aan_fdct8x8( const short src, short* *dst,
911	int step, const short *postscale )
912	{
913	int workspace[`64`], *work = workspace;
914	int i;
915
916	// Pass 1: process rows
917	for( i = `8`; i > `0`; i--, src += step, work += `8` )
918	{
919	int x0 = src[`0`], x1 = src[`7`];
920	int x2 = src[`3`], x3 = src[`4`];
921
922	int x4 = x0 + x1; x0 -= x1;
923	x1 = x2 + x3; x2 -= x3;
924
925	work[`7`] = x0; work[`1`] = x2;
926	x2 = x4 + x1; x4 -= x1;
927
928	x0 = src[`1`]; x3 = src[`6`];
929	x1 = x0 + x3; x0 -= x3;
930	work[`5`] = x0;
931
932	x0 = src[`2`]; x3 = src[`5`];
933	work[`3`] = x0 - x3; x0 += x3;
934
935	x3 = x0 + x1; x0 -= x1;
936	x1 = x2 + x3; x2 -= x3;
937
938	work[`0`] = x1; work[`4`] = x2;
939
940	x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
941	x1 = x4 + x0; x4 -= x0;
942	work[`2`] = x4; work[`6`] = x1;
943
944	x0 = work[`1`]; x1 = work[`3`];
945	x2 = work[`5`]; x3 = work[`7`];
946
947	x0 += x1; x1 += x2; x2 += x3;
948	x1 = DCT_DESCALE(x1*C0_707, fixb);
949
950	x4 = x1 + x3; x3 -= x1;
951	x1 = (x0 - x2)*C0_382;
952	x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
953	x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
954
955	x1 = x0 + x3; x3 -= x0;
956	x0 = x4 + x2; x4 -= x2;
957
958	work[`5`] = x1; work[`1`] = x0;
959	work[`7`] = x4; work[`3`] = x3;
960	}
961
962	work = workspace;
963	// pass 2: process columns
964	for( i = `8`; i > `0`; i--, work++, postscale += `8`, dst += `8` )
965	{
966	int x0 = work[`8``0`], x1 = work[`8``7`];
967	int x2 = work[`8``3`], x3 = work[`8``4`];
968
969	int x4 = x0 + x1; x0 -= x1;
970	x1 = x2 + x3; x2 -= x3;
971
972	work[`8``7`] = x0; work[`8``0`] = x2;
973	x2 = x4 + x1; x4 -= x1;
974
975	x0 = work[`8``1`]; x3 = work[`8``6`];
976	x1 = x0 + x3; x0 -= x3;
977	work[`8`*`4`] = x0;
978
979	x0 = work[`8``2`]; x3 = work[`8``5`];
980	work[`8`*`3`] = x0 - x3; x0 += x3;
981
982	x3 = x0 + x1; x0 -= x1;
983	x1 = x2 + x3; x2 -= x3;
984
985	dst[`0`] = (short)DCT_DESCALE(x1*postscale[`0`], postshift);
986	dst[`4`] = (short)DCT_DESCALE(x2*postscale[`4`], postshift);
987
988	x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
989	x1 = x4 + x0; x4 -= x0;
990
991	dst[`2`] = (short)DCT_DESCALE(x4*postscale[`2`], postshift);
992	dst[`6`] = (short)DCT_DESCALE(x1*postscale[`6`], postshift);
993
994	x0 = work[`8``0`]; x1 = work[`8``3`];
995	x2 = work[`8``4`]; x3 = work[`8``7`];
996
997	x0 += x1; x1 += x2; x2 += x3;
998	x1 = DCT_DESCALE(x1*C0_707, fixb);
999
1000	x4 = x1 + x3; x3 -= x1;
1001	x1 = (x0 - x2)*C0_382;
1002	x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1003	x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1004
1005	x1 = x0 + x3; x3 -= x0;
1006	x0 = x4 + x2; x4 -= x2;
1007
1008	dst[`5`] = (short)DCT_DESCALE(x1*postscale[`5`], postshift);
1009	dst[`1`] = (short)DCT_DESCALE(x0*postscale[`1`], postshift);
1010	dst[`7`] = (short)DCT_DESCALE(x4*postscale[`7`], postshift);
1011	dst[`3`] = (short)DCT_DESCALE(x3*postscale[`3`], postshift);
1012	}
1013	}
1014	#endif
1015
1016
1017	inline void convertToYUV(int colorspace, int channels, int input_channels, short* UV_data, short* Y_data, const uchar* pix_data, int y_limit, int x_limit, int step, int u_plane_ofs, int v_plane_ofs)
1018	{
1019	int i, j;
1020	const int UV_step = `16`;
1021	int x_scale = channels > `1` ? `2` : `1`, y_scale = x_scale;
1022	int Y_step = x_scale*`8`;
1023
1024	if( channels > `1` )
1025	{
1026	if( colorspace == COLORSPACE_YUV444P && y_limit == `16` && x_limit == `16` )
1027	{
1028	for( i = `0`; i < y_limit; i += `2`, pix_data += step`2`, Y_data += Y_step`2`, UV_data += UV_step )
1029	{
1030	#ifdef WITH_NEON
1031	{
1032	uint16x8_t masklo = vdupq_n_u16(`255`);
1033	uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1034	uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, `8`), vandq_u16(lane, masklo));
1035	lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1036	uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, `8`), vandq_u16(lane, masklo));
1037	t1 = vaddq_u16(t1, t2);
1038	vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(`128`*`4`)));
1039
1040	lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1041	t1 = vaddq_u16(vshrq_n_u16(lane, `8`), vandq_u16(lane, masklo));
1042	lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1043	t2 = vaddq_u16(vshrq_n_u16(lane, `8`), vandq_u16(lane, masklo));
1044	t1 = vaddq_u16(t1, t2);
1045	vst1q_s16(UV_data + `8`, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(`128`*`4`)));
1046	}
1047
1048	{
1049	int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1050	int16x8_t delta = vdupq_n_s16(`128`);
1051	lane = vsubq_s16(lane, delta);
1052	vst1q_s16(Y_data, lane);
1053
1054	lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+`8`)));
1055	lane = vsubq_s16(lane, delta);
1056	vst1q_s16(Y_data + `8`, lane);
1057
1058	lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1059	lane = vsubq_s16(lane, delta);
1060	vst1q_s16(Y_data+Y_step, lane);
1061
1062	lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + `8`)));
1063	lane = vsubq_s16(lane, delta);
1064	vst1q_s16(Y_data+Y_step + `8`, lane);
1065	}
1066	#else
1067	for( j = `0`; j < x_limit; j += `2`, pix_data += `2` )
1068	{
1069	Y_data[j] = pix_data[`0`] - `128`;
1070	Y_data[j+`1`] = pix_data[`1`] - `128`;
1071	Y_data[j+Y_step] = pix_data[step] - `128`;
1072	Y_data[j+Y_step+`1`] = pix_data[step+`1`] - `128`;
1073
1074	UV_data[j>>`1`] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+`1`] +
1075	pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+`1`] - `128`*`4`;
1076	UV_data[(j>>`1`)+`8`] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+`1`] +
1077	pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+`1`] - `128`*`4`;
1078
1079	}
1080
1081	pix_data -= x_limit*input_channels;
1082	#endif
1083	}
1084	}
1085	else
1086	{
1087	for( i = `0`; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1088	{
1089	for( j = `0`; j < x_limit; j++, pix_data += input_channels )
1090	{
1091	int Y, U, V;
1092
1093	if( colorspace == COLORSPACE_BGR )
1094	{
1095	int r = pix_data[`2`];
1096	int g = pix_data[`1`];
1097	int b = pix_data[`0`];
1098
1099	Y = DCT_DESCALE( ry_r + gy_g + b*y_b, fixc) - `128`;
1100	U = DCT_DESCALE( rcb_r + gcb_g + b*cb_b, fixc );
1101	V = DCT_DESCALE( rcr_r + gcr_g + b*cr_b, fixc );
1102	}
1103	else if( colorspace == COLORSPACE_RGBA )
1104	{
1105	int r = pix_data[`0`];
1106	int g = pix_data[`1`];
1107	int b = pix_data[`2`];
1108
1109	Y = DCT_DESCALE( ry_r + gy_g + b*y_b, fixc) - `128`;
1110	U = DCT_DESCALE( rcb_r + gcb_g + b*cb_b, fixc );
1111	V = DCT_DESCALE( rcr_r + gcr_g + b*cr_b, fixc );
1112	}
1113	else
1114	{
1115	Y = pix_data[`0`] - `128`;
1116	U = pix_data[v_plane_ofs] - `128`;
1117	V = pix_data[u_plane_ofs] - `128`;
1118	}
1119
1120	int j2 = j >> (x_scale - `1`);
1121	Y_data[j] = (short)Y;
1122	UV_data[j2] = (short)(UV_data[j2] + U);
1123	UV_data[j2 + `8`] = (short)(UV_data[j2 + `8`] + V);
1124	}
1125
1126	pix_data -= x_limit*input_channels;
1127	if( ((i+`1`) & (y_scale - `1`)) == `0` )
1128	{
1129	UV_data += UV_step;
1130	}
1131	}
1132	}
1133
1134	}
1135	else
1136	{
1137	for( i = `0`; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1138	{
1139	for( j = `0`; j < x_limit; j++ )
1140	Y_data[j] = (short)(pix_data[j]`4` - `128``4`);
1141	}
1142	}
1143	}
1144
1145	class MjpegEncoder : public ParallelLoopBody
1146	{
1147	public:
1148	MjpegEncoder(int _height,
1149	int _width,
1150	int _step,
1151	const uchar* _data,
1152	int _input_channels,
1153	int _channels,
1154	int _colorspace,
1155	unsigned (&_huff_dc_tab)[`2`][`16`],
1156	unsigned (&_huff_ac_tab)[`2`][`256`],
1157	short (&_fdct_qtab)[`2`][`64`],
1158	uchar* _cat_table,
1159	mjpeg_buffer_keeper& _buffer_list,
1160	double nstripes
1161	) :
1162	m_buffer_list(_buffer_list),
1163	height(_height),
1164	width(_width),
1165	step(_step),
1166	in_data(_data),
1167	input_channels(_input_channels),
1168	channels(_channels),
1169	colorspace(_colorspace),
1170	huff_dc_tab(_huff_dc_tab),
1171	huff_ac_tab(_huff_ac_tab),
1172	fdct_qtab(_fdct_qtab),
1173	cat_table(_cat_table)
1174	{
1175	//empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
1176	const int min_pixels_count = `96`*`96`;
1177
1178	stripes_count = `1`;
1179
1180	if(nstripes < `0`)
1181	{
1182	if(height*width > min_pixels_count)
1183	{
1184	const int default_stripes_count = `4`;
1185	stripes_count = default_stripes_count;
1186	}
1187	}
1188	else
1189	{
1190	stripes_count = cvCeil(value: nstripes);
1191	}
1192
1193	int y_scale = channels > `1` ? `2` : `1`;
1194	int y_step = y_scale * `8`;
1195
1196	int max_stripes = (height - `1`)/y_step + `1`;
1197
1198	stripes_count = std::min(a: stripes_count, b: max_stripes);
1199
1200	m_buffer_list.allocate_buffers(count: stripes_count, size: (heightwidth`2`)/stripes_count);
1201	}
1202
1203	void operator()( const cv::Range& range ) const CV_OVERRIDE
1204	{
1205	const int CAT_TAB_SIZE = `4096`;
1206
1207	int x, y;
1208	int i, j;
1209
1210	short buffer[`4096`];
1211	int x_scale = channels > `1` ? `2` : `1`, y_scale = x_scale;
1212	int dc_pred[] = { `0`, `0`, `0` };
1213	int x_step = x_scale * `8`;
1214	int y_step = y_scale * `8`;
1215	short block[`6`][`64`];
1216	int luma_count = x_scale*y_scale;
1217	int block_count = luma_count + channels - `1`;
1218	int u_plane_ofs = step*height;
1219	int v_plane_ofs = u_plane_ofs + step*height;
1220	const uchar* data = in_data;
1221	const uchar* init_data = data;
1222
1223	int num_steps = (height - `1`)/y_step + `1`;
1224
1225	//if this is not first stripe we need to calculate dc_pred from previous step
1226	if(range.start > `0`)
1227	{
1228	y = y_step*int(num_steps*range.start/stripes_count - `1`);
1229	data = init_data + y*step;
1230
1231	for( x = `0`; x < width; x += x_step )
1232	{
1233	int x_limit = x_step;
1234	int y_limit = y_step;
1235	const uchar* pix_data = data + x*input_channels;
1236	short* Y_data = block[`0`];
1237	short* UV_data = block[luma_count];
1238
1239	if( x + x_limit > width ) x_limit = width - x;
1240	if( y + y_limit > height ) y_limit = height - y;
1241
1242	memset( s: block, c: `0`, n: block_count`64`sizeof(block[`0`][`0`]));
1243
1244	convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1245
1246	for( i = `0`; i < block_count; i++ )
1247	{
1248	int is_chroma = i >= luma_count;
1249	int src_step = x_scale * `8`;
1250	const short* src_ptr = block[i & -`2`] + (i & `1`)*`8`;
1251
1252	aan_fdct8x8( src: src_ptr, dst: buffer, step: src_step, postscale: fdct_qtab[is_chroma] );
1253
1254	j = is_chroma + (i > luma_count);
1255	dc_pred[j] = buffer[`0`];
1256	}
1257	}
1258	}
1259
1260	for(int k = range.start; k < range.end; ++k)
1261	{
1262	mjpeg_buffer& output_buffer = m_buffer_list [k];
1263	output_buffer.clear();
1264
1265	int y_min = y_step*int(num_steps*k/stripes_count);
1266	int y_max = y_step*int(num_steps*(k+`1`)/stripes_count);
1267
1268	if(k == stripes_count - `1`)
1269	{
1270	y_max = height;
1271	}
1272
1273
1274	data = init_data + y_min*step;
1275
1276	for( y = y_min; y < y_max; y += y_step, data += y_step*step )
1277	{
1278	for( x = `0`; x < width; x += x_step )
1279	{
1280	int x_limit = x_step;
1281	int y_limit = y_step;
1282	const uchar* pix_data = data + x*input_channels;
1283	short* Y_data = block[`0`];
1284	short* UV_data = block[luma_count];
1285
1286	if( x + x_limit > width ) x_limit = width - x;
1287	if( y + y_limit > height ) y_limit = height - y;
1288
1289	memset( s: block, c: `0`, n: block_count`64`sizeof(block[`0`][`0`]));
1290
1291	convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1292
1293	for( i = `0`; i < block_count; i++ )
1294	{
1295	int is_chroma = i >= luma_count;
1296	int src_step = x_scale * `8`;
1297	int run = `0`, val;
1298	const short* src_ptr = block[i & -`2`] + (i & `1`)*`8`;
1299	const unsigned* htable = huff_ac_tab[is_chroma];
1300
1301	aan_fdct8x8( src: src_ptr, dst: buffer, step: src_step, postscale: fdct_qtab[is_chroma] );
1302
1303	j = is_chroma + (i > luma_count);
1304	val = buffer[`0`] - dc_pred[j];
1305	dc_pred[j] = buffer[`0`];
1306
1307	{
1308	int cat = cat_table[val + CAT_TAB_SIZE];
1309
1310	//CV_Assert( cat <= 11 );
1311	output_buffer.put_val(val: cat, table: huff_dc_tab[is_chroma] );
1312	output_buffer.put_bits( bits: val - (val < `0` ? `1` : `0`), len: cat );
1313	}
1314
1315	for( j = `1`; j < `64`; j++ )
1316	{
1317	val = buffer[zigzag[j]];
1318
1319	if( val == `0` )
1320	{
1321	run++;
1322	}
1323	else
1324	{
1325	while( run >= `16` )
1326	{
1327	output_buffer.put_val( val: `0xF0`, table: htable ); // encode 16 zeros
1328	run -= `16`;
1329	}
1330
1331	{
1332	int cat = cat_table[val + CAT_TAB_SIZE];
1333	//CV_Assert( cat <= 10 );
1334	output_buffer.put_val( val: cat + run*`16`, table: htable );
1335	output_buffer.put_bits( bits: val - (val < `0` ? `1` : `0`), len: cat );
1336	}
1337
1338	run = `0`;
1339	}
1340	}
1341
1342	if( run )
1343	{
1344	output_buffer.put_val( val: `0x00`, table: htable ); // encode EOB
1345	}
1346	}
1347	}
1348	}
1349	}
1350	}
1351
1352	cv::Range getRange()
1353	{
1354	return cv::Range (`0`, stripes_count);
1355	}
1356
1357	double getNStripes()
1358	{
1359	return stripes_count;
1360	}
1361
1362	mjpeg_buffer_keeper& m_buffer_list;
1363	private:
1364
1365	MjpegEncoder& operator=( const MjpegEncoder & ) { return *this; }
1366
1367	const int height;
1368	const int width;
1369	const int step;
1370	const uchar* in_data;
1371	const int input_channels;
1372	const int channels;
1373	const int colorspace;
1374	const unsigned (&huff_dc_tab)[`2`][`16`];
1375	const unsigned (&huff_ac_tab)[`2`][`256`];
1376	const short (&fdct_qtab)[`2`][`64`];
1377	const uchar* cat_table;
1378	int stripes_count;
1379	};
1380
1381	void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1382	{
1383	//double total_cvt = 0, total_dct = 0;
1384	static bool init_cat_table = false;
1385	const int CAT_TAB_SIZE = `4096`;
1386	static uchar cat_table[CAT_TAB_SIZE*`2`+`1`];
1387	if( !init_cat_table )
1388	{
1389	for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1390	{
1391	Cv32suf a;
1392	a.f = (float)i;
1393	cat_table[i+CAT_TAB_SIZE] = ((a.i >> `23`) & `255`) - (`126` & (i ? -`1` : `0`));
1394	}
1395	init_cat_table = true;
1396	}
1397
1398	//double total_dct = 0, total_cvt = 0;
1399	int width = container.getWidth();
1400	int height = container.getHeight();
1401	int channels = container.getChannels();
1402
1403	CV_Assert( data && width > `0` && height > `0` );
1404
1405	// encode the header and tables
1406	// for each mcu:
1407	// convert rgb to yuv with downsampling (if color).
1408	// for every block:
1409	// calc dct and quantize
1410	// encode block.
1411	int i, j;
1412	const int max_quality = `12`;
1413	short fdct_qtab[`2`][`64`];
1414	unsigned huff_dc_tab[`2`][`16`];
1415	unsigned huff_ac_tab[`2`][`256`];
1416
1417	int x_scale = channels > `1` ? `2` : `1`, y_scale = x_scale;
1418	short buffer[`4096`];
1419	int* hbuffer = (int*)buffer;
1420	int luma_count = x_scale*y_scale;
1421	double _quality = quality`0.01`max_quality;
1422
1423	if( _quality < `1.` ) _quality = `1.`;
1424	if( _quality > max_quality ) _quality = max_quality;
1425
1426	double inv_quality = `1.`/_quality;
1427
1428	// Encode header
1429	container.putStreamBytes( buf: (const uchar)jpegHeader, count: sizeof*(jpegHeader) - `1` );
1430
1431	// Encode quantization tables
1432	for( i = `0`; i < (channels > `1` ? `2` : `1`); i++ )
1433	{
1434	const uchar* qtable = i == `0` ? jpegTableK1_T : jpegTableK2_T;
1435	int chroma_scale = i > `0` ? luma_count : `1`;
1436
1437	container.jputStreamShort( val: `0xffdb` ); // DQT marker
1438	container.jputStreamShort( val: `2` + `65``1` ); // put single qtable*
1439	container.putStreamByte( val: `0``16` + i ); // 8-bit table*
1440
1441	// put coefficients
1442	for( j = `0`; j < `64`; j++ )
1443	{
1444	int idx = zigzag[j];
1445	int qval = cvRound(value: qtable[idx]*inv_quality);
1446	if( qval < `1` )
1447	qval = `1`;
1448	if( qval > `255` )
1449	qval = `255`;
1450	fdct_qtab[i][idx] = (short)(cvRound(value: (`1` << (postshift + `11`)))/
1451	(qvalchroma_scaleidct_prescale[idx]));
1452	container.putStreamByte( val: qval );
1453	}
1454	}
1455
1456	// Encode huffman tables
1457	for( i = `0`; i < (channels > `1` ? `4` : `2`); i++ )
1458	{
1459	const uchar* htable = i == `0` ? jpegTableK3 : i == `1` ? jpegTableK5 :
1460	i == `2` ? jpegTableK4 : jpegTableK6;
1461	int is_ac_tab = i & `1`;
1462	int idx = i >= `2`;
1463	int tableSize = `16` + (is_ac_tab ? `162` : `12`);
1464
1465	container.jputStreamShort( val: `0xFFC4` ); // DHT marker
1466	container.jputStreamShort( val: `3` + tableSize ); // define one huffman table
1467	container.putStreamByte( val: is_ac_tab`16` + idx ); // put DC/AC flag and table index*
1468	container.putStreamBytes( buf: htable, count: tableSize ); // put table
1469
1470	createEncodeHuffmanTable(src: createSourceHuffmanTable( src: htable, dst: hbuffer, max_bits: `16`, first_bits: `9` ),
1471	table: is_ac_tab ? huff_ac_tab[idx] : huff_dc_tab[idx],
1472	max_size: is_ac_tab ? `256` : `16` );
1473	}
1474
1475	// put frame header
1476	container.jputStreamShort( val: `0xFFC0` ); // SOF0 marker
1477	container.jputStreamShort( val: `8` + `3`channels ); // length of frame header*
1478	container.putStreamByte( val: `8` ); // sample precision
1479	container.jputStreamShort( val: height );
1480	container.jputStreamShort( val: width );
1481	container.putStreamByte( val: channels ); // number of components
1482
1483	for( i = `0`; i < channels; i++ )
1484	{
1485	container.putStreamByte( val: i + `1` ); // (i+1)-th component id (Y,U or V)
1486	if( i == `0` )
1487	container.putStreamByte(val: x_scale`16` + y_scale); // chroma scale factors*
1488	else
1489	container.putStreamByte(val: `1`*`16` + `1`);
1490	container.putStreamByte( val: i > `0` ); // quantization table idx
1491	}
1492
1493	// put scan header
1494	container.jputStreamShort( val: `0xFFDA` ); // SOS marker
1495	container.jputStreamShort( val: `6` + `2`channels ); // length of scan header*
1496	container.putStreamByte( val: channels ); // number of components in the scan
1497
1498	for( i = `0`; i < channels; i++ )
1499	{
1500	container.putStreamByte( val: i+`1` ); // component id
1501	container.putStreamByte( val: (i>`0`)`16` + (i>`0`) );// selection of DC & AC tables*
1502	}
1503
1504	container.jputStreamShort(val: `0``256` + `63`); // start and end of spectral selection - for*
1505	// sequential DCT start is 0 and end is 63
1506
1507	container.putStreamByte( val: `0` ); // successive approximation bit position
1508	// high & low - (0,0) for sequential DCT
1509
1510	buffers_list.reset();
1511
1512	MjpegEncoder parallel_encoder(height, width, step, data, input_channels, channels, colorspace, huff_dc_tab, huff_ac_tab, fdct_qtab, cat_table, buffers_list, nstripes);
1513
1514	cv::parallel_for_(range: parallel_encoder.getRange(), body: parallel_encoder, nstripes: parallel_encoder.getNStripes());
1515
1516	//std::vector<unsigned>& v = parallel_encoder.m_buffer_list.get_data();
1517	unsigned* v = buffers_list.get_data();
1518	unsigned last_data_elem = buffers_list.get_data_size() - `1`;
1519
1520	for(unsigned k = `0`; k < last_data_elem; ++k)
1521	{
1522	container.jputStream(currval: v[k]);
1523	}
1524	container.jflushStream(currval: v[last_data_elem], bitIdx: `32` - buffers_list.get_last_bit_len());
1525	container.jputStreamShort( val: `0xFFD9` ); // EOI marker
1526	/printf("total dct = %.1fms, total cvt = %.1fms\n",*
1527	total_dct1000./cv::getTickFrequency(),*
1528	total_cvt1000./cv::getTickFrequency());/
1529
1530	size_t pos = container.getStreamPos();
1531	size_t pos1 = (pos + `3`) & ~`3`;
1532	for( ; pos < pos1; pos++ )
1533	container.putStreamByte(val: `0`);
1534	}
1535
1536	}
1537
1538	Ptr<IVideoWriter> createMotionJpegWriter(const std::string& filename, int fourcc,
1539	double fps, const Size& frameSize,
1540	const VideoWriterParameters& params)
1541	{
1542	if (fourcc != CV_FOURCC(c1: `'M'`, c2: `'J'`, c3: `'P'`, c4: `'G'`))
1543	return Ptr<IVideoWriter>();
1544
1545	const bool isColor = params.get(key: VIDEOWRITER_PROP_IS_COLOR, defaultValue: true);
1546	Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(a1: filename, a1: fps, a1: frameSize, a1: isColor);
1547	if( !iwriter ->isOpened() )
1548	iwriter.release();
1549	return iwriter;
1550	}
1551
1552	}
1553

source code of opencv/modules/videoio/src/cap_mjpeg_encoder.cpp