1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19// * Redistribution's of source code must retain the above copyright notice,
20// this list of conditions and the following disclaimer.
21//
22// * Redistribution's in binary form must reproduce the above copyright notice,
23// this list of conditions and the following disclaimer in the documentation
24// and/or other materials provided with the distribution.
25//
26// * The name of Intel Corporation may not be used to endorse or promote products
27// derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the Intel Corporation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "precomp.hpp"
43#include "opencv2/videoio/container_avi.private.hpp"
44
45#include <vector>
46#include <deque>
47#include <iostream>
48#include <cstdlib>
49
50#if CV_NEON
51#define WITH_NEON
52#endif
53
54namespace cv
55{
56
57static const unsigned bit_mask[] =
58{
59 0,
60 0x00000001, 0x00000003, 0x00000007, 0x0000000F,
61 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
62 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
63 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
64 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
65 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
66 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
67 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
68};
69
70static const uchar huff_val_shift = 20;
71static const int huff_code_mask = (1 << huff_val_shift) - 1;
72
73static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
74{
75 int i, k;
76 int min_val = INT_MAX, max_val = INT_MIN;
77 int size;
78
79 /* calc min and max values in the table */
80 for( i = 1, k = 1; src[k] >= 0; i++ )
81 {
82 int code_count = src[k++];
83
84 for( code_count += k; k < code_count; k++ )
85 {
86 int val = src[k] >> huff_val_shift;
87 if( val < min_val )
88 min_val = val;
89 if( val > max_val )
90 max_val = val;
91 }
92 }
93
94 size = max_val - min_val + 3;
95
96 if( size > max_size )
97 {
98 CV_Error(cv::Error::StsOutOfRange, "too big maximum Huffman code size");
99 }
100
101 memset( s: table, c: 0, n: size*sizeof(table[0]));
102
103 table[0] = min_val;
104 table[1] = size - 2;
105
106 for( i = 1, k = 1; src[k] >= 0; i++ )
107 {
108 int code_count = src[k++];
109
110 for( code_count += k; k < code_count; k++ )
111 {
112 int val = src[k] >> huff_val_shift;
113 int code = src[k] & huff_code_mask;
114
115 table[val - min_val + 2] = (code << 8) | i;
116 }
117 }
118 return true;
119}
120
121static int* createSourceHuffmanTable(const uchar* src, int* dst,
122 int max_bits, int first_bits)
123{
124 int i, val_idx, code = 0;
125 int* table = dst;
126 *dst++ = first_bits;
127 for (i = 1, val_idx = max_bits; i <= max_bits; i++)
128 {
129 int code_count = src[i - 1];
130 dst[0] = code_count;
131 code <<= 1;
132 for (int k = 0; k < code_count; k++)
133 {
134 dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
135 }
136 code += code_count;
137 dst += code_count + 1;
138 val_idx += code_count;
139 }
140 dst[0] = -1;
141 return table;
142}
143
144
145namespace mjpeg
146{
147
148class mjpeg_buffer
149{
150public:
151 mjpeg_buffer()
152 {
153 reset();
154 }
155
156 void resize(int size)
157 {
158 data.resize(new_size: size);
159 }
160
161 inline void put_bits(unsigned bits, int len)
162 {
163 CV_Assert(len >=0 && len < 32);
164 if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())
165 {
166 resize(size: int(2*data.size()));
167 }
168
169 bits_free -= (len);
170 unsigned int tempval = (bits) & bit_mask[(len)];
171
172 if( bits_free <= 0 )
173 {
174 data[m_pos] |= ((unsigned)tempval >> -bits_free);
175
176 bits_free += 32;
177 ++m_pos;
178 data[m_pos] = bits_free < 32 ? (tempval << bits_free) : 0;
179 }
180 else
181 {
182 data[m_pos] |= (bits_free == 32) ? tempval : (tempval << bits_free);
183 }
184 }
185
186 inline void put_val(int val, const unsigned * table)
187 {
188 unsigned code = table[(val) + 2];
189 put_bits(bits: code >> 8, len: (int)(code & 255));
190 }
191
192 void finish()
193 {
194 if(bits_free == 32)
195 {
196 bits_free = 0;
197 m_data_len = m_pos;
198 }
199 else
200 {
201 m_data_len = m_pos + 1;
202 }
203 }
204
205 void reset()
206 {
207 bits_free = 32;
208 m_pos = 0;
209 m_data_len = 0;
210 }
211
212 void clear()
213 {
214 //we need to clear only first element, the rest would be overwritten
215 data[0] = 0;
216 }
217
218 int get_bits_free()
219 {
220 return bits_free;
221 }
222
223 unsigned* get_data()
224 {
225 return &data[0];
226 }
227
228 unsigned get_len()
229 {
230 return m_data_len;
231 }
232
233private:
234 std::vector<unsigned> data;
235 int bits_free;
236 unsigned m_pos;
237 unsigned m_data_len;
238};
239
240
241class mjpeg_buffer_keeper
242{
243public:
244 mjpeg_buffer_keeper()
245 {
246 reset();
247 }
248
249 mjpeg_buffer& operator[](int i)
250 {
251 return m_buffer_list[i];
252 }
253
254 void allocate_buffers(int count, int size)
255 {
256 for(int i = (int)m_buffer_list.size(); i < count; ++i)
257 {
258 m_buffer_list.push_back(x: mjpeg_buffer());
259 m_buffer_list.back().resize(size);
260 }
261 }
262
263 unsigned* get_data()
264 {
265 //if there is only one buffer (single thread) there is no need to stack buffers
266 if(m_buffer_list.size() == 1)
267 {
268 m_buffer_list[0].finish();
269
270 m_data_len = m_buffer_list[0].get_len();
271 m_last_bit_len = 32 - m_buffer_list[0].get_bits_free();
272
273 return m_buffer_list[0].get_data();
274 }
275
276 allocate_output_buffer();
277
278 int bits = 0;
279 unsigned currval = 0;
280 m_data_len = 0;
281
282 for(unsigned j = 0; j < m_buffer_list.size(); ++j)
283 {
284 mjpeg_buffer& buffer = m_buffer_list[j];
285
286 //if no bit shift required we could use memcpy
287 if(bits == 0)
288 {
289 size_t current_pos = m_data_len;
290
291 if(buffer.get_bits_free() == 0)
292 {
293 memcpy(dest: &m_output_buffer[current_pos], src: buffer.get_data(), n: sizeof(buffer.get_data()[0])*buffer.get_len());
294 m_data_len += buffer.get_len();
295 currval = 0;
296 }
297 else
298 {
299 memcpy(dest: &m_output_buffer[current_pos], src: buffer.get_data(), n: sizeof(buffer.get_data()[0])*(buffer.get_len() - 1 ));
300 m_data_len += buffer.get_len() - 1;
301 currval = buffer.get_data()[buffer.get_len() - 1];
302 }
303 }
304 else
305 {
306 for(unsigned i = 0; i < buffer.get_len() - 1; ++i)
307 {
308 currval |= ( (unsigned)buffer.get_data()[i] >> (31 & (-bits)) );
309
310 m_output_buffer[m_data_len++] = currval;
311
312 currval = buffer.get_data()[i] << (bits + 32);
313 }
314
315 currval |= ( (unsigned)buffer.get_data()[buffer.get_len() - 1] >> (31 & (-bits)) );
316
317 if( buffer.get_bits_free() <= -bits)
318 {
319 m_output_buffer[m_data_len++] = currval;
320
321 currval = buffer.get_data()[buffer.get_len() - 1] << (bits + 32);
322 }
323 }
324
325 bits += buffer.get_bits_free();
326
327 if(bits > 0)
328 {
329 bits -= 32;
330 }
331 }
332
333 //bits == 0 means that last element shouldn't be used.
334 if (bits != 0) {
335 m_output_buffer[m_data_len++] = currval;
336 m_last_bit_len = -bits;
337 }
338 else
339 {
340 m_last_bit_len = 32;
341 }
342
343 return &m_output_buffer[0];
344 }
345
346 int get_last_bit_len()
347 {
348 return m_last_bit_len;
349 }
350
351 int get_data_size()
352 {
353 return m_data_len;
354 }
355
356 void reset()
357 {
358 m_last_bit_len = 0;
359 for(unsigned i = 0; i < m_buffer_list.size(); ++i)
360 {
361 m_buffer_list[i].reset();
362 }
363
364 //there is no need to erase output buffer since it would be overwritten
365 m_data_len = 0;
366 }
367
368private:
369
370 void allocate_output_buffer()
371 {
372 unsigned total_size = 0;
373
374 for(unsigned i = 0; i < m_buffer_list.size(); ++i)
375 {
376 m_buffer_list[i].finish();
377 total_size += m_buffer_list[i].get_len();
378 }
379
380 if(total_size > m_output_buffer.size())
381 {
382 m_output_buffer.clear();
383 m_output_buffer.resize(new_size: total_size);
384 }
385 }
386
387 std::deque<mjpeg_buffer> m_buffer_list;
388 std::vector<unsigned> m_output_buffer;
389 int m_data_len;
390 int m_last_bit_len;
391};
392
393class MotionJpegWriter : public IVideoWriter
394{
395public:
396 MotionJpegWriter()
397 {
398 rawstream = false;
399 nstripes = -1;
400 quality = 0;
401 }
402
403 MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
404 {
405 rawstream = false;
406 open(filename, fps, size, iscolor);
407 nstripes = -1;
408 }
409 ~MotionJpegWriter() { close(); }
410
411 virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }
412
413 void close()
414 {
415 if( !container.isOpenedStream() )
416 return;
417
418 if( !container.isEmptyFrameOffset() && !rawstream )
419 {
420 container.endWriteChunk(); // end LIST 'movi'
421 container.writeIndex(stream_number: 0, strm_type: dc);
422 container.finishWriteAVI();
423 }
424 }
425
426 bool open(const String& filename, double fps, Size size, bool iscolor)
427 {
428 close();
429
430 if( filename.empty() )
431 return false;
432 const char* ext = strrchr(s: filename.c_str(), c: '.');
433 if( !ext )
434 return false;
435 if( strcmp(s1: ext, s2: ".avi") != 0 && strcmp(s1: ext, s2: ".AVI") != 0 && strcmp(s1: ext, s2: ".Avi") != 0 )
436 return false;
437
438 if( !container.initContainer(filename, fps, size, iscolor) )
439 return false;
440
441 CV_Assert(fps >= 1);
442 quality = 75;
443 rawstream = false;
444
445 if( !rawstream )
446 {
447 container.startWriteAVI(stream_count: 1); // count stream
448 container.writeStreamHeader(codec_: MJPEG);
449 }
450 //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
451 return true;
452 }
453
454 bool isOpened() const CV_OVERRIDE { return container.isOpenedStream(); }
455
456 void write(InputArray _img) CV_OVERRIDE
457 {
458 Mat img = _img.getMat();
459 size_t chunkPointer = container.getStreamPos();
460 int input_channels = img.channels();
461 int colorspace = -1;
462 int imgWidth = img.cols;
463 int frameWidth = container.getWidth();
464 int imgHeight = img.rows;
465 int frameHeight = container.getHeight();
466 int channels = container.getChannels();
467
468
469 if( input_channels == 1 && channels == 1 )
470 {
471 CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight );
472 colorspace = COLORSPACE_GRAY;
473 }
474 else if( input_channels == 4 )
475 {
476 CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
477 colorspace = COLORSPACE_RGBA;
478 }
479 else if( input_channels == 3 )
480 {
481 CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
482 colorspace = COLORSPACE_BGR;
483 }
484 else if( input_channels == 1 && channels == 3 )
485 {
486 CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight*3 );
487 colorspace = COLORSPACE_YUV444P;
488 }
489 else
490 CV_Error(cv::Error::StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
491
492 if( !rawstream ) {
493 int avi_index = container.getAVIIndex(stream_number: 0, strm_type: dc);
494 container.startWriteChunk(fourcc: avi_index);
495 }
496
497 writeFrameData(data: img.data, step: (int)img.step, colorspace, input_channels);
498
499 if( !rawstream )
500 {
501 size_t tempChunkPointer = container.getStreamPos();
502 size_t moviPointer = container.getMoviPointer();
503 container.pushFrameOffset(elem: chunkPointer - moviPointer);
504 container.pushFrameSize(elem: tempChunkPointer - chunkPointer - 8); // Size excludes '00dc' and size field
505 container.endWriteChunk(); // end '00dc'
506 }
507 }
508
509 double getProperty(int propId) const CV_OVERRIDE
510 {
511 if( propId == VIDEOWRITER_PROP_QUALITY )
512 return quality;
513 if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
514 {
515 bool isEmpty = container.isEmptyFrameSize();
516 return isEmpty ? 0. : container.atFrameSize(i: container.countFrameSize() - 1);
517 }
518 if( propId == VIDEOWRITER_PROP_NSTRIPES )
519 return nstripes;
520 return 0.;
521 }
522
523 bool setProperty(int propId, double value) CV_OVERRIDE
524 {
525 if( propId == VIDEOWRITER_PROP_QUALITY )
526 {
527 quality = value;
528 return true;
529 }
530
531 if( propId == VIDEOWRITER_PROP_NSTRIPES)
532 {
533 nstripes = value;
534 return true;
535 }
536
537 return false;
538 }
539
540 void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
541
542protected:
543 double quality;
544 bool rawstream;
545 mjpeg_buffer_keeper buffers_list;
546 double nstripes;
547
548 AVIWriteContainer container;
549};
550
551#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
552#define fix(x, n) (int)((x)*(1 << (n)) + .5);
553
554enum
555{
556 fixb = 14,
557 fixc = 12,
558 postshift = 14
559};
560
561static const int C0_707 = fix(0.707106781f, fixb);
562static const int C0_541 = fix(0.541196100f, fixb);
563static const int C0_382 = fix(0.382683432f, fixb);
564static const int C1_306 = fix(1.306562965f, fixb);
565
566static const int y_r = fix(0.299, fixc);
567static const int y_g = fix(0.587, fixc);
568static const int y_b = fix(0.114, fixc);
569
570static const int cb_r = -fix(0.1687, fixc);
571static const int cb_g = -fix(0.3313, fixc);
572static const int cb_b = fix(0.5, fixc);
573
574static const int cr_r = fix(0.5, fixc);
575static const int cr_g = -fix(0.4187, fixc);
576static const int cr_b = -fix(0.0813, fixc);
577
578// Standard JPEG quantization tables
579static const uchar jpegTableK1_T[] =
580{
581 16, 12, 14, 14, 18, 24, 49, 72,
582 11, 12, 13, 17, 22, 35, 64, 92,
583 10, 14, 16, 22, 37, 55, 78, 95,
584 16, 19, 24, 29, 56, 64, 87, 98,
585 24, 26, 40, 51, 68, 81, 103, 112,
586 40, 58, 57, 87, 109, 104, 121, 100,
587 51, 60, 69, 80, 103, 113, 120, 103,
588 61, 55, 56, 62, 77, 92, 101, 99
589};
590
591static const uchar jpegTableK2_T[] =
592{
593 17, 18, 24, 47, 99, 99, 99, 99,
594 18, 21, 26, 66, 99, 99, 99, 99,
595 24, 26, 56, 99, 99, 99, 99, 99,
596 47, 66, 99, 99, 99, 99, 99, 99,
597 99, 99, 99, 99, 99, 99, 99, 99,
598 99, 99, 99, 99, 99, 99, 99, 99,
599 99, 99, 99, 99, 99, 99, 99, 99,
600 99, 99, 99, 99, 99, 99, 99, 99
601};
602
603// Standard Huffman tables
604
605// ... for luma DCs.
606static const uchar jpegTableK3[] =
607{
608 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
609 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
610};
611
612// ... for chroma DCs.
613static const uchar jpegTableK4[] =
614{
615 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
616 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
617};
618
619// ... for luma ACs.
620static const uchar jpegTableK5[] =
621{
622 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
623 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
624 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
625 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
626 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
627 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
628 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
629 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
630 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
631 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
632 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
633 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
634 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
635 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
636 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
637 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
638 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
639 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
640 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
641 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
642 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
643 0xf9, 0xfa
644};
645
646// ... for chroma ACs
647static const uchar jpegTableK6[] =
648{
649 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
650 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
651 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
652 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
653 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
654 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
655 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
656 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
657 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
658 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
659 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
660 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
661 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
662 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
663 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
664 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
665 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
666 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
667 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
668 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
669 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
670 0xf9, 0xfa
671};
672
673static const uchar zigzag[] =
674{
675 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
676 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
677 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
678 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
679 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
680};
681
682
683static const int idct_prescale[] =
684{
685 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
686 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
687 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
688 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
689 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
690 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
691 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
692 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
693};
694
695static const char jpegHeader[] =
696"\xFF\xD8" // SOI - start of image
697"\xFF\xE0" // APP0 - jfif extension
698"\x00\x10" // 2 bytes: length of APP0 segment
699"JFIF\x00" // JFIF signature
700"\x01\x02" // version of JFIF
701"\x00" // units = pixels ( 1 - inch, 2 - cm )
702"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
703"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
704
705#ifdef WITH_NEON
706// FDCT with postscaling
707static void aan_fdct8x8( const short *src, short *dst,
708 int step, const short *postscale )
709{
710 // Pass 1: process rows
711 int16x8_t x0 = vld1q_s16(src); int16x8_t x1 = vld1q_s16(src + step*7);
712 int16x8_t x2 = vld1q_s16(src + step*3); int16x8_t x3 = vld1q_s16(src + step*4);
713
714 int16x8_t x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
715 x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
716
717 int16x8_t t1 = x0; int16x8_t t2 = x2;
718
719 x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
720
721 x0 = vld1q_s16(src + step); x3 = vld1q_s16(src + step*6);
722
723 x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
724 int16x8_t t3 = x0;
725
726 x0 = vld1q_s16(src + step*2); x3 = vld1q_s16(src + step*5);
727
728 int16x8_t t4 = vsubq_s16(x0, x3);
729
730 x0 = vaddq_s16(x0, x3);
731 x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
732 x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
733
734 int16x8_t res0 = x1;
735 int16x8_t res4 = x2;
736 x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
737 x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
738
739 int16x8_t res2 = x4;
740 int16x8_t res6 = x1;
741
742 x0 = t2; x1 = t4;
743 x2 = t3; x3 = t1;
744 x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
745 x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
746
747 x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
748 x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
749 x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
750 x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
751
752 x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
753 x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
754
755 int16x8_t res1 = x0;
756 int16x8_t res3 = x3;
757 int16x8_t res5 = x1;
758 int16x8_t res7 = x4;
759
760 //transpose a matrix
761 /*
762 res0 00 01 02 03 04 05 06 07
763 res1 10 11 12 13 14 15 16 17
764 res2 20 21 22 23 24 25 26 27
765 res3 30 31 32 33 34 35 36 37
766 res4 40 41 42 43 44 45 46 47
767 res5 50 51 52 53 54 55 56 57
768 res6 60 61 62 63 64 65 66 67
769 res7 70 71 72 73 74 75 76 77
770 */
771
772 //transpose elements 00-33
773 int16x4_t res0_0 = vget_low_s16(res0);
774 int16x4_t res1_0 = vget_low_s16(res1);
775 int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
776 int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
777
778 res0_0 = vget_low_s16(res2);
779 res1_0 = vget_low_s16(res3);
780 tres = vtrn_s16(res0_0, res1_0);
781 int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
782
783 int32x4x2_t tres1 = vtrnq_s32(l0, l1);
784
785 // transpose elements 40-73
786 res0_0 = vget_low_s16(res4);
787 res1_0 = vget_low_s16(res5);
788 tres = vtrn_s16(res0_0, res1_0);
789 l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
790
791 res0_0 = vget_low_s16(res6);
792 res1_0 = vget_low_s16(res7);
793
794 tres = vtrn_s16(res0_0, res1_0);
795 l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
796
797 int32x4x2_t tres2 = vtrnq_s32(l0, l1);
798
799 //combine into 0-3
800 int16x8_t transp_res0 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
801 int16x8_t transp_res1 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
802 int16x8_t transp_res2 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
803 int16x8_t transp_res3 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
804
805 // transpose elements 04-37
806 res0_0 = vget_high_s16(res0);
807 res1_0 = vget_high_s16(res1);
808 tres = vtrn_s16(res0_0, res1_0);
809 l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
810
811 res0_0 = vget_high_s16(res2);
812 res1_0 = vget_high_s16(res3);
813
814 tres = vtrn_s16(res0_0, res1_0);
815 l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
816
817 tres1 = vtrnq_s32(l0, l1);
818
819 // transpose elements 44-77
820 res0_0 = vget_high_s16(res4);
821 res1_0 = vget_high_s16(res5);
822 tres = vtrn_s16(res0_0, res1_0);
823 l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
824
825 res0_0 = vget_high_s16(res6);
826 res1_0 = vget_high_s16(res7);
827
828 tres = vtrn_s16(res0_0, res1_0);
829 l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
830
831 tres2 = vtrnq_s32(l0, l1);
832
833 //combine into 4-7
834 int16x8_t transp_res4 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
835 int16x8_t transp_res5 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
836 int16x8_t transp_res6 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
837 int16x8_t transp_res7 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
838
839 //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
840#define STORE_DESCALED(addr, reg, mul_addr) postscale_line = vld1q_s16((mul_addr)); \
841mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
842reg = vabsq_s16(reg); \
843reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
844reg = vsubq_s16(veorq_s16(reg, mask), mask); \
845vst1q_s16((addr), reg);
846
847 int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
848
849 // pass 2: process columns
850 x0 = transp_res0; x1 = transp_res7;
851 x2 = transp_res3; x3 = transp_res4;
852
853 x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
854 x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
855
856 t1 = x0; t2 = x2;
857
858 x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
859
860 x0 = transp_res1;
861 x3 = transp_res6;
862
863 x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
864
865 t3 = x0;
866
867 x0 = transp_res2; x3 = transp_res5;
868
869 t4 = vsubq_s16(x0, x3);
870
871 x0 = vaddq_s16(x0, x3);
872
873 x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
874 x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
875
876 STORE_DESCALED(dst, x1, postscale);
877 STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
878
879 x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
880
881 x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
882
883 STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
884 STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
885
886 x0 = t2; x1 = t4;
887 x2 = t3; x3 = t1;
888
889 x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
890
891 x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
892
893 x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
894
895 x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
896 x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
897 x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
898
899 x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
900 x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
901
902 STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
903 STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
904 STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
905 STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
906}
907
908#else
909// FDCT with postscaling
910static void aan_fdct8x8( const short *src, short *dst,
911 int step, const short *postscale )
912{
913 int workspace[64], *work = workspace;
914 int i;
915
916 // Pass 1: process rows
917 for( i = 8; i > 0; i--, src += step, work += 8 )
918 {
919 int x0 = src[0], x1 = src[7];
920 int x2 = src[3], x3 = src[4];
921
922 int x4 = x0 + x1; x0 -= x1;
923 x1 = x2 + x3; x2 -= x3;
924
925 work[7] = x0; work[1] = x2;
926 x2 = x4 + x1; x4 -= x1;
927
928 x0 = src[1]; x3 = src[6];
929 x1 = x0 + x3; x0 -= x3;
930 work[5] = x0;
931
932 x0 = src[2]; x3 = src[5];
933 work[3] = x0 - x3; x0 += x3;
934
935 x3 = x0 + x1; x0 -= x1;
936 x1 = x2 + x3; x2 -= x3;
937
938 work[0] = x1; work[4] = x2;
939
940 x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
941 x1 = x4 + x0; x4 -= x0;
942 work[2] = x4; work[6] = x1;
943
944 x0 = work[1]; x1 = work[3];
945 x2 = work[5]; x3 = work[7];
946
947 x0 += x1; x1 += x2; x2 += x3;
948 x1 = DCT_DESCALE(x1*C0_707, fixb);
949
950 x4 = x1 + x3; x3 -= x1;
951 x1 = (x0 - x2)*C0_382;
952 x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
953 x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
954
955 x1 = x0 + x3; x3 -= x0;
956 x0 = x4 + x2; x4 -= x2;
957
958 work[5] = x1; work[1] = x0;
959 work[7] = x4; work[3] = x3;
960 }
961
962 work = workspace;
963 // pass 2: process columns
964 for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
965 {
966 int x0 = work[8*0], x1 = work[8*7];
967 int x2 = work[8*3], x3 = work[8*4];
968
969 int x4 = x0 + x1; x0 -= x1;
970 x1 = x2 + x3; x2 -= x3;
971
972 work[8*7] = x0; work[8*0] = x2;
973 x2 = x4 + x1; x4 -= x1;
974
975 x0 = work[8*1]; x3 = work[8*6];
976 x1 = x0 + x3; x0 -= x3;
977 work[8*4] = x0;
978
979 x0 = work[8*2]; x3 = work[8*5];
980 work[8*3] = x0 - x3; x0 += x3;
981
982 x3 = x0 + x1; x0 -= x1;
983 x1 = x2 + x3; x2 -= x3;
984
985 dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
986 dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
987
988 x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
989 x1 = x4 + x0; x4 -= x0;
990
991 dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
992 dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
993
994 x0 = work[8*0]; x1 = work[8*3];
995 x2 = work[8*4]; x3 = work[8*7];
996
997 x0 += x1; x1 += x2; x2 += x3;
998 x1 = DCT_DESCALE(x1*C0_707, fixb);
999
1000 x4 = x1 + x3; x3 -= x1;
1001 x1 = (x0 - x2)*C0_382;
1002 x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1003 x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1004
1005 x1 = x0 + x3; x3 -= x0;
1006 x0 = x4 + x2; x4 -= x2;
1007
1008 dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
1009 dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
1010 dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
1011 dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
1012 }
1013}
1014#endif
1015
1016
1017inline void convertToYUV(int colorspace, int channels, int input_channels, short* UV_data, short* Y_data, const uchar* pix_data, int y_limit, int x_limit, int step, int u_plane_ofs, int v_plane_ofs)
1018{
1019 int i, j;
1020 const int UV_step = 16;
1021 int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1022 int Y_step = x_scale*8;
1023
1024 if( channels > 1 )
1025 {
1026 if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
1027 {
1028 for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
1029 {
1030#ifdef WITH_NEON
1031 {
1032 uint16x8_t masklo = vdupq_n_u16(255);
1033 uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1034 uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1035 lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1036 uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1037 t1 = vaddq_u16(t1, t2);
1038 vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1039
1040 lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1041 t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1042 lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1043 t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1044 t1 = vaddq_u16(t1, t2);
1045 vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1046 }
1047
1048 {
1049 int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1050 int16x8_t delta = vdupq_n_s16(128);
1051 lane = vsubq_s16(lane, delta);
1052 vst1q_s16(Y_data, lane);
1053
1054 lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
1055 lane = vsubq_s16(lane, delta);
1056 vst1q_s16(Y_data + 8, lane);
1057
1058 lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1059 lane = vsubq_s16(lane, delta);
1060 vst1q_s16(Y_data+Y_step, lane);
1061
1062 lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
1063 lane = vsubq_s16(lane, delta);
1064 vst1q_s16(Y_data+Y_step + 8, lane);
1065 }
1066#else
1067 for( j = 0; j < x_limit; j += 2, pix_data += 2 )
1068 {
1069 Y_data[j] = pix_data[0] - 128;
1070 Y_data[j+1] = pix_data[1] - 128;
1071 Y_data[j+Y_step] = pix_data[step] - 128;
1072 Y_data[j+Y_step+1] = pix_data[step+1] - 128;
1073
1074 UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
1075 pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
1076 UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
1077 pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
1078
1079 }
1080
1081 pix_data -= x_limit*input_channels;
1082#endif
1083 }
1084 }
1085 else
1086 {
1087 for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1088 {
1089 for( j = 0; j < x_limit; j++, pix_data += input_channels )
1090 {
1091 int Y, U, V;
1092
1093 if( colorspace == COLORSPACE_BGR )
1094 {
1095 int r = pix_data[2];
1096 int g = pix_data[1];
1097 int b = pix_data[0];
1098
1099 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1100 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1101 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1102 }
1103 else if( colorspace == COLORSPACE_RGBA )
1104 {
1105 int r = pix_data[0];
1106 int g = pix_data[1];
1107 int b = pix_data[2];
1108
1109 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1110 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1111 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1112 }
1113 else
1114 {
1115 Y = pix_data[0] - 128;
1116 U = pix_data[v_plane_ofs] - 128;
1117 V = pix_data[u_plane_ofs] - 128;
1118 }
1119
1120 int j2 = j >> (x_scale - 1);
1121 Y_data[j] = (short)Y;
1122 UV_data[j2] = (short)(UV_data[j2] + U);
1123 UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
1124 }
1125
1126 pix_data -= x_limit*input_channels;
1127 if( ((i+1) & (y_scale - 1)) == 0 )
1128 {
1129 UV_data += UV_step;
1130 }
1131 }
1132 }
1133
1134 }
1135 else
1136 {
1137 for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1138 {
1139 for( j = 0; j < x_limit; j++ )
1140 Y_data[j] = (short)(pix_data[j]*4 - 128*4);
1141 }
1142 }
1143}
1144
1145class MjpegEncoder : public ParallelLoopBody
1146{
1147public:
1148 MjpegEncoder(int _height,
1149 int _width,
1150 int _step,
1151 const uchar* _data,
1152 int _input_channels,
1153 int _channels,
1154 int _colorspace,
1155 unsigned (&_huff_dc_tab)[2][16],
1156 unsigned (&_huff_ac_tab)[2][256],
1157 short (&_fdct_qtab)[2][64],
1158 uchar* _cat_table,
1159 mjpeg_buffer_keeper& _buffer_list,
1160 double nstripes
1161 ) :
1162 m_buffer_list(_buffer_list),
1163 height(_height),
1164 width(_width),
1165 step(_step),
1166 in_data(_data),
1167 input_channels(_input_channels),
1168 channels(_channels),
1169 colorspace(_colorspace),
1170 huff_dc_tab(_huff_dc_tab),
1171 huff_ac_tab(_huff_ac_tab),
1172 fdct_qtab(_fdct_qtab),
1173 cat_table(_cat_table)
1174 {
1175 //empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
1176 const int min_pixels_count = 96*96;
1177
1178 stripes_count = 1;
1179
1180 if(nstripes < 0)
1181 {
1182 if(height*width > min_pixels_count)
1183 {
1184 const int default_stripes_count = 4;
1185 stripes_count = default_stripes_count;
1186 }
1187 }
1188 else
1189 {
1190 stripes_count = cvCeil(value: nstripes);
1191 }
1192
1193 int y_scale = channels > 1 ? 2 : 1;
1194 int y_step = y_scale * 8;
1195
1196 int max_stripes = (height - 1)/y_step + 1;
1197
1198 stripes_count = std::min(a: stripes_count, b: max_stripes);
1199
1200 m_buffer_list.allocate_buffers(count: stripes_count, size: (height*width*2)/stripes_count);
1201 }
1202
1203 void operator()( const cv::Range& range ) const CV_OVERRIDE
1204 {
1205 const int CAT_TAB_SIZE = 4096;
1206
1207 int x, y;
1208 int i, j;
1209
1210 short buffer[4096];
1211 int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1212 int dc_pred[] = { 0, 0, 0 };
1213 int x_step = x_scale * 8;
1214 int y_step = y_scale * 8;
1215 short block[6][64];
1216 int luma_count = x_scale*y_scale;
1217 int block_count = luma_count + channels - 1;
1218 int u_plane_ofs = step*height;
1219 int v_plane_ofs = u_plane_ofs + step*height;
1220 const uchar* data = in_data;
1221 const uchar* init_data = data;
1222
1223 int num_steps = (height - 1)/y_step + 1;
1224
1225 //if this is not first stripe we need to calculate dc_pred from previous step
1226 if(range.start > 0)
1227 {
1228 y = y_step*int(num_steps*range.start/stripes_count - 1);
1229 data = init_data + y*step;
1230
1231 for( x = 0; x < width; x += x_step )
1232 {
1233 int x_limit = x_step;
1234 int y_limit = y_step;
1235 const uchar* pix_data = data + x*input_channels;
1236 short* Y_data = block[0];
1237 short* UV_data = block[luma_count];
1238
1239 if( x + x_limit > width ) x_limit = width - x;
1240 if( y + y_limit > height ) y_limit = height - y;
1241
1242 memset( s: block, c: 0, n: block_count*64*sizeof(block[0][0]));
1243
1244 convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1245
1246 for( i = 0; i < block_count; i++ )
1247 {
1248 int is_chroma = i >= luma_count;
1249 int src_step = x_scale * 8;
1250 const short* src_ptr = block[i & -2] + (i & 1)*8;
1251
1252 aan_fdct8x8( src: src_ptr, dst: buffer, step: src_step, postscale: fdct_qtab[is_chroma] );
1253
1254 j = is_chroma + (i > luma_count);
1255 dc_pred[j] = buffer[0];
1256 }
1257 }
1258 }
1259
1260 for(int k = range.start; k < range.end; ++k)
1261 {
1262 mjpeg_buffer& output_buffer = m_buffer_list[k];
1263 output_buffer.clear();
1264
1265 int y_min = y_step*int(num_steps*k/stripes_count);
1266 int y_max = y_step*int(num_steps*(k+1)/stripes_count);
1267
1268 if(k == stripes_count - 1)
1269 {
1270 y_max = height;
1271 }
1272
1273
1274 data = init_data + y_min*step;
1275
1276 for( y = y_min; y < y_max; y += y_step, data += y_step*step )
1277 {
1278 for( x = 0; x < width; x += x_step )
1279 {
1280 int x_limit = x_step;
1281 int y_limit = y_step;
1282 const uchar* pix_data = data + x*input_channels;
1283 short* Y_data = block[0];
1284 short* UV_data = block[luma_count];
1285
1286 if( x + x_limit > width ) x_limit = width - x;
1287 if( y + y_limit > height ) y_limit = height - y;
1288
1289 memset( s: block, c: 0, n: block_count*64*sizeof(block[0][0]));
1290
1291 convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1292
1293 for( i = 0; i < block_count; i++ )
1294 {
1295 int is_chroma = i >= luma_count;
1296 int src_step = x_scale * 8;
1297 int run = 0, val;
1298 const short* src_ptr = block[i & -2] + (i & 1)*8;
1299 const unsigned* htable = huff_ac_tab[is_chroma];
1300
1301 aan_fdct8x8( src: src_ptr, dst: buffer, step: src_step, postscale: fdct_qtab[is_chroma] );
1302
1303 j = is_chroma + (i > luma_count);
1304 val = buffer[0] - dc_pred[j];
1305 dc_pred[j] = buffer[0];
1306
1307 {
1308 int cat = cat_table[val + CAT_TAB_SIZE];
1309
1310 //CV_Assert( cat <= 11 );
1311 output_buffer.put_val(val: cat, table: huff_dc_tab[is_chroma] );
1312 output_buffer.put_bits( bits: val - (val < 0 ? 1 : 0), len: cat );
1313 }
1314
1315 for( j = 1; j < 64; j++ )
1316 {
1317 val = buffer[zigzag[j]];
1318
1319 if( val == 0 )
1320 {
1321 run++;
1322 }
1323 else
1324 {
1325 while( run >= 16 )
1326 {
1327 output_buffer.put_val( val: 0xF0, table: htable ); // encode 16 zeros
1328 run -= 16;
1329 }
1330
1331 {
1332 int cat = cat_table[val + CAT_TAB_SIZE];
1333 //CV_Assert( cat <= 10 );
1334 output_buffer.put_val( val: cat + run*16, table: htable );
1335 output_buffer.put_bits( bits: val - (val < 0 ? 1 : 0), len: cat );
1336 }
1337
1338 run = 0;
1339 }
1340 }
1341
1342 if( run )
1343 {
1344 output_buffer.put_val( val: 0x00, table: htable ); // encode EOB
1345 }
1346 }
1347 }
1348 }
1349 }
1350 }
1351
1352 cv::Range getRange()
1353 {
1354 return cv::Range(0, stripes_count);
1355 }
1356
1357 double getNStripes()
1358 {
1359 return stripes_count;
1360 }
1361
1362 mjpeg_buffer_keeper& m_buffer_list;
1363private:
1364
1365 MjpegEncoder& operator=( const MjpegEncoder & ) { return *this; }
1366
1367 const int height;
1368 const int width;
1369 const int step;
1370 const uchar* in_data;
1371 const int input_channels;
1372 const int channels;
1373 const int colorspace;
1374 const unsigned (&huff_dc_tab)[2][16];
1375 const unsigned (&huff_ac_tab)[2][256];
1376 const short (&fdct_qtab)[2][64];
1377 const uchar* cat_table;
1378 int stripes_count;
1379};
1380
1381void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1382{
1383 //double total_cvt = 0, total_dct = 0;
1384 static bool init_cat_table = false;
1385 const int CAT_TAB_SIZE = 4096;
1386 static uchar cat_table[CAT_TAB_SIZE*2+1];
1387 if( !init_cat_table )
1388 {
1389 for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1390 {
1391 Cv32suf a;
1392 a.f = (float)i;
1393 cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
1394 }
1395 init_cat_table = true;
1396 }
1397
1398 //double total_dct = 0, total_cvt = 0;
1399 int width = container.getWidth();
1400 int height = container.getHeight();
1401 int channels = container.getChannels();
1402
1403 CV_Assert( data && width > 0 && height > 0 );
1404
1405 // encode the header and tables
1406 // for each mcu:
1407 // convert rgb to yuv with downsampling (if color).
1408 // for every block:
1409 // calc dct and quantize
1410 // encode block.
1411 int i, j;
1412 const int max_quality = 12;
1413 short fdct_qtab[2][64];
1414 unsigned huff_dc_tab[2][16];
1415 unsigned huff_ac_tab[2][256];
1416
1417 int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1418 short buffer[4096];
1419 int* hbuffer = (int*)buffer;
1420 int luma_count = x_scale*y_scale;
1421 double _quality = quality*0.01*max_quality;
1422
1423 if( _quality < 1. ) _quality = 1.;
1424 if( _quality > max_quality ) _quality = max_quality;
1425
1426 double inv_quality = 1./_quality;
1427
1428 // Encode header
1429 container.putStreamBytes( buf: (const uchar*)jpegHeader, count: sizeof(jpegHeader) - 1 );
1430
1431 // Encode quantization tables
1432 for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
1433 {
1434 const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
1435 int chroma_scale = i > 0 ? luma_count : 1;
1436
1437 container.jputStreamShort( val: 0xffdb ); // DQT marker
1438 container.jputStreamShort( val: 2 + 65*1 ); // put single qtable
1439 container.putStreamByte( val: 0*16 + i ); // 8-bit table
1440
1441 // put coefficients
1442 for( j = 0; j < 64; j++ )
1443 {
1444 int idx = zigzag[j];
1445 int qval = cvRound(value: qtable[idx]*inv_quality);
1446 if( qval < 1 )
1447 qval = 1;
1448 if( qval > 255 )
1449 qval = 255;
1450 fdct_qtab[i][idx] = (short)(cvRound(value: (1 << (postshift + 11)))/
1451 (qval*chroma_scale*idct_prescale[idx]));
1452 container.putStreamByte( val: qval );
1453 }
1454 }
1455
1456 // Encode huffman tables
1457 for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
1458 {
1459 const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
1460 i == 2 ? jpegTableK4 : jpegTableK6;
1461 int is_ac_tab = i & 1;
1462 int idx = i >= 2;
1463 int tableSize = 16 + (is_ac_tab ? 162 : 12);
1464
1465 container.jputStreamShort( val: 0xFFC4 ); // DHT marker
1466 container.jputStreamShort( val: 3 + tableSize ); // define one huffman table
1467 container.putStreamByte( val: is_ac_tab*16 + idx ); // put DC/AC flag and table index
1468 container.putStreamBytes( buf: htable, count: tableSize ); // put table
1469
1470 createEncodeHuffmanTable(src: createSourceHuffmanTable( src: htable, dst: hbuffer, max_bits: 16, first_bits: 9 ),
1471 table: is_ac_tab ? huff_ac_tab[idx] : huff_dc_tab[idx],
1472 max_size: is_ac_tab ? 256 : 16 );
1473 }
1474
1475 // put frame header
1476 container.jputStreamShort( val: 0xFFC0 ); // SOF0 marker
1477 container.jputStreamShort( val: 8 + 3*channels ); // length of frame header
1478 container.putStreamByte( val: 8 ); // sample precision
1479 container.jputStreamShort( val: height );
1480 container.jputStreamShort( val: width );
1481 container.putStreamByte( val: channels ); // number of components
1482
1483 for( i = 0; i < channels; i++ )
1484 {
1485 container.putStreamByte( val: i + 1 ); // (i+1)-th component id (Y,U or V)
1486 if( i == 0 )
1487 container.putStreamByte(val: x_scale*16 + y_scale); // chroma scale factors
1488 else
1489 container.putStreamByte(val: 1*16 + 1);
1490 container.putStreamByte( val: i > 0 ); // quantization table idx
1491 }
1492
1493 // put scan header
1494 container.jputStreamShort( val: 0xFFDA ); // SOS marker
1495 container.jputStreamShort( val: 6 + 2*channels ); // length of scan header
1496 container.putStreamByte( val: channels ); // number of components in the scan
1497
1498 for( i = 0; i < channels; i++ )
1499 {
1500 container.putStreamByte( val: i+1 ); // component id
1501 container.putStreamByte( val: (i>0)*16 + (i>0) );// selection of DC & AC tables
1502 }
1503
1504 container.jputStreamShort(val: 0*256 + 63); // start and end of spectral selection - for
1505 // sequential DCT start is 0 and end is 63
1506
1507 container.putStreamByte( val: 0 ); // successive approximation bit position
1508 // high & low - (0,0) for sequential DCT
1509
1510 buffers_list.reset();
1511
1512 MjpegEncoder parallel_encoder(height, width, step, data, input_channels, channels, colorspace, huff_dc_tab, huff_ac_tab, fdct_qtab, cat_table, buffers_list, nstripes);
1513
1514 cv::parallel_for_(range: parallel_encoder.getRange(), body: parallel_encoder, nstripes: parallel_encoder.getNStripes());
1515
1516 //std::vector<unsigned>& v = parallel_encoder.m_buffer_list.get_data();
1517 unsigned* v = buffers_list.get_data();
1518 unsigned last_data_elem = buffers_list.get_data_size() - 1;
1519
1520 for(unsigned k = 0; k < last_data_elem; ++k)
1521 {
1522 container.jputStream(currval: v[k]);
1523 }
1524 container.jflushStream(currval: v[last_data_elem], bitIdx: 32 - buffers_list.get_last_bit_len());
1525 container.jputStreamShort( val: 0xFFD9 ); // EOI marker
1526 /*printf("total dct = %.1fms, total cvt = %.1fms\n",
1527 total_dct*1000./cv::getTickFrequency(),
1528 total_cvt*1000./cv::getTickFrequency());*/
1529
1530 size_t pos = container.getStreamPos();
1531 size_t pos1 = (pos + 3) & ~3;
1532 for( ; pos < pos1; pos++ )
1533 container.putStreamByte(val: 0);
1534}
1535
1536}
1537
1538Ptr<IVideoWriter> createMotionJpegWriter(const std::string& filename, int fourcc,
1539 double fps, const Size& frameSize,
1540 const VideoWriterParameters& params)
1541{
1542 if (fourcc != CV_FOURCC(c1: 'M', c2: 'J', c3: 'P', c4: 'G'))
1543 return Ptr<IVideoWriter>();
1544
1545 const bool isColor = params.get(key: VIDEOWRITER_PROP_IS_COLOR, defaultValue: true);
1546 Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(a1: filename, a1: fps, a1: frameSize, a1: isColor);
1547 if( !iwriter->isOpened() )
1548 iwriter.release();
1549 return iwriter;
1550}
1551
1552}
1553

source code of opencv/modules/videoio/src/cap_mjpeg_encoder.cpp