1/*
2Open Asset Import Library (assimp)
3----------------------------------------------------------------------
4
5Copyright (c) 2006-2019, assimp team
6
7
8All rights reserved.
9
10Redistribution and use of this software in source and binary forms,
11with or without modification, are permitted provided that the
12following conditions are met:
13
14* Redistributions of source code must retain the above
15 copyright notice, this list of conditions and the
16 following disclaimer.
17
18* Redistributions in binary form must reproduce the above
19 copyright notice, this list of conditions and the
20 following disclaimer in the documentation and/or other
21 materials provided with the distribution.
22
23* Neither the name of the assimp team, nor the names of its
24 contributors may be used to endorse or promote products
25 derived from this software without specific prior
26 written permission of the assimp team.
27
28THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40----------------------------------------------------------------------
41*/
42/** @file FBXBinaryTokenizer.cpp
43 * @brief Implementation of a fake lexer for binary fbx files -
44 * we emit tokens so the parser needs almost no special handling
45 * for binary files.
46 */
47
48#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
49
50#include "FBXTokenizer.h"
51#include "FBXUtil.h"
52#include <assimp/defs.h>
53#include <stdint.h>
54#include <assimp/Exceptional.h>
55#include <assimp/ByteSwapper.h>
56
57namespace Assimp {
58namespace FBX {
59
60//enum Flag
61//{
62// e_unknown_0 = 1 << 0,
63// e_unknown_1 = 1 << 1,
64// e_unknown_2 = 1 << 2,
65// e_unknown_3 = 1 << 3,
66// e_unknown_4 = 1 << 4,
67// e_unknown_5 = 1 << 5,
68// e_unknown_6 = 1 << 6,
69// e_unknown_7 = 1 << 7,
70// e_unknown_8 = 1 << 8,
71// e_unknown_9 = 1 << 9,
72// e_unknown_10 = 1 << 10,
73// e_unknown_11 = 1 << 11,
74// e_unknown_12 = 1 << 12,
75// e_unknown_13 = 1 << 13,
76// e_unknown_14 = 1 << 14,
77// e_unknown_15 = 1 << 15,
78// e_unknown_16 = 1 << 16,
79// e_unknown_17 = 1 << 17,
80// e_unknown_18 = 1 << 18,
81// e_unknown_19 = 1 << 19,
82// e_unknown_20 = 1 << 20,
83// e_unknown_21 = 1 << 21,
84// e_unknown_22 = 1 << 22,
85// e_unknown_23 = 1 << 23,
86// e_flag_field_size_64_bit = 1 << 24, // Not sure what is
87// e_unknown_25 = 1 << 25,
88// e_unknown_26 = 1 << 26,
89// e_unknown_27 = 1 << 27,
90// e_unknown_28 = 1 << 28,
91// e_unknown_29 = 1 << 29,
92// e_unknown_30 = 1 << 30,
93// e_unknown_31 = 1 << 31
94//};
95//
96//bool check_flag(uint32_t flags, Flag to_check)
97//{
98// return (flags & to_check) != 0;
99//}
100// ------------------------------------------------------------------------------------------------
101Token::Token(const char* sbegin, const char* send, TokenType type, size_t offset)
102 :
103 #ifdef DEBUG
104 contents(sbegin, static_cast<size_t>(send-sbegin)),
105 #endif
106 sbegin(sbegin)
107 , send(send)
108 , type(type)
109 , line(offset)
110 , column(BINARY_MARKER)
111{
112 ai_assert(sbegin);
113 ai_assert(send);
114
115 // binary tokens may have zero length because they are sometimes dummies
116 // inserted by TokenizeBinary()
117 ai_assert(send >= sbegin);
118}
119
120
121namespace {
122
123// ------------------------------------------------------------------------------------------------
124// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
125AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset) AI_WONT_RETURN_SUFFIX;
126AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset)
127{
128 throw DeadlyImportError(Util::AddOffset(prefix: "FBX-Tokenize",text: message,offset));
129}
130
131
132// ------------------------------------------------------------------------------------------------
133size_t Offset(const char* begin, const char* cursor) {
134 ai_assert(begin <= cursor);
135
136 return cursor - begin;
137}
138
139// ------------------------------------------------------------------------------------------------
140void TokenizeError(const std::string& message, const char* begin, const char* cursor) {
141 TokenizeError(message, offset: Offset(begin, cursor));
142}
143
144// ------------------------------------------------------------------------------------------------
145uint32_t ReadWord(const char* input, const char*& cursor, const char* end) {
146 const size_t k_to_read = sizeof( uint32_t );
147 if(Offset(begin: cursor, cursor: end) < k_to_read ) {
148 TokenizeError(message: "cannot ReadWord, out of bounds",begin: input, cursor);
149 }
150
151 uint32_t word;
152 ::memcpy(dest: &word, src: cursor, n: 4);
153 AI_SWAP4(word);
154
155 cursor += k_to_read;
156
157 return word;
158}
159
160// ------------------------------------------------------------------------------------------------
161uint64_t ReadDoubleWord(const char* input, const char*& cursor, const char* end) {
162 const size_t k_to_read = sizeof(uint64_t);
163 if(Offset(begin: cursor, cursor: end) < k_to_read) {
164 TokenizeError(message: "cannot ReadDoubleWord, out of bounds",begin: input, cursor);
165 }
166
167 uint64_t dword /*= *reinterpret_cast<const uint64_t*>(cursor)*/;
168 ::memcpy( dest: &dword, src: cursor, n: sizeof( uint64_t ) );
169 AI_SWAP8(dword);
170
171 cursor += k_to_read;
172
173 return dword;
174}
175
176// ------------------------------------------------------------------------------------------------
177uint8_t ReadByte(const char* input, const char*& cursor, const char* end) {
178 if(Offset(begin: cursor, cursor: end) < sizeof( uint8_t ) ) {
179 TokenizeError(message: "cannot ReadByte, out of bounds",begin: input, cursor);
180 }
181
182 uint8_t word;/* = *reinterpret_cast< const uint8_t* >( cursor )*/
183 ::memcpy( dest: &word, src: cursor, n: sizeof( uint8_t ) );
184 ++cursor;
185
186 return word;
187}
188
189// ------------------------------------------------------------------------------------------------
190unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input,
191 const char*& cursor, const char* end, bool long_length = false, bool allow_null = false) {
192 const uint32_t len_len = long_length ? 4 : 1;
193 if(Offset(begin: cursor, cursor: end) < len_len) {
194 TokenizeError(message: "cannot ReadString, out of bounds reading length",begin: input, cursor);
195 }
196
197 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
198
199 if (Offset(begin: cursor, cursor: end) < length) {
200 TokenizeError(message: "cannot ReadString, length is out of bounds",begin: input, cursor);
201 }
202
203 sbegin_out = cursor;
204 cursor += length;
205
206 send_out = cursor;
207
208 if(!allow_null) {
209 for (unsigned int i = 0; i < length; ++i) {
210 if(sbegin_out[i] == '\0') {
211 TokenizeError(message: "failed ReadString, unexpected NUL character in string",begin: input, cursor);
212 }
213 }
214 }
215
216 return length;
217}
218
219// ------------------------------------------------------------------------------------------------
220void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end) {
221 if(Offset(begin: cursor, cursor: end) < 1) {
222 TokenizeError(message: "cannot ReadData, out of bounds reading length",begin: input, cursor);
223 }
224
225 const char type = *cursor;
226 sbegin_out = cursor++;
227
228 switch(type)
229 {
230 // 16 bit int
231 case 'Y':
232 cursor += 2;
233 break;
234
235 // 1 bit bool flag (yes/no)
236 case 'C':
237 cursor += 1;
238 break;
239
240 // 32 bit int
241 case 'I':
242 // <- fall through
243
244 // float
245 case 'F':
246 cursor += 4;
247 break;
248
249 // double
250 case 'D':
251 cursor += 8;
252 break;
253
254 // 64 bit int
255 case 'L':
256 cursor += 8;
257 break;
258
259 // note: do not write cursor += ReadWord(...cursor) as this would be UB
260
261 // raw binary data
262 case 'R':
263 {
264 const uint32_t length = ReadWord(input, cursor, end);
265 cursor += length;
266 break;
267 }
268
269 case 'b':
270 // TODO: what is the 'b' type code? Right now we just skip over it /
271 // take the full range we could get
272 cursor = end;
273 break;
274
275 // array of *
276 case 'f':
277 case 'd':
278 case 'l':
279 case 'i':
280 case 'c': {
281 const uint32_t length = ReadWord(input, cursor, end);
282 const uint32_t encoding = ReadWord(input, cursor, end);
283
284 const uint32_t comp_len = ReadWord(input, cursor, end);
285
286 // compute length based on type and check against the stored value
287 if(encoding == 0) {
288 uint32_t stride = 0;
289 switch(type)
290 {
291 case 'f':
292 case 'i':
293 stride = 4;
294 break;
295
296 case 'd':
297 case 'l':
298 stride = 8;
299 break;
300
301 case 'c':
302 stride = 1;
303 break;
304
305 default:
306 ai_assert(false);
307 };
308 ai_assert(stride > 0);
309 if(length * stride != comp_len) {
310 TokenizeError(message: "cannot ReadData, calculated data stride differs from what the file claims",begin: input, cursor);
311 }
312 }
313 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
314 else if (encoding != 1) {
315 TokenizeError(message: "cannot ReadData, unknown encoding",begin: input, cursor);
316 }
317 cursor += comp_len;
318 break;
319 }
320
321 // string
322 case 'S': {
323 const char* sb, *se;
324 // 0 characters can legally happen in such strings
325 ReadString(sbegin_out&: sb, send_out&: se, input, cursor, end, long_length: true, allow_null: true);
326 break;
327 }
328 default:
329 TokenizeError(message: "cannot ReadData, unexpected type code: " + std::string(&type, 1),begin: input, cursor);
330 }
331
332 if(cursor > end) {
333 TokenizeError(message: "cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),begin: input, cursor);
334 }
335
336 // the type code is contained in the returned range
337 send_out = cursor;
338}
339
340
341// ------------------------------------------------------------------------------------------------
342bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end, bool const is64bits)
343{
344 // the first word contains the offset at which this block ends
345 const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
346
347 // we may get 0 if reading reached the end of the file -
348 // fbx files have a mysterious extra footer which I don't know
349 // how to extract any information from, but at least it always
350 // starts with a 0.
351 if(!end_offset) {
352 return false;
353 }
354
355 if(end_offset > Offset(begin: input, cursor: end)) {
356 TokenizeError(message: "block offset is out of range",begin: input, cursor);
357 }
358 else if(end_offset < Offset(begin: input, cursor)) {
359 TokenizeError(message: "block offset is negative out of range",begin: input, cursor);
360 }
361
362 // the second data word contains the number of properties in the scope
363 const uint64_t prop_count = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
364
365 // the third data word contains the length of the property list
366 const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
367
368 // now comes the name of the scope/key
369 const char* sbeg, *send;
370 ReadString(sbegin_out&: sbeg, send_out&: send, input, cursor, end);
371
372 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(begin: input, cursor) ));
373
374 // now come the individual properties
375 const char* begin_cursor = cursor;
376 for (unsigned int i = 0; i < prop_count; ++i) {
377 ReadData(sbegin_out&: sbeg, send_out&: send, input, cursor, end: begin_cursor + prop_length);
378
379 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(begin: input, cursor) ));
380
381 if(i != prop_count-1) {
382 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(begin: input, cursor) ));
383 }
384 }
385
386 if (Offset(begin: begin_cursor, cursor) != prop_length) {
387 TokenizeError(message: "property length not reached, something is wrong",begin: input, cursor);
388 }
389
390 // at the end of each nested block, there is a NUL record to indicate
391 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
392 // this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit.
393 const size_t sentinel_block_length = is64bits ? (sizeof(uint64_t)* 3 + 1) : (sizeof(uint32_t)* 3 + 1);
394
395 if (Offset(begin: input, cursor) < end_offset) {
396 if (end_offset - Offset(begin: input, cursor) < sentinel_block_length) {
397 TokenizeError(message: "insufficient padding bytes at block end",begin: input, cursor);
398 }
399
400 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(begin: input, cursor) ));
401
402 // XXX this is vulnerable to stack overflowing ..
403 while(Offset(begin: input, cursor) < end_offset - sentinel_block_length) {
404 ReadScope(output_tokens, input, cursor, end: input + end_offset - sentinel_block_length, is64bits);
405 }
406 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(begin: input, cursor) ));
407
408 for (unsigned int i = 0; i < sentinel_block_length; ++i) {
409 if(cursor[i] != '\0') {
410 TokenizeError(message: "failed to read nested block sentinel, expected all bytes to be 0",begin: input, cursor);
411 }
412 }
413 cursor += sentinel_block_length;
414 }
415
416 if (Offset(begin: input, cursor) != end_offset) {
417 TokenizeError(message: "scope length not reached, something is wrong",begin: input, cursor);
418 }
419
420 return true;
421}
422
423} // anonymous namespace
424
425// ------------------------------------------------------------------------------------------------
426// TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
427void TokenizeBinary(TokenList& output_tokens, const char* input, size_t length)
428{
429 ai_assert(input);
430
431 if(length < 0x1b) {
432 TokenizeError(message: "file is too short",offset: 0);
433 }
434
435 //uint32_t offset = 0x15;
436/* const char* cursor = input + 0x15;
437
438 const uint32_t flags = ReadWord(input, cursor, input + length);
439
440 const uint8_t padding_0 = ReadByte(input, cursor, input + length); // unused
441 const uint8_t padding_1 = ReadByte(input, cursor, input + length); // unused*/
442
443 if (strncmp(s1: input,s2: "Kaydara FBX Binary",n: 18)) {
444 TokenizeError(message: "magic bytes not found",offset: 0);
445 }
446
447 const char* cursor = input + 18;
448 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
449 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
450 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
451 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
452 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
453 const uint32_t version = ReadWord(input, cursor, end: input + length);
454 const bool is64bits = version >= 7500;
455 const char *end = input + length;
456 while (cursor < end ) {
457 if (!ReadScope(output_tokens, input, cursor, end: input + length, is64bits)) {
458 break;
459 }
460 }
461}
462
463} // !FBX
464} // !Assimp
465
466#endif
467

source code of qt3d/src/3rdparty/assimp/src/code/FBX/FBXBinaryTokenizer.cpp