1/*
2Open Asset Import Library (assimp)
3----------------------------------------------------------------------
4
5Copyright (c) 2006-2025, assimp team
6
7All rights reserved.
8
9Redistribution and use of this software in source and binary forms,
10with or without modification, are permitted provided that the
11following conditions are met:
12
13* Redistributions of source code must retain the above
14 copyright notice, this list of conditions and the
15 following disclaimer.
16
17* Redistributions in binary form must reproduce the above
18 copyright notice, this list of conditions and the
19 following disclaimer in the documentation and/or other
20 materials provided with the distribution.
21
22* Neither the name of the assimp team, nor the names of its
23 contributors may be used to endorse or promote products
24 derived from this software without specific prior
25 written permission of the assimp team.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
39----------------------------------------------------------------------
40*/
41/** @file FBXBinaryTokenizer.cpp
42 * @brief Implementation of a fake lexer for binary fbx files -
43 * we emit tokens so the parser needs almost no special handling
44 * for binary files.
45 */
46
47#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
48
49#include "FBXTokenizer.h"
50#include "FBXUtil.h"
51#include <assimp/defs.h>
52#include <stdint.h>
53#include <cstdint>
54#include <assimp/Exceptional.h>
55#include <assimp/ByteSwapper.h>
56#include <assimp/DefaultLogger.hpp>
57#include <assimp/StringUtils.h>
58
59namespace Assimp {
60namespace FBX {
61
62// ------------------------------------------------------------------------------------------------
63Token::Token(const char* sbegin, const char* send, TokenType type, size_t offset) :
64 #ifdef DEBUG
65 contents(sbegin, static_cast<size_t>(send-sbegin)),
66 #endif
67 sbegin(sbegin),
68 send(send),
69 type(type),
70 line(offset),
71 column(BINARY_MARKER) {
72 ai_assert(sbegin);
73 ai_assert(send);
74
75 // binary tokens may have zero length because they are sometimes dummies
76 // inserted by TokenizeBinary()
77 ai_assert(send >= sbegin);
78}
79
80
81namespace {
82
83// ------------------------------------------------------------------------------------------------
84// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
85AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset) AI_WONT_RETURN_SUFFIX;
86AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset)
87{
88 throw DeadlyImportError("FBX-Tokenize", Util::GetOffsetText(offset), message);
89}
90
91
92// ------------------------------------------------------------------------------------------------
93size_t Offset(const char* begin, const char* cursor) {
94 ai_assert(begin <= cursor);
95
96 return cursor - begin;
97}
98
99// ------------------------------------------------------------------------------------------------
100AI_WONT_RETURN void TokenizeError(const std::string& message, const char* begin, const char* cursor) AI_WONT_RETURN_SUFFIX;
101void TokenizeError(const std::string& message, const char* begin, const char* cursor) {
102 TokenizeError(message, offset: Offset(begin, cursor));
103}
104
105// ------------------------------------------------------------------------------------------------
106uint32_t ReadWord(const char* input, const char*& cursor, const char* end) {
107 const size_t k_to_read = sizeof( uint32_t );
108 if(Offset(begin: cursor, cursor: end) < k_to_read ) {
109 TokenizeError(message: "cannot ReadWord, out of bounds",begin: input, cursor);
110 }
111
112 uint32_t word;
113 ::memcpy(dest: &word, src: cursor, n: 4);
114 AI_SWAP4(word);
115
116 cursor += k_to_read;
117
118 return word;
119}
120
121// ------------------------------------------------------------------------------------------------
122uint64_t ReadDoubleWord(const char* input, const char*& cursor, const char* end) {
123 const size_t k_to_read = sizeof(uint64_t);
124 if(Offset(begin: cursor, cursor: end) < k_to_read) {
125 TokenizeError(message: "cannot ReadDoubleWord, out of bounds",begin: input, cursor);
126 }
127
128 uint64_t dword /*= *reinterpret_cast<const uint64_t*>(cursor)*/;
129 ::memcpy( dest: &dword, src: cursor, n: sizeof( uint64_t ) );
130 AI_SWAP8(dword);
131
132 cursor += k_to_read;
133
134 return dword;
135}
136
137// ------------------------------------------------------------------------------------------------
138uint8_t ReadByte(const char* input, const char*& cursor, const char* end) {
139 if(Offset(begin: cursor, cursor: end) < sizeof( uint8_t ) ) {
140 TokenizeError(message: "cannot ReadByte, out of bounds",begin: input, cursor);
141 }
142
143 uint8_t word;/* = *reinterpret_cast< const uint8_t* >( cursor )*/
144 ::memcpy( dest: &word, src: cursor, n: sizeof( uint8_t ) );
145 ++cursor;
146
147 return word;
148}
149
150// ------------------------------------------------------------------------------------------------
151unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input,
152 const char*& cursor, const char* end, bool long_length = false, bool allow_null = false) {
153 const uint32_t len_len = long_length ? 4 : 1;
154 if(Offset(begin: cursor, cursor: end) < len_len) {
155 TokenizeError(message: "cannot ReadString, out of bounds reading length",begin: input, cursor);
156 }
157
158 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
159
160 if (Offset(begin: cursor, cursor: end) < length) {
161 TokenizeError(message: "cannot ReadString, length is out of bounds",begin: input, cursor);
162 }
163
164 sbegin_out = cursor;
165 cursor += length;
166
167 send_out = cursor;
168
169 if(!allow_null) {
170 for (unsigned int i = 0; i < length; ++i) {
171 if(sbegin_out[i] == '\0') {
172 TokenizeError(message: "failed ReadString, unexpected NUL character in string",begin: input, cursor);
173 }
174 }
175 }
176
177 return length;
178}
179
180// ------------------------------------------------------------------------------------------------
181void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end) {
182 if(Offset(begin: cursor, cursor: end) < 1) {
183 TokenizeError(message: "cannot ReadData, out of bounds reading length",begin: input, cursor);
184 }
185
186 const char type = *cursor;
187 sbegin_out = cursor++;
188
189 switch(type)
190 {
191 // 16 bit int
192 case 'Y':
193 cursor += 2;
194 break;
195
196 // 1 bit bool flag (yes/no)
197 case 'C':
198 cursor += 1;
199 break;
200
201 // 32 bit int
202 case 'I':
203 // <- fall through
204
205 // float
206 case 'F':
207 cursor += 4;
208 break;
209
210 // double
211 case 'D':
212 cursor += 8;
213 break;
214
215 // 64 bit int
216 case 'L':
217 cursor += 8;
218 break;
219
220 // note: do not write cursor += ReadWord(...cursor) as this would be UB
221
222 // raw binary data
223 case 'R':
224 {
225 const uint32_t length = ReadWord(input, cursor, end);
226 cursor += length;
227 break;
228 }
229
230 case 'b':
231 // TODO: what is the 'b' type code? Right now we just skip over it /
232 // take the full range we could get
233 cursor = end;
234 break;
235
236 // array of *
237 case 'f':
238 case 'd':
239 case 'l':
240 case 'i':
241 case 'c': {
242 const uint32_t length = ReadWord(input, cursor, end);
243 const uint32_t encoding = ReadWord(input, cursor, end);
244
245 const uint32_t comp_len = ReadWord(input, cursor, end);
246
247 // compute length based on type and check against the stored value
248 if(encoding == 0) {
249 uint32_t stride = 0;
250 switch(type)
251 {
252 case 'f':
253 case 'i':
254 stride = 4;
255 break;
256
257 case 'd':
258 case 'l':
259 stride = 8;
260 break;
261
262 case 'c':
263 stride = 1;
264 break;
265
266 default:
267 ai_assert(false);
268 };
269 ai_assert(stride > 0);
270 if(length * stride != comp_len) {
271 TokenizeError(message: "cannot ReadData, calculated data stride differs from what the file claims",begin: input, cursor);
272 }
273 }
274 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
275 else if (encoding != 1) {
276 TokenizeError(message: "cannot ReadData, unknown encoding",begin: input, cursor);
277 }
278 cursor += comp_len;
279 break;
280 }
281
282 // string
283 case 'S': {
284 const char* sb, *se;
285 // 0 characters can legally happen in such strings
286 ReadString(sbegin_out&: sb, send_out&: se, input, cursor, end, long_length: true, allow_null: true);
287 break;
288 }
289 default:
290 TokenizeError(message: "cannot ReadData, unexpected type code: " + std::string(&type, 1),begin: input, cursor);
291 }
292
293 if(cursor > end) {
294 TokenizeError(message: "cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),begin: input, cursor);
295 }
296
297 // the type code is contained in the returned range
298 send_out = cursor;
299}
300
301
302// ------------------------------------------------------------------------------------------------
303bool ReadScope(TokenList &output_tokens, StackAllocator &token_allocator, const char *input, const char *&cursor, const char *end, bool const is64bits) {
304 // the first word contains the offset at which this block ends
305 const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
306
307 // we may get 0 if reading reached the end of the file -
308 // fbx files have a mysterious extra footer which I don't know
309 // how to extract any information from, but at least it always
310 // starts with a 0.
311 if(!end_offset) {
312 return false;
313 }
314
315 if(end_offset > Offset(begin: input, cursor: end)) {
316 TokenizeError(message: "block offset is out of range",begin: input, cursor);
317 }
318 else if(end_offset < Offset(begin: input, cursor)) {
319 TokenizeError(message: "block offset is negative out of range",begin: input, cursor);
320 }
321
322 // the second data word contains the number of properties in the scope
323 const uint64_t prop_count = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
324
325 // the third data word contains the length of the property list
326 const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
327
328 // now comes the name of the scope/key
329 const char* sbeg, *send;
330 ReadString(sbegin_out&: sbeg, send_out&: send, input, cursor, end);
331
332 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(begin: input, cursor) ));
333
334 // now come the individual properties
335 const char* begin_cursor = cursor;
336
337 if ((begin_cursor + prop_length) > end) {
338 TokenizeError(message: "property length out of bounds reading length ", begin: input, cursor);
339 }
340
341 for (unsigned int i = 0; i < prop_count; ++i) {
342 ReadData(sbegin_out&: sbeg, send_out&: send, input, cursor, end: begin_cursor + prop_length);
343
344 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(begin: input, cursor) ));
345
346 if(i != prop_count-1) {
347 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(begin: input, cursor) ));
348 }
349 }
350
351 if (Offset(begin: begin_cursor, cursor) != prop_length) {
352 TokenizeError(message: "property length not reached, something is wrong",begin: input, cursor);
353 }
354
355 // at the end of each nested block, there is a NUL record to indicate
356 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
357 // this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit.
358 const size_t sentinel_block_length = is64bits ? (sizeof(uint64_t)* 3 + 1) : (sizeof(uint32_t)* 3 + 1);
359
360 if (Offset(begin: input, cursor) < end_offset) {
361 if (end_offset - Offset(begin: input, cursor) < sentinel_block_length) {
362 TokenizeError(message: "insufficient padding bytes at block end",begin: input, cursor);
363 }
364
365 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(begin: input, cursor) ));
366
367 // XXX this is vulnerable to stack overflowing ..
368 while(Offset(begin: input, cursor) < end_offset - sentinel_block_length) {
369 ReadScope(output_tokens, token_allocator, input, cursor, end: input + end_offset - sentinel_block_length, is64bits);
370 }
371 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(begin: input, cursor) ));
372
373 for (unsigned int i = 0; i < sentinel_block_length; ++i) {
374 if(cursor[i] != '\0') {
375 TokenizeError(message: "failed to read nested block sentinel, expected all bytes to be 0",begin: input, cursor);
376 }
377 }
378 cursor += sentinel_block_length;
379 }
380
381 if (Offset(begin: input, cursor) != end_offset) {
382 TokenizeError(message: "scope length not reached, something is wrong",begin: input, cursor);
383 }
384
385 return true;
386}
387
388} // anonymous namespace
389
390// ------------------------------------------------------------------------------------------------
391// TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
392void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length, StackAllocator &token_allocator) {
393 ai_assert(input);
394 ASSIMP_LOG_DEBUG("Tokenizing binary FBX file");
395
396 if(length < 0x1b) {
397 TokenizeError(message: "file is too short",offset: 0);
398 }
399
400 //uint32_t offset = 0x15;
401/* const char* cursor = input + 0x15;
402
403 const uint32_t flags = ReadWord(input, cursor, input + length);
404
405 const uint8_t padding_0 = ReadByte(input, cursor, input + length); // unused
406 const uint8_t padding_1 = ReadByte(input, cursor, input + length); // unused*/
407
408 if (strncmp(s1: input,s2: "Kaydara FBX Binary",n: 18)) {
409 TokenizeError(message: "magic bytes not found",offset: 0);
410 }
411
412 const char* cursor = input + 18;
413 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
414 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
415 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
416 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
417 /*Result ignored*/ ReadByte(input, cursor, end: input + length);
418 const uint32_t version = ReadWord(input, cursor, end: input + length);
419 ASSIMP_LOG_DEBUG("FBX version: ", version);
420 const bool is64bits = version >= 7500;
421 const char *end = input + length;
422 try
423 {
424 while (cursor < end ) {
425 if (!ReadScope(output_tokens, token_allocator, input, cursor, end: input + length, is64bits)) {
426 break;
427 }
428 }
429 }
430 catch (const DeadlyImportError& e)
431 {
432 if (!is64bits && (length > std::numeric_limits<uint32_t>::max())) {
433 throw DeadlyImportError("The FBX file is invalid. This may be because the content is too big for this older version (", ai_to_string(value: version), ") of the FBX format. (", e.what(), ")");
434 }
435 throw;
436 }
437}
438
439} // !FBX
440} // !Assimp
441
442#endif
443

source code of qtquick3d/src/3rdparty/assimp/src/code/AssetLib/FBX/FBXBinaryTokenizer.cpp