1 | //======================================================================== |
2 | // |
3 | // PSTokenizer.cc |
4 | // |
5 | // Copyright 2002-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2006 Scott Turner <scotty1024@mac.com> |
17 | // Copyright (C) 2008 Albert Astals Cid <aacid@kde.org> |
18 | // Copyright (C) 2017 Vincent Le Garrec <legarrec.vincent@gmail.com> |
19 | // |
20 | // To see a description of the changes please see the Changelog file that |
21 | // came with your tarball or type make ChangeLog if you are building from git |
22 | // |
23 | //======================================================================== |
24 | |
25 | #include <config.h> |
26 | |
27 | #include <cstdio> |
28 | #include <cstdlib> |
29 | #include "PSTokenizer.h" |
30 | |
31 | //------------------------------------------------------------------------ |
32 | |
33 | // A '1' in this array means the character is white space. A '1' or |
34 | // '2' means the character ends a name or command. |
35 | static const char specialChars[256] = { |
36 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x |
37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
38 | 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x |
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x |
40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x |
41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x |
42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x |
43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x |
44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x |
45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x |
46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax |
47 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx |
48 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx |
49 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx |
50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex |
51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx |
52 | }; |
53 | |
54 | //------------------------------------------------------------------------ |
55 | |
56 | PSTokenizer::PSTokenizer(int (*getCharFuncA)(void *), void *dataA) |
57 | { |
58 | getCharFunc = getCharFuncA; |
59 | data = dataA; |
60 | charBuf = -1; |
61 | } |
62 | |
63 | PSTokenizer::~PSTokenizer() { } |
64 | |
65 | bool PSTokenizer::getToken(char *buf, int size, int *length) |
66 | { |
67 | bool , backslash; |
68 | int c; |
69 | int i; |
70 | |
71 | // skip leading whitespace and comments |
72 | comment = false; |
73 | while (true) { |
74 | if ((c = getChar()) == EOF) { |
75 | buf[0] = '\0'; |
76 | *length = 0; |
77 | return false; |
78 | } |
79 | if (comment) { |
80 | if (c == '\x0a' || c == '\x0d') { |
81 | comment = false; |
82 | } |
83 | } else if (c == '%') { |
84 | comment = true; |
85 | } else if (specialChars[static_cast<unsigned char>(c)] != 1) { |
86 | break; |
87 | } |
88 | } |
89 | |
90 | // Reserve room for terminating '\0' |
91 | size--; |
92 | |
93 | // read a token |
94 | i = 0; |
95 | buf[i++] = c; |
96 | if (c == '(') { |
97 | backslash = false; |
98 | while ((c = lookChar()) != EOF) { |
99 | consumeChar(); |
100 | if (i < size) { |
101 | buf[i++] = c; |
102 | } |
103 | if (c == '\\') { |
104 | backslash = true; |
105 | } else if (!backslash && c == ')') { |
106 | break; |
107 | } else { |
108 | backslash = false; |
109 | } |
110 | } |
111 | } else if (c == '<') { |
112 | while ((c = lookChar()) != EOF) { |
113 | consumeChar(); |
114 | if (i < size && specialChars[static_cast<unsigned char>(c)] != 1) { |
115 | buf[i++] = c; |
116 | } |
117 | if (c == '>') { |
118 | break; |
119 | } |
120 | } |
121 | } else if (c != '[' && c != ']') { |
122 | while ((c = lookChar()) != EOF && !specialChars[static_cast<unsigned char>(c)]) { |
123 | consumeChar(); |
124 | if (i < size) { |
125 | buf[i++] = c; |
126 | } |
127 | } |
128 | } |
129 | // Zero terminate token string |
130 | buf[i] = '\0'; |
131 | // Return length of token |
132 | *length = i; |
133 | |
134 | return true; |
135 | } |
136 | |
137 | int PSTokenizer::lookChar() |
138 | { |
139 | if (charBuf < 0) { |
140 | charBuf = (*getCharFunc)(data); |
141 | } |
142 | return charBuf; |
143 | } |
144 | |
145 | void PSTokenizer::consumeChar() |
146 | { |
147 | charBuf = -1; |
148 | } |
149 | |
150 | int PSTokenizer::getChar() |
151 | { |
152 | int c = charBuf; |
153 | |
154 | if (c < 0) { |
155 | c = (*getCharFunc)(data); |
156 | } else { |
157 | charBuf = -1; |
158 | } |
159 | return c; |
160 | } |
161 | |