1 | /* |
2 | * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) |
3 | * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2009 Torch Mobile, Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with this library; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * |
20 | */ |
21 | |
22 | #include "config.h" |
23 | #include "RegExp.h" |
24 | #include "Lexer.h" |
25 | #include <stdio.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <wtf/Assertions.h> |
29 | #include <wtf/OwnArrayPtr.h> |
30 | |
31 | |
32 | #if ENABLE(YARR) |
33 | |
34 | #include "yarr/RegexCompiler.h" |
35 | #if ENABLE(YARR_JIT) |
36 | #include "yarr/RegexJIT.h" |
37 | #else |
38 | #include "yarr/RegexInterpreter.h" |
39 | #endif |
40 | |
41 | #else |
42 | |
43 | #if ENABLE(WREC) |
44 | #include "JIT.h" |
45 | #include "WRECGenerator.h" |
46 | #endif |
47 | #include <pcre/pcre.h> |
48 | |
49 | #endif |
50 | |
51 | namespace JSC { |
52 | |
53 | #if ENABLE(WREC) |
54 | using namespace WREC; |
55 | #endif |
56 | |
57 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern) |
58 | : m_pattern(pattern) |
59 | , m_flagBits(0) |
60 | , m_constructionError(0) |
61 | , m_numSubpatterns(0) |
62 | { |
63 | compile(globalData); |
64 | } |
65 | |
66 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
67 | : m_pattern(pattern) |
68 | , m_flagBits(0) |
69 | , m_constructionError(0) |
70 | , m_numSubpatterns(0) |
71 | { |
72 | // NOTE: The global flag is handled on a case-by-case basis by functions like |
73 | // String::match and RegExpObject::match. |
74 | #ifndef QT_BUILD_SCRIPT_LIB |
75 | if (flags.find('g') != -1) |
76 | m_flagBits |= Global; |
77 | if (flags.find('i') != -1) |
78 | m_flagBits |= IgnoreCase; |
79 | if (flags.find('m') != -1) |
80 | m_flagBits |= Multiline; |
81 | #else //Invalid flags should throw a SyntaxError (ECMA Script 15.10.4.1) |
82 | static const char flagError[] = "invalid regular expression flag" ; |
83 | for (int i = 0; i < flags.size(); i++) { |
84 | switch (flags.data()[i]) { |
85 | case 'g': |
86 | m_flagBits |= Global; |
87 | break; |
88 | case 'i': |
89 | m_flagBits |= IgnoreCase; |
90 | break; |
91 | case 'm': |
92 | m_flagBits |= Multiline; |
93 | break; |
94 | default: |
95 | m_constructionError = flagError; |
96 | #if !ENABLE(YARR) |
97 | m_regExp = 0; |
98 | #endif |
99 | return; |
100 | } |
101 | } |
102 | #endif |
103 | |
104 | compile(globalData); |
105 | } |
106 | |
107 | #if !ENABLE(YARR) |
108 | RegExp::~RegExp() |
109 | { |
110 | jsRegExpFree(m_regExp); |
111 | } |
112 | #endif |
113 | |
114 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern) |
115 | { |
116 | return adoptRef(p: new RegExp(globalData, pattern)); |
117 | } |
118 | |
119 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
120 | { |
121 | return adoptRef(p: new RegExp(globalData, pattern, flags)); |
122 | } |
123 | |
124 | #if ENABLE(YARR) |
125 | |
126 | void RegExp::compile(JSGlobalData* globalData) |
127 | { |
128 | #if ENABLE(YARR_JIT) |
129 | Yarr::jitCompileRegex(globalData, jitObject&: m_regExpJITCode, pattern: m_pattern, numSubpatterns&: m_numSubpatterns, error&: m_constructionError, ignoreCase: ignoreCase(), multiline: multiline()); |
130 | #else |
131 | UNUSED_PARAM(globalData); |
132 | m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline())); |
133 | #endif |
134 | } |
135 | |
136 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
137 | { |
138 | if (startOffset < 0) |
139 | startOffset = 0; |
140 | if (ovector) |
141 | ovector->clear(); |
142 | |
143 | if (startOffset > s.size() || s.isNull()) |
144 | return -1; |
145 | |
146 | #if ENABLE(YARR_JIT) |
147 | if (!!m_regExpJITCode) { |
148 | #else |
149 | if (m_regExpBytecode) { |
150 | #endif |
151 | int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre. |
152 | int* offsetVector; |
153 | Vector<int, 32> nonReturnedOvector; |
154 | if (ovector) { |
155 | ovector->resize(size: offsetVectorSize); |
156 | offsetVector = ovector->data(); |
157 | } else { |
158 | nonReturnedOvector.resize(size: offsetVectorSize); |
159 | offsetVector = nonReturnedOvector.data(); |
160 | } |
161 | |
162 | ASSERT(offsetVector); |
163 | for (int j = 0; j < offsetVectorSize; ++j) |
164 | offsetVector[j] = -1; |
165 | |
166 | |
167 | #if ENABLE(YARR_JIT) |
168 | int result = Yarr::executeRegex(jitObject&: m_regExpJITCode, input: s.data(), start: startOffset, length: s.size(), output: offsetVector, outputArraySize: offsetVectorSize); |
169 | #else |
170 | int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector); |
171 | #endif |
172 | |
173 | if (result < 0) { |
174 | #ifndef NDEBUG |
175 | // TODO: define up a symbol, rather than magic -1 |
176 | if (result != -1) |
177 | fprintf(stderr, format: "jsRegExpExecute failed with result %d\n" , result); |
178 | #endif |
179 | if (ovector) |
180 | ovector->clear(); |
181 | } |
182 | return result; |
183 | } |
184 | |
185 | return -1; |
186 | } |
187 | |
188 | #else |
189 | |
190 | void RegExp::compile(JSGlobalData* globalData) |
191 | { |
192 | m_regExp = 0; |
193 | #if ENABLE(WREC) |
194 | m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline()); |
195 | if (m_wrecFunction || m_constructionError) |
196 | return; |
197 | // Fall through to non-WREC case. |
198 | #else |
199 | UNUSED_PARAM(globalData); |
200 | #endif |
201 | |
202 | JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
203 | JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine; |
204 | m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError); |
205 | } |
206 | |
207 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
208 | { |
209 | if (startOffset < 0) |
210 | startOffset = 0; |
211 | if (ovector) |
212 | ovector->clear(); |
213 | |
214 | if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) |
215 | return -1; |
216 | |
217 | #if ENABLE(WREC) |
218 | if (m_wrecFunction) { |
219 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
220 | int* offsetVector; |
221 | Vector<int, 32> nonReturnedOvector; |
222 | if (ovector) { |
223 | ovector->resize(offsetVectorSize); |
224 | offsetVector = ovector->data(); |
225 | } else { |
226 | nonReturnedOvector.resize(offsetVectorSize); |
227 | offsetVector = nonReturnedOvector.data(); |
228 | } |
229 | ASSERT(offsetVector); |
230 | for (int j = 0; j < offsetVectorSize; ++j) |
231 | offsetVector[j] = -1; |
232 | |
233 | int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector); |
234 | |
235 | if (result < 0) { |
236 | #ifndef NDEBUG |
237 | // TODO: define up a symbol, rather than magic -1 |
238 | if (result != -1) |
239 | fprintf(stderr, "jsRegExpExecute failed with result %d\n" , result); |
240 | #endif |
241 | if (ovector) |
242 | ovector->clear(); |
243 | } |
244 | return result; |
245 | } else |
246 | #endif |
247 | if (m_regExp) { |
248 | // Set up the offset vector for the result. |
249 | // First 2/3 used for result, the last third used by PCRE. |
250 | int* offsetVector; |
251 | int offsetVectorSize; |
252 | int fixedSizeOffsetVector[3]; |
253 | if (!ovector) { |
254 | offsetVectorSize = 3; |
255 | offsetVector = fixedSizeOffsetVector; |
256 | } else { |
257 | offsetVectorSize = (m_numSubpatterns + 1) * 3; |
258 | ovector->resize(offsetVectorSize); |
259 | offsetVector = ovector->data(); |
260 | } |
261 | |
262 | int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize); |
263 | |
264 | if (numMatches < 0) { |
265 | #ifndef NDEBUG |
266 | if (numMatches != JSRegExpErrorNoMatch) |
267 | fprintf(stderr, "jsRegExpExecute failed with result %d\n" , numMatches); |
268 | #endif |
269 | if (ovector) |
270 | ovector->clear(); |
271 | return -1; |
272 | } |
273 | |
274 | return offsetVector[0]; |
275 | } |
276 | |
277 | return -1; |
278 | } |
279 | |
280 | #endif |
281 | |
282 | } // namespace JSC |
283 | |