1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22#include "config.h"
23#include "RegExp.h"
24#include "Lexer.h"
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wtf/Assertions.h>
29#include <wtf/OwnArrayPtr.h>
30
31
32#if ENABLE(YARR)
33
34#include "yarr/RegexCompiler.h"
35#if ENABLE(YARR_JIT)
36#include "yarr/RegexJIT.h"
37#else
38#include "yarr/RegexInterpreter.h"
39#endif
40
41#else
42
43#if ENABLE(WREC)
44#include "JIT.h"
45#include "WRECGenerator.h"
46#endif
47#include <pcre/pcre.h>
48
49#endif
50
51namespace JSC {
52
53#if ENABLE(WREC)
54using namespace WREC;
55#endif
56
57inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
58 : m_pattern(pattern)
59 , m_flagBits(0)
60 , m_constructionError(0)
61 , m_numSubpatterns(0)
62{
63 compile(globalData);
64}
65
66inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
67 : m_pattern(pattern)
68 , m_flagBits(0)
69 , m_constructionError(0)
70 , m_numSubpatterns(0)
71{
72 // NOTE: The global flag is handled on a case-by-case basis by functions like
73 // String::match and RegExpObject::match.
74#ifndef QT_BUILD_SCRIPT_LIB
75 if (flags.find('g') != -1)
76 m_flagBits |= Global;
77 if (flags.find('i') != -1)
78 m_flagBits |= IgnoreCase;
79 if (flags.find('m') != -1)
80 m_flagBits |= Multiline;
81#else //Invalid flags should throw a SyntaxError (ECMA Script 15.10.4.1)
82 static const char flagError[] = "invalid regular expression flag";
83 for (int i = 0; i < flags.size(); i++) {
84 switch (flags.data()[i]) {
85 case 'g':
86 m_flagBits |= Global;
87 break;
88 case 'i':
89 m_flagBits |= IgnoreCase;
90 break;
91 case 'm':
92 m_flagBits |= Multiline;
93 break;
94 default:
95 m_constructionError = flagError;
96#if !ENABLE(YARR)
97 m_regExp = 0;
98#endif
99 return;
100 }
101 }
102#endif
103
104 compile(globalData);
105}
106
107#if !ENABLE(YARR)
108RegExp::~RegExp()
109{
110 jsRegExpFree(m_regExp);
111}
112#endif
113
114PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
115{
116 return adoptRef(p: new RegExp(globalData, pattern));
117}
118
119PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
120{
121 return adoptRef(p: new RegExp(globalData, pattern, flags));
122}
123
124#if ENABLE(YARR)
125
126void RegExp::compile(JSGlobalData* globalData)
127{
128#if ENABLE(YARR_JIT)
129 Yarr::jitCompileRegex(globalData, jitObject&: m_regExpJITCode, pattern: m_pattern, numSubpatterns&: m_numSubpatterns, error&: m_constructionError, ignoreCase: ignoreCase(), multiline: multiline());
130#else
131 UNUSED_PARAM(globalData);
132 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
133#endif
134}
135
136int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
137{
138 if (startOffset < 0)
139 startOffset = 0;
140 if (ovector)
141 ovector->clear();
142
143 if (startOffset > s.size() || s.isNull())
144 return -1;
145
146#if ENABLE(YARR_JIT)
147 if (!!m_regExpJITCode) {
148#else
149 if (m_regExpBytecode) {
150#endif
151 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
152 int* offsetVector;
153 Vector<int, 32> nonReturnedOvector;
154 if (ovector) {
155 ovector->resize(size: offsetVectorSize);
156 offsetVector = ovector->data();
157 } else {
158 nonReturnedOvector.resize(size: offsetVectorSize);
159 offsetVector = nonReturnedOvector.data();
160 }
161
162 ASSERT(offsetVector);
163 for (int j = 0; j < offsetVectorSize; ++j)
164 offsetVector[j] = -1;
165
166
167#if ENABLE(YARR_JIT)
168 int result = Yarr::executeRegex(jitObject&: m_regExpJITCode, input: s.data(), start: startOffset, length: s.size(), output: offsetVector, outputArraySize: offsetVectorSize);
169#else
170 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
171#endif
172
173 if (result < 0) {
174#ifndef NDEBUG
175 // TODO: define up a symbol, rather than magic -1
176 if (result != -1)
177 fprintf(stderr, format: "jsRegExpExecute failed with result %d\n", result);
178#endif
179 if (ovector)
180 ovector->clear();
181 }
182 return result;
183 }
184
185 return -1;
186}
187
188#else
189
190void RegExp::compile(JSGlobalData* globalData)
191{
192 m_regExp = 0;
193#if ENABLE(WREC)
194 m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
195 if (m_wrecFunction || m_constructionError)
196 return;
197 // Fall through to non-WREC case.
198#else
199 UNUSED_PARAM(globalData);
200#endif
201
202 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
203 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
204 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
205}
206
207int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
208{
209 if (startOffset < 0)
210 startOffset = 0;
211 if (ovector)
212 ovector->clear();
213
214 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull())
215 return -1;
216
217#if ENABLE(WREC)
218 if (m_wrecFunction) {
219 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
220 int* offsetVector;
221 Vector<int, 32> nonReturnedOvector;
222 if (ovector) {
223 ovector->resize(offsetVectorSize);
224 offsetVector = ovector->data();
225 } else {
226 nonReturnedOvector.resize(offsetVectorSize);
227 offsetVector = nonReturnedOvector.data();
228 }
229 ASSERT(offsetVector);
230 for (int j = 0; j < offsetVectorSize; ++j)
231 offsetVector[j] = -1;
232
233 int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
234
235 if (result < 0) {
236#ifndef NDEBUG
237 // TODO: define up a symbol, rather than magic -1
238 if (result != -1)
239 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
240#endif
241 if (ovector)
242 ovector->clear();
243 }
244 return result;
245 } else
246#endif
247 if (m_regExp) {
248 // Set up the offset vector for the result.
249 // First 2/3 used for result, the last third used by PCRE.
250 int* offsetVector;
251 int offsetVectorSize;
252 int fixedSizeOffsetVector[3];
253 if (!ovector) {
254 offsetVectorSize = 3;
255 offsetVector = fixedSizeOffsetVector;
256 } else {
257 offsetVectorSize = (m_numSubpatterns + 1) * 3;
258 ovector->resize(offsetVectorSize);
259 offsetVector = ovector->data();
260 }
261
262 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
263
264 if (numMatches < 0) {
265#ifndef NDEBUG
266 if (numMatches != JSRegExpErrorNoMatch)
267 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
268#endif
269 if (ovector)
270 ovector->clear();
271 return -1;
272 }
273
274 return offsetVector[0];
275 }
276
277 return -1;
278}
279
280#endif
281
282} // namespace JSC
283

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/runtime/RegExp.cpp