1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qv4regexp_p.h" |
5 | #include "qv4engine_p.h" |
6 | #include "qv4scopedvalue_p.h" |
7 | #include <private/qv4mm_p.h> |
8 | #include <runtime/VM.h> |
9 | |
10 | using namespace QV4; |
11 | |
12 | #if ENABLE(YARR_JIT) |
13 | static constexpr quint8 RegexpJitThreshold = 5; |
14 | #endif |
15 | |
16 | static JSC::RegExpFlags jscFlags(uint flags) |
17 | { |
18 | JSC::RegExpFlags jscFlags = JSC::NoFlags; |
19 | if (flags & CompiledData::RegExp::RegExp_Global) |
20 | jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagGlobal); |
21 | if (flags & CompiledData::RegExp::RegExp_IgnoreCase) |
22 | jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagIgnoreCase); |
23 | if (flags & CompiledData::RegExp::RegExp_Multiline) |
24 | jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagMultiline); |
25 | if (flags & CompiledData::RegExp::RegExp_Unicode) |
26 | jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagUnicode); |
27 | if (flags & CompiledData::RegExp::RegExp_Sticky) |
28 | jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagSticky); |
29 | return jscFlags; |
30 | } |
31 | |
32 | RegExpCache::~RegExpCache() |
33 | { |
34 | for (RegExpCache::Iterator it = begin(), e = end(); it != e; ++it) { |
35 | if (RegExp *re = it.value().as<RegExp>()) |
36 | re->d()->cache = nullptr; |
37 | } |
38 | } |
39 | |
40 | DEFINE_MANAGED_VTABLE(RegExp); |
41 | |
42 | uint RegExp::match(const QString &string, int start, uint *matchOffsets) |
43 | { |
44 | if (!isValid()) |
45 | return JSC::Yarr::offsetNoMatch; |
46 | |
47 | #if ENABLE(YARR_JIT) |
48 | auto *priv = d(); |
49 | |
50 | auto regenerateByteCode = [](Heap::RegExp *regexp) { |
51 | JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError; |
52 | JSC::Yarr::YarrPattern yarrPattern(WTF::String(*regexp->pattern), jscFlags(flags: regexp->flags), |
53 | error); |
54 | |
55 | // As we successfully parsed the pattern before, we should still be able to. |
56 | Q_ASSERT(error == JSC::Yarr::ErrorCode::NoError); |
57 | |
58 | regexp->byteCode = JSC::Yarr::byteCompile( |
59 | yarrPattern, |
60 | regexp->internalClass->engine->bumperPointerAllocator).release(); |
61 | }; |
62 | |
63 | auto removeJitCode = [](Heap::RegExp *regexp) { |
64 | delete regexp->jitCode; |
65 | regexp->jitCode = nullptr; |
66 | regexp->jitFailed = true; |
67 | }; |
68 | |
69 | auto removeByteCode = [](Heap::RegExp *regexp) { |
70 | delete regexp->byteCode; |
71 | regexp->byteCode = nullptr; |
72 | }; |
73 | |
74 | if (!priv->jitCode && !priv->jitFailed && priv->internalClass->engine->canJIT() |
75 | && (string.length() > 1024 || priv->matchCount++ == RegexpJitThreshold)) { |
76 | removeByteCode(priv); |
77 | |
78 | JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError; |
79 | JSC::Yarr::YarrPattern yarrPattern( |
80 | WTF::String(*priv->pattern), jscFlags(flags: priv->flags), error); |
81 | if (!yarrPattern.m_containsBackreferences) { |
82 | priv->jitCode = new JSC::Yarr::YarrCodeBlock; |
83 | JSC::VM *vm = static_cast<JSC::VM *>(priv->internalClass->engine); |
84 | JSC::Yarr::jitCompile(yarrPattern, JSC::Yarr::Char16, vm, jitObject&: *priv->jitCode); |
85 | } |
86 | |
87 | if (!priv->hasValidJITCode()) { |
88 | removeJitCode(priv); |
89 | regenerateByteCode(priv); |
90 | } |
91 | } |
92 | #endif |
93 | |
94 | WTF::String s(string); |
95 | |
96 | #if ENABLE(YARR_JIT) |
97 | if (priv->hasValidJITCode()) { |
98 | static const uint offsetJITFail = std::numeric_limits<unsigned>::max() - 1; |
99 | uint ret = JSC::Yarr::offsetNoMatch; |
100 | #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) |
101 | char buffer[8192]; |
102 | ret = uint(priv->jitCode->execute(input: s.characters16(), start, length: s.size(), |
103 | output: (int*)matchOffsets, freeParenContext: buffer, parenContextSize: 8192).start); |
104 | #else |
105 | ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(), |
106 | (int*)matchOffsets).start); |
107 | #endif |
108 | if (ret != offsetJITFail) |
109 | return ret; |
110 | |
111 | removeJitCode(priv); |
112 | // JIT failed. We need byteCode to run the interpreter. |
113 | Q_ASSERT(!priv->byteCode); |
114 | regenerateByteCode(priv); |
115 | } |
116 | #endif // ENABLE(YARR_JIT) |
117 | |
118 | return JSC::Yarr::interpret(byteCode(), input: s.characters16(), length: string.size(), start, output: matchOffsets); |
119 | } |
120 | |
121 | QString RegExp::getSubstitution(const QString &matched, const QString &str, int position, const Value *captures, int nCaptures, const QString &replacement) |
122 | { |
123 | QString result; |
124 | |
125 | int matchedLength = matched.size(); |
126 | Q_ASSERT(position >= 0 && position <= str.size()); |
127 | int tailPos = position + matchedLength; |
128 | int seenDollar = -1; |
129 | for (int i = 0; i < replacement.size(); ++i) { |
130 | QChar ch = replacement.at(i); |
131 | if (seenDollar >= 0) { |
132 | if (ch.unicode() == '$') { |
133 | result += QLatin1Char('$'); |
134 | } else if (ch.unicode() == '&') { |
135 | result += matched; |
136 | } else if (ch.unicode() == '`') { |
137 | result += str.left(n: position); |
138 | } else if (ch.unicode() == '\'') { |
139 | result += str.mid(position: tailPos); |
140 | } else if (ch.unicode() >= '0' && ch.unicode() <= '9') { |
141 | int n = ch.unicode() - '0'; |
142 | if (i + 1 < replacement.size()) { |
143 | ch = replacement.at(i: i + 1); |
144 | if (ch.unicode() >= '0' && ch.unicode() <= '9') { |
145 | n = n*10 + (ch.unicode() - '0'); |
146 | ++i; |
147 | } |
148 | } |
149 | if (n > 0 && n <= nCaptures) { |
150 | String *s = captures[n].stringValue(); |
151 | if (s) |
152 | result += s->toQString(); |
153 | } else { |
154 | for (int j = seenDollar; j <= i; ++j) |
155 | result += replacement.at(i: j); |
156 | } |
157 | } else { |
158 | result += QLatin1Char('$'); |
159 | result += ch; |
160 | } |
161 | seenDollar = -1; |
162 | } else { |
163 | if (ch == QLatin1Char('$')) { |
164 | seenDollar = i; |
165 | continue; |
166 | } |
167 | result += ch; |
168 | } |
169 | } |
170 | if (seenDollar >= 0) |
171 | result += QLatin1Char('$'); |
172 | return result; |
173 | } |
174 | |
175 | QString Heap::RegExp::flagsAsString() const |
176 | { |
177 | QString result; |
178 | if (flags & CompiledData::RegExp::RegExp_Global) |
179 | result += QLatin1Char('g'); |
180 | if (flags & CompiledData::RegExp::RegExp_IgnoreCase) |
181 | result += QLatin1Char('i'); |
182 | if (flags & CompiledData::RegExp::RegExp_Multiline) |
183 | result += QLatin1Char('m'); |
184 | if (flags & CompiledData::RegExp::RegExp_Unicode) |
185 | result += QLatin1Char('u'); |
186 | if (flags & CompiledData::RegExp::RegExp_Sticky) |
187 | result += QLatin1Char('y'); |
188 | return result; |
189 | } |
190 | |
191 | Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, uint flags) |
192 | { |
193 | RegExpCacheKey key(pattern, flags); |
194 | |
195 | RegExpCache *cache = engine->regExpCache; |
196 | if (!cache) |
197 | cache = engine->regExpCache = new RegExpCache; |
198 | |
199 | QV4::WeakValue &cachedValue = (*cache)[key]; |
200 | if (QV4::RegExp *result = cachedValue.as<RegExp>()) |
201 | return result->d(); |
202 | |
203 | Scope scope(engine); |
204 | Scoped<RegExp> result(scope, engine->memoryManager->alloc<RegExp>(args&: engine, args: pattern, args&: flags)); |
205 | |
206 | result->d()->cache = cache; |
207 | cachedValue.set(engine, value: result); |
208 | |
209 | return result->d(); |
210 | } |
211 | |
212 | void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, uint flags) |
213 | { |
214 | Base::init(); |
215 | this->pattern = new QString(pattern); |
216 | this->flags = flags; |
217 | |
218 | valid = false; |
219 | jitFailed = false; |
220 | matchCount = 0; |
221 | |
222 | JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError; |
223 | JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), jscFlags(flags), error); |
224 | if (error != JSC::Yarr::ErrorCode::NoError) |
225 | return; |
226 | subPatternCount = yarrPattern.m_numSubpatterns; |
227 | Q_UNUSED(engine); |
228 | byteCode = JSC::Yarr::byteCompile(yarrPattern, internalClass->engine->bumperPointerAllocator).release(); |
229 | if (byteCode) |
230 | valid = true; |
231 | } |
232 | |
233 | void Heap::RegExp::destroy() |
234 | { |
235 | if (cache) { |
236 | RegExpCacheKey key(this); |
237 | cache->remove(key); |
238 | } |
239 | #if ENABLE(YARR_JIT) |
240 | delete jitCode; |
241 | #endif |
242 | delete byteCode; |
243 | delete pattern; |
244 | Base::destroy(); |
245 | } |
246 | |