1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qv4regexp_p.h"
5#include "qv4engine_p.h"
6#include "qv4scopedvalue_p.h"
7#include <private/qv4mm_p.h>
8#include <runtime/VM.h>
9
10using namespace QV4;
11
12#if ENABLE(YARR_JIT)
13static constexpr qsizetype LongStringJitThreshold = 1024;
14static constexpr int LongStringJitBoost = 3;
15#endif
16
17static JSC::RegExpFlags jscFlags(quint8 flags)
18{
19 JSC::RegExpFlags jscFlags = JSC::NoFlags;
20 if (flags & CompiledData::RegExp::RegExp_Global)
21 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagGlobal);
22 if (flags & CompiledData::RegExp::RegExp_IgnoreCase)
23 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagIgnoreCase);
24 if (flags & CompiledData::RegExp::RegExp_Multiline)
25 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagMultiline);
26 if (flags & CompiledData::RegExp::RegExp_Unicode)
27 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagUnicode);
28 if (flags & CompiledData::RegExp::RegExp_Sticky)
29 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagSticky);
30 return jscFlags;
31}
32
33RegExpCache::~RegExpCache()
34{
35 for (RegExpCache::Iterator it = begin(), e = end(); it != e; ++it) {
36 if (RegExp *re = it.value().as<RegExp>())
37 re->d()->cache = nullptr;
38 }
39}
40
41DEFINE_MANAGED_VTABLE(RegExp);
42
43uint RegExp::match(const QString &string, int start, uint *matchOffsets)
44{
45 if (!isValid())
46 return JSC::Yarr::offsetNoMatch;
47
48#if ENABLE(YARR_JIT)
49 auto *priv = d();
50
51 auto regenerateByteCode = [](Heap::RegExp *regexp) {
52 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
53 JSC::Yarr::YarrPattern yarrPattern(WTF::String(*regexp->pattern), jscFlags(flags: regexp->flags),
54 error);
55
56 // As we successfully parsed the pattern before, we should still be able to.
57 Q_ASSERT(error == JSC::Yarr::ErrorCode::NoError);
58
59 regexp->byteCode = JSC::Yarr::byteCompile(
60 yarrPattern,
61 regexp->internalClass->engine->bumperPointerAllocator).release();
62 };
63
64 auto removeJitCode = [](Heap::RegExp *regexp) {
65 delete regexp->jitCode;
66 regexp->jitCode = nullptr;
67 regexp->jitFailed = true;
68 };
69
70 auto removeByteCode = [](Heap::RegExp *regexp) {
71 delete regexp->byteCode;
72 regexp->byteCode = nullptr;
73 };
74
75 if (!priv->jitCode) {
76
77 // Long strings count as more calls. We want the JIT to run earlier.
78 const bool longString = string.length() > LongStringJitThreshold;
79 if (longString)
80 priv->interpreterCallCount += LongStringJitBoost;
81
82 if (priv->internalClass->engine->canJIT(jittable: priv)) {
83 removeByteCode(priv);
84
85 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
86 JSC::Yarr::YarrPattern yarrPattern(
87 WTF::String(*priv->pattern), jscFlags(flags: priv->flags), error);
88 if (!yarrPattern.m_containsBackreferences) {
89 priv->jitCode = new JSC::Yarr::YarrCodeBlock;
90 JSC::VM *vm = static_cast<JSC::VM *>(priv->internalClass->engine);
91 JSC::Yarr::jitCompile(yarrPattern, JSC::Yarr::Char16, vm, jitObject&: *priv->jitCode);
92 }
93
94 if (!priv->hasValidJITCode()) {
95 removeJitCode(priv);
96 regenerateByteCode(priv);
97 }
98 } else if (!longString) {
99 // Short strings do the regular post-increment to honor
100 // QV4_JIT_CALL_THRESHOLD.
101 ++priv->interpreterCallCount;
102 }
103 }
104#endif
105
106 WTF::String s(string);
107
108#if ENABLE(YARR_JIT)
109 if (priv->hasValidJITCode()) {
110 static const uint offsetJITFail = std::numeric_limits<unsigned>::max() - 1;
111 uint ret = JSC::Yarr::offsetNoMatch;
112#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
113 char buffer[8192];
114 ret = uint(priv->jitCode->execute(input: s.characters16(), start, length: s.size(),
115 output: (int*)matchOffsets, freeParenContext: buffer, parenContextSize: 8192).start);
116#else
117 ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(),
118 (int*)matchOffsets).start);
119#endif
120 if (ret != offsetJITFail)
121 return ret;
122
123 removeJitCode(priv);
124 // JIT failed. We need byteCode to run the interpreter.
125 Q_ASSERT(!priv->byteCode);
126 regenerateByteCode(priv);
127 }
128#endif // ENABLE(YARR_JIT)
129
130 return JSC::Yarr::interpret(byteCode(), input: s.characters16(), length: string.size(), start, output: matchOffsets);
131}
132
133QString RegExp::getSubstitution(const QString &matched, const QString &str, int position, const Value *captures, int nCaptures, const QString &replacement)
134{
135 QString result;
136
137 int matchedLength = matched.size();
138 Q_ASSERT(position >= 0 && position <= str.size());
139 int tailPos = position + matchedLength;
140 int seenDollar = -1;
141 for (int i = 0; i < replacement.size(); ++i) {
142 QChar ch = replacement.at(i);
143 if (seenDollar >= 0) {
144 if (ch.unicode() == '$') {
145 result += QLatin1Char('$');
146 } else if (ch.unicode() == '&') {
147 result += matched;
148 } else if (ch.unicode() == '`') {
149 result += str.left(n: position);
150 } else if (ch.unicode() == '\'') {
151 result += str.mid(position: tailPos);
152 } else if (ch.unicode() >= '0' && ch.unicode() <= '9') {
153 int n = ch.unicode() - '0';
154 if (i + 1 < replacement.size()) {
155 ch = replacement.at(i: i + 1);
156 if (ch.unicode() >= '0' && ch.unicode() <= '9') {
157 n = n*10 + (ch.unicode() - '0');
158 ++i;
159 }
160 }
161 if (n > 0 && n <= nCaptures) {
162 String *s = captures[n].stringValue();
163 if (s)
164 result += s->toQString();
165 } else {
166 for (int j = seenDollar; j <= i; ++j)
167 result += replacement.at(i: j);
168 }
169 } else {
170 result += QLatin1Char('$');
171 result += ch;
172 }
173 seenDollar = -1;
174 } else {
175 if (ch == QLatin1Char('$')) {
176 seenDollar = i;
177 continue;
178 }
179 result += ch;
180 }
181 }
182 if (seenDollar >= 0)
183 result += QLatin1Char('$');
184 return result;
185}
186
187Heap::RegExp *RegExp::create(
188 ExecutionEngine *engine, const QString &pattern, CompiledData::RegExp::Flags flags)
189{
190 RegExpCacheKey key(pattern, flags);
191
192 RegExpCache *cache = engine->regExpCache;
193 if (!cache)
194 cache = engine->regExpCache = new RegExpCache;
195
196 QV4::WeakValue &cachedValue = (*cache)[key];
197 if (QV4::RegExp *result = cachedValue.as<RegExp>())
198 return result->d();
199
200 Scope scope(engine);
201 Scoped<RegExp> result(scope, engine->memoryManager->alloc<RegExp>(args&: engine, args: pattern, args&: flags));
202
203 result->d()->cache = cache;
204 cachedValue.set(engine, value: result);
205
206 return result->d();
207}
208
209void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, uint flags)
210{
211 Base::init();
212 this->pattern = new QString(pattern);
213 this->flags = flags;
214
215 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
216 JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), jscFlags(flags), error);
217 if (error != JSC::Yarr::ErrorCode::NoError)
218 return;
219 subPatternCount = yarrPattern.m_numSubpatterns;
220 Q_UNUSED(engine);
221 byteCode = JSC::Yarr::byteCompile(yarrPattern, internalClass->engine->bumperPointerAllocator).release();
222 if (byteCode)
223 valid = true;
224}
225
226void Heap::RegExp::destroy()
227{
228 if (cache) {
229 RegExpCacheKey key(this);
230 cache->remove(key);
231 }
232#if ENABLE(YARR_JIT)
233 delete jitCode;
234#endif
235 delete byteCode;
236 delete pattern;
237 Base::destroy();
238}
239

source code of qtdeclarative/src/qml/jsruntime/qv4regexp.cpp