1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qv4regexp_p.h" |
5 | #include "qv4engine_p.h" |
6 | #include "qv4scopedvalue_p.h" |
7 | #include <private/qv4mm_p.h> |
8 | #include <runtime/VM.h> |
9 | |
10 | using namespace QV4; |
11 | |
12 | static JSC::RegExpFlags jscFlags(uint flags) |
13 | { |
14 | JSC::RegExpFlags jscFlags = JSC::NoFlags; |
15 | if (flags & CompiledData::RegExp::RegExp_Global) |
16 | jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagGlobal); |
17 | if (flags & CompiledData::RegExp::RegExp_IgnoreCase) |
18 | jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagIgnoreCase); |
19 | if (flags & CompiledData::RegExp::RegExp_Multiline) |
20 | jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagMultiline); |
21 | if (flags & CompiledData::RegExp::RegExp_Unicode) |
22 | jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagUnicode); |
23 | if (flags & CompiledData::RegExp::RegExp_Sticky) |
24 | jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagSticky); |
25 | return jscFlags; |
26 | } |
27 | |
28 | RegExpCache::~RegExpCache() |
29 | { |
30 | for (RegExpCache::Iterator it = begin(), e = end(); it != e; ++it) { |
31 | if (RegExp *re = it.value().as<RegExp>()) |
32 | re->d()->cache = nullptr; |
33 | } |
34 | } |
35 | |
36 | DEFINE_MANAGED_VTABLE(RegExp); |
37 | |
38 | uint RegExp::match(const QString &string, int start, uint *matchOffsets) |
39 | { |
40 | if (!isValid()) |
41 | return JSC::Yarr::offsetNoMatch; |
42 | |
43 | WTF::String s(string); |
44 | |
45 | #if ENABLE(YARR_JIT) |
46 | static const uint offsetJITFail = std::numeric_limits<unsigned>::max() - 1; |
47 | auto *priv = d(); |
48 | if (priv->hasValidJITCode()) { |
49 | uint ret = JSC::Yarr::offsetNoMatch; |
50 | #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS) |
51 | char buffer[8192]; |
52 | ret = uint(priv->jitCode->execute(input: s.characters16(), start, length: s.size(), |
53 | output: (int*)matchOffsets, freeParenContext: buffer, parenContextSize: 8192).start); |
54 | #else |
55 | ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(), |
56 | (int*)matchOffsets).start); |
57 | #endif |
58 | if (ret != offsetJITFail) |
59 | return ret; |
60 | |
61 | // JIT failed. We need byteCode to run the interpreter. |
62 | if (!priv->byteCode) { |
63 | JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError; |
64 | JSC::Yarr::YarrPattern yarrPattern(WTF::String(*priv->pattern), jscFlags(flags: priv->flags), |
65 | error); |
66 | |
67 | // As we successfully parsed the pattern before, we should still be able to. |
68 | Q_ASSERT(error == JSC::Yarr::ErrorCode::NoError); |
69 | |
70 | priv->byteCode = JSC::Yarr::byteCompile( |
71 | yarrPattern, |
72 | priv->internalClass->engine->bumperPointerAllocator).release(); |
73 | } |
74 | } |
75 | #endif // ENABLE(YARR_JIT) |
76 | |
77 | return JSC::Yarr::interpret(byteCode(), input: s.characters16(), length: string.size(), start, output: matchOffsets); |
78 | } |
79 | |
80 | QString RegExp::getSubstitution(const QString &matched, const QString &str, int position, const Value *captures, int nCaptures, const QString &replacement) |
81 | { |
82 | QString result; |
83 | |
84 | int matchedLength = matched.size(); |
85 | Q_ASSERT(position >= 0 && position <= str.size()); |
86 | int tailPos = position + matchedLength; |
87 | int seenDollar = -1; |
88 | for (int i = 0; i < replacement.size(); ++i) { |
89 | QChar ch = replacement.at(i); |
90 | if (seenDollar >= 0) { |
91 | if (ch.unicode() == '$') { |
92 | result += QLatin1Char('$'); |
93 | } else if (ch.unicode() == '&') { |
94 | result += matched; |
95 | } else if (ch.unicode() == '`') { |
96 | result += str.left(n: position); |
97 | } else if (ch.unicode() == '\'') { |
98 | result += str.mid(position: tailPos); |
99 | } else if (ch.unicode() >= '0' && ch.unicode() <= '9') { |
100 | int n = ch.unicode() - '0'; |
101 | if (i + 1 < replacement.size()) { |
102 | ch = replacement.at(i: i + 1); |
103 | if (ch.unicode() >= '0' && ch.unicode() <= '9') { |
104 | n = n*10 + (ch.unicode() - '0'); |
105 | ++i; |
106 | } |
107 | } |
108 | if (n > 0 && n <= nCaptures) { |
109 | String *s = captures[n].stringValue(); |
110 | if (s) |
111 | result += s->toQString(); |
112 | } else { |
113 | for (int j = seenDollar; j <= i; ++j) |
114 | result += replacement.at(i: j); |
115 | } |
116 | } else { |
117 | result += QLatin1Char('$'); |
118 | result += ch; |
119 | } |
120 | seenDollar = -1; |
121 | } else { |
122 | if (ch == QLatin1Char('$')) { |
123 | seenDollar = i; |
124 | continue; |
125 | } |
126 | result += ch; |
127 | } |
128 | } |
129 | if (seenDollar >= 0) |
130 | result += QLatin1Char('$'); |
131 | return result; |
132 | } |
133 | |
134 | QString Heap::RegExp::flagsAsString() const |
135 | { |
136 | QString result; |
137 | if (flags & CompiledData::RegExp::RegExp_Global) |
138 | result += QLatin1Char('g'); |
139 | if (flags & CompiledData::RegExp::RegExp_IgnoreCase) |
140 | result += QLatin1Char('i'); |
141 | if (flags & CompiledData::RegExp::RegExp_Multiline) |
142 | result += QLatin1Char('m'); |
143 | if (flags & CompiledData::RegExp::RegExp_Unicode) |
144 | result += QLatin1Char('u'); |
145 | if (flags & CompiledData::RegExp::RegExp_Sticky) |
146 | result += QLatin1Char('y'); |
147 | return result; |
148 | } |
149 | |
150 | Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, uint flags) |
151 | { |
152 | RegExpCacheKey key(pattern, flags); |
153 | |
154 | RegExpCache *cache = engine->regExpCache; |
155 | if (!cache) |
156 | cache = engine->regExpCache = new RegExpCache; |
157 | |
158 | QV4::WeakValue &cachedValue = (*cache)[key]; |
159 | if (QV4::RegExp *result = cachedValue.as<RegExp>()) |
160 | return result->d(); |
161 | |
162 | Scope scope(engine); |
163 | Scoped<RegExp> result(scope, engine->memoryManager->alloc<RegExp>(args&: engine, args: pattern, args&: flags)); |
164 | |
165 | result->d()->cache = cache; |
166 | cachedValue.set(engine, value: result); |
167 | |
168 | return result->d(); |
169 | } |
170 | |
171 | void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, uint flags) |
172 | { |
173 | Base::init(); |
174 | this->pattern = new QString(pattern); |
175 | this->flags = flags; |
176 | |
177 | valid = false; |
178 | |
179 | JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError; |
180 | JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), jscFlags(flags), error); |
181 | if (error != JSC::Yarr::ErrorCode::NoError) |
182 | return; |
183 | subPatternCount = yarrPattern.m_numSubpatterns; |
184 | #if ENABLE(YARR_JIT) |
185 | if (!yarrPattern.m_containsBackreferences && engine->canJIT()) { |
186 | jitCode = new JSC::Yarr::YarrCodeBlock; |
187 | JSC::VM *vm = static_cast<JSC::VM *>(engine); |
188 | JSC::Yarr::jitCompile(yarrPattern, JSC::Yarr::Char16, vm, jitObject&: *jitCode); |
189 | } |
190 | #else |
191 | Q_UNUSED(engine); |
192 | #endif |
193 | if (hasValidJITCode()) { |
194 | valid = true; |
195 | return; |
196 | } |
197 | byteCode = JSC::Yarr::byteCompile(yarrPattern, internalClass->engine->bumperPointerAllocator).release(); |
198 | if (byteCode) |
199 | valid = true; |
200 | } |
201 | |
202 | void Heap::RegExp::destroy() |
203 | { |
204 | if (cache) { |
205 | RegExpCacheKey key(this); |
206 | cache->remove(key); |
207 | } |
208 | #if ENABLE(YARR_JIT) |
209 | delete jitCode; |
210 | #endif |
211 | delete byteCode; |
212 | delete pattern; |
213 | Base::destroy(); |
214 | } |
215 | |