1 | /* |
2 | SPDX-FileCopyrightText: 2002-2008 The Kopete developers <kopete-devel@kde.org> |
3 | SPDX-FileCopyrightText: 2008 Carlo Segato <brandon.ml@gmail.com> |
4 | SPDX-FileCopyrightText: 2002-2003 Stefan Gehn <metz@gehn.net> |
5 | SPDX-FileCopyrightText: 2005 Engin AYDOGAN <engin@bzzzt.biz> |
6 | |
7 | SPDX-License-Identifier: LGPL-2.1-or-later |
8 | */ |
9 | |
10 | #include "kemoticonsparser_p.h" |
11 | |
12 | #include <QDebug> |
13 | #include <QString> |
14 | |
15 | #include <cstring> |
16 | |
17 | // ### keep sorted by first column and HTML entity-encoded! |
18 | struct Emoticon { |
19 | const char *match; |
20 | const char *replacement; |
21 | }; |
22 | // clang-format off |
23 | static constexpr const Emoticon emoticons_map[] = { |
24 | {.match: ">-(" , .replacement: "đ " }, |
25 | {.match: ">:(" , .replacement: "đ " }, |
26 | {.match: ">:)" , .replacement: "đ" }, |
27 | {.match: ">:-(" , .replacement: "đ " }, |
28 | {.match: ">w<" , .replacement: "đ" }, |
29 | {.match: "<-.->" , .replacement: "đ´" }, |
30 | {.match: "<3" , .replacement: "âĨī¸" }, |
31 | {.match: "<]:o){" , .replacement: "đ¤Ą" }, |
32 | {.match: "<|:^0|" , .replacement: "đ¤Ą" }, |
33 | {.match: "()-()" , .replacement: "đ¤" }, |
34 | {.match: "(-_o)zzZ" , .replacement: "đ´" }, |
35 | {.match: "(:|" , .replacement: "đĨą" }, |
36 | {.match: "(@_@)" , .replacement: "đ" }, |
37 | {.match: "(c:>*" , .replacement: "đ¤Ą" }, |
38 | {.match: "({)" , .replacement: "đ¤" }, |
39 | {.match: "(})" , .replacement: "đ¤" }, |
40 | {.match: "*<:^)" , .replacement: "đ¤Ą" }, |
41 | {.match: "*<:o)" , .replacement: "đ¤Ą" }, |
42 | {.match: "*:o)" , .replacement: "đ¤Ą" }, |
43 | {.match: "*:oB" , .replacement: "đ¤Ą" }, |
44 | {.match: "*:oP" , .replacement: "đ¤Ą" }, |
45 | {.match: "+o(" , .replacement: "đ¤ĸ" }, |
46 | {.match: ",':(" , .replacement: "đ" }, |
47 | {.match: "-_-" , .replacement: "đ´" }, |
48 | {.match: "-_-+" , .replacement: "đ " }, |
49 | {.match: "-o-o-" , .replacement: "đ¤" }, |
50 | {.match: "/00\\" , .replacement: "đ" }, |
51 | {.match: "0:)" , .replacement: "đ" }, |
52 | {.match: "0:-)" , .replacement: "đ" }, |
53 | {.match: "0;)" , .replacement: "đ" }, |
54 | {.match: "0=)" , .replacement: "đ" }, |
55 | {.match: "3:)" , .replacement: "đ" }, |
56 | {.match: "8)" , .replacement: "đ" }, |
57 | {.match: "8-)" , .replacement: "đ" }, |
58 | {.match: "8:::(" , .replacement: "đ" }, |
59 | {.match: ":\"-(" , .replacement: "đĸ" }, |
60 | {.match: ":'(" , .replacement: "đĸ" }, |
61 | {.match: ":'-(" , .replacement: "đĸ" }, |
62 | {.match: ":'D" , .replacement: "đ" }, |
63 | {.match: ":(" , .replacement: "đ" }, |
64 | {.match: ":((" , .replacement: "đĸ" }, |
65 | {.match: ":)" , .replacement: "đ" }, |
66 | {.match: ":))" , .replacement: "đ" }, |
67 | {.match: ":*" , .replacement: "đ" }, |
68 | {.match: ":*(" , .replacement: "đĸ" }, |
69 | {.match: ":*)" , .replacement: "đ" }, |
70 | {.match: ":-$" , .replacement: "đ¯" }, |
71 | {.match: ":-&" , .replacement: "đ¤ĸ" }, |
72 | {.match: ":->" , .replacement: "âēī¸" }, |
73 | {.match: ":->>" , .replacement: "âēī¸" }, |
74 | {.match: ":-(" , .replacement: "đ" }, |
75 | {.match: ":-)" , .replacement: "đ" }, |
76 | {.match: ":-))" , .replacement: "đ" }, |
77 | {.match: ":-)*" , .replacement: "đ" }, |
78 | {.match: ":-*" , .replacement: "đ" }, |
79 | {.match: ":-/" , .replacement: "đ" }, |
80 | {.match: ":-@" , .replacement: "đ " }, |
81 | {.match: ":-D" , .replacement: "đ" }, |
82 | {.match: ":-O" , .replacement: "đŽ" }, |
83 | {.match: ":-P" , .replacement: "đ" }, |
84 | {.match: ":-Q" , .replacement: "đ" }, |
85 | {.match: ":-S" , .replacement: "đ" }, |
86 | {.match: ":-X" , .replacement: "đ¤Ģ" }, |
87 | {.match: ":-[" , .replacement: "đ¯" }, |
88 | {.match: ":-o" , .replacement: "đŽ" }, |
89 | {.match: ":-p" , .replacement: "đ" }, |
90 | {.match: ":-s" , .replacement: "đ" }, |
91 | {.match: ":-t" , .replacement: "đ" }, |
92 | {.match: ":-x" , .replacement: "đ¤Ģ" }, |
93 | {.match: ":-|" , .replacement: "đ" }, |
94 | {.match: ":-||" , .replacement: "đ " }, |
95 | {.match: ":/" , .replacement: "đĢ¤" }, |
96 | {.match: ":@" , .replacement: "đ " }, |
97 | {.match: ":C" , .replacement: "âšī¸" }, |
98 | {.match: ":D" , .replacement: "đ" }, |
99 | {.match: ":O" , .replacement: "đŽ" }, |
100 | {.match: ":P" , .replacement: "đ" }, |
101 | {.match: ":S" , .replacement: "đ" }, |
102 | {.match: ":X" , .replacement: "đ¤Ģ" }, |
103 | {.match: ":\\" , .replacement: "đĢ¤" }, |
104 | {.match: ":_(" , .replacement: "đĸ" }, |
105 | {.match: ":c" , .replacement: "âšī¸" }, |
106 | {.match: ":o" , .replacement: "đŽ" }, |
107 | {.match: ":o)" , .replacement: "đ¤Ą" }, |
108 | {.match: ":p" , .replacement: "đ" }, |
109 | {.match: ":s" , .replacement: "đ" }, |
110 | {.match: ":x" , .replacement: "đ¤Ģ" }, |
111 | {.match: ":|))" , .replacement: "đ" }, |
112 | {.match: ";(" , .replacement: "đĸ" }, |
113 | {.match: ";)" , .replacement: "đ" }, |
114 | {.match: ";-(!)" , .replacement: "đ" }, |
115 | {.match: ";-(" , .replacement: "đĸ" }, |
116 | {.match: ";-)" , .replacement: "đ" }, |
117 | {.match: ";_;" , .replacement: "đĸ" }, |
118 | {.match: "= #" , .replacement: "đ" }, |
119 | {.match: "='(" , .replacement: "đĸ" }, |
120 | {.match: "=(" , .replacement: "đ" }, |
121 | {.match: "=[" , .replacement: "đ" }, |
122 | {.match: "=^D" , .replacement: "đ" }, |
123 | {.match: "B-)" , .replacement: "đ" }, |
124 | {.match: "D:" , .replacement: "đ" }, |
125 | {.match: "D=" , .replacement: "đ" }, |
126 | {.match: "O-)" , .replacement: "đ" }, |
127 | {.match: "O.o" , .replacement: "đ¤" }, |
128 | {.match: "O.o?" , .replacement: "đ¤" }, |
129 | {.match: "O:)" , .replacement: "đ" }, |
130 | {.match: "O:-)" , .replacement: "đ" }, |
131 | {.match: "O;" , .replacement: "đ" }, |
132 | {.match: "T.T" , .replacement: "đ" }, |
133 | {.match: "T_T" , .replacement: "đ" }, |
134 | {.match: "X-(" , .replacement: "đ " }, |
135 | {.match: "Y_Y" , .replacement: "đ" }, |
136 | {.match: "Z_Z" , .replacement: "đ´" }, |
137 | {.match: "\\o-o/" , .replacement: "đ¤" }, |
138 | {.match: "\\~/" , .replacement: "đ¤" }, |
139 | {.match: "]:->" , .replacement: "đ" }, |
140 | {.match: "^j^" , .replacement: "đ" }, |
141 | {.match: "i_i" , .replacement: "đ" }, |
142 | {.match: "t.t" , .replacement: "đ" }, |
143 | {.match: "y_y" , .replacement: "đ" }, |
144 | {.match: "|-O" , .replacement: "đĨą" }, |
145 | {.match: "}:-)" , .replacement: "đ" }, |
146 | }; |
147 | // clang-format on |
148 | |
149 | static const Emoticon *findEmoticon(QStringView s) |
150 | { |
151 | auto it = std::lower_bound(first: std::begin(arr: emoticons_map), last: std::end(arr: emoticons_map), val: s, comp: [](const auto &emoticon, auto s) { |
152 | return QLatin1String(emoticon.match) < s; |
153 | }); |
154 | if (it != std::end(arr: emoticons_map) && s.startsWith(s: QLatin1String((*it).match))) { |
155 | return it; |
156 | } |
157 | // if we don't have an exact match but a prefix, that will be in the item before the one returned by lower_bound |
158 | if (it != std::begin(arr: emoticons_map)) { |
159 | it = std::prev(x: it); |
160 | if (s.startsWith(s: QLatin1String((*it).match))) { |
161 | return it; |
162 | } |
163 | } |
164 | return nullptr; |
165 | } |
166 | |
167 | QString KEmoticonsParser::parseEmoticons(const QString &message) |
168 | { |
169 | QString result; |
170 | |
171 | /* previous char, in the firs iteration assume that it is space since we want |
172 | * to let emoticons at the beginning, the very first previous QChar must be a space. */ |
173 | QChar p = QLatin1Char(' '); |
174 | |
175 | int pos = 0; |
176 | int previousPos = 0; |
177 | |
178 | bool inHTMLTag = false; |
179 | bool inHTMLLink = false; |
180 | bool inHTMLEntity = false; |
181 | |
182 | for (; pos < message.length(); ++pos) { |
183 | const QChar c = message[pos]; |
184 | |
185 | if (!inHTMLTag) { // Are we already in an HTML tag ? |
186 | if (c == QLatin1Char('<')) { // If not check if are going into one |
187 | inHTMLTag = true; // If we are, change the state to inHTML |
188 | p = c; |
189 | continue; |
190 | } |
191 | } else { // We are already in a HTML tag |
192 | if (c == QLatin1Char('>')) { // Check if it ends |
193 | inHTMLTag = false; // If so, change the state |
194 | |
195 | if (p == QLatin1Char('a')) { |
196 | inHTMLLink = false; |
197 | } |
198 | } else if (c == QLatin1Char('a') && p == QLatin1Char('<')) { // check if we just entered an anchor tag |
199 | inHTMLLink = true; // don't put smileys in urls |
200 | } |
201 | p = c; |
202 | continue; |
203 | } |
204 | |
205 | if (!inHTMLEntity) { // are we |
206 | if (c == QLatin1Char('&')) { |
207 | inHTMLEntity = true; |
208 | } |
209 | } |
210 | |
211 | if (inHTMLLink) { // i can't think of any situation where a link address might need emoticons |
212 | p = c; |
213 | continue; |
214 | } |
215 | |
216 | if (!p.isSpace() && p != QLatin1Char('>')) { // '>' may mark the end of an html tag |
217 | p = c; |
218 | continue; |
219 | } /* strict requires space before the emoticon */ |
220 | |
221 | const auto emoticon = findEmoticon(s: QStringView(message).mid(pos)); |
222 | if (emoticon) { |
223 | bool found = true; |
224 | /* check if the character after this match is space or end of string*/ |
225 | const int matchLen = std::strlen(s: emoticon->match); |
226 | if (message.length() > pos + matchLen) { |
227 | const QChar n = message[pos + matchLen]; |
228 | //<br/> marks the end of a line |
229 | if (n != QLatin1Char('<') && !n.isSpace() && !n.isNull() && n != QLatin1Char('&')) { |
230 | found = false; |
231 | } |
232 | } |
233 | |
234 | if (found) { |
235 | result += QStringView(message).mid(pos: previousPos, n: pos - previousPos); |
236 | result += QString::fromUtf8(utf8: emoticon->replacement); |
237 | |
238 | /* Skip the matched emoticon's matchText */ |
239 | pos += matchLen - 1; |
240 | previousPos = pos + 1; |
241 | } else { |
242 | if (inHTMLEntity) { |
243 | // If we are in an HTML entity such as > |
244 | const int htmlEnd = message.indexOf(c: QLatin1Char(';'), from: pos); |
245 | // Search for where it ends |
246 | if (htmlEnd == -1) { |
247 | // Apparently this HTML entity isn't ended, something is wrong, try skip the '&' |
248 | // and continue |
249 | // qCDebug(KEMOTICONS_CORE) << "Broken HTML entity, trying to recover."; |
250 | inHTMLEntity = false; |
251 | pos++; |
252 | } else { |
253 | pos = htmlEnd; |
254 | inHTMLEntity = false; |
255 | } |
256 | } |
257 | } |
258 | } /* else no emoticons begin with this character, so don't do anything */ |
259 | p = c; |
260 | } |
261 | |
262 | if (result.isEmpty()) { |
263 | return message; |
264 | } |
265 | if (previousPos < message.length()) { |
266 | result += QStringView(message).mid(pos: previousPos); |
267 | } |
268 | return result; |
269 | } |
270 | |