1/*
2 SPDX-FileCopyrightText: 2002-2008 The Kopete developers <kopete-devel@kde.org>
3 SPDX-FileCopyrightText: 2008 Carlo Segato <brandon.ml@gmail.com>
4 SPDX-FileCopyrightText: 2002-2003 Stefan Gehn <metz@gehn.net>
5 SPDX-FileCopyrightText: 2005 Engin AYDOGAN <engin@bzzzt.biz>
6
7 SPDX-License-Identifier: LGPL-2.1-or-later
8*/
9
10#include "kemoticonsparser_p.h"
11
12#include <QDebug>
13#include <QString>
14
15#include <cstring>
16
17// ### keep sorted by first column and HTML entity-encoded!
18struct Emoticon {
19 const char *match;
20 const char *replacement;
21};
22// clang-format off
23static constexpr const Emoticon emoticons_map[] = {
24 {.match: "&gt;-(", .replacement: "😠"},
25 {.match: "&gt;:(", .replacement: "😠"},
26 {.match: "&gt;:)", .replacement: "😈"},
27 {.match: "&gt;:-(", .replacement: "😠"},
28 {.match: "&gt;w&lt;", .replacement: "😟"},
29 {.match: "&lt;-.-&gt;", .replacement: "😴"},
30 {.match: "&lt;3", .replacement: "â™Ĩī¸"},
31 {.match: "&lt;]:o){", .replacement: "🤡"},
32 {.match: "&lt;|:^0|", .replacement: "🤡"},
33 {.match: "()-()", .replacement: "🤓"},
34 {.match: "(-_o)zzZ", .replacement: "😴"},
35 {.match: "(:|", .replacement: "đŸĨą"},
36 {.match: "(@_@)", .replacement: "😕"},
37 {.match: "(c:&gt;*", .replacement: "🤡"},
38 {.match: "({)", .replacement: "🤗"},
39 {.match: "(})", .replacement: "🤗"},
40 {.match: "*&lt;:^)", .replacement: "🤡"},
41 {.match: "*&lt;:o)", .replacement: "🤡"},
42 {.match: "*:o)", .replacement: "🤡"},
43 {.match: "*:oB", .replacement: "🤡"},
44 {.match: "*:oP", .replacement: "🤡"},
45 {.match: "+o(", .replacement: "đŸ¤ĸ"},
46 {.match: ",':(", .replacement: "😕"},
47 {.match: "-_-", .replacement: "😴"},
48 {.match: "-_-+", .replacement: "😠"},
49 {.match: "-o-o-", .replacement: "🤓"},
50 {.match: "/00\\", .replacement: "😟"},
51 {.match: "0:)", .replacement: "😇"},
52 {.match: "0:-)", .replacement: "😇"},
53 {.match: "0;)", .replacement: "😇"},
54 {.match: "0=)", .replacement: "😇"},
55 {.match: "3:)", .replacement: "😈"},
56 {.match: "8)", .replacement: "😎"},
57 {.match: "8-)", .replacement: "😎"},
58 {.match: "8:::(", .replacement: "😭"},
59 {.match: ":\"-(", .replacement: "đŸ˜ĸ"},
60 {.match: ":'(", .replacement: "đŸ˜ĸ"},
61 {.match: ":'-(", .replacement: "đŸ˜ĸ"},
62 {.match: ":'D", .replacement: "😆"},
63 {.match: ":(", .replacement: "🙁"},
64 {.match: ":((", .replacement: "đŸ˜ĸ"},
65 {.match: ":)", .replacement: "🙂"},
66 {.match: ":))", .replacement: "😆"},
67 {.match: ":*", .replacement: "😗"},
68 {.match: ":*(", .replacement: "đŸ˜ĸ"},
69 {.match: ":*)", .replacement: "😗"},
70 {.match: ":-$", .replacement: "đŸ˜¯"},
71 {.match: ":-&amp;", .replacement: "đŸ¤ĸ"},
72 {.match: ":-&gt;", .replacement: "â˜ēī¸"},
73 {.match: ":-&gt;&gt;", .replacement: "â˜ēī¸"},
74 {.match: ":-(", .replacement: "🙁"},
75 {.match: ":-)", .replacement: "🙂"},
76 {.match: ":-))", .replacement: "😀"},
77 {.match: ":-)*", .replacement: "😗"},
78 {.match: ":-*", .replacement: "😗"},
79 {.match: ":-/", .replacement: "😕"},
80 {.match: ":-@", .replacement: "😠"},
81 {.match: ":-D", .replacement: "😀"},
82 {.match: ":-O", .replacement: "😮"},
83 {.match: ":-P", .replacement: "😛"},
84 {.match: ":-Q", .replacement: "😕"},
85 {.match: ":-S", .replacement: "😕"},
86 {.match: ":-X", .replacement: "đŸ¤Ģ"},
87 {.match: ":-[", .replacement: "đŸ˜¯"},
88 {.match: ":-o", .replacement: "😮"},
89 {.match: ":-p", .replacement: "😛"},
90 {.match: ":-s", .replacement: "😕"},
91 {.match: ":-t", .replacement: "😛"},
92 {.match: ":-x", .replacement: "đŸ¤Ģ"},
93 {.match: ":-|", .replacement: "😐"},
94 {.match: ":-||", .replacement: "😠"},
95 {.match: ":/", .replacement: "đŸĢ¤"},
96 {.match: ":@", .replacement: "😠"},
97 {.match: ":C", .replacement: "☚ī¸"},
98 {.match: ":D", .replacement: "😀"},
99 {.match: ":O", .replacement: "😮"},
100 {.match: ":P", .replacement: "😛"},
101 {.match: ":S", .replacement: "😕"},
102 {.match: ":X", .replacement: "đŸ¤Ģ"},
103 {.match: ":\\", .replacement: "đŸĢ¤"},
104 {.match: ":_(", .replacement: "đŸ˜ĸ"},
105 {.match: ":c", .replacement: "☚ī¸"},
106 {.match: ":o", .replacement: "😮"},
107 {.match: ":o)", .replacement: "🤡"},
108 {.match: ":p", .replacement: "😛"},
109 {.match: ":s", .replacement: "😕"},
110 {.match: ":x", .replacement: "đŸ¤Ģ"},
111 {.match: ":|))", .replacement: "😀"},
112 {.match: ";(", .replacement: "đŸ˜ĸ"},
113 {.match: ";)", .replacement: "😉"},
114 {.match: ";-(!)", .replacement: "😗"},
115 {.match: ";-(", .replacement: "đŸ˜ĸ"},
116 {.match: ";-)", .replacement: "😉"},
117 {.match: ";_;", .replacement: "đŸ˜ĸ"},
118 {.match: "= #", .replacement: "😗"},
119 {.match: "='(", .replacement: "đŸ˜ĸ"},
120 {.match: "=(", .replacement: "🙁"},
121 {.match: "=[", .replacement: "🙁"},
122 {.match: "=^D", .replacement: "😆"},
123 {.match: "B-)", .replacement: "😎"},
124 {.match: "D:", .replacement: "🙁"},
125 {.match: "D=", .replacement: "🙁"},
126 {.match: "O-)", .replacement: "😇"},
127 {.match: "O.o", .replacement: "🤔"},
128 {.match: "O.o?", .replacement: "🤔"},
129 {.match: "O:)", .replacement: "😇"},
130 {.match: "O:-)", .replacement: "😇"},
131 {.match: "O;", .replacement: "😇"},
132 {.match: "T.T", .replacement: "🙁"},
133 {.match: "T_T", .replacement: "😭"},
134 {.match: "X-(", .replacement: "😠"},
135 {.match: "Y_Y", .replacement: "🙁"},
136 {.match: "Z_Z", .replacement: "😴"},
137 {.match: "\\o-o/", .replacement: "🤓"},
138 {.match: "\\~/", .replacement: "🤓"},
139 {.match: "]:-&gt;", .replacement: "😈"},
140 {.match: "^j^", .replacement: "😇"},
141 {.match: "i_i", .replacement: "😭"},
142 {.match: "t.t", .replacement: "🙁"},
143 {.match: "y_y", .replacement: "🙁"},
144 {.match: "|-O", .replacement: "đŸĨą"},
145 {.match: "}:-)", .replacement: "😈"},
146};
147// clang-format on
148
149static const Emoticon *findEmoticon(QStringView s)
150{
151 auto it = std::lower_bound(first: std::begin(arr: emoticons_map), last: std::end(arr: emoticons_map), val: s, comp: [](const auto &emoticon, auto s) {
152 return QLatin1String(emoticon.match) < s;
153 });
154 if (it != std::end(arr: emoticons_map) && s.startsWith(s: QLatin1String((*it).match))) {
155 return it;
156 }
157 // if we don't have an exact match but a prefix, that will be in the item before the one returned by lower_bound
158 if (it != std::begin(arr: emoticons_map)) {
159 it = std::prev(x: it);
160 if (s.startsWith(s: QLatin1String((*it).match))) {
161 return it;
162 }
163 }
164 return nullptr;
165}
166
167QString KEmoticonsParser::parseEmoticons(const QString &message)
168{
169 QString result;
170
171 /* previous char, in the firs iteration assume that it is space since we want
172 * to let emoticons at the beginning, the very first previous QChar must be a space. */
173 QChar p = QLatin1Char(' ');
174
175 int pos = 0;
176 int previousPos = 0;
177
178 bool inHTMLTag = false;
179 bool inHTMLLink = false;
180 bool inHTMLEntity = false;
181
182 for (; pos < message.length(); ++pos) {
183 const QChar c = message[pos];
184
185 if (!inHTMLTag) { // Are we already in an HTML tag ?
186 if (c == QLatin1Char('<')) { // If not check if are going into one
187 inHTMLTag = true; // If we are, change the state to inHTML
188 p = c;
189 continue;
190 }
191 } else { // We are already in a HTML tag
192 if (c == QLatin1Char('>')) { // Check if it ends
193 inHTMLTag = false; // If so, change the state
194
195 if (p == QLatin1Char('a')) {
196 inHTMLLink = false;
197 }
198 } else if (c == QLatin1Char('a') && p == QLatin1Char('<')) { // check if we just entered an anchor tag
199 inHTMLLink = true; // don't put smileys in urls
200 }
201 p = c;
202 continue;
203 }
204
205 if (!inHTMLEntity) { // are we
206 if (c == QLatin1Char('&')) {
207 inHTMLEntity = true;
208 }
209 }
210
211 if (inHTMLLink) { // i can't think of any situation where a link address might need emoticons
212 p = c;
213 continue;
214 }
215
216 if (!p.isSpace() && p != QLatin1Char('>')) { // '>' may mark the end of an html tag
217 p = c;
218 continue;
219 } /* strict requires space before the emoticon */
220
221 const auto emoticon = findEmoticon(s: QStringView(message).mid(pos));
222 if (emoticon) {
223 bool found = true;
224 /* check if the character after this match is space or end of string*/
225 const int matchLen = std::strlen(s: emoticon->match);
226 if (message.length() > pos + matchLen) {
227 const QChar n = message[pos + matchLen];
228 //<br/> marks the end of a line
229 if (n != QLatin1Char('<') && !n.isSpace() && !n.isNull() && n != QLatin1Char('&')) {
230 found = false;
231 }
232 }
233
234 if (found) {
235 result += QStringView(message).mid(pos: previousPos, n: pos - previousPos);
236 result += QString::fromUtf8(utf8: emoticon->replacement);
237
238 /* Skip the matched emoticon's matchText */
239 pos += matchLen - 1;
240 previousPos = pos + 1;
241 } else {
242 if (inHTMLEntity) {
243 // If we are in an HTML entity such as &gt;
244 const int htmlEnd = message.indexOf(c: QLatin1Char(';'), from: pos);
245 // Search for where it ends
246 if (htmlEnd == -1) {
247 // Apparently this HTML entity isn't ended, something is wrong, try skip the '&'
248 // and continue
249 // qCDebug(KEMOTICONS_CORE) << "Broken HTML entity, trying to recover.";
250 inHTMLEntity = false;
251 pos++;
252 } else {
253 pos = htmlEnd;
254 inHTMLEntity = false;
255 }
256 }
257 }
258 } /* else no emoticons begin with this character, so don't do anything */
259 p = c;
260 }
261
262 if (result.isEmpty()) {
263 return message;
264 }
265 if (previousPos < message.length()) {
266 result += QStringView(message).mid(pos: previousPos);
267 }
268 return result;
269}
270

source code of kcoreaddons/src/lib/text/kemoticonsparser.cpp