1 | /* |
2 | This file is part of the KDE libraries |
3 | |
4 | SPDX-FileCopyrightText: 1999 Ian Zepp <icszepp@islc.net> |
5 | SPDX-FileCopyrightText: 2006 Dominic Battre <dominic@battre.de> |
6 | SPDX-FileCopyrightText: 2006 Martin Pool <mbp@canonical.com> |
7 | |
8 | SPDX-License-Identifier: LGPL-2.0-or-later |
9 | */ |
10 | |
11 | #include "kstringhandler.h" |
12 | |
13 | #include <stdlib.h> // random() |
14 | |
15 | #include <QList> |
16 | #include <QRegularExpression> |
17 | |
18 | // |
19 | // Capitalization routines |
20 | // |
21 | QString KStringHandler::capwords(const QString &text) |
22 | { |
23 | if (text.isEmpty()) { |
24 | return text; |
25 | } |
26 | |
27 | const QString strippedText = text.trimmed(); |
28 | const QString space = QString(QLatin1Char(' ')); |
29 | const QStringList words = capwords(list: strippedText.split(sep: space)); |
30 | |
31 | QString result = text; |
32 | result.replace(before: strippedText, after: words.join(sep: space)); |
33 | return result; |
34 | } |
35 | |
36 | QStringList KStringHandler::capwords(const QStringList &list) |
37 | { |
38 | QStringList tmp = list; |
39 | for (auto &str : tmp) { |
40 | str[0] = str.at(i: 0).toUpper(); |
41 | } |
42 | return tmp; |
43 | } |
44 | |
45 | QString KStringHandler::lsqueeze(const QString &str, const int maxlen) |
46 | { |
47 | if (str.length() > maxlen) { |
48 | const int part = maxlen - 3; |
49 | return QLatin1String("..." ) + QStringView(str).right(n: part); |
50 | } else { |
51 | return str; |
52 | } |
53 | } |
54 | |
55 | QString KStringHandler::csqueeze(const QString &str, const int maxlen) |
56 | { |
57 | if (str.length() > maxlen && maxlen > 3) { |
58 | const int part = (maxlen - 3) / 2; |
59 | const QStringView strView{str}; |
60 | return strView.left(n: part) + QLatin1String("..." ) + strView.right(n: part); |
61 | } else { |
62 | return str; |
63 | } |
64 | } |
65 | |
66 | QString KStringHandler::rsqueeze(const QString &str, const int maxlen) |
67 | { |
68 | if (str.length() > maxlen) { |
69 | const int part = maxlen - 3; |
70 | return QStringView(str).left(n: part) + QLatin1String("..." ); |
71 | } else { |
72 | return str; |
73 | } |
74 | } |
75 | |
76 | QStringList KStringHandler::perlSplit(const QStringView sep, const QStringView str, int max) |
77 | { |
78 | const bool ignoreMax = max == 0; |
79 | |
80 | const int sepLength = sep.size(); |
81 | |
82 | QStringList list; |
83 | int searchStart = 0; |
84 | int sepIndex = str.indexOf(s: sep, from: searchStart); |
85 | |
86 | while (sepIndex != -1 && (ignoreMax || list.count() < max - 1)) { |
87 | const auto chunk = str.mid(pos: searchStart, n: sepIndex - searchStart); |
88 | if (!chunk.isEmpty()) { |
89 | list.append(t: chunk.toString()); |
90 | } |
91 | |
92 | searchStart = sepIndex + sepLength; |
93 | sepIndex = str.indexOf(s: sep, from: searchStart); |
94 | } |
95 | |
96 | const auto lastChunk = str.mid(pos: searchStart, n: str.length() - searchStart); |
97 | if (!lastChunk.isEmpty()) { |
98 | list.append(t: lastChunk.toString()); |
99 | } |
100 | |
101 | return list; |
102 | } |
103 | |
104 | QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max) |
105 | { |
106 | return perlSplit(sep: QStringView(sep), str: QStringView(s), max); |
107 | } |
108 | |
109 | QStringList KStringHandler::perlSplit(const QChar &sep, const QString &str, int max) |
110 | { |
111 | return perlSplit(sep: QStringView(&sep, 1), str: QStringView(str), max); |
112 | } |
113 | |
114 | QStringList KStringHandler::perlSplit(const QRegularExpression &sep, const QString &str, int max) |
115 | { |
116 | // nothing to split |
117 | if (str.isEmpty()) { |
118 | return QStringList(); |
119 | } |
120 | |
121 | const bool ignoreMax = max == 0; |
122 | |
123 | QStringList list; |
124 | |
125 | int start = 0; |
126 | |
127 | const QStringView strView(str); |
128 | |
129 | QRegularExpression separator(sep); |
130 | separator.setPatternOptions(QRegularExpression::UseUnicodePropertiesOption); |
131 | |
132 | QRegularExpressionMatchIterator iter = separator.globalMatch(subjectView: strView); |
133 | QRegularExpressionMatch match; |
134 | while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) { |
135 | match = iter.next(); |
136 | const QStringView chunk = strView.mid(pos: start, n: match.capturedStart() - start); |
137 | if (!chunk.isEmpty()) { |
138 | list.append(t: chunk.toString()); |
139 | } |
140 | |
141 | start = match.capturedEnd(); |
142 | } |
143 | |
144 | // catch the remainder |
145 | const QStringView lastChunk = strView.mid(pos: start, n: strView.size() - start); |
146 | if (!lastChunk.isEmpty()) { |
147 | list.append(t: lastChunk.toString()); |
148 | } |
149 | |
150 | return list; |
151 | } |
152 | |
153 | QString KStringHandler::tagUrls(const QString &text) |
154 | { |
155 | QString richText(text); |
156 | |
157 | static const QRegularExpression urlEx(QStringLiteral(R"((www\.(?!\.)|(fish|ftp|http|https)://[\d\w./,:_~?=&;#@\-+%$()]+))" ), |
158 | QRegularExpression::UseUnicodePropertiesOption); |
159 | // The reference \1 is going to be replaced by the matched url |
160 | richText.replace(re: urlEx, QStringLiteral("<a href=\"\\1\">\\1</a>" )); |
161 | return richText; |
162 | } |
163 | |
164 | QString KStringHandler::obscure(const QString &str) |
165 | { |
166 | QString result; |
167 | for (const QChar ch : str) { |
168 | // yes, no typo. can't encode ' ' or '!' because |
169 | // they're the unicode BOM. stupid scrambling. stupid. |
170 | const ushort uc = ch.unicode(); |
171 | result += (uc <= 0x21) ? ch : QChar(0x1001F - uc); |
172 | } |
173 | |
174 | return result; |
175 | } |
176 | |
177 | static inline bool containsSpaces(const QString &text) |
178 | { |
179 | for (int i = 0; i < text.length(); i++) { |
180 | const QChar c = text[i]; |
181 | if (c.isSpace()) { |
182 | return true; |
183 | } |
184 | } |
185 | return false; |
186 | } |
187 | |
188 | QString KStringHandler::preProcessWrap(const QString &text) |
189 | { |
190 | const QChar zwsp(0x200b); |
191 | |
192 | QString result; |
193 | result.reserve(asize: text.length()); |
194 | |
195 | const bool containsSpaces = ::containsSpaces(text); |
196 | |
197 | for (int i = 0; i < text.length(); i++) { |
198 | const QChar c = text[i]; |
199 | |
200 | const bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('[')); |
201 | const bool singleQuote = (c == QLatin1Char('\'')); |
202 | const bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']')); |
203 | const bool breakAfter = (closingParens || c.isPunct() || c.isSymbol()); |
204 | const bool isLastChar = i == (text.length() - 1); |
205 | const bool isLower = c.isLower(); |
206 | const bool nextIsUpper = !isLastChar && text[i + 1].isUpper(); // false by default |
207 | const bool nextIsSpace = isLastChar || text[i + 1].isSpace(); // true by default |
208 | const bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp); |
209 | |
210 | // Provide a breaking opportunity before opening parenthesis |
211 | if (openingParens && !prevIsSpace) { |
212 | result += zwsp; |
213 | } |
214 | |
215 | // Provide a word joiner before the single quote |
216 | if (singleQuote && !prevIsSpace) { |
217 | result += QChar(0x2060); |
218 | } |
219 | |
220 | result += c; |
221 | |
222 | // Provide a breaking opportunity between camelCase and PascalCase sub-words; |
223 | // but if source string contains whitespaces, then it should be sufficiently wrappable on its own |
224 | const bool isCamelCase = !containsSpaces && isLower && nextIsUpper; |
225 | |
226 | if (isCamelCase || (breakAfter && !openingParens && !nextIsSpace && !singleQuote)) { |
227 | result += zwsp; |
228 | } |
229 | } |
230 | |
231 | return result; |
232 | } |
233 | |
234 | int KStringHandler::logicalLength(const QString &text) |
235 | { |
236 | int length = 0; |
237 | const auto chrs = text.toUcs4(); |
238 | for (const auto chr : chrs) { |
239 | const auto script = QChar::script(ucs4: chr); |
240 | /* clang-format off */ |
241 | if (script == QChar::Script_Han |
242 | || script == QChar::Script_Hangul |
243 | || script == QChar::Script_Hiragana |
244 | || script == QChar::Script_Katakana |
245 | || script == QChar::Script_Yi |
246 | || QChar::isHighSurrogate(ucs4: chr)) { /* clang-format on */ |
247 | length += 2; |
248 | } else { |
249 | length += 1; |
250 | } |
251 | } |
252 | return length; |
253 | } |
254 | |