1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39#include <QtCore/qtextboundaryfinder.h>
40#include <QtCore/qvarlengtharray.h>
41
42#include <private/qunicodetools_p.h>
43
44QT_BEGIN_NAMESPACE
45
46class QTextBoundaryFinderPrivate
47{
48public:
49 QCharAttributes attributes[1];
50};
51
52static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, QCharAttributes *attributes)
53{
54 const ushort *string = reinterpret_cast<const ushort *>(chars);
55
56 QVarLengthArray<QUnicodeTools::ScriptItem> scriptItems;
57 {
58 QVarLengthArray<uchar> scripts(length);
59
60 QUnicodeTools::initScripts(string, length, scripts: scripts.data());
61
62 int start = 0;
63 for (int i = start + 1; i <= length; ++i) {
64 if (i == length || scripts[i] != scripts[start]) {
65 QUnicodeTools::ScriptItem item;
66 item.position = start;
67 item.script = scripts[start];
68 scriptItems.append(t: item);
69 start = i;
70 }
71 }
72 }
73
74 QUnicodeTools::CharAttributeOptions options;
75 switch (type) {
76 case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break;
77 case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break;
78 case QTextBoundaryFinder::Sentence: options |= QUnicodeTools::SentenceBreaks; break;
79 case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break;
80 default: break;
81 }
82 QUnicodeTools::initCharAttributes(string, length, items: scriptItems.data(), numItems: scriptItems.count(), attributes, options);
83}
84
85/*!
86 \class QTextBoundaryFinder
87 \inmodule QtCore
88
89 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
90
91 \since 4.4
92 \ingroup tools
93 \ingroup shared
94 \ingroup string-processing
95 \reentrant
96
97 QTextBoundaryFinder allows to find Unicode text boundaries in a
98 string, accordingly to the Unicode text boundary specification (see
99 \l{https://www.unicode.org/reports/tr14/}{Unicode Standard Annex #14} and
100 \l{https://www.unicode.org/reports/tr29/}{Unicode Standard Annex #29}).
101
102 QTextBoundaryFinder can operate on a QString in four possible
103 modes depending on the value of \a BoundaryType.
104
105 Units of Unicode characters that make up what the user thinks of
106 as a character or basic unit of the language are here called
107 Grapheme clusters. The two unicode characters 'A' + diaeresis do
108 for example form one grapheme cluster as the user thinks of them
109 as one character, yet it is in this case represented by two
110 unicode code points
111 (see \l{https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries}).
112
113 Word boundaries are there to locate the start and end of what a
114 language considers to be a word
115 (see \l{https://www.unicode.org/reports/tr29/#Word_Boundaries}).
116
117 Line break boundaries give possible places where a line break
118 might happen and sentence boundaries will show the beginning and
119 end of whole sentences
120 (see \l{https://www.unicode.org/reports/tr29/#Sentence_Boundaries} and
121 \l{https://www.unicode.org/reports/tr14/}).
122
123 The first position in a string is always a valid boundary and
124 refers to the position before the first character. The last
125 position at the length of the string is also valid and refers
126 to the position after the last character.
127*/
128
129/*!
130 \enum QTextBoundaryFinder::BoundaryType
131
132 \value Grapheme Finds a grapheme which is the smallest boundary. It
133 including letters, punctuation marks, numerals and more.
134 \value Word Finds a word.
135 \value Line Finds possible positions for breaking the text into multiple
136 lines.
137 \value Sentence Finds sentence boundaries. These include periods, question
138 marks etc.
139*/
140
141/*!
142 \enum QTextBoundaryFinder::BoundaryReason
143
144 \value NotAtBoundary The boundary finder is not at a boundary position.
145 \value BreakOpportunity The boundary finder is at a break opportunity position.
146 Such a break opportunity might also be an item boundary
147 (either StartOfItem, EndOfItem, or combination of both),
148 a mandatory line break, or a soft hyphen.
149 \value StartOfItem Since 5.0. The boundary finder is at the start of
150 a grapheme, a word, a sentence, or a line.
151 \value EndOfItem Since 5.0. The boundary finder is at the end of
152 a grapheme, a word, a sentence, or a line.
153 \value MandatoryBreak Since 5.0. The boundary finder is at the end of line
154 (can occur for a Line boundary type only).
155 \value SoftHyphen The boundary finder is at the soft hyphen
156 (can occur for a Line boundary type only).
157*/
158
159/*!
160 Constructs an invalid QTextBoundaryFinder object.
161*/
162QTextBoundaryFinder::QTextBoundaryFinder()
163 : t(Grapheme)
164 , chars(nullptr)
165 , length(0)
166 , freePrivate(true)
167 , d(nullptr)
168{
169}
170
171/*!
172 Copies the QTextBoundaryFinder object, \a other.
173*/
174QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
175 : t(other.t)
176 , s(other.s)
177 , chars(other.chars)
178 , length(other.length)
179 , pos(other.pos)
180 , freePrivate(true)
181 , d(nullptr)
182{
183 if (other.d) {
184 Q_ASSERT(length > 0);
185 d = (QTextBoundaryFinderPrivate *) malloc(size: (length + 1) * sizeof(QCharAttributes));
186 Q_CHECK_PTR(d);
187 memcpy(dest: d, src: other.d, n: (length + 1) * sizeof(QCharAttributes));
188 }
189}
190
191/*!
192 Assigns the object, \a other, to another QTextBoundaryFinder object.
193*/
194QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
195{
196 if (&other == this)
197 return *this;
198
199 if (other.d) {
200 Q_ASSERT(other.length > 0);
201 uint newCapacity = (other.length + 1) * sizeof(QCharAttributes);
202 QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) realloc(ptr: freePrivate ? d : nullptr, size: newCapacity);
203 Q_CHECK_PTR(newD);
204 freePrivate = true;
205 d = newD;
206 }
207
208 t = other.t;
209 s = other.s;
210 chars = other.chars;
211 length = other.length;
212 pos = other.pos;
213
214 if (other.d) {
215 memcpy(dest: d, src: other.d, n: (length + 1) * sizeof(QCharAttributes));
216 } else {
217 if (freePrivate)
218 free(ptr: d);
219 d = nullptr;
220 }
221
222 return *this;
223}
224
225/*!
226 Destructs the QTextBoundaryFinder object.
227*/
228QTextBoundaryFinder::~QTextBoundaryFinder()
229{
230 Q_UNUSED(unused);
231 if (freePrivate)
232 free(ptr: d);
233}
234
235/*!
236 Creates a QTextBoundaryFinder object of \a type operating on \a string.
237*/
238QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
239 : t(type)
240 , s(string)
241 , chars(string.unicode())
242 , length(string.length())
243 , pos(0)
244 , freePrivate(true)
245 , d(nullptr)
246{
247 if (length > 0) {
248 d = (QTextBoundaryFinderPrivate *) malloc(size: (length + 1) * sizeof(QCharAttributes));
249 Q_CHECK_PTR(d);
250 init(type: t, chars, length, attributes: d->attributes);
251 }
252}
253
254/*!
255 Creates a QTextBoundaryFinder object of \a type operating on \a chars
256 with \a length.
257
258 \a buffer is an optional working buffer of size \a bufferSize you can pass to
259 the QTextBoundaryFinder. If the buffer is large enough to hold the working
260 data required (bufferSize >= length + 1), it will use this
261 instead of allocating its own buffer.
262
263 \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
264 application programmer's responsibility to ensure the array is allocated for
265 as long as the QTextBoundaryFinder object stays alive. The same applies to
266 \a buffer.
267*/
268QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
269 : t(type)
270 , chars(chars)
271 , length(length)
272 , pos(0)
273 , freePrivate(true)
274 , d(nullptr)
275{
276 if (!chars) {
277 length = 0;
278 } else if (length > 0) {
279 if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) {
280 d = (QTextBoundaryFinderPrivate *)buffer;
281 freePrivate = false;
282 } else {
283 d = (QTextBoundaryFinderPrivate *) malloc(size: (length + 1) * sizeof(QCharAttributes));
284 Q_CHECK_PTR(d);
285 }
286 init(type: t, chars, length, attributes: d->attributes);
287 }
288}
289
290/*!
291 Moves the finder to the start of the string. This is equivalent to setPosition(0).
292
293 \sa setPosition(), position()
294*/
295void QTextBoundaryFinder::toStart()
296{
297 pos = 0;
298}
299
300/*!
301 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
302
303 \sa setPosition(), position()
304*/
305void QTextBoundaryFinder::toEnd()
306{
307 pos = length;
308}
309
310/*!
311 Returns the current position of the QTextBoundaryFinder.
312
313 The range is from 0 (the beginning of the string) to the length of
314 the string inclusive.
315
316 \sa setPosition()
317*/
318int QTextBoundaryFinder::position() const
319{
320 return pos;
321}
322
323/*!
324 Sets the current position of the QTextBoundaryFinder to \a position.
325
326 If \a position is out of bounds, it will be bound to only valid
327 positions. In this case, valid positions are from 0 to the length of
328 the string inclusive.
329
330 \sa position()
331*/
332void QTextBoundaryFinder::setPosition(int position)
333{
334 pos = qBound(min: 0, val: position, max: length);
335}
336
337/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
338
339 Returns the type of the QTextBoundaryFinder.
340*/
341
342/*! \fn bool QTextBoundaryFinder::isValid() const
343
344 Returns \c true if the text boundary finder is valid; otherwise returns \c false.
345 A default QTextBoundaryFinder is invalid.
346*/
347
348/*!
349 Returns the string the QTextBoundaryFinder object operates on.
350*/
351QString QTextBoundaryFinder::string() const
352{
353 if (chars == s.unicode() && length == s.length())
354 return s;
355 return QString(chars, length);
356}
357
358
359/*!
360 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
361
362 Returns -1 if there is no next boundary.
363*/
364int QTextBoundaryFinder::toNextBoundary()
365{
366 if (!d || pos < 0 || pos >= length) {
367 pos = -1;
368 return pos;
369 }
370
371 ++pos;
372 switch(t) {
373 case Grapheme:
374 while (pos < length && !d->attributes[pos].graphemeBoundary)
375 ++pos;
376 break;
377 case Word:
378 while (pos < length && !d->attributes[pos].wordBreak)
379 ++pos;
380 break;
381 case Sentence:
382 while (pos < length && !d->attributes[pos].sentenceBoundary)
383 ++pos;
384 break;
385 case Line:
386 while (pos < length && !d->attributes[pos].lineBreak)
387 ++pos;
388 break;
389 }
390
391 return pos;
392}
393
394/*!
395 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
396
397 Returns -1 if there is no previous boundary.
398*/
399int QTextBoundaryFinder::toPreviousBoundary()
400{
401 if (!d || pos <= 0 || pos > length) {
402 pos = -1;
403 return pos;
404 }
405
406 --pos;
407 switch(t) {
408 case Grapheme:
409 while (pos > 0 && !d->attributes[pos].graphemeBoundary)
410 --pos;
411 break;
412 case Word:
413 while (pos > 0 && !d->attributes[pos].wordBreak)
414 --pos;
415 break;
416 case Sentence:
417 while (pos > 0 && !d->attributes[pos].sentenceBoundary)
418 --pos;
419 break;
420 case Line:
421 while (pos > 0 && !d->attributes[pos].lineBreak)
422 --pos;
423 break;
424 }
425
426 return pos;
427}
428
429/*!
430 Returns \c true if the object's position() is currently at a valid text boundary.
431*/
432bool QTextBoundaryFinder::isAtBoundary() const
433{
434 if (!d || pos < 0 || pos > length)
435 return false;
436
437 switch(t) {
438 case Grapheme:
439 return d->attributes[pos].graphemeBoundary;
440 case Word:
441 return d->attributes[pos].wordBreak;
442 case Sentence:
443 return d->attributes[pos].sentenceBoundary;
444 case Line:
445 // ### TR#14 LB2 prohibits break at sot
446 return d->attributes[pos].lineBreak || pos == 0;
447 }
448 return false;
449}
450
451/*!
452 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
453*/
454QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
455{
456 BoundaryReasons reasons = NotAtBoundary;
457 if (!d || pos < 0 || pos > length)
458 return reasons;
459
460 const QCharAttributes attr = d->attributes[pos];
461 switch (t) {
462 case Grapheme:
463 if (attr.graphemeBoundary) {
464 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
465 if (pos == 0)
466 reasons &= (~EndOfItem);
467 else if (pos == length)
468 reasons &= (~StartOfItem);
469 }
470 break;
471 case Word:
472 if (attr.wordBreak) {
473 reasons |= BreakOpportunity;
474 if (attr.wordStart)
475 reasons |= StartOfItem;
476 if (attr.wordEnd)
477 reasons |= EndOfItem;
478 }
479 break;
480 case Sentence:
481 if (attr.sentenceBoundary) {
482 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
483 if (pos == 0)
484 reasons &= (~EndOfItem);
485 else if (pos == length)
486 reasons &= (~StartOfItem);
487 }
488 break;
489 case Line:
490 // ### TR#14 LB2 prohibits break at sot
491 if (attr.lineBreak || pos == 0) {
492 reasons |= BreakOpportunity;
493 if (attr.mandatoryBreak || pos == 0) {
494 reasons |= MandatoryBreak | StartOfItem | EndOfItem;
495 if (pos == 0)
496 reasons &= (~EndOfItem);
497 else if (pos == length)
498 reasons &= (~StartOfItem);
499 } else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) {
500 reasons |= SoftHyphen;
501 }
502 }
503 break;
504 default:
505 break;
506 }
507
508 return reasons;
509}
510
511QT_END_NAMESPACE
512

source code of qtbase/src/corelib/text/qtextboundaryfinder.cpp