1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3#include <QtCore/qtextboundaryfinder.h>
4#include <QtCore/qvarlengtharray.h>
5
6#include <private/qunicodetools_p.h>
7
8QT_BEGIN_NAMESPACE
9
10static void init(QTextBoundaryFinder::BoundaryType type, QStringView str, QCharAttributes *attributes)
11{
12 QUnicodeTools::ScriptItemArray scriptItems;
13 QUnicodeTools::initScripts(str, scripts: &scriptItems);
14
15 QUnicodeTools::CharAttributeOptions options;
16 switch (type) {
17 case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break;
18 case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break;
19 case QTextBoundaryFinder::Sentence: options |= QUnicodeTools::SentenceBreaks; break;
20 case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break;
21 default: break;
22 }
23 QUnicodeTools::initCharAttributes(str, items: scriptItems.data(), numItems: scriptItems.size(), attributes, options);
24}
25
26/*!
27 \class QTextBoundaryFinder
28 \inmodule QtCore
29
30 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
31
32 \since 4.4
33 \ingroup tools
34 \ingroup shared
35 \ingroup string-processing
36 \reentrant
37
38 QTextBoundaryFinder allows to find Unicode text boundaries in a
39 string, accordingly to the Unicode text boundary specification (see
40 \l{https://www.unicode.org/reports/tr14/}{Unicode Standard Annex #14} and
41 \l{https://www.unicode.org/reports/tr29/}{Unicode Standard Annex #29}).
42
43 QTextBoundaryFinder can operate on a QString in four possible
44 modes depending on the value of \a BoundaryType.
45
46 Units of Unicode characters that make up what the user thinks of
47 as a character or basic unit of the language are here called
48 Grapheme clusters. The two unicode characters 'A' + diaeresis do
49 for example form one grapheme cluster as the user thinks of them
50 as one character, yet it is in this case represented by two
51 unicode code points
52 (see \l{https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries}).
53
54 Word boundaries are there to locate the start and end of what a
55 language considers to be a word
56 (see \l{https://www.unicode.org/reports/tr29/#Word_Boundaries}).
57
58 Line break boundaries give possible places where a line break
59 might happen and sentence boundaries will show the beginning and
60 end of whole sentences
61 (see \l{https://www.unicode.org/reports/tr29/#Sentence_Boundaries} and
62 \l{https://www.unicode.org/reports/tr14/}).
63
64 The first position in a string is always a valid boundary and
65 refers to the position before the first character. The last
66 position at the length of the string is also valid and refers
67 to the position after the last character.
68*/
69
70/*!
71 \enum QTextBoundaryFinder::BoundaryType
72
73 \value Grapheme Finds a grapheme which is the smallest boundary. It
74 including letters, punctuation marks, numerals and more.
75 \value Word Finds a word.
76 \value Line Finds possible positions for breaking the text into multiple
77 lines.
78 \value Sentence Finds sentence boundaries. These include periods, question
79 marks etc.
80*/
81
82/*!
83 \enum QTextBoundaryFinder::BoundaryReason
84
85 \value NotAtBoundary The boundary finder is not at a boundary position.
86 \value BreakOpportunity The boundary finder is at a break opportunity position.
87 Such a break opportunity might also be an item boundary
88 (either StartOfItem, EndOfItem, or combination of both),
89 a mandatory line break, or a soft hyphen.
90 \value [since 5.0] StartOfItem The boundary finder is at the start of
91 a grapheme, a word, a sentence, or a line.
92 \value [since 5.0] EndOfItem The boundary finder is at the end of
93 a grapheme, a word, a sentence, or a line.
94 \value [since 5.0] MandatoryBreak The boundary finder is at the end of line
95 (can occur for a Line boundary type only).
96 \value SoftHyphen The boundary finder is at the soft hyphen
97 (can occur for a Line boundary type only).
98*/
99
100/*!
101 Constructs an invalid QTextBoundaryFinder object.
102*/
103QTextBoundaryFinder::QTextBoundaryFinder()
104 : freeBuffer(true)
105{
106}
107
108/*!
109 Copies the QTextBoundaryFinder object, \a other.
110*/
111QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
112 : t(other.t)
113 , s(other.s)
114 , sv(other.sv)
115 , pos(other.pos)
116 , freeBuffer(true)
117{
118 if (other.attributes) {
119 Q_ASSERT(sv.size() > 0);
120 attributes = (QCharAttributes *) malloc(size: (sv.size() + 1) * sizeof(QCharAttributes));
121 Q_CHECK_PTR(attributes);
122 memcpy(dest: attributes, src: other.attributes, n: (sv.size() + 1) * sizeof(QCharAttributes));
123 }
124}
125
126/*!
127 Assigns the object, \a other, to another QTextBoundaryFinder object.
128*/
129QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
130{
131 if (&other == this)
132 return *this;
133
134 if (other.attributes) {
135 Q_ASSERT(other.sv.size() > 0);
136 size_t newCapacity = (size_t(other.sv.size()) + 1) * sizeof(QCharAttributes);
137 QCharAttributes *newD = (QCharAttributes *) realloc(ptr: freeBuffer ? attributes : nullptr, size: newCapacity);
138 Q_CHECK_PTR(newD);
139 freeBuffer = true;
140 attributes = newD;
141 }
142
143 t = other.t;
144 s = other.s;
145 sv = other.sv;
146 pos = other.pos;
147
148 if (other.attributes) {
149 memcpy(dest: attributes, src: other.attributes, n: (sv.size() + 1) * sizeof(QCharAttributes));
150 } else {
151 if (freeBuffer)
152 free(ptr: attributes);
153 attributes = nullptr;
154 }
155
156 return *this;
157}
158
159/*!
160 Destructs the QTextBoundaryFinder object.
161*/
162QTextBoundaryFinder::~QTextBoundaryFinder()
163{
164 Q_UNUSED(unused);
165 if (freeBuffer)
166 free(ptr: attributes);
167}
168
169/*!
170 Creates a QTextBoundaryFinder object of \a type operating on \a string.
171*/
172QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
173 : t(type)
174 , s(string)
175 , sv(s)
176 , freeBuffer(true)
177{
178 if (sv.size() > 0) {
179 attributes = (QCharAttributes *) malloc(size: (sv.size() + 1) * sizeof(QCharAttributes));
180 Q_CHECK_PTR(attributes);
181 init(type: t, str: sv, attributes);
182 }
183}
184
185/*!
186 \fn QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer, qsizetype bufferSize)
187 \overload
188
189 The same as QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize).
190*/
191
192/*!
193 Creates a QTextBoundaryFinder object of \a type operating on \a string.
194 \since 6.0
195
196 \a buffer is an optional working buffer of size \a bufferSize you can pass to
197 the QTextBoundaryFinder. If the buffer is large enough to hold the working
198 data required (bufferSize >= length + 1), it will use this
199 instead of allocating its own buffer.
200
201 \warning QTextBoundaryFinder does not create a copy of \a string. It is the
202 application programmer's responsibility to ensure the array is allocated for
203 as long as the QTextBoundaryFinder object stays alive. The same applies to
204 \a buffer.
205*/
206QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, QStringView string, unsigned char *buffer, qsizetype bufferSize)
207 : t(type)
208 , sv(string)
209 , freeBuffer(true)
210{
211 if (!sv.isEmpty()) {
212 if (buffer && bufferSize / int(sizeof(QCharAttributes)) >= sv.size() + 1) {
213 attributes = reinterpret_cast<QCharAttributes *>(buffer);
214 freeBuffer = false;
215 } else {
216 attributes = (QCharAttributes *) malloc(size: (sv.size() + 1) * sizeof(QCharAttributes));
217 Q_CHECK_PTR(attributes);
218 }
219 init(type: t, str: sv, attributes);
220 }
221}
222
223/*!
224 Moves the finder to the start of the string. This is equivalent to setPosition(0).
225
226 \sa setPosition(), position()
227*/
228void QTextBoundaryFinder::toStart()
229{
230 pos = 0;
231}
232
233/*!
234 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
235
236 \sa setPosition(), position()
237*/
238void QTextBoundaryFinder::toEnd()
239{
240 pos = sv.size();
241}
242
243/*!
244 Returns the current position of the QTextBoundaryFinder.
245
246 The range is from 0 (the beginning of the string) to the length of
247 the string inclusive.
248
249 \sa setPosition()
250*/
251qsizetype QTextBoundaryFinder::position() const
252{
253 return pos;
254}
255
256/*!
257 Sets the current position of the QTextBoundaryFinder to \a position.
258
259 If \a position is out of bounds, it will be bound to only valid
260 positions. In this case, valid positions are from 0 to the length of
261 the string inclusive.
262
263 \sa position()
264*/
265void QTextBoundaryFinder::setPosition(qsizetype position)
266{
267 pos = qBound(min: 0, val: position, max: sv.size());
268}
269
270/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
271
272 Returns the type of the QTextBoundaryFinder.
273*/
274
275/*! \fn bool QTextBoundaryFinder::isValid() const
276
277 Returns \c true if the text boundary finder is valid; otherwise returns \c false.
278 A default QTextBoundaryFinder is invalid.
279*/
280
281/*!
282 Returns the string the QTextBoundaryFinder object operates on.
283*/
284QString QTextBoundaryFinder::string() const
285{
286 if (sv.data() == s.unicode() && sv.size() == s.size())
287 return s;
288 return sv.toString();
289}
290
291
292/*!
293 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
294
295 Returns -1 if there is no next boundary.
296*/
297qsizetype QTextBoundaryFinder::toNextBoundary()
298{
299 if (!attributes || pos < 0 || pos >= sv.size()) {
300 pos = -1;
301 return pos;
302 }
303
304 ++pos;
305 switch(t) {
306 case Grapheme:
307 while (pos < sv.size() && !attributes[pos].graphemeBoundary)
308 ++pos;
309 break;
310 case Word:
311 while (pos < sv.size() && !attributes[pos].wordBreak)
312 ++pos;
313 break;
314 case Sentence:
315 while (pos < sv.size() && !attributes[pos].sentenceBoundary)
316 ++pos;
317 break;
318 case Line:
319 while (pos < sv.size() && !attributes[pos].lineBreak)
320 ++pos;
321 break;
322 }
323
324 return pos;
325}
326
327/*!
328 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
329
330 Returns -1 if there is no previous boundary.
331*/
332qsizetype QTextBoundaryFinder::toPreviousBoundary()
333{
334 if (!attributes || pos <= 0 || pos > sv.size()) {
335 pos = -1;
336 return pos;
337 }
338
339 --pos;
340 switch(t) {
341 case Grapheme:
342 while (pos > 0 && !attributes[pos].graphemeBoundary)
343 --pos;
344 break;
345 case Word:
346 while (pos > 0 && !attributes[pos].wordBreak)
347 --pos;
348 break;
349 case Sentence:
350 while (pos > 0 && !attributes[pos].sentenceBoundary)
351 --pos;
352 break;
353 case Line:
354 while (pos > 0 && !attributes[pos].lineBreak)
355 --pos;
356 break;
357 }
358
359 return pos;
360}
361
362/*!
363 Returns \c true if the object's position() is currently at a valid text boundary.
364*/
365bool QTextBoundaryFinder::isAtBoundary() const
366{
367 if (!attributes || pos < 0 || pos > sv.size())
368 return false;
369
370 switch(t) {
371 case Grapheme:
372 return attributes[pos].graphemeBoundary;
373 case Word:
374 return attributes[pos].wordBreak;
375 case Sentence:
376 return attributes[pos].sentenceBoundary;
377 case Line:
378 // ### TR#14 LB2 prohibits break at sot
379 return attributes[pos].lineBreak || pos == 0;
380 }
381 return false;
382}
383
384/*!
385 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
386*/
387QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
388{
389 BoundaryReasons reasons = NotAtBoundary;
390 if (!attributes || pos < 0 || pos > sv.size())
391 return reasons;
392
393 const QCharAttributes attr = attributes[pos];
394 switch (t) {
395 case Grapheme:
396 if (attr.graphemeBoundary) {
397 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
398 if (pos == 0)
399 reasons &= (~EndOfItem);
400 else if (pos == sv.size())
401 reasons &= (~StartOfItem);
402 }
403 break;
404 case Word:
405 if (attr.wordBreak) {
406 reasons |= BreakOpportunity;
407 if (attr.wordStart)
408 reasons |= StartOfItem;
409 if (attr.wordEnd)
410 reasons |= EndOfItem;
411 }
412 break;
413 case Sentence:
414 if (attr.sentenceBoundary) {
415 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
416 if (pos == 0)
417 reasons &= (~EndOfItem);
418 else if (pos == sv.size())
419 reasons &= (~StartOfItem);
420 }
421 break;
422 case Line:
423 // ### TR#14 LB2 prohibits break at sot
424 if (attr.lineBreak || pos == 0) {
425 reasons |= BreakOpportunity;
426 if (attr.mandatoryBreak || pos == 0) {
427 reasons |= MandatoryBreak | StartOfItem | EndOfItem;
428 if (pos == 0)
429 reasons &= (~EndOfItem);
430 else if (pos == sv.size())
431 reasons &= (~StartOfItem);
432 } else if (pos > 0 && sv[pos - 1].unicode() == QChar::SoftHyphen) {
433 reasons |= SoftHyphen;
434 }
435 }
436 break;
437 default:
438 break;
439 }
440
441 return reasons;
442}
443
444QT_END_NAMESPACE
445

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtbase/src/corelib/text/qtextboundaryfinder.cpp