1 | /* This file is part of the KDE libraries |
2 | SPDX-FileCopyrightText: 2006 Jacob R Rideout <kde@jacobrideout.net> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #ifndef TEXTBREAKS_H |
8 | #define TEXTBREAKS_H |
9 | |
10 | class QString; |
11 | |
12 | #include "sonnetcore_export.h" |
13 | |
14 | #include <memory> |
15 | |
16 | namespace Sonnet |
17 | { |
18 | class TextBreaksPrivate; |
19 | |
20 | /** |
21 | * @short TextBreaks determines the barriers between linguistic structures in any given text. |
22 | * |
23 | * TextBreaks is a class that determines the boundaries between graphemes |
24 | * (characters as per the unicode definition,) words and sentences. The |
25 | * default implementation conforms to Unicode Standard Annex #29 https://unicode.org/reports/tr29/. |
26 | * You can subclass TextBreaks to create the correct behaviour for languages that require it. |
27 | * |
28 | * @author Jacob Rideout <kde@jacobrideout.net> |
29 | * @since 4.3 |
30 | */ |
31 | class SONNETCORE_EXPORT TextBreaks |
32 | { |
33 | public: |
34 | struct Position { |
35 | int start, length; |
36 | }; |
37 | |
38 | /** |
39 | * This structure abstracts the positions of breaks in the test. As per the |
40 | * unicode annex, both the start and end of the text are returned. |
41 | */ |
42 | typedef QList<Position> Positions; |
43 | |
44 | /** Constructor |
45 | * Creates a new TextBreaks instance. If @p text is specified, |
46 | * it sets the text to be checked. |
47 | * @param text the text that is to be checked |
48 | */ |
49 | explicit TextBreaks(const QString &text = QString()); |
50 | |
51 | /** Virtual Destructor |
52 | */ |
53 | virtual ~TextBreaks(); |
54 | |
55 | /** |
56 | * Returns the text to be checked |
57 | * @return text |
58 | */ |
59 | QString text() const; |
60 | |
61 | /** |
62 | * Sets the text to @p text |
63 | * @param text to be set |
64 | * @return true if the word is misspelled. false otherwise |
65 | */ |
66 | void setText(const QString &text); |
67 | |
68 | /** |
69 | * Return the Positions of each word for the given @p text. |
70 | * @param text to be checked |
71 | * @return positions of breaks |
72 | */ |
73 | static Positions wordBreaks(const QString &text); |
74 | |
75 | /** |
76 | * Return the Positions of each sentence for the given @p text. |
77 | * @param text to be checked |
78 | * @return positions of breaks |
79 | */ |
80 | static Positions sentenceBreaks(const QString &text); |
81 | |
82 | /** |
83 | * Return the Positions of each word for the text previously set. |
84 | * @return positions of breaks |
85 | */ |
86 | virtual Positions wordBreaks() const; |
87 | |
88 | /** |
89 | * Return the Positions of each sentence for the text previously set. |
90 | * @return positions of breaks |
91 | */ |
92 | virtual Positions sentenceBreaks() const; |
93 | |
94 | private: |
95 | std::unique_ptr<TextBreaksPrivate> const d; |
96 | }; |
97 | } |
98 | |
99 | Q_DECLARE_TYPEINFO(Sonnet::TextBreaks::Position, Q_PRIMITIVE_TYPE); |
100 | |
101 | #endif |
102 | |