1 | #include <QtTest/QTest> |
2 | |
3 | #include <poppler-qt6.h> |
4 | |
5 | class TestSearch : public QObject |
6 | { |
7 | Q_OBJECT |
8 | public: |
9 | explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { } |
10 | private slots: |
11 | void testAcrossLinesSearch(); // leave it first |
12 | void testAcrossLinesSearchDoubleColumn(); |
13 | void bug7063(); |
14 | void testNextAndPrevious(); |
15 | void testWholeWordsOnly(); |
16 | void testIgnoreDiacritics(); |
17 | void testRussianSearch(); // Issue #743 |
18 | void testDeseretSearch(); // Issue #853 |
19 | }; |
20 | |
21 | void TestSearch::bug7063() |
22 | { |
23 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/bug7063.pdf" ); |
24 | QVERIFY(document); |
25 | |
26 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
27 | QVERIFY(page); |
28 | |
29 | double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); |
30 | |
31 | QCOMPARE(page->search(QStringLiteral("non-ascii:" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); |
32 | |
33 | QCOMPARE(page->search(QStringLiteral("Ascii" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); |
34 | QCOMPARE(page->search(QStringLiteral("Ascii" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::IgnoreCase), true); |
35 | |
36 | QCOMPARE(page->search(QStringLiteral("latin1:" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); |
37 | |
38 | QCOMPARE(page->search(QString::fromUtf8("é" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
39 | QCOMPARE(page->search(QString::fromUtf8("à" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
40 | QCOMPARE(page->search(QString::fromUtf8("ç" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
41 | QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\"" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
42 | QCOMPARE(page->search(QString::fromUtf8("¥µ©" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
43 | QCOMPARE(page->search(QString::fromUtf8("¥©" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); // clazy:exclude=qstring-allocations |
44 | |
45 | QCOMPARE(page->search(QStringLiteral("non-ascii:" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); |
46 | |
47 | QCOMPARE(page->search(QStringLiteral("Ascii" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); |
48 | QCOMPARE(page->search(QStringLiteral("Ascii" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::IgnoreCase), true); |
49 | |
50 | QCOMPARE(page->search(QStringLiteral("latin1:" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); |
51 | |
52 | QCOMPARE(page->search(QString::fromUtf8("é" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
53 | QCOMPARE(page->search(QString::fromUtf8("à" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
54 | QCOMPARE(page->search(QString::fromUtf8("ç" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
55 | QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\"" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
56 | QCOMPARE(page->search(QString::fromUtf8("¥µ©" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // clazy:exclude=qstring-allocations |
57 | QCOMPARE(page->search(QString::fromUtf8("¥©" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false); // clazy:exclude=qstring-allocations |
58 | } |
59 | |
60 | void TestSearch::testNextAndPrevious() |
61 | { |
62 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/xr01.pdf" ); |
63 | QVERIFY(document); |
64 | |
65 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
66 | QVERIFY(page); |
67 | |
68 | double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); |
69 | |
70 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); |
71 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
72 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
73 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
74 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
75 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
76 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
77 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
78 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
79 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
80 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
81 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
82 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
83 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
84 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
85 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
86 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
87 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
88 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
89 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
90 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), false); |
91 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
92 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
93 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
94 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
95 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
96 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
97 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
98 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
99 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
100 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
101 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
102 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
103 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
104 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
105 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
106 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), false); |
107 | |
108 | rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); |
109 | |
110 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); |
111 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
112 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
113 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
114 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
115 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
116 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
117 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
118 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
119 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
120 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
121 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
122 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
123 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
124 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
125 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true); |
126 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
127 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
128 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
129 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
130 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), false); |
131 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
132 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
133 | QVERIFY(qAbs(rectTop - 139.81) < 0.01); |
134 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
135 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
136 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
137 | QVERIFY(qAbs(rectLeft - 171.46) < 0.01); |
138 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
139 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
140 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
141 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true); |
142 | QVERIFY(qAbs(rectLeft - 161.44) < 0.01); |
143 | QVERIFY(qAbs(rectTop - 127.85) < 0.01); |
144 | QVERIFY(qAbs(rectRight - rectLeft - 6.70) < 0.01); |
145 | QVERIFY(qAbs(rectBottom - rectTop - 8.85) < 0.01); |
146 | QCOMPARE(page->search(QStringLiteral("is" ), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), false); |
147 | } |
148 | |
149 | void TestSearch::testWholeWordsOnly() |
150 | { |
151 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf" ); |
152 | QVERIFY(document); |
153 | |
154 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
155 | QVERIFY(page); |
156 | |
157 | const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; |
158 | |
159 | const Poppler::Page::SearchFlags mode0; |
160 | const Poppler::Page::SearchFlags mode1 = Poppler::Page::IgnoreCase; |
161 | const Poppler::Page::SearchFlags mode2 = Poppler::Page::WholeWords; |
162 | const Poppler::Page::SearchFlags mode3 = Poppler::Page::IgnoreCase | Poppler::Page::WholeWords; |
163 | |
164 | double left, top, right, bottom; |
165 | |
166 | QCOMPARE(page->search(QStringLiteral("brown" ), left, top, right, bottom, direction, mode0), true); |
167 | QCOMPARE(page->search(QStringLiteral("brOwn" ), left, top, right, bottom, direction, mode0), false); |
168 | |
169 | QCOMPARE(page->search(QStringLiteral("brOwn" ), left, top, right, bottom, direction, mode1), true); |
170 | QCOMPARE(page->search(QStringLiteral("brawn" ), left, top, right, bottom, direction, mode1), false); |
171 | |
172 | QCOMPARE(page->search(QStringLiteral("brown" ), left, top, right, bottom, direction, mode2), true); |
173 | QCOMPARE(page->search(QStringLiteral("own" ), left, top, right, bottom, direction, mode2), false); |
174 | |
175 | QCOMPARE(page->search(QStringLiteral("brOwn" ), left, top, right, bottom, direction, mode3), true); |
176 | QCOMPARE(page->search(QStringLiteral("Own" ), left, top, right, bottom, direction, mode3), false); |
177 | } |
178 | |
179 | void TestSearch::testIgnoreDiacritics() |
180 | { |
181 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/Issue637.pdf" ); |
182 | QVERIFY(document); |
183 | |
184 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
185 | QVERIFY(page); |
186 | |
187 | const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; |
188 | |
189 | const Poppler::Page::SearchFlags mode0; |
190 | const Poppler::Page::SearchFlags mode1 = Poppler::Page::IgnoreDiacritics; |
191 | const Poppler::Page::SearchFlags mode2 = Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; |
192 | const Poppler::Page::SearchFlags mode3 = Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase | Poppler::Page::WholeWords; |
193 | const Poppler::Page::SearchFlags mode4 = Poppler::Page::IgnoreCase | Poppler::Page::WholeWords; |
194 | |
195 | double left, top, right, bottom; |
196 | |
197 | // Test pdf (Issue637.pdf) just contains the following three lines: |
198 | // La cigüeña voló sobre nuestras cabezas. |
199 | // La cigogne a survolé nos têtes. |
200 | // Der Storch flog über unsere Köpfe hinweg. |
201 | |
202 | QCOMPARE(page->search(QString(), left, top, right, bottom, direction, mode0), false); |
203 | QCOMPARE(page->search(QStringLiteral("ciguena" ), left, top, right, bottom, direction, mode0), false); |
204 | QCOMPARE(page->search(QStringLiteral("Ciguena" ), left, top, right, bottom, direction, mode1), false); |
205 | QCOMPARE(page->search(QStringLiteral("ciguena" ), left, top, right, bottom, direction, mode1), true); |
206 | QCOMPARE(page->search(QString::fromUtf8("cigüeña" ), left, top, right, bottom, direction, mode1), true); // clazy:exclude=qstring-allocations |
207 | QCOMPARE(page->search(QString::fromUtf8("cigüena" ), left, top, right, bottom, direction, mode1), false); // clazy:exclude=qstring-allocations |
208 | QCOMPARE(page->search(QString::fromUtf8("Cigüeña" ), left, top, right, bottom, direction, mode1), false); // clazy:exclude=qstring-allocations |
209 | QCOMPARE(page->search(QStringLiteral("Ciguena" ), left, top, right, bottom, direction, mode2), true); |
210 | QCOMPARE(page->search(QStringLiteral("ciguena" ), left, top, right, bottom, direction, mode2), true); |
211 | QCOMPARE(page->search(QStringLiteral("Ciguena" ), left, top, right, bottom, direction, mode3), true); |
212 | QCOMPARE(page->search(QStringLiteral("ciguena" ), left, top, right, bottom, direction, mode3), true); |
213 | |
214 | QCOMPARE(page->search(QString::fromUtf8("cigüeña" ), left, top, right, bottom, direction, mode4), true); // clazy:exclude=qstring-allocations |
215 | QCOMPARE(page->search(QString::fromUtf8("Cigüeña" ), left, top, right, bottom, direction, mode4), true); // clazy:exclude=qstring-allocations |
216 | QCOMPARE(page->search(QString::fromUtf8("cigüena" ), left, top, right, bottom, direction, mode4), false); // clazy:exclude=qstring-allocations |
217 | QCOMPARE(page->search(QStringLiteral("Ciguena" ), left, top, right, bottom, direction, mode4), false); |
218 | |
219 | QCOMPARE(page->search(QStringLiteral("kopfe" ), left, top, right, bottom, direction, mode2), true); |
220 | QCOMPARE(page->search(QStringLiteral("kopfe" ), left, top, right, bottom, direction, mode3), true); |
221 | QCOMPARE(page->search(QStringLiteral("uber" ), left, top, right, bottom, direction, mode0), false); |
222 | QCOMPARE(page->search(QStringLiteral("uber" ), left, top, right, bottom, direction, mode1), true); |
223 | QCOMPARE(page->search(QStringLiteral("uber" ), left, top, right, bottom, direction, mode2), true); |
224 | QCOMPARE(page->search(QStringLiteral("uber" ), left, top, right, bottom, direction, mode3), true); |
225 | |
226 | QCOMPARE(page->search(QStringLiteral("vole" ), left, top, right, bottom, direction, mode2), true); |
227 | QCOMPARE(page->search(QStringLiteral("vole" ), left, top, right, bottom, direction, mode3), false); |
228 | QCOMPARE(page->search(QStringLiteral("survole" ), left, top, right, bottom, direction, mode3), true); |
229 | QCOMPARE(page->search(QStringLiteral("tete" ), left, top, right, bottom, direction, mode3), false); |
230 | QCOMPARE(page->search(QStringLiteral("tete" ), left, top, right, bottom, direction, mode2), true); |
231 | |
232 | QCOMPARE(page->search(QStringLiteral("La Ciguena Volo" ), left, top, right, bottom, direction, mode2), true); |
233 | QCOMPARE(page->search(QStringLiteral("Survole Nos Tetes" ), left, top, right, bottom, direction, mode2), true); |
234 | QCOMPARE(page->search(QStringLiteral("Uber Unsere Kopfe" ), left, top, right, bottom, direction, mode2), true); |
235 | } |
236 | |
237 | void TestSearch::testRussianSearch() |
238 | { |
239 | // Test for issue #743 |
240 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/russian.pdf" ); |
241 | QVERIFY(document); |
242 | |
243 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
244 | QVERIFY(page); |
245 | |
246 | const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; |
247 | |
248 | const Poppler::Page::SearchFlags mode0 = Poppler::Page::NoSearchFlags; |
249 | const Poppler::Page::SearchFlags mode1 = Poppler::Page::IgnoreDiacritics; |
250 | const Poppler::Page::SearchFlags mode2 = Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; |
251 | const Poppler::Page::SearchFlags mode0W = mode0 | Poppler::Page::WholeWords; |
252 | const Poppler::Page::SearchFlags mode1W = mode1 | Poppler::Page::WholeWords; |
253 | const Poppler::Page::SearchFlags mode2W = mode2 | Poppler::Page::WholeWords; |
254 | |
255 | double l, t, r, b; // left, top, right, bottom |
256 | |
257 | // In the searched page 5, these two words do exist: простой and Простой |
258 | const QString str = QString::fromUtf8(utf8: "простой" ); // clazy:exclude=qstring-allocations |
259 | QCOMPARE(page->search(str, l, t, r, b, direction, mode0), true); |
260 | QCOMPARE(page->search(str, l, t, r, b, direction, mode1), true); |
261 | QCOMPARE(page->search(str, l, t, r, b, direction, mode2), true); |
262 | QCOMPARE(page->search(str, l, t, r, b, direction, mode0W), true); |
263 | QCOMPARE(page->search(str, l, t, r, b, direction, mode1W), true); |
264 | QCOMPARE(page->search(str, l, t, r, b, direction, mode2W), true); |
265 | } |
266 | |
267 | void TestSearch::testDeseretSearch() |
268 | { |
269 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/deseret.pdf" ); |
270 | QVERIFY(document); |
271 | |
272 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
273 | QVERIFY(page); |
274 | |
275 | double l, t, r, b; // left, top, right, bottom |
276 | |
277 | const QString str = QString::fromUtf8(utf8: "𐐐𐐯𐑊𐐬" ); // clazy:exclude=qstring-allocations |
278 | QCOMPARE(page->search(str, l, t, r, b, Poppler::Page::FromTop, Poppler::Page::NoSearchFlags), true); |
279 | |
280 | const QString str2 = QString::fromUtf8(utf8: "𐐸𐐯𐑊𐐬" ); // clazy:exclude=qstring-allocations |
281 | QCOMPARE(page->search(str2, l, t, r, b, Poppler::Page::FromTop, Poppler::Page::IgnoreCase), true); |
282 | } |
283 | |
284 | void TestSearch::testAcrossLinesSearch() |
285 | { |
286 | // Test for searching across lines with new flag Poppler::Page::AcrossLines |
287 | // and its automatic features like ignoring hyphen at end of line or allowing |
288 | // whitespace in the search term to match on newline character. |
289 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLines.pdf" ); |
290 | QVERIFY(document); |
291 | |
292 | std::unique_ptr<Poppler::Page> page = document->page(index: 1); |
293 | QVERIFY(page); |
294 | |
295 | const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; |
296 | |
297 | const Poppler::Page::SearchFlags empty = Poppler::Page::NoSearchFlags; |
298 | const Poppler::Page::SearchFlags mode0 = Poppler::Page::AcrossLines; |
299 | const Poppler::Page::SearchFlags mode1 = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics; |
300 | const Poppler::Page::SearchFlags mode2 = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; |
301 | const Poppler::Page::SearchFlags mode2W = mode2 | Poppler::Page::WholeWords; |
302 | |
303 | double l, t, r, b; // left, top, right, bottom |
304 | |
305 | // In the searched page, each of "re-conocimiento" "PRUE-BA" "imáge-nes" happen split across lines |
306 | const QString str1 = QString::fromUtf8(utf8: "reconocimiento" ); // clazy:exclude=qstring-allocations |
307 | const QString str2 = QString::fromUtf8(utf8: "IMagenes" ); // clazy:exclude=qstring-allocations |
308 | // Test it cannot be found with empty search flags |
309 | QCOMPARE(page->search(str1, l, t, r, b, direction, empty), false); |
310 | // Test it is found with AcrossLines option |
311 | QCOMPARE(page->search(str1, l, t, r, b, direction, mode0), true); |
312 | // Test AcrossLines with IgnoreDiacritics and IgnoreCase options |
313 | QCOMPARE(page->search(str2, l, t, r, b, direction, mode0), false); |
314 | QCOMPARE(page->search(str2, l, t, r, b, direction, mode1), false); |
315 | QCOMPARE(page->search(str2, l, t, r, b, direction, mode2), true); |
316 | // Test with WholeWords too |
317 | QCOMPARE(page->search(str2, l, t, r, b, direction, mode2W), true); |
318 | |
319 | // Now test that AcrossLines also allows whitespace in the search term to match on newline char. |
320 | // In the searched page, "podrá" ends a line and "acordar" starts the next line, so we |
321 | // now test we match it with "podrá acordar" |
322 | const QString str3 = QString::fromUtf8(utf8: "podrá acordar," ); // clazy:exclude=qstring-allocations |
323 | QCOMPARE(page->search(str3, l, t, r, b, direction, mode0), true); |
324 | QCOMPARE(page->search(str3, l, t, r, b, direction, mode1), true); |
325 | QCOMPARE(page->search(str3, l, t, r, b, direction, mode2), true); |
326 | QCOMPARE(page->search(str3, l, t, r, b, direction, mode2W), true); |
327 | // now test it also works with IgnoreDiacritics and IgnoreCase |
328 | const QString str4 = QString::fromUtf8(utf8: "PODRA acordar" ); // clazy:exclude=qstring-allocations |
329 | QCOMPARE(page->search(str4, l, t, r, b, direction, mode0), false); |
330 | QCOMPARE(page->search(str4, l, t, r, b, direction, mode1), false); |
331 | QCOMPARE(page->search(str4, l, t, r, b, direction, mode2), true); |
332 | QCOMPARE(page->search(str4, l, t, r, b, direction, mode2W), false); // false as it lacks ending comma |
333 | |
334 | // Now test that when a hyphen char in the search term matches a hyphen at end of line, |
335 | // then we don't automatically ignore it, but treat it as a normal char. |
336 | // In the searched page, "CC BY-NC-SA 4.0" is split across two lines on the second hyphen |
337 | const QString str5 = QString::fromUtf8(utf8: "CC BY-NC-SA 4.0" ); // clazy:exclude=qstring-allocations |
338 | std::unique_ptr<Poppler::Page> page0 = document->page(index: 0); |
339 | QVERIFY(page0); |
340 | QCOMPARE(page0->search(str5, l, t, r, b, direction, mode0), true); |
341 | QCOMPARE(page0->search(str5, l, t, r, b, direction, mode1), true); |
342 | QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2), true); |
343 | QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2W), true); |
344 | QCOMPARE(page0->search(QString::fromUtf8("NC-SA" ), l, t, r, b, direction, mode2W), false); // clazy:exclude=qstring-allocations |
345 | // Searching for "CC BY-NCSA 4.0" should also match, because hyphen is now ignored at end of line |
346 | const QString str6 = QString::fromUtf8(utf8: "CC BY-NCSA 4.0" ); // clazy:exclude=qstring-allocations |
347 | QCOMPARE(page0->search(str6, l, t, r, b, direction, mode0), true); |
348 | QCOMPARE(page0->search(str6, l, t, r, b, direction, mode1), true); |
349 | QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2), true); |
350 | QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2W), true); |
351 | // Check for the case when next line falls in next paragraph. Issue #1475 |
352 | const QString across_block = QString::fromUtf8(utf8: "emacs jose" ); // clazy:exclude=qstring-allocations |
353 | QCOMPARE(page0->search(across_block, l, t, r, b, direction, empty), false); |
354 | QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode0), false); |
355 | QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode1), false); |
356 | QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2), true); |
357 | QCOMPARE(page0->search(across_block, l, t, r, b, direction, mode2W), true); |
358 | |
359 | // Now for completeness, we will match the full text of two lines |
360 | const QString full2lines = QString::fromUtf8( // clazy:exclude=qstring-allocations |
361 | utf8: "Las pruebas se practicarán en vista pública, si bien, excepcionalmente, el Tribunal podrá acordar, mediante providencia, que determinadas pruebas se celebren fuera del acto de juicio" ); |
362 | QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode0), true); |
363 | QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode1), true); |
364 | QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2), true); |
365 | QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2W), true); |
366 | // And now the full text of two lines split by a hyphenated word |
367 | const QString full2linesHyphenated = QString::fromUtf8(utf8: "Consiste básicamente en información digitalizada, codificados y alojados en un elemento contenedor digital (equipos, dispositivos periféricos, unidades de memoria, unidades " |
368 | "virtualizadas, tramas" ); // clazy:exclude=qstring-allocations |
369 | QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode0), true); |
370 | QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true); |
371 | QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true); |
372 | QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true); |
373 | |
374 | // BUG about false positives at start of a line. |
375 | const QString bug_str = QString::fromUtf8(utf8: "nes y" ); // clazy:exclude=qstring-allocations |
376 | // there's only 1 match, check for that |
377 | QCOMPARE(page->search(bug_str, mode2).size(), 1); |
378 | } |
379 | |
380 | void TestSearch::testAcrossLinesSearchDoubleColumn() |
381 | { |
382 | // Test for searching across lines with new flag Poppler::Page::AcrossLines |
383 | // in a document with two columns of text. |
384 | std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf" ); |
385 | QVERIFY(document); |
386 | |
387 | std::unique_ptr<Poppler::Page> page = document->page(index: 0); |
388 | QVERIFY(page); |
389 | |
390 | const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; |
391 | |
392 | // Test for a bug in double column documents where single line matches are |
393 | // wrongly returned as being multiline matches. |
394 | const QString bug_str = QString::fromUtf8(utf8: "betw" ); // clazy:exclude=qstring-allocations |
395 | |
396 | // there's only 3 matches for 'betw' in document, where only the last |
397 | // one is a multiline match, so that's a total of 4 rects returned |
398 | QCOMPARE(page->search(bug_str, mode).size(), 4); |
399 | } |
400 | |
401 | QTEST_GUILESS_MAIN(TestSearch) |
402 | #include "check_search.moc" |
403 | |