1#include <QtTest/QTest>
2
3#include <poppler-qt6.h>
4
5#include <QtCore/QFile>
6
7class TestActualText : public QObject
8{
9 Q_OBJECT
10public:
11 explicit TestActualText(QObject *parent = nullptr) : QObject(parent) { }
12private slots:
13 void checkActualText1();
14 void checkActualText2();
15 void checkActualText2_data();
16 void checkAllOrientations();
17 void checkAllOrientations_data();
18 void checkFakeboldText();
19 void checkFakeboldText_data();
20
21private:
22 void checkActualText(Poppler::Document &doc, const QRectF &area, const QString &text);
23};
24
25void TestActualText::checkActualText(Poppler::Document &doc, const QRectF &area, const QString &text)
26{
27 std::unique_ptr<Poppler::Page> page = doc.page(index: 0);
28 QVERIFY(page);
29
30 QCOMPARE(page->text(area), text);
31}
32
33void TestActualText::checkActualText1()
34{
35 std::unique_ptr<Poppler::Document> doc = Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf");
36 QVERIFY(doc);
37
38 checkActualText(doc&: *doc, area: QRectF {}, QStringLiteral("The slow brown fox jumps over the black dog."));
39}
40
41void TestActualText::checkActualText2()
42{
43 QFETCH(QRectF, area);
44 QFETCH(QString, text);
45
46 QFile file(TESTDATADIR "/unittestcases/WithActualText.pdf");
47 QVERIFY(file.open(QIODevice::ReadOnly));
48
49 std::unique_ptr<Poppler::Document> doc = Poppler::Document::load(device: &file);
50 QVERIFY(doc);
51
52 checkActualText(doc&: *doc, area, text);
53}
54
55void TestActualText::checkActualText2_data()
56{
57 QTest::addColumn<QRectF>(name: "area");
58 QTest::addColumn<QString>(name: "text");
59
60 // Line bounding box is [100.000 90.720 331.012110 102.350]
61
62 QTest::newRow(dataTag: "full page") << QRectF {} << QStringLiteral("The slow brown fox jumps over the black dog.");
63 QTest::newRow(dataTag: "full line") << QRectF { 50.0, 90.0, 290.0, 20.0 } << QStringLiteral("The slow brown fox jumps over the black dog.");
64 QTest::newRow(dataTag: "full line [narrow]") << QRectF { 50.0, 95.0, 290.0, 5.0 } << QStringLiteral("The slow brown fox jumps over the black dog.");
65 QTest::newRow(dataTag: "above line") << QRectF { 50.0, 85.0, 290.0, 10.0 } << QString {};
66 QTest::newRow(dataTag: "above line mid") << QRectF { 50.0, 90.0, 290.0, 5.0 } << QString {};
67 QTest::newRow(dataTag: "first two words") << QRectF { 50.0, 90.0, 100.0, 20.0 } << QStringLiteral("The slow");
68 QTest::newRow(dataTag: "first two words [narrow]") << QRectF { 50.0, 95.0, 100.0, 5.0 } << QStringLiteral("The slow");
69 QTest::newRow(dataTag: "first character") << QRectF { 103.0, 95.0, 1.0, 5.0 } << QStringLiteral("T");
70 QTest::newRow(dataTag: "last two words") << QRectF { 285.0, 90.0, 100.0, 20.0 } << QStringLiteral("black dog.");
71 QTest::newRow(dataTag: "last character") << QRectF { 320.0, 90.0, 8.0, 20.0 } << QStringLiteral("g");
72 QTest::newRow(dataTag: "middle 'fox'") << QRectF { 190.0, 90.0, 15.0, 20.0 } << QStringLiteral("fox");
73 QTest::newRow(dataTag: "middle 'x'") << QRectF { 200.0, 90.0, 5.0, 20.0 } << QStringLiteral("x");
74}
75
76void TestActualText::checkAllOrientations()
77{
78 QFETCH(int, pageNr);
79 QFETCH(QRectF, area);
80 QFETCH(QString, text);
81
82 QString path { TESTDATADIR "/unittestcases/orientation.pdf" };
83 std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(filePath: path) };
84 QVERIFY(doc);
85
86 std::unique_ptr<Poppler::Page> page { doc->page(index: pageNr) };
87 QVERIFY(page);
88
89 QCOMPARE(page->text(area), text);
90}
91
92void TestActualText::checkAllOrientations_data()
93{
94 QTest::addColumn<int>(name: "pageNr");
95 QTest::addColumn<QRectF>(name: "area");
96 QTest::addColumn<QString>(name: "text");
97
98 QTest::newRow(dataTag: "Portrait") << 0 << QRectF {} << QStringLiteral("Portrait");
99 QTest::newRow(dataTag: "Landscape") << 1 << QRectF {} << QStringLiteral("Landscape");
100 QTest::newRow(dataTag: "Upside down") << 2 << QRectF {} << QStringLiteral("Upside down");
101 QTest::newRow(dataTag: "Seacape") << 3 << QRectF {} << QStringLiteral("Seascape");
102
103 QTest::newRow(dataTag: "Portrait A4 rect") << 0 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Portrait");
104 QTest::newRow(dataTag: "Landscape A4 rect") << 1 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Landscape");
105 QTest::newRow(dataTag: "Upside down A4 rect") << 2 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Upside down");
106 QTest::newRow(dataTag: "Seacape A4 rect") << 3 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Seascape");
107
108 QTest::newRow(dataTag: "Portrait line rect") << 0 << QRectF { 30, 30, 60, 20 } << QStringLiteral("Portrait");
109 QTest::newRow(dataTag: "Landscape line rect") << 1 << QRectF { 790, 30, 20, 80 } << QStringLiteral("Landscape");
110 QTest::newRow(dataTag: "Upside down line rect") << 2 << QRectF { 485, 790, 75, 20 } << QStringLiteral("Upside down");
111 QTest::newRow(dataTag: "Seacape line rect") << 3 << QRectF { 30, 500, 20, 70 } << QStringLiteral("Seascape");
112
113 QTest::newRow(dataTag: "Portrait small rect B") << 0 << QRectF { 30, 35, 10, 10 } << QStringLiteral("P");
114 QTest::newRow(dataTag: "Portrait small rect E") << 0 << QRectF { 80, 35, 10, 10 } << QStringLiteral("t");
115 QTest::newRow(dataTag: "Landscape small rect B") << 1 << QRectF { 800, 30, 10, 10 } << QStringLiteral("L");
116 QTest::newRow(dataTag: "Landscape small rect E") << 1 << QRectF { 800, 90, 10, 10 } << QStringLiteral("e");
117 QTest::newRow(dataTag: "Upside down small rect B") << 2 << QRectF { 550, 800, 10, 10 } << QStringLiteral("U");
118 QTest::newRow(dataTag: "Upside down small rect E") << 2 << QRectF { 485, 800, 10, 10 } << QStringLiteral("n");
119 QTest::newRow(dataTag: "Seacape small rect B") << 3 << QRectF { 40, 550, 10, 10 } << QStringLiteral("S");
120 QTest::newRow(dataTag: "Seacape small rect E") << 3 << QRectF { 40, 510, 10, 10 } << QStringLiteral("p");
121}
122
123void TestActualText::checkFakeboldText()
124{
125 QFETCH(int, pageNr);
126 QFETCH(QRectF, area);
127 QFETCH(QString, text);
128
129 QString path { TESTDATADIR "/unittestcases/fakebold.pdf" };
130 std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(filePath: path) };
131 QVERIFY(doc);
132
133 std::unique_ptr<Poppler::Page> page { doc->page(index: pageNr) };
134 QVERIFY(page);
135
136 QEXPECT_FAIL("Upright line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
137 QEXPECT_FAIL("Upright line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
138 QEXPECT_FAIL("Upright line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
139 QEXPECT_FAIL("Rotated 90' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
140 QEXPECT_FAIL("Rotated 90' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
141 QEXPECT_FAIL("Rotated 90' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
142 QEXPECT_FAIL("Rotated 180' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
143 QEXPECT_FAIL("Rotated 180' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
144 QEXPECT_FAIL("Rotated 180' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
145 QEXPECT_FAIL("Rotated 270' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
146 QEXPECT_FAIL("Rotated 270' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
147 QEXPECT_FAIL("Rotated 270' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
148 QCOMPARE(page->text(area), text);
149}
150
151void TestActualText::checkFakeboldText_data()
152{
153 QTest::addColumn<int>(name: "pageNr");
154 QTest::addColumn<QRectF>(name: "area");
155 QTest::addColumn<QString>(name: "text");
156
157 QTest::newRow(dataTag: "Upright line 1") << 0 << QRectF { 0, 0, 595, 80 } << QStringLiteral("1 This is fakebold text.");
158 QTest::newRow(dataTag: "Upright line 2") << 0 << QRectF { 0, 80, 595, 80 } << QStringLiteral("2 This is a fakebold word.");
159 QTest::newRow(dataTag: "Upright line 3") << 0 << QRectF { 0, 140, 595, 80 } << QStringLiteral("3 The last word is in fakebold.");
160 QTest::newRow(dataTag: "Upright line 4") << 0 << QRectF { 0, 220, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word.");
161 QTest::newRow(dataTag: "Upright line 5") << 0 << QRectF { 0, 300, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word.");
162
163 QTest::newRow(dataTag: "Rotated 90' line 1") << 1 << QRectF { 510, 0, 80, 842 } << QStringLiteral("1 This is fakebold text.");
164 QTest::newRow(dataTag: "Rotated 90' line 2") << 1 << QRectF { 430, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word.");
165 QTest::newRow(dataTag: "Rotated 90' line 3") << 1 << QRectF { 350, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold.");
166 QTest::newRow(dataTag: "Rotated 90' line 4") << 1 << QRectF { 270, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word.");
167 QTest::newRow(dataTag: "Rotated 90' line 5") << 1 << QRectF { 190, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word.");
168
169 QTest::newRow(dataTag: "Rotated 180' line 1") << 2 << QRectF { 0, 760, 595, 80 } << QStringLiteral("1 This is fakebold text.");
170 QTest::newRow(dataTag: "Rotated 180' line 2") << 2 << QRectF { 0, 680, 595, 80 } << QStringLiteral("2 This is a fakebold word.");
171 QTest::newRow(dataTag: "Rotated 180' line 3") << 2 << QRectF { 0, 600, 595, 80 } << QStringLiteral("3 The last word is in fakebold.");
172 QTest::newRow(dataTag: "Rotated 180' line 4") << 2 << QRectF { 0, 520, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word.");
173 QTest::newRow(dataTag: "Rotated 180' line 5") << 2 << QRectF { 0, 440, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word.");
174
175 QTest::newRow(dataTag: "Rotated 270' line 1") << 3 << QRectF { 20, 0, 80, 842 } << QStringLiteral("1 This is fakebold text.");
176 QTest::newRow(dataTag: "Rotated 270' line 2") << 3 << QRectF { 100, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word.");
177 QTest::newRow(dataTag: "Rotated 270' line 3") << 3 << QRectF { 160, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold.");
178 QTest::newRow(dataTag: "Rotated 270' line 4") << 3 << QRectF { 240, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word.");
179 QTest::newRow(dataTag: "Rotated 270' line 5") << 3 << QRectF { 320, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word.");
180}
181
182QTEST_GUILESS_MAIN(TestActualText)
183
184#include "check_actualtext.moc"
185

source code of poppler/qt6/tests/check_actualtext.cpp