1 | #include <QtTest/QTest> |
2 | |
3 | #include <poppler-qt6.h> |
4 | |
5 | #include <QtCore/QFile> |
6 | |
7 | class TestActualText : public QObject |
8 | { |
9 | Q_OBJECT |
10 | public: |
11 | explicit TestActualText(QObject *parent = nullptr) : QObject(parent) { } |
12 | private slots: |
13 | void checkActualText1(); |
14 | void checkActualText2(); |
15 | void checkActualText2_data(); |
16 | void checkAllOrientations(); |
17 | void checkAllOrientations_data(); |
18 | void checkFakeboldText(); |
19 | void checkFakeboldText_data(); |
20 | |
21 | private: |
22 | void checkActualText(Poppler::Document &doc, const QRectF &area, const QString &text); |
23 | }; |
24 | |
25 | void TestActualText::checkActualText(Poppler::Document &doc, const QRectF &area, const QString &text) |
26 | { |
27 | std::unique_ptr<Poppler::Page> page = doc.page(index: 0); |
28 | QVERIFY(page); |
29 | |
30 | QCOMPARE(page->text(area), text); |
31 | } |
32 | |
33 | void TestActualText::checkActualText1() |
34 | { |
35 | std::unique_ptr<Poppler::Document> doc = Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf" ); |
36 | QVERIFY(doc); |
37 | |
38 | checkActualText(doc&: *doc, area: QRectF {}, QStringLiteral("The slow brown fox jumps over the black dog." )); |
39 | } |
40 | |
41 | void TestActualText::checkActualText2() |
42 | { |
43 | QFETCH(QRectF, area); |
44 | QFETCH(QString, text); |
45 | |
46 | QFile file(TESTDATADIR "/unittestcases/WithActualText.pdf" ); |
47 | QVERIFY(file.open(QIODevice::ReadOnly)); |
48 | |
49 | std::unique_ptr<Poppler::Document> doc = Poppler::Document::load(device: &file); |
50 | QVERIFY(doc); |
51 | |
52 | checkActualText(doc&: *doc, area, text); |
53 | } |
54 | |
55 | void TestActualText::checkActualText2_data() |
56 | { |
57 | QTest::addColumn<QRectF>(name: "area" ); |
58 | QTest::addColumn<QString>(name: "text" ); |
59 | |
60 | // Line bounding box is [100.000 90.720 331.012110 102.350] |
61 | |
62 | QTest::newRow(dataTag: "full page" ) << QRectF {} << QStringLiteral("The slow brown fox jumps over the black dog." ); |
63 | QTest::newRow(dataTag: "full line" ) << QRectF { 50.0, 90.0, 290.0, 20.0 } << QStringLiteral("The slow brown fox jumps over the black dog." ); |
64 | QTest::newRow(dataTag: "full line [narrow]" ) << QRectF { 50.0, 95.0, 290.0, 5.0 } << QStringLiteral("The slow brown fox jumps over the black dog." ); |
65 | QTest::newRow(dataTag: "above line" ) << QRectF { 50.0, 85.0, 290.0, 10.0 } << QString {}; |
66 | QTest::newRow(dataTag: "above line mid" ) << QRectF { 50.0, 90.0, 290.0, 5.0 } << QString {}; |
67 | QTest::newRow(dataTag: "first two words" ) << QRectF { 50.0, 90.0, 100.0, 20.0 } << QStringLiteral("The slow" ); |
68 | QTest::newRow(dataTag: "first two words [narrow]" ) << QRectF { 50.0, 95.0, 100.0, 5.0 } << QStringLiteral("The slow" ); |
69 | QTest::newRow(dataTag: "first character" ) << QRectF { 103.0, 95.0, 1.0, 5.0 } << QStringLiteral("T" ); |
70 | QTest::newRow(dataTag: "last two words" ) << QRectF { 285.0, 90.0, 100.0, 20.0 } << QStringLiteral("black dog." ); |
71 | QTest::newRow(dataTag: "last character" ) << QRectF { 320.0, 90.0, 8.0, 20.0 } << QStringLiteral("g" ); |
72 | QTest::newRow(dataTag: "middle 'fox'" ) << QRectF { 190.0, 90.0, 15.0, 20.0 } << QStringLiteral("fox" ); |
73 | QTest::newRow(dataTag: "middle 'x'" ) << QRectF { 200.0, 90.0, 5.0, 20.0 } << QStringLiteral("x" ); |
74 | } |
75 | |
76 | void TestActualText::checkAllOrientations() |
77 | { |
78 | QFETCH(int, pageNr); |
79 | QFETCH(QRectF, area); |
80 | QFETCH(QString, text); |
81 | |
82 | QString path { TESTDATADIR "/unittestcases/orientation.pdf" }; |
83 | std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(filePath: path) }; |
84 | QVERIFY(doc); |
85 | |
86 | std::unique_ptr<Poppler::Page> page { doc->page(index: pageNr) }; |
87 | QVERIFY(page); |
88 | |
89 | QCOMPARE(page->text(area), text); |
90 | } |
91 | |
92 | void TestActualText::checkAllOrientations_data() |
93 | { |
94 | QTest::addColumn<int>(name: "pageNr" ); |
95 | QTest::addColumn<QRectF>(name: "area" ); |
96 | QTest::addColumn<QString>(name: "text" ); |
97 | |
98 | QTest::newRow(dataTag: "Portrait" ) << 0 << QRectF {} << QStringLiteral("Portrait" ); |
99 | QTest::newRow(dataTag: "Landscape" ) << 1 << QRectF {} << QStringLiteral("Landscape" ); |
100 | QTest::newRow(dataTag: "Upside down" ) << 2 << QRectF {} << QStringLiteral("Upside down" ); |
101 | QTest::newRow(dataTag: "Seacape" ) << 3 << QRectF {} << QStringLiteral("Seascape" ); |
102 | |
103 | QTest::newRow(dataTag: "Portrait A4 rect" ) << 0 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Portrait" ); |
104 | QTest::newRow(dataTag: "Landscape A4 rect" ) << 1 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Landscape" ); |
105 | QTest::newRow(dataTag: "Upside down A4 rect" ) << 2 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Upside down" ); |
106 | QTest::newRow(dataTag: "Seacape A4 rect" ) << 3 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Seascape" ); |
107 | |
108 | QTest::newRow(dataTag: "Portrait line rect" ) << 0 << QRectF { 30, 30, 60, 20 } << QStringLiteral("Portrait" ); |
109 | QTest::newRow(dataTag: "Landscape line rect" ) << 1 << QRectF { 790, 30, 20, 80 } << QStringLiteral("Landscape" ); |
110 | QTest::newRow(dataTag: "Upside down line rect" ) << 2 << QRectF { 485, 790, 75, 20 } << QStringLiteral("Upside down" ); |
111 | QTest::newRow(dataTag: "Seacape line rect" ) << 3 << QRectF { 30, 500, 20, 70 } << QStringLiteral("Seascape" ); |
112 | |
113 | QTest::newRow(dataTag: "Portrait small rect B" ) << 0 << QRectF { 30, 35, 10, 10 } << QStringLiteral("P" ); |
114 | QTest::newRow(dataTag: "Portrait small rect E" ) << 0 << QRectF { 80, 35, 10, 10 } << QStringLiteral("t" ); |
115 | QTest::newRow(dataTag: "Landscape small rect B" ) << 1 << QRectF { 800, 30, 10, 10 } << QStringLiteral("L" ); |
116 | QTest::newRow(dataTag: "Landscape small rect E" ) << 1 << QRectF { 800, 90, 10, 10 } << QStringLiteral("e" ); |
117 | QTest::newRow(dataTag: "Upside down small rect B" ) << 2 << QRectF { 550, 800, 10, 10 } << QStringLiteral("U" ); |
118 | QTest::newRow(dataTag: "Upside down small rect E" ) << 2 << QRectF { 485, 800, 10, 10 } << QStringLiteral("n" ); |
119 | QTest::newRow(dataTag: "Seacape small rect B" ) << 3 << QRectF { 40, 550, 10, 10 } << QStringLiteral("S" ); |
120 | QTest::newRow(dataTag: "Seacape small rect E" ) << 3 << QRectF { 40, 510, 10, 10 } << QStringLiteral("p" ); |
121 | } |
122 | |
123 | void TestActualText::checkFakeboldText() |
124 | { |
125 | QFETCH(int, pageNr); |
126 | QFETCH(QRectF, area); |
127 | QFETCH(QString, text); |
128 | |
129 | QString path { TESTDATADIR "/unittestcases/fakebold.pdf" }; |
130 | std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(filePath: path) }; |
131 | QVERIFY(doc); |
132 | |
133 | std::unique_ptr<Poppler::Page> page { doc->page(index: pageNr) }; |
134 | QVERIFY(page); |
135 | |
136 | QEXPECT_FAIL("Upright line 3" , "Fakebold not matched when bold word is followed with non-bold glyph" , Continue); |
137 | QEXPECT_FAIL("Upright line 4" , "Fakebold not matched when bold word follows non-bold glyph" , Continue); |
138 | QEXPECT_FAIL("Upright line 5" , "Fakebold not matched when bold word is enclosed by non-bold glyphs" , Continue); |
139 | QEXPECT_FAIL("Rotated 90' line 3" , "Fakebold not matched when bold word is followed with non-bold glyph" , Continue); |
140 | QEXPECT_FAIL("Rotated 90' line 4" , "Fakebold not matched when bold word follows non-bold glyph" , Continue); |
141 | QEXPECT_FAIL("Rotated 90' line 5" , "Fakebold not matched when bold word is enclosed by non-bold glyphs" , Continue); |
142 | QEXPECT_FAIL("Rotated 180' line 3" , "Fakebold not matched when bold word is followed with non-bold glyph" , Continue); |
143 | QEXPECT_FAIL("Rotated 180' line 4" , "Fakebold not matched when bold word follows non-bold glyph" , Continue); |
144 | QEXPECT_FAIL("Rotated 180' line 5" , "Fakebold not matched when bold word is enclosed by non-bold glyphs" , Continue); |
145 | QEXPECT_FAIL("Rotated 270' line 3" , "Fakebold not matched when bold word is followed with non-bold glyph" , Continue); |
146 | QEXPECT_FAIL("Rotated 270' line 4" , "Fakebold not matched when bold word follows non-bold glyph" , Continue); |
147 | QEXPECT_FAIL("Rotated 270' line 5" , "Fakebold not matched when bold word is enclosed by non-bold glyphs" , Continue); |
148 | QCOMPARE(page->text(area), text); |
149 | } |
150 | |
151 | void TestActualText::checkFakeboldText_data() |
152 | { |
153 | QTest::addColumn<int>(name: "pageNr" ); |
154 | QTest::addColumn<QRectF>(name: "area" ); |
155 | QTest::addColumn<QString>(name: "text" ); |
156 | |
157 | QTest::newRow(dataTag: "Upright line 1" ) << 0 << QRectF { 0, 0, 595, 80 } << QStringLiteral("1 This is fakebold text." ); |
158 | QTest::newRow(dataTag: "Upright line 2" ) << 0 << QRectF { 0, 80, 595, 80 } << QStringLiteral("2 This is a fakebold word." ); |
159 | QTest::newRow(dataTag: "Upright line 3" ) << 0 << QRectF { 0, 140, 595, 80 } << QStringLiteral("3 The last word is in fakebold." ); |
160 | QTest::newRow(dataTag: "Upright line 4" ) << 0 << QRectF { 0, 220, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word." ); |
161 | QTest::newRow(dataTag: "Upright line 5" ) << 0 << QRectF { 0, 300, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word." ); |
162 | |
163 | QTest::newRow(dataTag: "Rotated 90' line 1" ) << 1 << QRectF { 510, 0, 80, 842 } << QStringLiteral("1 This is fakebold text." ); |
164 | QTest::newRow(dataTag: "Rotated 90' line 2" ) << 1 << QRectF { 430, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word." ); |
165 | QTest::newRow(dataTag: "Rotated 90' line 3" ) << 1 << QRectF { 350, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold." ); |
166 | QTest::newRow(dataTag: "Rotated 90' line 4" ) << 1 << QRectF { 270, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word." ); |
167 | QTest::newRow(dataTag: "Rotated 90' line 5" ) << 1 << QRectF { 190, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word." ); |
168 | |
169 | QTest::newRow(dataTag: "Rotated 180' line 1" ) << 2 << QRectF { 0, 760, 595, 80 } << QStringLiteral("1 This is fakebold text." ); |
170 | QTest::newRow(dataTag: "Rotated 180' line 2" ) << 2 << QRectF { 0, 680, 595, 80 } << QStringLiteral("2 This is a fakebold word." ); |
171 | QTest::newRow(dataTag: "Rotated 180' line 3" ) << 2 << QRectF { 0, 600, 595, 80 } << QStringLiteral("3 The last word is in fakebold." ); |
172 | QTest::newRow(dataTag: "Rotated 180' line 4" ) << 2 << QRectF { 0, 520, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word." ); |
173 | QTest::newRow(dataTag: "Rotated 180' line 5" ) << 2 << QRectF { 0, 440, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word." ); |
174 | |
175 | QTest::newRow(dataTag: "Rotated 270' line 1" ) << 3 << QRectF { 20, 0, 80, 842 } << QStringLiteral("1 This is fakebold text." ); |
176 | QTest::newRow(dataTag: "Rotated 270' line 2" ) << 3 << QRectF { 100, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word." ); |
177 | QTest::newRow(dataTag: "Rotated 270' line 3" ) << 3 << QRectF { 160, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold." ); |
178 | QTest::newRow(dataTag: "Rotated 270' line 4" ) << 3 << QRectF { 240, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word." ); |
179 | QTest::newRow(dataTag: "Rotated 270' line 5" ) << 3 << QRectF { 320, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word." ); |
180 | } |
181 | |
182 | QTEST_GUILESS_MAIN(TestActualText) |
183 | |
184 | #include "check_actualtext.moc" |
185 | |