1/*
2 * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org>
3 * Copyright (C) 2017-2020, Albert Astals Cid <aacid@kde.org>
4 * Copyright (C) 2017, Jason Alan Palmer <jalanpalmer@gmail.com>
5 * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
6 * Copyright (C) 2018, 2020, Adam Reichold <adam.reichold@t-online.de>
7 * Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com>
8 * Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com>
9 * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2, or (at your option)
14 * any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25
26/**
27 \file poppler-page.h
28 */
29#include "poppler-page.h"
30#include "poppler-page-transition.h"
31
32#include "poppler-document-private.h"
33#include "poppler-page-private.h"
34#include "poppler-private.h"
35#include "poppler-font-private.h"
36#include "poppler-font.h"
37
38#include "TextOutputDev.h"
39
40#include <algorithm>
41#include <memory>
42#include <utility>
43
44using namespace poppler;
45
46page_private::page_private(document_private *_doc, int _index) : doc(_doc), page(doc->doc->getCatalog()->getPage(i: _index + 1)), index(_index), transition(nullptr), font_info_cache_initialized(false) { }
47
48page_private::~page_private()
49{
50 delete transition;
51}
52
53void page_private::init_font_info_cache()
54{
55 if (font_info_cache_initialized) {
56 return;
57 }
58
59 poppler::font_iterator it(index, doc);
60
61 if (it.has_next()) {
62 font_info_cache = it.next();
63 }
64
65 font_info_cache_initialized = true;
66 return;
67}
68
69/**
70 \class poppler::page poppler-page.h "poppler/cpp/poppler-page.h"
71
72 A page in a PDF %document.
73 */
74
75/**
76 \enum poppler::page::orientation_enum
77
78 The possible orientation of a page.
79*/
80
81/**
82 \enum poppler::page::search_direction_enum
83
84 The direction/action to follow when performing a text search.
85*/
86
87/**
88 \enum poppler::page::text_layout_enum
89
90 A layout of the text of a page.
91*/
92
93page::page(document_private *doc, int index) : d(new page_private(doc, index)) { }
94
95/**
96 Destructor.
97 */
98page::~page()
99{
100 delete d;
101}
102
103/**
104 \returns the orientation of the page
105 */
106page::orientation_enum page::orientation() const
107{
108 const int rotation = d->page->getRotate();
109 switch (rotation) {
110 case 90:
111 return landscape;
112 break;
113 case 180:
114 return upside_down;
115 break;
116 case 270:
117 return seascape;
118 break;
119 default:
120 return portrait;
121 }
122}
123
124/**
125 The eventual duration the page can be hinted to be shown in a presentation.
126
127 If this value is positive (usually different than -1) then a PDF viewer, when
128 showing the page in a presentation, should show the page for at most for this
129 number of seconds, and then switch to the next page (if any). Note this is
130 purely a presentation attribute, it has no influence on the behaviour.
131
132 \returns the duration time (in seconds) of the page
133 */
134double page::duration() const
135{
136 return d->page->getDuration();
137}
138
139/**
140 Returns the size of one rect of the page.
141
142 \returns the size of the specified page rect
143 */
144rectf page::page_rect(page_box_enum box) const
145{
146 const PDFRectangle *r = nullptr;
147 switch (box) {
148 case media_box:
149 r = d->page->getMediaBox();
150 break;
151 case crop_box:
152 r = d->page->getCropBox();
153 break;
154 case bleed_box:
155 r = d->page->getBleedBox();
156 break;
157 case trim_box:
158 r = d->page->getTrimBox();
159 break;
160 case art_box:
161 r = d->page->getArtBox();
162 break;
163 }
164 if (r) {
165 return detail::pdfrectangle_to_rectf(pdfrect: *r);
166 }
167 return rectf();
168}
169
170/**
171 \returns the label of the page, if any
172 */
173ustring page::label() const
174{
175 GooString goo;
176 if (!d->doc->doc->getCatalog()->indexToLabel(index: d->index, label: &goo)) {
177 return ustring();
178 }
179
180 return detail::unicode_GooString_to_ustring(str: &goo);
181}
182
183/**
184 The transition from this page to the next one.
185
186 If it is set, then a PDF viewer in a presentation should perform the
187 specified transition effect when switching from this page to the next one.
188
189 \returns the transition effect for the switch to the next page, if any
190 */
191page_transition *page::transition() const
192{
193 if (!d->transition) {
194 Object o = d->page->getTrans();
195 if (o.isDict()) {
196 d->transition = new page_transition(&o);
197 }
198 }
199 return d->transition;
200}
201
202/**
203 Search the page for some text.
204
205 \param text the text to search
206 \param[in,out] r the area where to start search, which will be set to the area
207 of the match (if any)
208 \param direction in which direction search for text
209 \param case_sensitivity whether search in a case sensitive way
210 \param rotation the rotation assumed for the page
211 */
212bool page::search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation) const
213{
214 const size_t len = text.length();
215
216 if (len == 0) {
217 return false;
218 }
219
220 std::vector<Unicode> u(len);
221 for (size_t i = 0; i < len; ++i) {
222 u[i] = text[i];
223 }
224
225 const bool sCase = case_sensitivity == case_sensitive;
226 const int rotation_value = (int)rotation * 90;
227
228 bool found = false;
229 double rect_left = r.left();
230 double rect_top = r.top();
231 double rect_right = r.right();
232 double rect_bottom = r.bottom();
233
234 TextOutputDev td(nullptr, true, 0, false, false);
235 d->doc->doc->displayPage(out: &td, page: d->index + 1, hDPI: 72, vDPI: 72, rotate: rotation_value, useMediaBox: false, crop: true, printing: false);
236 TextPage *text_page = td.takeText();
237
238 switch (direction) {
239 case search_from_top:
240 found = text_page->findText(s: &u[0], len, startAtTop: true, stopAtBottom: true, startAtLast: false, stopAtLast: false, caseSensitive: sCase, backward: false, wholeWord: false, xMin: &rect_left, yMin: &rect_top, xMax: &rect_right, yMax: &rect_bottom);
241 break;
242 case search_next_result:
243 found = text_page->findText(s: &u[0], len, startAtTop: false, stopAtBottom: true, startAtLast: true, stopAtLast: false, caseSensitive: sCase, backward: false, wholeWord: false, xMin: &rect_left, yMin: &rect_top, xMax: &rect_right, yMax: &rect_bottom);
244 break;
245 case search_previous_result:
246 found = text_page->findText(s: &u[0], len, startAtTop: false, stopAtBottom: true, startAtLast: true, stopAtLast: false, caseSensitive: sCase, backward: true, wholeWord: false, xMin: &rect_left, yMin: &rect_top, xMax: &rect_right, yMax: &rect_bottom);
247 break;
248 }
249
250 text_page->decRefCnt();
251 r.set_left(rect_left);
252 r.set_top(rect_top);
253 r.set_right(rect_right);
254 r.set_bottom(rect_bottom);
255
256 return found;
257}
258
259/**
260 Returns the text in the page, in its physical layout.
261
262 \param r if not empty, it will be extracted the text in it; otherwise, the
263 text of the whole page
264
265 \returns the text of the page in the specified rect or in the whole page
266 */
267ustring page::text(const rectf &r) const
268{
269 return text(r, layout_mode: physical_layout);
270}
271
272static void appendToGooString(void *stream, const char *text, int len)
273{
274 ((GooString *)stream)->append(str: text, lengthA: len);
275}
276
277/**
278 Returns the text in the page.
279
280 \param rect if not empty, it will be extracted the text in it; otherwise, the
281 text of the whole page
282 \param layout_mode the layout of the text
283
284 \returns the text of the page in the specified rect or in the whole page
285
286 \since 0.16
287 */
288ustring page::text(const rectf &r, text_layout_enum layout_mode) const
289{
290 std::unique_ptr<GooString> out(new GooString());
291 const bool use_raw_order = (layout_mode == raw_order_layout);
292 const bool use_physical_layout = (layout_mode == physical_layout);
293 TextOutputDev td(&appendToGooString, out.get(), use_physical_layout, 0, use_raw_order, false);
294 if (r.is_empty()) {
295 d->doc->doc->displayPage(out: &td, page: d->index + 1, hDPI: 72, vDPI: 72, rotate: 0, useMediaBox: false, crop: true, printing: false);
296 } else {
297 d->doc->doc->displayPageSlice(out: &td, page: d->index + 1, hDPI: 72, vDPI: 72, rotate: 0, useMediaBox: false, crop: true, printing: false, sliceX: r.left(), sliceY: r.top(), sliceW: r.width(), sliceH: r.height());
298 }
299 return ustring::from_utf8(str: out->c_str());
300}
301
302/*
303 * text_box_font_info object for text_box
304 */
305text_box_font_info_data::~text_box_font_info_data() = default;
306
307/*
308 * text_box object for page::text_list()
309 */
310text_box_data::~text_box_data() = default;
311
312text_box::~text_box() = default;
313
314text_box &text_box::operator=(text_box &&a) noexcept = default;
315text_box::text_box(text_box &&a) noexcept = default;
316
317text_box::text_box(text_box_data *data) : m_data { data } { }
318
319ustring text_box::text() const
320{
321 return m_data->text;
322}
323
324rectf text_box::bbox() const
325{
326 return m_data->bbox;
327}
328
329int text_box::rotation() const
330{
331 return m_data->rotation;
332}
333
334rectf text_box::char_bbox(size_t i) const
335{
336 if (i < m_data->char_bboxes.size()) {
337 return m_data->char_bboxes[i];
338 }
339 return rectf(0, 0, 0, 0);
340}
341
342bool text_box::has_space_after() const
343{
344 return m_data->has_space_after;
345}
346
347bool text_box::has_font_info() const
348{
349 return (m_data->text_box_font != nullptr);
350}
351
352text_box::writing_mode_enum text_box::get_wmode(int i) const
353{
354 if (this->has_font_info()) {
355 return m_data->text_box_font->wmodes[i];
356 } else {
357 return text_box::invalid_wmode;
358 }
359}
360
361double text_box::get_font_size() const
362{
363 if (this->has_font_info()) {
364 return m_data->text_box_font->font_size;
365 } else {
366 return -1;
367 }
368}
369
370std::string text_box::get_font_name(int i) const
371{
372 if (!this->has_font_info()) {
373 return std::string("*ignored*");
374 }
375
376 int j = m_data->text_box_font->glyph_to_cache_index[i];
377 if (j < 0) {
378 return std::string("");
379 }
380 return m_data->text_box_font->font_info_cache[j].name();
381}
382
383std::vector<text_box> page::text_list(int opt_flag) const
384{
385 std::vector<text_box> output_list;
386
387 /* config values are same with Qt5 Page::TextList() */
388 auto output_dev = std::make_unique<TextOutputDev>(args: nullptr, /* char* fileName */
389 args: false, /* bool physLayoutA */
390 args: 0, /* double fixedPitchA */
391 args: false, /* bool rawOrderA */
392 args: false /* bool append */
393 );
394
395 /*
396 * config values are same with Qt5 Page::TextList(),
397 * but rotation is fixed to zero.
398 * Few people use non-zero values.
399 */
400 d->doc->doc->displayPageSlice(out: output_dev.get(), page: d->index + 1, /* page */
401 hDPI: 72, vDPI: 72, rotate: 0, /* hDPI, vDPI, rot */
402 useMediaBox: false, crop: false, printing: false, /* useMediaBox, crop, printing */
403 sliceX: -1, sliceY: -1, sliceW: -1, sliceH: -1, /* sliceX, sliceY, sliceW, sliceH */
404 abortCheckCbk: nullptr, abortCheckCbkData: nullptr, /* abortCheckCbk(), abortCheckCbkData */
405 annotDisplayDecideCbk: nullptr, annotDisplayDecideCbkData: nullptr, /* annotDisplayDecideCbk(), annotDisplayDecideCbkData */
406 copyXRef: true); /* copyXRef */
407
408 if (std::unique_ptr<TextWordList> word_list { output_dev->makeWordList() }) {
409
410 output_list.reserve(n: word_list->getLength());
411 for (int i = 0; i < word_list->getLength(); i++) {
412 TextWord *word = word_list->get(idx: i);
413
414 std::unique_ptr<GooString> gooWord { word->getText() };
415 ustring ustr = ustring::from_utf8(str: gooWord->c_str());
416
417 double xMin, yMin, xMax, yMax;
418 word->getBBox(xMinA: &xMin, yMinA: &yMin, xMaxA: &xMax, yMaxA: &yMax);
419
420 text_box tb { new text_box_data { .text: ustr, .bbox: { xMin, yMin, xMax - xMin, yMax - yMin }, .rotation: word->getRotation(), .char_bboxes: {}, .has_space_after: word->hasSpaceAfter() == true, .text_box_font: nullptr } };
421
422 std::unique_ptr<text_box_font_info_data> tb_font_info = nullptr;
423 if (opt_flag & page::text_list_include_font) {
424 d->init_font_info_cache();
425
426 std::unique_ptr<text_box_font_info_data> tb_font { new text_box_font_info_data {
427 .font_size: word->getFontSize(), // double font_size
428 .wmodes: {}, // std::vector<text_box::writing_mode> wmodes;
429 .font_info_cache: d->font_info_cache, // std::vector<font_info> font_info_cache;
430 .glyph_to_cache_index: {} // std::vector<int> glyph_to_cache_index;
431 } };
432
433 tb_font_info = std::move(tb_font);
434 };
435
436 tb.m_data->char_bboxes.reserve(n: word->getLength());
437 for (int j = 0; j < word->getLength(); j++) {
438 word->getCharBBox(charIdx: j, xMinA: &xMin, yMinA: &yMin, xMaxA: &xMax, yMaxA: &yMax);
439 tb.m_data->char_bboxes.emplace_back(args&: xMin, args&: yMin, args: xMax - xMin, args: yMax - yMin);
440 }
441
442 if (tb_font_info && d->font_info_cache_initialized) {
443 tb_font_info->glyph_to_cache_index.reserve(n: word->getLength());
444 for (int j = 0; j < word->getLength(); j++) {
445 const TextFontInfo *cur_text_font_info = word->getFontInfo(idx: j);
446
447 // filter-out the invalid WMode value here.
448 switch (cur_text_font_info->getWMode()) {
449 case 0:
450 tb_font_info->wmodes.push_back(x: text_box::horizontal_wmode);
451 break;
452 case 1:
453 tb_font_info->wmodes.push_back(x: text_box::vertical_wmode);
454 break;
455 default:
456 tb_font_info->wmodes.push_back(x: text_box::invalid_wmode);
457 };
458
459 tb_font_info->glyph_to_cache_index.push_back(x: -1);
460 for (size_t k = 0; k < tb_font_info->font_info_cache.size(); k++) {
461 if (cur_text_font_info->matches(ref: &(tb_font_info->font_info_cache[k].d->ref))) {
462 tb_font_info->glyph_to_cache_index[j] = k;
463 break;
464 }
465 }
466 }
467 tb.m_data->text_box_font = std::move(tb_font_info);
468 }
469
470 output_list.push_back(x: std::move(tb));
471 }
472 }
473
474 return output_list;
475}
476
477std::vector<text_box> page::text_list() const
478{
479 return text_list(opt_flag: 0);
480}
481

source code of poppler/cpp/poppler-page.cpp