1/*
2 * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org>
3 * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
4 * Copyright (C) 2018-2022, Albert Astals Cid <aacid@kde.org>
5 * Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com>
6 * Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com>
7 * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu>
8 * Copyright (C) 2020, Adam Reichold <adam.reichold@t-online.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25#ifndef POPPLER_PAGE_H
26#define POPPLER_PAGE_H
27
28#include "poppler-global.h"
29#include "poppler-rectangle.h"
30
31#include <memory>
32
33namespace poppler {
34
35struct text_box_data;
36class POPPLER_CPP_EXPORT text_box
37{
38 friend class page;
39
40public:
41 text_box(text_box &&) noexcept;
42 text_box &operator=(text_box &&) noexcept;
43
44 ~text_box();
45
46 ustring text() const;
47 rectf bbox() const;
48
49 /**
50 \since 0.68
51 */
52 int rotation() const;
53
54 /**
55 Get a bbox for the i-th glyph
56
57 This method returns a rectf of the bounding box for
58 the i-th glyph in the text_box.
59
60 \note The text_box object owns the rectf objects,
61 the caller is not needed to free them.
62
63 \warning For too large glyph index, rectf(0,0,0,0)
64 is returned. The number of the glyphs and ustring
65 codepoints might be different in some complex scripts.
66 */
67 rectf char_bbox(size_t i) const;
68 bool has_space_after() const;
69
70 /**
71 \since 0.89
72 */
73 bool has_font_info() const;
74
75 /**
76 Get a writing mode for the i-th glyph
77
78 This method returns an enum of the writing mode
79 for the i-th glyph in the text_box.
80
81 \note Usually all glyphs in one text_box have the
82 same writing mode. Thus the default value of the
83 glyph index is 0.
84 */
85 enum writing_mode_enum
86 {
87 invalid_wmode = -1,
88 horizontal_wmode = 0,
89 vertical_wmode = 1
90 };
91
92 /**
93 \since 0.89
94 */
95 writing_mode_enum get_wmode(int i = 0) const;
96
97 /**
98 Get a font size of this text_box instance.
99
100 This method return a double floating value of the
101 font size from the text_box instance.
102 */
103
104 /**
105 \since 0.89
106 */
107 double get_font_size() const;
108
109 /**
110 Get a font name for the i-th glyph
111
112 This method returns a std::string object holding
113 the font name for the i-th glyph.
114
115 \note The randomization prefix of the embedded fonts
116 are not removed. The font names including these
117 prefixes are insuffucient to determine whether the
118 two fonts are same or different.
119
120 \note The clients should not assume that the
121 encoding of the font name is one of the ASCII,
122 Latin1 or UTF-8. Some legacy PDF producers used
123 in CJK market use GBK, Big5, Wansung or Shift-JIS.
124 */
125
126 /**
127 \since 0.89
128 */
129 std::string get_font_name(int i = 0) const;
130
131private:
132 explicit text_box(text_box_data *data);
133
134 std::unique_ptr<text_box_data> m_data;
135};
136
137class document;
138class document_private;
139class page_private;
140class page_transition;
141
142class POPPLER_CPP_EXPORT page : public poppler::noncopyable
143{
144public:
145 enum orientation_enum
146 {
147 landscape,
148 portrait,
149 seascape,
150 upside_down
151 };
152 enum search_direction_enum
153 {
154 search_from_top,
155 search_next_result,
156 search_previous_result
157 };
158 enum text_layout_enum
159 {
160 physical_layout,
161 raw_order_layout,
162 non_raw_non_physical_layout ///< \since 0.88
163 };
164
165 ~page();
166
167 orientation_enum orientation() const;
168 double duration() const;
169 rectf page_rect(page_box_enum box = crop_box) const;
170 ustring label() const;
171
172 page_transition *transition() const;
173
174 bool search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation = rotate_0) const;
175 ustring text(const rectf &r = rectf()) const;
176 ustring text(const rectf &r, text_layout_enum layout_mode) const;
177
178 /**
179 Returns a list of text of the page
180
181 This method returns a std::vector of text_box that contain all
182 the text of the page, with roughly one text word of text
183 per text_box item.
184
185 For text written in western languages (left-to-right and
186 up-to-down), the std::vector contains the text in the proper
187 order.
188
189 \since 0.63
190
191 \note The page object owns the text_box objects as unique_ptr,
192 the caller is not needed to free them.
193
194 \warning This method is not tested with Asian scripts
195 */
196 std::vector<text_box> text_list() const;
197
198 /*
199 * text_list_option_enum is a bitmask-style flags for text_list(),
200 * 0 means the default & simplest behaviour.
201 */
202 enum text_list_option_enum
203 {
204 text_list_include_font = 1 // \since 0.89
205 };
206
207 /**
208 Extended version of text_list() taking an option flag.
209 The option flag should be the multiple of text_list_option_enum.
210
211 \since 0.89
212 */
213 std::vector<text_box> text_list(int opt_flag) const;
214
215private:
216 page(document_private *doc, int index);
217
218 page_private *d;
219 friend class page_private;
220 friend class document;
221};
222
223}
224
225#endif
226

source code of poppler/cpp/poppler-page.h