1 | /* |
2 | * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org> |
3 | * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
4 | * Copyright (C) 2018-2022, Albert Astals Cid <aacid@kde.org> |
5 | * Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com> |
6 | * Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com> |
7 | * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu> |
8 | * Copyright (C) 2020, Adam Reichold <adam.reichold@t-online.de> |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by |
12 | * the Free Software Foundation; either version 2, or (at your option) |
13 | * any later version. |
14 | * |
15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | * GNU General Public License for more details. |
19 | * |
20 | * You should have received a copy of the GNU General Public License |
21 | * along with this program; if not, write to the Free Software |
22 | * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. |
23 | */ |
24 | |
25 | #ifndef POPPLER_PAGE_H |
26 | #define POPPLER_PAGE_H |
27 | |
28 | #include "poppler-global.h" |
29 | #include "poppler-rectangle.h" |
30 | |
31 | #include <memory> |
32 | |
33 | namespace poppler { |
34 | |
35 | struct text_box_data; |
36 | class POPPLER_CPP_EXPORT text_box |
37 | { |
38 | friend class page; |
39 | |
40 | public: |
41 | text_box(text_box &&) noexcept; |
42 | text_box &operator=(text_box &&) noexcept; |
43 | |
44 | ~text_box(); |
45 | |
46 | ustring text() const; |
47 | rectf bbox() const; |
48 | |
49 | /** |
50 | \since 0.68 |
51 | */ |
52 | int rotation() const; |
53 | |
54 | /** |
55 | Get a bbox for the i-th glyph |
56 | |
57 | This method returns a rectf of the bounding box for |
58 | the i-th glyph in the text_box. |
59 | |
60 | \note The text_box object owns the rectf objects, |
61 | the caller is not needed to free them. |
62 | |
63 | \warning For too large glyph index, rectf(0,0,0,0) |
64 | is returned. The number of the glyphs and ustring |
65 | codepoints might be different in some complex scripts. |
66 | */ |
67 | rectf char_bbox(size_t i) const; |
68 | bool has_space_after() const; |
69 | |
70 | /** |
71 | \since 0.89 |
72 | */ |
73 | bool has_font_info() const; |
74 | |
75 | /** |
76 | Get a writing mode for the i-th glyph |
77 | |
78 | This method returns an enum of the writing mode |
79 | for the i-th glyph in the text_box. |
80 | |
81 | \note Usually all glyphs in one text_box have the |
82 | same writing mode. Thus the default value of the |
83 | glyph index is 0. |
84 | */ |
85 | enum writing_mode_enum |
86 | { |
87 | invalid_wmode = -1, |
88 | horizontal_wmode = 0, |
89 | vertical_wmode = 1 |
90 | }; |
91 | |
92 | /** |
93 | \since 0.89 |
94 | */ |
95 | writing_mode_enum get_wmode(int i = 0) const; |
96 | |
97 | /** |
98 | Get a font size of this text_box instance. |
99 | |
100 | This method return a double floating value of the |
101 | font size from the text_box instance. |
102 | */ |
103 | |
104 | /** |
105 | \since 0.89 |
106 | */ |
107 | double get_font_size() const; |
108 | |
109 | /** |
110 | Get a font name for the i-th glyph |
111 | |
112 | This method returns a std::string object holding |
113 | the font name for the i-th glyph. |
114 | |
115 | \note The randomization prefix of the embedded fonts |
116 | are not removed. The font names including these |
117 | prefixes are insuffucient to determine whether the |
118 | two fonts are same or different. |
119 | |
120 | \note The clients should not assume that the |
121 | encoding of the font name is one of the ASCII, |
122 | Latin1 or UTF-8. Some legacy PDF producers used |
123 | in CJK market use GBK, Big5, Wansung or Shift-JIS. |
124 | */ |
125 | |
126 | /** |
127 | \since 0.89 |
128 | */ |
129 | std::string get_font_name(int i = 0) const; |
130 | |
131 | private: |
132 | explicit text_box(text_box_data *data); |
133 | |
134 | std::unique_ptr<text_box_data> m_data; |
135 | }; |
136 | |
137 | class document; |
138 | class document_private; |
139 | class page_private; |
140 | class page_transition; |
141 | |
142 | class POPPLER_CPP_EXPORT page : public poppler::noncopyable |
143 | { |
144 | public: |
145 | enum orientation_enum |
146 | { |
147 | landscape, |
148 | portrait, |
149 | seascape, |
150 | upside_down |
151 | }; |
152 | enum search_direction_enum |
153 | { |
154 | search_from_top, |
155 | search_next_result, |
156 | search_previous_result |
157 | }; |
158 | enum text_layout_enum |
159 | { |
160 | physical_layout, |
161 | raw_order_layout, |
162 | non_raw_non_physical_layout ///< \since 0.88 |
163 | }; |
164 | |
165 | ~page(); |
166 | |
167 | orientation_enum orientation() const; |
168 | double duration() const; |
169 | rectf page_rect(page_box_enum box = crop_box) const; |
170 | ustring label() const; |
171 | |
172 | page_transition *transition() const; |
173 | |
174 | bool search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation = rotate_0) const; |
175 | ustring text(const rectf &r = rectf()) const; |
176 | ustring text(const rectf &r, text_layout_enum layout_mode) const; |
177 | |
178 | /** |
179 | Returns a list of text of the page |
180 | |
181 | This method returns a std::vector of text_box that contain all |
182 | the text of the page, with roughly one text word of text |
183 | per text_box item. |
184 | |
185 | For text written in western languages (left-to-right and |
186 | up-to-down), the std::vector contains the text in the proper |
187 | order. |
188 | |
189 | \since 0.63 |
190 | |
191 | \note The page object owns the text_box objects as unique_ptr, |
192 | the caller is not needed to free them. |
193 | |
194 | \warning This method is not tested with Asian scripts |
195 | */ |
196 | std::vector<text_box> text_list() const; |
197 | |
198 | /* |
199 | * text_list_option_enum is a bitmask-style flags for text_list(), |
200 | * 0 means the default & simplest behaviour. |
201 | */ |
202 | enum text_list_option_enum |
203 | { |
204 | text_list_include_font = 1 // \since 0.89 |
205 | }; |
206 | |
207 | /** |
208 | Extended version of text_list() taking an option flag. |
209 | The option flag should be the multiple of text_list_option_enum. |
210 | |
211 | \since 0.89 |
212 | */ |
213 | std::vector<text_box> text_list(int opt_flag) const; |
214 | |
215 | private: |
216 | page(document_private *doc, int index); |
217 | |
218 | page_private *d; |
219 | friend class page_private; |
220 | friend class document; |
221 | }; |
222 | |
223 | } |
224 | |
225 | #endif |
226 | |